blob: 311f7b9e2074e542023f34801c6801ef7715ca9f [file] [log] [blame]
avm99963411e36a2020-09-27 23:32:48 +02001import requests
2from bs4 import BeautifulSoup
3import pyodbc
4import re
5from datetime import datetime, timedelta
6import pytz
7
8class TableParser:
9 TIMEZONE = 'Europe/Madrid'
10 EMPTY_CELL_CLASSES = ['row_labels', 'even_row', 'odd_row']
11
12 def __init__(self, baseUrl):
13 self.baseUrl = baseUrl
14
avm999630a75b652020-09-27 23:53:03 +020015 def parse(self, year, month, day, area, db = None):
avm99963411e36a2020-09-27 23:32:48 +020016 url = self.baseUrl
17 params = {
18 'year': year,
19 'month': month,
20 'day': day,
21 'area': area
22 }
23 page = requests.get(url, params=params)
24 soup = BeautifulSoup(page.content, 'html.parser')
25 table = soup.find(id="day_main")
26
27 hores = []
28 for h in range(8,22):
29 newhour = ""
30 if h < 10:
31 newhour += "0"
32 newhour += str(h);
33
34 hores.append(newhour + ":00");
35 hores.append(newhour + ":30");
36
37 p = re.compile(r"Aula (\S+) ?\(\d*\)", re.IGNORECASE)
38
39 for hora in hores:
40 td_hora = table.find(text=hora).findNext('td')
41 column = 1
42
43 while hora not in td_hora.get_text():
44
45 classes = td_hora['class'];
46 if td_hora.has_attr('class') and not td_hora['class'][0] in self.EMPTY_CELL_CLASSES:
47 assignaturaRaw = td_hora.get_text().strip()
48 assignatura = assignaturaRaw.lower()
49 aulaRaw = table.find_all("th")[column].get_text().strip()
50 aula = p.match(aulaRaw).group(1)
51 durada = int(td_hora.get("rowspan"))*30
52
53 timeSplit = hora.split(':')
54
55 begins = datetime(year, month, day, int(timeSplit[0]), int(timeSplit[1]))
56 begins = pytz.timezone(self.TIMEZONE).localize(begins)
57 ends = begins + timedelta(minutes=durada)
58
avm999630a75b652020-09-27 23:53:03 +020059 print(("Afegint " if db != None else "") + assignaturaRaw
avm99963411e36a2020-09-27 23:32:48 +020060 + ", " + hora
61 + ", " + str(durada) + "mins"
62 + ", " + aula)
63
avm999630a75b652020-09-27 23:53:03 +020064 if db != None:
65 cursor1 = db.cursor()
66 cursor1.execute("SELECT id FROM classes WHERE calendar_name = ? AND room = ? AND begins = ? AND ends = ?",
avm99963411e36a2020-09-27 23:32:48 +020067 assignatura, aula, begins, ends)
avm999630a75b652020-09-27 23:53:03 +020068 row = cursor1.fetchone()
69 if row:
70 print("[WARNING] Ja estava a la DB (id " + str(row.id) + ")")
71 else:
72 cursor2 = db.cursor()
73 cursor2.execute("INSERT INTO classes (calendar_name, room, begins, ends) VALUES (?, ?, ?, ?)",
74 assignatura, aula, begins, ends)
avm99963411e36a2020-09-27 23:32:48 +020075
76 td_hora = td_hora.findNext('td')
77 column = column + 1
78
avm999630a75b652020-09-27 23:53:03 +020079 if db != None:
80 db.commit()