blob: 76eefd56792e4aa2e1fb6cc67376a4a157a0a85c [file] [log] [blame]
avm99963411e36a2020-09-27 23:32:48 +02001import requests
2from bs4 import BeautifulSoup
3import pyodbc
4import re
5from datetime import datetime, timedelta
6import pytz
7
8class TableParser:
9 TIMEZONE = 'Europe/Madrid'
10 EMPTY_CELL_CLASSES = ['row_labels', 'even_row', 'odd_row']
11
12 def __init__(self, baseUrl):
13 self.baseUrl = baseUrl
14
15 def parse(self, year, month, day, area, db):
16 url = self.baseUrl
17 params = {
18 'year': year,
19 'month': month,
20 'day': day,
21 'area': area
22 }
23 page = requests.get(url, params=params)
24 soup = BeautifulSoup(page.content, 'html.parser')
25 table = soup.find(id="day_main")
26
27 hores = []
28 for h in range(8,22):
29 newhour = ""
30 if h < 10:
31 newhour += "0"
32 newhour += str(h);
33
34 hores.append(newhour + ":00");
35 hores.append(newhour + ":30");
36
37 p = re.compile(r"Aula (\S+) ?\(\d*\)", re.IGNORECASE)
38
39 for hora in hores:
40 td_hora = table.find(text=hora).findNext('td')
41 column = 1
42
43 while hora not in td_hora.get_text():
44
45 classes = td_hora['class'];
46 if td_hora.has_attr('class') and not td_hora['class'][0] in self.EMPTY_CELL_CLASSES:
47 assignaturaRaw = td_hora.get_text().strip()
48 assignatura = assignaturaRaw.lower()
49 aulaRaw = table.find_all("th")[column].get_text().strip()
50 aula = p.match(aulaRaw).group(1)
51 durada = int(td_hora.get("rowspan"))*30
52
53 timeSplit = hora.split(':')
54
55 begins = datetime(year, month, day, int(timeSplit[0]), int(timeSplit[1]))
56 begins = pytz.timezone(self.TIMEZONE).localize(begins)
57 ends = begins + timedelta(minutes=durada)
58
59 print("Afegint " + assignaturaRaw
60 + ", " + hora
61 + ", " + str(durada) + "mins"
62 + ", " + aula)
63
64 cursor1 = db.cursor()
65 cursor1.execute("SELECT id FROM classes WHERE calendar_name = ? AND room = ? AND begins = ? AND ends = ?",
66 assignatura, aula, begins, ends)
67 row = cursor1.fetchone()
68 if row:
69 print("[WARNING] Ja estava a la DB (id " + str(row.id) + ")")
70 else:
71 cursor2 = db.cursor()
72 cursor2.execute("INSERT INTO classes (calendar_name, room, begins, ends) VALUES (?, ?, ?, ?)",
73 assignatura, aula, begins, ends)
74
75 td_hora = td_hora.findNext('td')
76 column = column + 1
77
78 db.commit()