blob: 537f393716f557bfc574d4e862ca768c48260422 [file] [log] [blame]
avm99963411e36a2020-09-27 23:32:48 +02001import requests
2from bs4 import BeautifulSoup
3import pyodbc
4import re
5from datetime import datetime, timedelta
6import pytz
7
8class TableParser:
9 TIMEZONE = 'Europe/Madrid'
10 EMPTY_CELL_CLASSES = ['row_labels', 'even_row', 'odd_row']
11
12 def __init__(self, baseUrl):
13 self.baseUrl = baseUrl
14
avm999630a75b652020-09-27 23:53:03 +020015 def parse(self, year, month, day, area, db = None):
avm99963411e36a2020-09-27 23:32:48 +020016 url = self.baseUrl
17 params = {
18 'year': year,
19 'month': month,
20 'day': day,
21 'area': area
22 }
23 page = requests.get(url, params=params)
24 soup = BeautifulSoup(page.content, 'html.parser')
25 table = soup.find(id="day_main")
26
27 hores = []
28 for h in range(8,22):
29 newhour = ""
30 if h < 10:
31 newhour += "0"
32 newhour += str(h);
33
34 hores.append(newhour + ":00");
35 hores.append(newhour + ":30");
36
37 p = re.compile(r"Aula (\S+) ?\(\d*\)", re.IGNORECASE)
38
39 for hora in hores:
40 td_hora = table.find(text=hora).findNext('td')
41 column = 1
42
43 while hora not in td_hora.get_text():
44
45 classes = td_hora['class'];
46 if td_hora.has_attr('class') and not td_hora['class'][0] in self.EMPTY_CELL_CLASSES:
avm99963a5ac3372020-09-30 20:38:47 +020047 assignatura = td_hora.get_text().strip()
avm99963411e36a2020-09-27 23:32:48 +020048 aulaRaw = table.find_all("th")[column].get_text().strip()
49 aula = p.match(aulaRaw).group(1)
50 durada = int(td_hora.get("rowspan"))*30
51
52 timeSplit = hora.split(':')
53
avm999638f376312020-09-28 19:23:52 +020054 beginsDateTime = datetime(year, month, day, int(timeSplit[0]), int(timeSplit[1]))
55 beginsDateTime = pytz.timezone(self.TIMEZONE).localize(beginsDateTime)
56 begins = int(beginsDateTime.timestamp())
57 endsDateTime = beginsDateTime + timedelta(minutes=durada)
58 ends = int(endsDateTime.timestamp())
avm99963411e36a2020-09-27 23:32:48 +020059
avm99963acf51322020-10-01 01:56:24 +020060 print(("Afegint " if db != None else "") + assignatura
avm99963411e36a2020-09-27 23:32:48 +020061 + ", " + hora
62 + ", " + str(durada) + "mins"
63 + ", " + aula)
64
avm999630a75b652020-09-27 23:53:03 +020065 if db != None:
66 cursor1 = db.cursor()
67 cursor1.execute("SELECT id FROM classes WHERE calendar_name = ? AND room = ? AND begins = ? AND ends = ?",
avm99963411e36a2020-09-27 23:32:48 +020068 assignatura, aula, begins, ends)
avm999630a75b652020-09-27 23:53:03 +020069 row = cursor1.fetchone()
70 if row:
71 print("[WARNING] Ja estava a la DB (id " + str(row.id) + ")")
72 else:
73 cursor2 = db.cursor()
74 cursor2.execute("INSERT INTO classes (calendar_name, room, begins, ends) VALUES (?, ?, ?, ?)",
75 assignatura, aula, begins, ends)
avm99963411e36a2020-09-27 23:32:48 +020076
77 td_hora = td_hora.findNext('td')
78 column = column + 1
79
avm999630a75b652020-09-27 23:53:03 +020080 if db != None:
81 db.commit()