blob: faeefc329d92c9831751dd87feb3bd089145b6e6 [file] [log] [blame]
avm99963411e36a2020-09-27 23:32:48 +02001import requests
2from bs4 import BeautifulSoup
3import pyodbc
4import re
5from datetime import datetime, timedelta
6import pytz
7
8class TableParser:
9 TIMEZONE = 'Europe/Madrid'
10 EMPTY_CELL_CLASSES = ['row_labels', 'even_row', 'odd_row']
11
12 def __init__(self, baseUrl):
13 self.baseUrl = baseUrl
14
avm999630a75b652020-09-27 23:53:03 +020015 def parse(self, year, month, day, area, db = None):
avm99963411e36a2020-09-27 23:32:48 +020016 url = self.baseUrl
17 params = {
18 'year': year,
19 'month': month,
20 'day': day,
21 'area': area
22 }
23 page = requests.get(url, params=params)
24 soup = BeautifulSoup(page.content, 'html.parser')
25 table = soup.find(id="day_main")
26
27 hores = []
avm99963e76c4822020-10-06 18:37:11 +020028 implicitClasses = []
29
avm99963411e36a2020-09-27 23:32:48 +020030 for h in range(8,22):
31 newhour = ""
32 if h < 10:
33 newhour += "0"
avm99963e76c4822020-10-06 18:37:11 +020034 newhour += str(h)
avm99963411e36a2020-09-27 23:32:48 +020035
avm99963e76c4822020-10-06 18:37:11 +020036 hores.append(newhour + ":00")
37 implicitClasses.append([])
38 hores.append(newhour + ":30")
39 implicitClasses.append([])
avm99963411e36a2020-09-27 23:32:48 +020040
41 p = re.compile(r"Aula (\S+) ?\(\d*\)", re.IGNORECASE)
42
avm99963e76c4822020-10-06 18:37:11 +020043 horaActual = 0
avm99963411e36a2020-09-27 23:32:48 +020044 for hora in hores:
45 td_hora = table.find(text=hora).findNext('td')
46 column = 1
47
48 while hora not in td_hora.get_text():
avm99963e76c4822020-10-06 18:37:11 +020049 while column in implicitClasses[horaActual]:
50 column += 1
avm99963411e36a2020-09-27 23:32:48 +020051
52 classes = td_hora['class'];
53 if td_hora.has_attr('class') and not td_hora['class'][0] in self.EMPTY_CELL_CLASSES:
avm99963a5ac3372020-09-30 20:38:47 +020054 assignatura = td_hora.get_text().strip()
delefmeca4c3382020-10-06 19:34:36 +020055 degree = td_hora.get("class")[0]
avm99963411e36a2020-09-27 23:32:48 +020056 aulaRaw = table.find_all("th")[column].get_text().strip()
57 aula = p.match(aulaRaw).group(1)
avm99963e76c4822020-10-06 18:37:11 +020058 files = int(td_hora.get("rowspan"))
59 durada = files*30
avm99963411e36a2020-09-27 23:32:48 +020060
61 timeSplit = hora.split(':')
62
avm999638f376312020-09-28 19:23:52 +020063 beginsDateTime = datetime(year, month, day, int(timeSplit[0]), int(timeSplit[1]))
64 beginsDateTime = pytz.timezone(self.TIMEZONE).localize(beginsDateTime)
65 begins = int(beginsDateTime.timestamp())
66 endsDateTime = beginsDateTime + timedelta(minutes=durada)
67 ends = int(endsDateTime.timestamp())
avm99963411e36a2020-09-27 23:32:48 +020068
avm99963acf51322020-10-01 01:56:24 +020069 print(("Afegint " if db != None else "") + assignatura
avm99963411e36a2020-09-27 23:32:48 +020070 + ", " + hora
71 + ", " + str(durada) + "mins"
delefmeca4c3382020-10-06 19:34:36 +020072 + ", " + aula
73 + ", " + degree)
avm99963411e36a2020-09-27 23:32:48 +020074
avm999630a75b652020-09-27 23:53:03 +020075 if db != None:
76 cursor1 = db.cursor()
avm9996344fc9292020-10-07 00:59:26 +020077 cursor1.execute("SELECT id FROM classes WHERE calendar_name = ? AND room = ? AND begins = ? AND ends = ? AND degree = ?",
78 assignatura, aula, begins, ends, degree)
avm999630a75b652020-09-27 23:53:03 +020079 row = cursor1.fetchone()
80 if row:
81 print("[WARNING] Ja estava a la DB (id " + str(row.id) + ")")
82 else:
83 cursor2 = db.cursor()
delefmeca4c3382020-10-06 19:34:36 +020084 cursor2.execute("INSERT INTO classes (calendar_name, room, begins, ends, degree) VALUES (?, ?, ?, ?, ?)",
85 assignatura, aula, begins, ends, degree)
avm99963411e36a2020-09-27 23:32:48 +020086
avm99963e76c4822020-10-06 18:37:11 +020087 for i in range(1, files - 1):
88 if i < len(implicitClasses):
89 implicitClasses[horaActual + i].append(column)
90
avm99963411e36a2020-09-27 23:32:48 +020091 td_hora = td_hora.findNext('td')
92 column = column + 1
93
avm99963e76c4822020-10-06 18:37:11 +020094 horaActual = horaActual + 1
95
avm999630a75b652020-09-27 23:53:03 +020096 if db != None:
97 db.commit()