Afegida integració amb MySQL per table-parser

S'ha encapsulat el codi que parseja les classes d'un dia en una classe
de Python (al fitxer TableParser.py), i s'ha creat el fitxer
cron-parse-tables.py que quan es crida utilitza la classe TableParser
per afegir a la base de dades les classes del dia següent.
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..fd1ac68
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,2 @@
+config.ini
+__pycache__/
diff --git a/TableParser.py b/TableParser.py
new file mode 100644
index 0000000..76eefd5
--- /dev/null
+++ b/TableParser.py
@@ -0,0 +1,78 @@
+import requests
+from bs4 import BeautifulSoup
+import pyodbc
+import re
+from datetime import datetime, timedelta
+import pytz
+
+class TableParser:
+    TIMEZONE = 'Europe/Madrid'
+    EMPTY_CELL_CLASSES = ['row_labels', 'even_row', 'odd_row']
+
+    def __init__(self, baseUrl):
+        self.baseUrl = baseUrl
+
+    def parse(self, year, month, day, area, db):
+        url = self.baseUrl
+        params = {
+            'year': year,
+            'month': month,
+            'day': day,
+            'area': area
+        }
+        page = requests.get(url, params=params)
+        soup = BeautifulSoup(page.content, 'html.parser')
+        table = soup.find(id="day_main")
+
+        hores = []
+        for h in range(8,22):
+            newhour = ""
+            if h < 10:
+                newhour += "0"
+            newhour += str(h);
+
+            hores.append(newhour + ":00");
+            hores.append(newhour + ":30");
+
+        p = re.compile(r"Aula (\S+) ?\(\d*\)", re.IGNORECASE)
+
+        for hora in hores:
+            td_hora = table.find(text=hora).findNext('td')
+            column = 1
+
+            while hora not in td_hora.get_text():
+
+                classes = td_hora['class'];
+                if td_hora.has_attr('class') and not td_hora['class'][0] in self.EMPTY_CELL_CLASSES:
+                    assignaturaRaw = td_hora.get_text().strip()
+                    assignatura = assignaturaRaw.lower()
+                    aulaRaw = table.find_all("th")[column].get_text().strip()
+                    aula = p.match(aulaRaw).group(1)
+                    durada = int(td_hora.get("rowspan"))*30
+
+                    timeSplit = hora.split(':')
+
+                    begins = datetime(year, month, day, int(timeSplit[0]), int(timeSplit[1]))
+                    begins = pytz.timezone(self.TIMEZONE).localize(begins)
+                    ends = begins + timedelta(minutes=durada)
+
+                    print("Afegint " + assignaturaRaw
+                            + ", " + hora
+                            + ", " + str(durada) + "mins"
+                            + ", " + aula)
+
+                    cursor1 = db.cursor()
+                    cursor1.execute("SELECT id FROM classes WHERE calendar_name = ? AND room = ? AND begins = ? AND ends = ?",
+                            assignatura, aula, begins, ends)
+                    row = cursor1.fetchone()
+                    if row:
+                        print("[WARNING] Ja estava a la DB (id " + str(row.id) + ")")
+                    else:
+                        cursor2 = db.cursor()
+                        cursor2.execute("INSERT INTO classes (calendar_name, room, begins, ends) VALUES (?, ?, ?, ?)",
+                                assignatura, aula, begins, ends)
+
+                td_hora = td_hora.findNext('td')
+                column = column + 1
+
+        db.commit()
diff --git a/config.ini.default b/config.ini.default
new file mode 100644
index 0000000..59c6506
--- /dev/null
+++ b/config.ini.default
@@ -0,0 +1,5 @@
+[db]
+host = localhost
+database = covid_tracability
+user = user
+password = password
diff --git a/cron-parse-tables.py b/cron-parse-tables.py
new file mode 100644
index 0000000..364ecf4
--- /dev/null
+++ b/cron-parse-tables.py
@@ -0,0 +1,31 @@
+import pyodbc
+import configparser
+from TableParser import TableParser
+import datetime
+
+config = configparser.ConfigParser()
+config.read('config.ini')
+
+db_host = config['db']['host']
+db_database = config['db']['database']
+db_user = config['db']['user']
+db_password = config['db']['password']
+
+connection_string = (
+    'DRIVER=MySQL ODBC 8.0 ANSI Driver;'
+    'SERVER=' + db_host + ';'
+    'DATABASE=' + db_database + ';'
+    'UID=' + db_user + ';'
+    'PWD=' + db_password + ';'
+    'charset=utf8mb4;'
+)
+
+db = pyodbc.connect(connection_string)
+db.setdecoding(pyodbc.SQL_WCHAR, encoding='utf-8')
+db.setencoding(encoding='utf-8')
+
+tomorrow = datetime.date.today() + datetime.timedelta(days=1)
+
+parser = TableParser('https://fme-intranet.upc.edu/appsext/mrbs/web/day.php')
+for area in [2, 6]:
+    parser.parse(tomorrow.year, tomorrow.month, tomorrow.day, area, db)
diff --git a/table-parser.py b/table-parser.py
deleted file mode 100644
index 63f3200..0000000
--- a/table-parser.py
+++ /dev/null
@@ -1,36 +0,0 @@
-import requests
-from bs4 import BeautifulSoup
-
-url = "https://fme-intranet.upc.edu/appsext/mrbs/web/day.php?year=2020&month=9&day=17&area=2"
-page = requests.get(url)
-soup = BeautifulSoup(page.content, 'html.parser')
-table = soup.find(id="day_main")
-
-hores = []
-for h in range(8,22):
-    newhour = ""
-    if h < 10:
-        newhour += "0"
-    newhour += str(h);
-    
-    hores.append(newhour + ":00");
-    hores.append(newhour + ":30");
-
-for hora in hores:
-    td_hora = table.find(text=hora).findNext('td')
-    column = 1
-
-    while hora not in td_hora.get_text():
-        
-        if "CDATA" not in td_hora.get_text():
-            assignatura = td_hora.get_text().strip()
-            aula = table.find_all("th")[column].get_text().strip()
-            durada = int(td_hora.get("rowspan"))*30
-            
-            print(assignatura
-                    + ", " + hora
-                    + ", " + str(durada) + "mins"
-                    + ", " + aula)
-        
-        td_hora = td_hora.findNext('td')
-        column = column + 1