Andreu Huguet | 27cdd15 | 2020-09-21 12:32:44 +0200 | [diff] [blame^] | 1 | import requests |
| 2 | from bs4 import BeautifulSoup |
| 3 | |
| 4 | url = "https://fme-intranet.upc.edu/appsext/mrbs/web/day.php?year=2020&month=9&day=17&area=2" |
| 5 | page = requests.get(url) |
| 6 | soup = BeautifulSoup(page.content, 'html.parser') |
| 7 | table = soup.find(id="day_main") |
| 8 | |
| 9 | hores = [] |
| 10 | for h in range(8,22): |
| 11 | newhour = "" |
| 12 | if h < 10: |
| 13 | newhour += "0" |
| 14 | newhour += str(h); |
| 15 | |
| 16 | hores.append(newhour + ":00"); |
| 17 | hores.append(newhour + ":30"); |
| 18 | |
| 19 | for hora in hores: |
| 20 | td_hora = table.find(text=hora).findNext('td') |
| 21 | column = 1 |
| 22 | |
| 23 | while hora not in td_hora.get_text(): |
| 24 | |
| 25 | if "CDATA" not in td_hora.get_text(): |
| 26 | assignatura = td_hora.get_text().strip() |
| 27 | aula = table.find_all("th")[column].get_text().strip() |
| 28 | durada = int(td_hora.get("rowspan"))*30 |
| 29 | |
| 30 | print(assignatura |
| 31 | + ", " + hora |
| 32 | + ", " + str(durada) + "mins" |
| 33 | + ", " + aula) |
| 34 | |
| 35 | td_hora = td_hora.findNext('td') |
| 36 | column = column + 1 |