Adds ability to generate graphs for custom areas
This commit adds the ability to generate individual PNG and SVG graphs
for custom areas, defined as the union of several "Àrees Bàsiques de
Salut" (ABS) in the config/customAreas.php file.
Change-Id: I27f34b4a8f520a38c55eed224554b470c5ba2938
diff --git a/cron/generate.bash b/cron/generate.bash
index 8e0b569..201740b 100644
--- a/cron/generate.bash
+++ b/cron/generate.bash
@@ -1,6 +1,14 @@
#!/bin/bash
+# Create the output folder if it doesn't exist
+rm -rf ../output
+mkdir -p ../output
+
+# Generate graphs for the Catalonia health areas
php generateData.php
gnuplot generateGraphs.gnu
-mkdir -p ../output
mv /tmp/covid19graphgenerator-output.png ../output/graph.png
mv /tmp/covid19graphgenerator-output.svg ../output/graph.svg
+
+# Generate graphs for each custom area defined in
+# config/customAreas.php
+php generateCustomData.php
diff --git a/cron/generateCustomData.php b/cron/generateCustomData.php
new file mode 100644
index 0000000..5be2bc0
--- /dev/null
+++ b/cron/generateCustomData.php
@@ -0,0 +1,63 @@
+<?php
+if (php_sapi_name() != "cli")
+ exit();
+
+require_once(__DIR__."/../config/config.php");
+require_once(__DIR__."/includes/generation.php");
+
+// Funció que retorna el nombre d'habitants a les ABS donades via una
+// API de la Generalitat de Catalunya
+function habitants($abs) {
+ $data = query("SELECT sum(poblacio_oficial) AS habitants
+ WHERE
+ abs_codi in(".implode(",", array_map(function($abs) { return "'".$abs."'"; }, $abs)).") AND
+ any = 2020", "ftq4-h9vk");
+
+ return ($data[0]["habitants"] ?? null);
+}
+
+if (isset($conf["customAreas"])) {
+ // A cada ciutat
+ foreach ($conf["customAreas"] as $area) {
+ // Si no hi ha cap ABS configurada no fem res
+ if (count($area["abs"]) == 0) {
+ echo "[Warning] There aren't any ABS configured for ".$area["name"].".\n";
+ continue;
+ }
+
+ // Demanem una llista del nombre de casos cada dia
+ $data = query("SELECT data, sum(numcasos) AS sum_numcasos
+ WHERE
+ resultatcoviddescripcio = 'Positiu PCR' AND
+ abscodi in(".implode(",", array_map(function($abs) { return "'".$abs."'"; }, $area["abs"])).")
+ GROUP BY data
+ ORDER BY data ASC
+ LIMIT 50000");
+
+ // Obtenim el nombre d'habitants a les ABS
+ $habitants = habitants($area["abs"]);
+ if ($habitants === null) {
+ echo "[Fatal error] Failed getting population for ".$area["name"].".\n";
+ continue;
+ }
+
+ // Generem les dades
+ $summary = generateSummary($data, $habitants);
+
+ // Les escribim en un fitxer
+ $file = tmpfile();
+ $fileName = stream_get_meta_data($file)['uri'];
+
+ foreach ($summary as $row)
+ fwrite($file, $row["data"]." ".$row["ia14"]." ".$row["rho7"]."\n");
+
+ // Cridem al gnuplot perquè generi la gràfica
+ shell_exec("gnuplot -c generateCustomGraph.gnu \"".escapeshellcmd($area["name"])."\" \"".escapeshellcmd($area["codename"])."\" \"".escapeshellcmd($fileName)."\"");
+ rename("/tmp/covid19graphgenerator-area-".$area["codename"]."-graph.png", __DIR__."/../output/area-".$area["codename"]."-graph.png");
+ rename("/tmp/covid19graphgenerator-area-".$area["codename"]."-graph.svg", __DIR__."/../output/area-".$area["codename"]."-graph.svg");
+
+ fclose($file);
+ }
+} else {
+ echo "[Warning] The config/customAreas.php file doesn't define the customAreas field.";
+}
diff --git a/cron/generateCustomGraph.gnu b/cron/generateCustomGraph.gnu
new file mode 100644
index 0000000..7f1e73f
--- /dev/null
+++ b/cron/generateCustomGraph.gnu
@@ -0,0 +1,17 @@
+name = ARG1
+codeName = ARG2
+fileName = ARG3
+
+filesPrefix = '/tmp/covid19graphgenerator-'
+
+set terminal svg size 500, 500
+set output '/tmp/covid19graphgenerator-area-'.codeName.'-graph.svg'
+
+set pointsize 0.75
+load "includes/plotCustomGraph.gnu"
+
+set terminal png size 500, 500
+set output '/tmp/covid19graphgenerator-area-'.codeName.'-graph.png'
+
+set pointsize 1
+load "includes/plotCustomGraph.gnu"
diff --git a/cron/generateData.php b/cron/generateData.php
index 489b702..996fdcd 100644
--- a/cron/generateData.php
+++ b/cron/generateData.php
@@ -29,28 +29,7 @@
"Metropolità Nord" => "MetropolitaNord",
];
-// Funció per obtenir el nombre de casos nous al dia originalDay+translation
-// a partir de les dades de la regió sanitaria (dataRegio).
-function getSumDay($originalDay, $translation, &$dataRegio) {
- if ($translation >= 0)
- $day = (clone $originalDay)->add(new DateInterval("P".abs($translation)."D"));
- else
- $day = (clone $originalDay)->sub(new DateInterval("P".abs($translation)."D"));
-
- foreach ($dataRegio as $row) {
- $rowDay = new DateTime($row["data"]);
- if ($day == $rowDay) return $row["sum_numcasos"];
- }
-
- return 0;
-}
-
-// Funció per fer una consulta a la taula de dades
-function query($soql) {
- $url = "https://analisi.transparenciacatalunya.cat/resource/xuwf-dxjd.json?\$query=".urlencode($soql);
- $raw = file_get_contents($url);
- return json_decode($raw, true);
-}
+require_once(__DIR__."/includes/generation.php");
// Demanem una llista del nombre de casos cada dia a cada regió sanitària
$data = query("SELECT data, regiosanitariadescripcio AS regio, sum(numcasos) AS sum_numcasos
@@ -74,97 +53,8 @@
if (!in_array($regio, array_keys($CODENAME)))
die("[fatal error] No tenim contemplada la regió '".$regio."'.\n");
- $summary[$regio] = [];
-
- // Veiem quin és el primer i l'últim dia de la sèrie
- $oldestDay = new DateTime("today");
- $newestDay = new DateTime();
- $newestDay->setTimestamp(0);
-
- foreach ($dataRegio as $row) {
- $date = new DateTime($row["data"]);
- if ($date < $oldestDay) $oldestDay = $date;
- if ($date > $newestDay) $newestDay = $date;
- }
-
- // Si l'últim dia és avui, posem que sigui ahir, perquè no volem informació
- // incompleta sobre avui.
- if ($oldestDay == (new DateTime("today")))
- $oldestDay = new DateTime("yesterday");
-
- // Ara calculem les rhos.
- $rhos = [];
-
- // Considerem cada dia a partir de 6 dies després del primer dia, i fins al
- // dia anterior a l'últim dia (extrems inclosos)
- for ($currentDate = (clone $oldestDay)->add(new DateInterval("P6D"));
- $currentDate < $newestDay;
- $currentDate->add(new DateInterval("P1D"))) {
- // Calculem la rho (velocitat reproductiva efectiva) per aquell dia.
- // Fórmula: https://biocomsc.upc.edu/en/shared/avaluacio_risc.pdf
- $num = getSumDay($currentDate, 1, $dataRegio) +
- getSumDay($currentDate, 0, $dataRegio) +
- getSumDay($currentDate, -1, $dataRegio);
-
- $den = getSumDay($currentDate, -4, $dataRegio) +
- getSumDay($currentDate, -5, $dataRegio) +
- getSumDay($currentDate, -6, $dataRegio);
-
- if ($num != 0 && $den == 0) continue;
-
- $rho = ($num == 0 ? 0 : $num/$den);
-
- $rhos[] = [
- "data" => $currentDate->format("c"),
- "rho" => $rho
- ];
- }
-
- // Considerem cada dia a partir de 13 dies després del primer dia, i fins el
- // dia anterior a l'últim dia (extrems inclosos)
- for ($currentDate = (clone $oldestDay)->add(new DateInterval("P13D"));
- $currentDate < $newestDay;
- $currentDate->add(new DateInterval("P1D"))) {
- // Calculem Rho_7 i IA_14
- // Rho_7(t) := \sum_{i=0}^{7} Rho(t - i)
- // IA_14(t) := \sum_{i=0}^{14} N(t - i),
- // on N(j) és el nombre de casos nous confirmats per PCR el dia j.
- $sum = 0;
-
- $p13Date = (clone $currentDate)->sub(new DateInterval("P13D"));
- $p6Date = (clone $currentDate)->sub(new DateInterval("P6D"));
-
- foreach ($dataRegio as $row) {
- $date = new DateTime($row["data"]);
- if ($date >= $p13Date && $date <= $currentDate) {
- $sum += $row["sum_numcasos"];
- }
- }
-
- $rhoAverage = 0;
- $rhoCount = 0;
-
- foreach ($rhos as $row) {
- $date = new DateTime($row["data"]);
- if ($date >= $p6Date && $date <= $currentDate) {
- ++$rhoCount;
- $rhoAverage += $row["rho"];
- }
- }
-
- // Si no hem trobat rhos (rhoCount == 0) és perquè el numerador no era 0
- // però el denominador era sempre 0 al calcular les rhos. Aleshores, tot i
- // que no poguem calcular la rho_7 a causa de no poder calcular les rho_t
- // individuals, aquest fet ens indica que el creixement ha sigut altíssim,
- // i per tant posem una rho_7 de 1000000000, que se surt de la gràfica.
- $rhoAverage = ($rhoCount == 0 ? 1000000000 : $rhoAverage/$rhoCount);
-
- $summary[$regio][] = [
- "data" => $currentDate->format("d/m/y"),
- "ia14" => $sum*(1e5/$HABITANTS[$regio]),
- "rho7" => $rhoAverage
- ];
- }
+ // Generem les dades
+ $summary[$regio] = generateSummary($dataRegio, $HABITANTS[$regio]);
}
// Posem les dades a diversos fitxers per tal que les pugui llegir el gnuplot
diff --git a/cron/generateGraphs.gnu b/cron/generateGraphs.gnu
index abd2f83..665641d 100644
--- a/cron/generateGraphs.gnu
+++ b/cron/generateGraphs.gnu
@@ -4,10 +4,10 @@
set output '/tmp/covid19graphgenerator-output.svg'
set pointsize 0.75
-load "plot.gnu"
+load "includes/plotAllGraphs.gnu"
set terminal png size 1600, 1600
set output '/tmp/covid19graphgenerator-output.png'
set pointsize 1
-load "plot.gnu"
+load "includes/plotAllGraphs.gnu"
diff --git a/cron/includes/generation.php b/cron/includes/generation.php
new file mode 100644
index 0000000..623113f
--- /dev/null
+++ b/cron/includes/generation.php
@@ -0,0 +1,124 @@
+<?php
+if (php_sapi_name() != "cli")
+ exit();
+
+// Funció per obtenir el nombre de casos nous al dia originalDay+translation
+// a partir de les dades de la regió (dataRegio).
+function getSumDay($originalDay, $translation, &$dataRegio) {
+ if ($translation >= 0)
+ $day = (clone $originalDay)->add(new DateInterval("P".abs($translation)."D"));
+ else
+ $day = (clone $originalDay)->sub(new DateInterval("P".abs($translation)."D"));
+
+ foreach ($dataRegio as $row) {
+ $rowDay = new DateTime($row["data"]);
+ if ($day == $rowDay) return $row["sum_numcasos"];
+ }
+
+ return 0;
+}
+
+// Funció per fer una consulta a la taula de dades
+function query($soql, $resource = "xuwf-dxjd") {
+ $url = "https://analisi.transparenciacatalunya.cat/resource/".$resource.".json?\$query=".urlencode($soql);
+ $raw = file_get_contents($url);
+ if ($raw === false) return null;
+ return json_decode($raw, true);
+}
+
+// Funció per generar les dades de cada regió
+function generateSummary(&$dataRegio, $habitants){
+ $summary = [];
+
+ // Veiem quin és el primer i l'últim dia de la sèrie
+ $oldestDay = new DateTime("today");
+ $newestDay = new DateTime();
+ $newestDay->setTimestamp(0);
+
+ foreach ($dataRegio as $row) {
+ $date = new DateTime($row["data"]);
+ if ($date < $oldestDay) $oldestDay = $date;
+ if ($date > $newestDay) $newestDay = $date;
+ }
+
+ // Si l'últim dia és avui, posem que sigui ahir, perquè no volem informació
+ // incompleta sobre avui.
+ if ($oldestDay == (new DateTime("today")))
+ $oldestDay = new DateTime("yesterday");
+
+ // Ara calculem les rhos.
+ $rhos = [];
+
+ // Considerem cada dia a partir de 6 dies després del primer dia, i fins al
+ // dia anterior a l'últim dia (extrems inclosos)
+ for ($currentDate = (clone $oldestDay)->add(new DateInterval("P6D"));
+ $currentDate < $newestDay;
+ $currentDate->add(new DateInterval("P1D"))) {
+ // Calculem la rho (velocitat reproductiva efectiva) per aquell dia.
+ // Fórmula: https://biocomsc.upc.edu/en/shared/avaluacio_risc.pdf
+ $num = getSumDay($currentDate, 1, $dataRegio) +
+ getSumDay($currentDate, 0, $dataRegio) +
+ getSumDay($currentDate, -1, $dataRegio);
+
+ $den = getSumDay($currentDate, -4, $dataRegio) +
+ getSumDay($currentDate, -5, $dataRegio) +
+ getSumDay($currentDate, -6, $dataRegio);
+
+ if ($num != 0 && $den == 0) continue;
+
+ $rho = ($num == 0 ? 0 : $num/$den);
+
+ $rhos[] = [
+ "data" => $currentDate->format("c"),
+ "rho" => $rho
+ ];
+ }
+
+ // Considerem cada dia a partir de 13 dies després del primer dia, i fins el
+ // dia anterior a l'últim dia (extrems inclosos)
+ for ($currentDate = (clone $oldestDay)->add(new DateInterval("P13D"));
+ $currentDate < $newestDay;
+ $currentDate->add(new DateInterval("P1D"))) {
+ // Calculem Rho_7 i IA_14
+ // Rho_7(t) := \sum_{i=0}^{7} Rho(t - i)
+ // IA_14(t) := \sum_{i=0}^{14} N(t - i),
+ // on N(j) és el nombre de casos nous confirmats per PCR el dia j.
+ $sum = 0;
+
+ $p13Date = (clone $currentDate)->sub(new DateInterval("P13D"));
+ $p6Date = (clone $currentDate)->sub(new DateInterval("P6D"));
+
+ foreach ($dataRegio as $row) {
+ $date = new DateTime($row["data"]);
+ if ($date >= $p13Date && $date <= $currentDate) {
+ $sum += $row["sum_numcasos"];
+ }
+ }
+
+ $rhoAverage = 0;
+ $rhoCount = 0;
+
+ foreach ($rhos as $row) {
+ $date = new DateTime($row["data"]);
+ if ($date >= $p6Date && $date <= $currentDate) {
+ ++$rhoCount;
+ $rhoAverage += $row["rho"];
+ }
+ }
+
+ // Si no hem trobat rhos (rhoCount == 0) és perquè el numerador no era 0
+ // però el denominador era sempre 0 al calcular les rhos. Aleshores, tot i
+ // que no poguem calcular la rho_7 a causa de no poder calcular les rho_t
+ // individuals, aquest fet ens indica que el creixement ha sigut altíssim,
+ // i per tant posem una rho_7 de 1000000000, que se surt de la gràfica.
+ $rhoAverage = ($rhoCount == 0 ? 1000000000 : $rhoAverage/$rhoCount);
+
+ $summary[] = [
+ "data" => $currentDate->format("d/m/y"),
+ "ia14" => $sum*(1e5/$habitants),
+ "rho7" => $rhoAverage
+ ];
+ }
+
+ return $summary;
+}
diff --git a/cron/plot.gnu b/cron/includes/plotAllGraphs.gnu
similarity index 71%
rename from cron/plot.gnu
rename to cron/includes/plotAllGraphs.gnu
index 92db98a..ae0914e 100644
--- a/cron/plot.gnu
+++ b/cron/includes/plotAllGraphs.gnu
@@ -20,8 +20,9 @@
do for [i = 1:n] {
lastUpdated = system("tail -n 1 ".filesPrefix.fileNames[i]." | awk '{print $1;}'")
- set title prettyNames[i]."\n{/*0.4 Última dada (punt negre): ".lastUpdated."}"
- plot 6*x w filledcurve y1=0 lt rgb "#ff9494", 100/x w filledcurve y1=0 lt rgb "#ffe494", 70/x w filledcurve y1=0 lt rgb "#dbff94", 30/x w filledcurve y1=0 lt rgb "#a0ff94", filesPrefix.fileNames[i] u 2:3 w lp pt 6 lt rgb "black", "< tail -n 1 ".filesPrefix.fileNames[i] u 2:3 w lp pt 7 lt rgb "black"
+ graphTitle = prettyNames[i]
+ graphDataFile = filesPrefix.fileNames[i]
+ load "includes/plotSingleGraph.gnu"
}
unset multiplot
diff --git a/cron/includes/plotCustomGraph.gnu b/cron/includes/plotCustomGraph.gnu
new file mode 100644
index 0000000..acf66aa
--- /dev/null
+++ b/cron/includes/plotCustomGraph.gnu
@@ -0,0 +1,19 @@
+set xlabel "Casos actius per 10^5 habitants"
+set ylabel "Mitjana taxa de creixement darrers 7 dies"
+set title font "Helvetica,20"
+
+set yrange[0:5]
+set xrange[0:800]
+set samples 400
+
+set key off
+set tics out scale 0.5,0.2
+
+min(a, b) = (a < b ? a : b)
+
+# The different colored areas correspond to the classification of the EPG values defined on page 8 at https://biocomsc.upc.edu/en/shared/20200506_report_web_51.pdf
+
+lastUpdated = system("tail -n 1 ".fileName." | awk '{print $1;}'")
+graphTitle = name
+graphDataFile = fileName
+load "includes/plotSingleGraph.gnu"
diff --git a/cron/includes/plotSingleGraph.gnu b/cron/includes/plotSingleGraph.gnu
new file mode 100644
index 0000000..152fd11
--- /dev/null
+++ b/cron/includes/plotSingleGraph.gnu
@@ -0,0 +1,2 @@
+set title graphTitle."\n{/*0.4 Última dada (punt negre): ".lastUpdated."}"
+plot 6*x w filledcurve y1=0 lt rgb "#ff9494", 100/x w filledcurve y1=0 lt rgb "#ffe494", 70/x w filledcurve y1=0 lt rgb "#dbff94", 30/x w filledcurve y1=0 lt rgb "#a0ff94", graphDataFile u 2:3 w lp pt 6 lt rgb "black", "< tail -n 1 ".graphDataFile u 2:3 w lp pt 7 lt rgb "black"