Adds ability to generate graphs for custom areas

This commit adds the ability to generate individual PNG and SVG graphs
for custom areas, defined as the union of several "Àrees Bàsiques de
Salut" (ABS) in the config/customAreas.php file.

Change-Id: I27f34b4a8f520a38c55eed224554b470c5ba2938
diff --git a/README.md b/README.md
index b0f938c..0fa3394 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,8 @@
 # covid-19
 This is the code for https://covid-19.sandbox.avm99963.com, which contains graphs which determine the level of risk of each Catalan health area due to the COVID-19, based on the work of the [BIOCOMSC](https://biocomsc.upc.edu/en/covid-19/daily-report) group at the Polytechnic University of Catalonia (UPC).
 
+It can also be used to generate risk graphs for custom areas by defining these areas in the `config/customAreas.php` file.
+
 **DISCLAIMER**: The data shown in the website might be wrong due to a wrong implementation.
 
 ## Installation
@@ -26,6 +28,10 @@
 * `index.html`: a web accessible document which includes a disclaimer text, a key for the graphs, and the generated graphs themselves.
 * `cron` folder: a non-web accessible folder which contains programs which ultimately generate the graphs.
 * `cron/generate.bash`: a Bash script which orchestrates all the other programs in the folder in order to generate the graphs.
-* `cron/generateData.php`: a PHP script which extracts the Covid-19 data from the *Generalitat de Catalunya*'s API and analyzes that data to generate the ρ<sub>7</sub> and IA<sub>14</sub> values needed by `cron/generateGraphs.gnu`.
+* `cron/generateData.php`: a PHP script which extracts the Covid-19 data from the *Generalitat de Catalunya*'s API and analyzes that data to generate the ρ<sub>7</sub> and IA<sub>14</sub> values needed by `cron/generateGraphs.gnu` to generate the graphs for each health area.
 * `cron/generateGraphs.gnu`: a gnuplot script which generates the graphs with the data which has been provided by the `cron/generateData.php`. It uses the helper script `cron/plot.gnu`.
+* `cron/generateCustomData.php`: a PHP script analogous to `cron/generateData.php` which generates the data for custom areas defined in `config/customAreas.php`.
+* `cron/generateCustomGraph.gnu`: a PHP script analogous to `cron/generateGraphs.gnu` which generates the graph of a custom area using the data provided by the previous script.
+* `config/config.php`: a file where general settings can be set.
+* `config/customAreas.php`: a file where the custom areas can be defined.
 * `output` folder: a folder created by the `generate.bash` script where the generated graphs are saved.
diff --git a/config/config.php b/config/config.php
new file mode 100644
index 0000000..f137ba8
--- /dev/null
+++ b/config/config.php
@@ -0,0 +1,4 @@
+<?php
+$conf = [];
+
+require_once(__DIR__."/customAreas.php");
diff --git a/config/customAreas.php b/config/customAreas.php
new file mode 100644
index 0000000..3498ce0
--- /dev/null
+++ b/config/customAreas.php
@@ -0,0 +1,57 @@
+<?php
+// Custom areas configuration
+
+// A list of the ABS codes can be found here:
+// https://catsalut.gencat.cat/web/.content/minisite/catsalut/proveidors_professionals/registres_catalegs/documents/poblacio-referencia.pdf
+$conf["customAreas"] = [
+  [
+    "name" => "Santa Coloma de Gramenet",
+    "abs" => [222, 223, 224, 225, 226, 368],
+    "codename" => "santa-coloma-de-gramenet"
+  ],
+  [
+    "name" => "Districte 1 - Sta. Coloma de Grt.",
+    "abs" => [222],
+    "codename" => "districte-1-santa-coloma-de-gramenet"
+  ],
+  [
+    "name" => "Districte 2 - Sta. Coloma de Grt.",
+    "abs" => [223],
+    "codename" => "districte-2-santa-coloma-de-gramenet"
+  ],
+  [
+    "name" => "Districte 3 - Sta. Coloma de Grt.",
+    "abs" => [224],
+    "codename" => "districte-3-santa-coloma-de-gramenet"
+  ],
+  [
+    "name" => "Districte 4 - Sta. Coloma de Grt.",
+    "abs" => [225],
+    "codename" => "districte-4-santa-coloma-de-gramenet"
+  ],
+  [
+    "name" => "Districte 5 - Sta. Coloma de Grt.",
+    "abs" => [226],
+    "codename" => "districte-5-santa-coloma-de-gramenet"
+  ],
+  [
+    "name" => "Districte 6 - Sta. Coloma de Grt.",
+    "abs" => [368],
+    "codename" => "districte-6-santa-coloma-de-gramenet"
+  ],
+  [
+    "name" => "Barcelona",
+    "abs" => ["016", "017", "018", "019", "020", "021", "022", "023", "024", "025", "027", "028", "029", "030", "031", "032", "033", "034", "035", "036", "038", "383", "400", "039", "040", "041", "042", "043", "044", "045", "046", "047", "048", "049", "050", "358", "051", "052", "053", "054", "055", "056", "326", "059", "062", "063", "064", "327", "385", "402", "403", "065", "067", "069", "070", "395", "396", "071", "072", "073", "074", "075", "076", "077", "078", "079", "331"],
+    "codename" => "barcelona"
+  ],
+  [
+    "name" => "Hospitalet del Llobregat",
+    "abs" => ["288", "289", "290", "291", "292", "293", "294", "295", "296", "297", "298", "299"],
+    "codename" => "hospitalet-del-llobregat"
+  ],
+  [
+    "name" => "Anoia rural",
+    "abs" => ["131"],
+    "codename" => "anoia-rural"
+  ]
+];
diff --git a/cron/generate.bash b/cron/generate.bash
index 8e0b569..201740b 100644
--- a/cron/generate.bash
+++ b/cron/generate.bash
@@ -1,6 +1,14 @@
 #!/bin/bash
+# Create the output folder if it doesn't exist
+rm -rf ../output
+mkdir -p ../output
+
+# Generate graphs for the Catalonia health areas
 php generateData.php
 gnuplot generateGraphs.gnu
-mkdir -p ../output
 mv /tmp/covid19graphgenerator-output.png ../output/graph.png
 mv /tmp/covid19graphgenerator-output.svg ../output/graph.svg
+
+# Generate graphs for each custom area defined in
+# config/customAreas.php
+php generateCustomData.php
diff --git a/cron/generateCustomData.php b/cron/generateCustomData.php
new file mode 100644
index 0000000..5be2bc0
--- /dev/null
+++ b/cron/generateCustomData.php
@@ -0,0 +1,63 @@
+<?php
+if (php_sapi_name() != "cli")
+  exit();
+
+require_once(__DIR__."/../config/config.php");
+require_once(__DIR__."/includes/generation.php");
+
+// Funció que retorna el nombre d'habitants a les ABS donades via una
+// API de la Generalitat de Catalunya
+function habitants($abs) {
+  $data = query("SELECT sum(poblacio_oficial) AS habitants
+  WHERE
+    abs_codi in(".implode(",", array_map(function($abs) { return "'".$abs."'"; }, $abs)).") AND
+    any = 2020", "ftq4-h9vk");
+
+  return ($data[0]["habitants"] ?? null);
+}
+
+if (isset($conf["customAreas"])) {
+  // A cada ciutat
+  foreach ($conf["customAreas"] as $area) {
+    // Si no hi ha cap ABS configurada no fem res
+    if (count($area["abs"]) == 0) {
+      echo "[Warning] There aren't any ABS configured for ".$area["name"].".\n";
+      continue;
+    }
+
+    // Demanem una llista del nombre de casos cada dia
+    $data = query("SELECT data, sum(numcasos) AS sum_numcasos
+    WHERE
+      resultatcoviddescripcio = 'Positiu PCR' AND
+      abscodi in(".implode(",", array_map(function($abs) { return "'".$abs."'"; }, $area["abs"])).")
+    GROUP BY data
+    ORDER BY data ASC
+    LIMIT 50000");
+
+    // Obtenim el nombre d'habitants a les ABS
+    $habitants = habitants($area["abs"]);
+    if ($habitants === null) {
+      echo "[Fatal error] Failed getting population for ".$area["name"].".\n";
+      continue;
+    }
+
+    // Generem les dades
+    $summary = generateSummary($data, $habitants);
+
+    // Les escribim en un fitxer
+    $file = tmpfile();
+    $fileName = stream_get_meta_data($file)['uri'];
+
+    foreach ($summary as $row)
+      fwrite($file, $row["data"]." ".$row["ia14"]." ".$row["rho7"]."\n");
+
+    // Cridem al gnuplot perquè generi la gràfica
+    shell_exec("gnuplot -c generateCustomGraph.gnu \"".escapeshellcmd($area["name"])."\" \"".escapeshellcmd($area["codename"])."\" \"".escapeshellcmd($fileName)."\"");
+    rename("/tmp/covid19graphgenerator-area-".$area["codename"]."-graph.png", __DIR__."/../output/area-".$area["codename"]."-graph.png");
+    rename("/tmp/covid19graphgenerator-area-".$area["codename"]."-graph.svg", __DIR__."/../output/area-".$area["codename"]."-graph.svg");
+
+    fclose($file);
+  }
+} else {
+  echo "[Warning] The config/customAreas.php file doesn't define the customAreas field.";
+}
diff --git a/cron/generateCustomGraph.gnu b/cron/generateCustomGraph.gnu
new file mode 100644
index 0000000..7f1e73f
--- /dev/null
+++ b/cron/generateCustomGraph.gnu
@@ -0,0 +1,17 @@
+name = ARG1
+codeName = ARG2
+fileName = ARG3
+
+filesPrefix = '/tmp/covid19graphgenerator-'
+
+set terminal svg size 500, 500
+set output '/tmp/covid19graphgenerator-area-'.codeName.'-graph.svg'
+
+set pointsize 0.75
+load "includes/plotCustomGraph.gnu"
+
+set terminal png size 500, 500
+set output '/tmp/covid19graphgenerator-area-'.codeName.'-graph.png'
+
+set pointsize 1
+load "includes/plotCustomGraph.gnu"
diff --git a/cron/generateData.php b/cron/generateData.php
index 489b702..996fdcd 100644
--- a/cron/generateData.php
+++ b/cron/generateData.php
@@ -29,28 +29,7 @@
   "Metropolità Nord" => "MetropolitaNord",
 ];
 
-// Funció per obtenir el nombre de casos nous al dia originalDay+translation
-// a partir de les dades de la regió sanitaria (dataRegio).
-function getSumDay($originalDay, $translation, &$dataRegio) {
-  if ($translation >= 0)
-    $day = (clone $originalDay)->add(new DateInterval("P".abs($translation)."D"));
-  else
-    $day = (clone $originalDay)->sub(new DateInterval("P".abs($translation)."D"));
-
-  foreach ($dataRegio as $row) {
-    $rowDay = new DateTime($row["data"]);
-    if ($day == $rowDay) return $row["sum_numcasos"];
-  }
-
-  return 0;
-}
-
-// Funció per fer una consulta a la taula de dades
-function query($soql) {
-  $url = "https://analisi.transparenciacatalunya.cat/resource/xuwf-dxjd.json?\$query=".urlencode($soql);
-  $raw = file_get_contents($url);
-  return json_decode($raw, true);
-}
+require_once(__DIR__."/includes/generation.php");
 
 // Demanem una llista del nombre de casos cada dia a cada regió sanitària
 $data = query("SELECT data, regiosanitariadescripcio AS regio, sum(numcasos) AS sum_numcasos
@@ -74,97 +53,8 @@
   if (!in_array($regio, array_keys($CODENAME)))
     die("[fatal error] No tenim contemplada la regió '".$regio."'.\n");
 
-  $summary[$regio] = [];
-
-  // Veiem quin és el primer i l'últim dia de la sèrie
-  $oldestDay = new DateTime("today");
-  $newestDay = new DateTime();
-  $newestDay->setTimestamp(0);
-
-  foreach ($dataRegio as $row) {
-    $date = new DateTime($row["data"]);
-    if ($date < $oldestDay) $oldestDay = $date;
-    if ($date > $newestDay) $newestDay = $date;
-  }
-
-  // Si l'últim dia és avui, posem que sigui ahir, perquè no volem informació
-  // incompleta sobre avui.
-  if ($oldestDay == (new DateTime("today")))
-    $oldestDay = new DateTime("yesterday");
-
-  // Ara calculem les rhos.
-  $rhos = [];
-
-  // Considerem cada dia a partir de 6 dies després del primer dia, i fins al
-  // dia anterior a l'últim dia (extrems inclosos)
-  for ($currentDate = (clone $oldestDay)->add(new DateInterval("P6D"));
-  $currentDate < $newestDay;
-  $currentDate->add(new DateInterval("P1D"))) {
-    // Calculem la rho (velocitat reproductiva efectiva) per aquell dia.
-    // Fórmula: https://biocomsc.upc.edu/en/shared/avaluacio_risc.pdf
-    $num = getSumDay($currentDate, 1, $dataRegio) +
-           getSumDay($currentDate, 0, $dataRegio) +
-           getSumDay($currentDate, -1, $dataRegio);
-
-    $den = getSumDay($currentDate, -4, $dataRegio) +
-           getSumDay($currentDate, -5, $dataRegio) +
-           getSumDay($currentDate, -6, $dataRegio);
-
-    if ($num != 0 && $den == 0) continue;
-
-    $rho = ($num == 0 ? 0 : $num/$den);
-
-    $rhos[] = [
-      "data" => $currentDate->format("c"),
-      "rho" => $rho
-    ];
-  }
-
-  // Considerem cada dia a partir de 13 dies després del primer dia, i fins el
-  // dia anterior a l'últim dia (extrems inclosos)
-  for ($currentDate = (clone $oldestDay)->add(new DateInterval("P13D"));
-    $currentDate < $newestDay;
-    $currentDate->add(new DateInterval("P1D"))) {
-    // Calculem Rho_7 i IA_14
-    // Rho_7(t) := \sum_{i=0}^{7} Rho(t - i)
-    // IA_14(t) := \sum_{i=0}^{14} N(t - i),
-    //   on N(j) és el nombre de casos nous confirmats per PCR el dia j.
-    $sum = 0;
-
-    $p13Date = (clone $currentDate)->sub(new DateInterval("P13D"));
-    $p6Date = (clone $currentDate)->sub(new DateInterval("P6D"));
-
-    foreach ($dataRegio as $row) {
-      $date = new DateTime($row["data"]);
-      if ($date >= $p13Date && $date <= $currentDate) {
-        $sum += $row["sum_numcasos"];
-      }
-    }
-
-    $rhoAverage = 0;
-    $rhoCount = 0;
-
-    foreach ($rhos as $row) {
-      $date = new DateTime($row["data"]);
-      if ($date >= $p6Date && $date <= $currentDate) {
-        ++$rhoCount;
-        $rhoAverage += $row["rho"];
-      }
-    }
-
-    // Si no hem trobat rhos (rhoCount == 0) és perquè el numerador no era 0
-    // però el denominador era sempre 0 al calcular les rhos. Aleshores, tot i
-    // que no poguem calcular la rho_7 a causa de no poder calcular les rho_t
-    // individuals, aquest fet ens indica que el creixement ha sigut altíssim,
-    // i per tant posem una rho_7 de 1000000000, que se surt de la gràfica.
-    $rhoAverage = ($rhoCount == 0 ? 1000000000 : $rhoAverage/$rhoCount);
-
-    $summary[$regio][] = [
-      "data" => $currentDate->format("d/m/y"),
-      "ia14" => $sum*(1e5/$HABITANTS[$regio]),
-      "rho7" => $rhoAverage
-    ];
-  }
+  // Generem les dades
+  $summary[$regio] = generateSummary($dataRegio, $HABITANTS[$regio]);
 }
 
 // Posem les dades a diversos fitxers per tal que les pugui llegir el gnuplot
diff --git a/cron/generateGraphs.gnu b/cron/generateGraphs.gnu
index abd2f83..665641d 100644
--- a/cron/generateGraphs.gnu
+++ b/cron/generateGraphs.gnu
@@ -4,10 +4,10 @@
 set output '/tmp/covid19graphgenerator-output.svg'
 
 set pointsize 0.75
-load "plot.gnu"
+load "includes/plotAllGraphs.gnu"
 
 set terminal png size 1600, 1600
 set output '/tmp/covid19graphgenerator-output.png'
 
 set pointsize 1
-load "plot.gnu"
+load "includes/plotAllGraphs.gnu"
diff --git a/cron/includes/generation.php b/cron/includes/generation.php
new file mode 100644
index 0000000..623113f
--- /dev/null
+++ b/cron/includes/generation.php
@@ -0,0 +1,124 @@
+<?php
+if (php_sapi_name() != "cli")
+  exit();
+
+// Funció per obtenir el nombre de casos nous al dia originalDay+translation
+// a partir de les dades de la regió (dataRegio).
+function getSumDay($originalDay, $translation, &$dataRegio) {
+  if ($translation >= 0)
+    $day = (clone $originalDay)->add(new DateInterval("P".abs($translation)."D"));
+  else
+    $day = (clone $originalDay)->sub(new DateInterval("P".abs($translation)."D"));
+
+  foreach ($dataRegio as $row) {
+    $rowDay = new DateTime($row["data"]);
+    if ($day == $rowDay) return $row["sum_numcasos"];
+  }
+
+  return 0;
+}
+
+// Funció per fer una consulta a la taula de dades
+function query($soql, $resource = "xuwf-dxjd") {
+  $url = "https://analisi.transparenciacatalunya.cat/resource/".$resource.".json?\$query=".urlencode($soql);
+  $raw = file_get_contents($url);
+  if ($raw === false) return null;
+  return json_decode($raw, true);
+}
+
+// Funció per generar les dades de cada regió
+function generateSummary(&$dataRegio, $habitants){
+  $summary = [];
+
+  // Veiem quin és el primer i l'últim dia de la sèrie
+  $oldestDay = new DateTime("today");
+  $newestDay = new DateTime();
+  $newestDay->setTimestamp(0);
+
+  foreach ($dataRegio as $row) {
+    $date = new DateTime($row["data"]);
+    if ($date < $oldestDay) $oldestDay = $date;
+    if ($date > $newestDay) $newestDay = $date;
+  }
+
+  // Si l'últim dia és avui, posem que sigui ahir, perquè no volem informació
+  // incompleta sobre avui.
+  if ($oldestDay == (new DateTime("today")))
+    $oldestDay = new DateTime("yesterday");
+
+  // Ara calculem les rhos.
+  $rhos = [];
+
+  // Considerem cada dia a partir de 6 dies després del primer dia, i fins al
+  // dia anterior a l'últim dia (extrems inclosos)
+  for ($currentDate = (clone $oldestDay)->add(new DateInterval("P6D"));
+  $currentDate < $newestDay;
+  $currentDate->add(new DateInterval("P1D"))) {
+    // Calculem la rho (velocitat reproductiva efectiva) per aquell dia.
+    // Fórmula: https://biocomsc.upc.edu/en/shared/avaluacio_risc.pdf
+    $num = getSumDay($currentDate, 1, $dataRegio) +
+           getSumDay($currentDate, 0, $dataRegio) +
+           getSumDay($currentDate, -1, $dataRegio);
+
+    $den = getSumDay($currentDate, -4, $dataRegio) +
+           getSumDay($currentDate, -5, $dataRegio) +
+           getSumDay($currentDate, -6, $dataRegio);
+
+    if ($num != 0 && $den == 0) continue;
+
+    $rho = ($num == 0 ? 0 : $num/$den);
+
+    $rhos[] = [
+      "data" => $currentDate->format("c"),
+      "rho" => $rho
+    ];
+  }
+
+  // Considerem cada dia a partir de 13 dies després del primer dia, i fins el
+  // dia anterior a l'últim dia (extrems inclosos)
+  for ($currentDate = (clone $oldestDay)->add(new DateInterval("P13D"));
+    $currentDate < $newestDay;
+    $currentDate->add(new DateInterval("P1D"))) {
+    // Calculem Rho_7 i IA_14
+    // Rho_7(t) := \sum_{i=0}^{7} Rho(t - i)
+    // IA_14(t) := \sum_{i=0}^{14} N(t - i),
+    //   on N(j) és el nombre de casos nous confirmats per PCR el dia j.
+    $sum = 0;
+
+    $p13Date = (clone $currentDate)->sub(new DateInterval("P13D"));
+    $p6Date = (clone $currentDate)->sub(new DateInterval("P6D"));
+
+    foreach ($dataRegio as $row) {
+      $date = new DateTime($row["data"]);
+      if ($date >= $p13Date && $date <= $currentDate) {
+        $sum += $row["sum_numcasos"];
+      }
+    }
+
+    $rhoAverage = 0;
+    $rhoCount = 0;
+
+    foreach ($rhos as $row) {
+      $date = new DateTime($row["data"]);
+      if ($date >= $p6Date && $date <= $currentDate) {
+        ++$rhoCount;
+        $rhoAverage += $row["rho"];
+      }
+    }
+
+    // Si no hem trobat rhos (rhoCount == 0) és perquè el numerador no era 0
+    // però el denominador era sempre 0 al calcular les rhos. Aleshores, tot i
+    // que no poguem calcular la rho_7 a causa de no poder calcular les rho_t
+    // individuals, aquest fet ens indica que el creixement ha sigut altíssim,
+    // i per tant posem una rho_7 de 1000000000, que se surt de la gràfica.
+    $rhoAverage = ($rhoCount == 0 ? 1000000000 : $rhoAverage/$rhoCount);
+
+    $summary[] = [
+      "data" => $currentDate->format("d/m/y"),
+      "ia14" => $sum*(1e5/$habitants),
+      "rho7" => $rhoAverage
+    ];
+  }
+
+  return $summary;
+}
diff --git a/cron/plot.gnu b/cron/includes/plotAllGraphs.gnu
similarity index 71%
rename from cron/plot.gnu
rename to cron/includes/plotAllGraphs.gnu
index 92db98a..ae0914e 100644
--- a/cron/plot.gnu
+++ b/cron/includes/plotAllGraphs.gnu
@@ -20,8 +20,9 @@
 
 do for [i = 1:n] {
   lastUpdated = system("tail -n 1 ".filesPrefix.fileNames[i]." | awk '{print $1;}'")
-  set title prettyNames[i]."\n{/*0.4 Última dada (punt negre): ".lastUpdated."}"
-  plot 6*x w filledcurve y1=0 lt rgb "#ff9494", 100/x w filledcurve y1=0 lt rgb "#ffe494", 70/x w filledcurve y1=0 lt rgb "#dbff94", 30/x w filledcurve y1=0 lt rgb "#a0ff94", filesPrefix.fileNames[i] u 2:3 w lp pt 6 lt rgb "black", "< tail -n 1 ".filesPrefix.fileNames[i] u 2:3 w lp pt 7 lt rgb "black"
+  graphTitle = prettyNames[i]
+  graphDataFile = filesPrefix.fileNames[i]
+  load "includes/plotSingleGraph.gnu"
 }
 
 unset multiplot
diff --git a/cron/includes/plotCustomGraph.gnu b/cron/includes/plotCustomGraph.gnu
new file mode 100644
index 0000000..acf66aa
--- /dev/null
+++ b/cron/includes/plotCustomGraph.gnu
@@ -0,0 +1,19 @@
+set xlabel "Casos actius per 10^5 habitants"
+set ylabel "Mitjana taxa de creixement darrers 7 dies"
+set title font "Helvetica,20"
+
+set yrange[0:5]
+set xrange[0:800]
+set samples 400
+
+set key off
+set tics out scale 0.5,0.2
+
+min(a, b) = (a < b ? a : b)
+
+# The different colored areas correspond to the classification of the EPG values defined on page 8 at https://biocomsc.upc.edu/en/shared/20200506_report_web_51.pdf
+
+lastUpdated = system("tail -n 1 ".fileName." | awk '{print $1;}'")
+graphTitle = name
+graphDataFile = fileName
+load "includes/plotSingleGraph.gnu"
diff --git a/cron/includes/plotSingleGraph.gnu b/cron/includes/plotSingleGraph.gnu
new file mode 100644
index 0000000..152fd11
--- /dev/null
+++ b/cron/includes/plotSingleGraph.gnu
@@ -0,0 +1,2 @@
+set title graphTitle."\n{/*0.4 Última dada (punt negre): ".lastUpdated."}"
+plot 6*x w filledcurve y1=0 lt rgb "#ff9494", 100/x w filledcurve y1=0 lt rgb "#ffe494", 70/x w filledcurve y1=0 lt rgb "#dbff94", 30/x w filledcurve y1=0 lt rgb "#a0ff94", graphDataFile u 2:3 w lp pt 6 lt rgb "black", "< tail -n 1 ".graphDataFile u 2:3 w lp pt 7 lt rgb "black"