Fix isoLangs generator

The isoLangs generator didn't detect a new set of languages which were
rencently added to Google Translate. This is because the
https://translate.google.com/translate_a/l?client=chrome endpoint no
longer returns the full list of languages.

Bug: translateselectedtext:8
Change-Id: If529a6001bc8c80db997e254249dbebd1245f1f3
diff --git a/tools/langs/README.md b/tools/langs/README.md
new file mode 100644
index 0000000..684befc
--- /dev/null
+++ b/tools/langs/README.md
@@ -0,0 +1,13 @@
+# Language list generator
+The tool found in this folder serves the purpose of updating the `isoLangs`
+object found in `//src/js/common.js`, which contains a list of possible
+target translation languages.
+
+Run `go run langs.go` in order to obtain the updated `isoLangs` object, and copy
+it to `//src/js/common.js`. If it fails, run it again until it works (it might
+take 3 or 4 attempts).
+
+If the way the languages are encoded into `https://translate.google.com/`
+change, this tool will no longer work, but it probably can be slightly adapted
+to make it work again. It is expected that this tool will not work in the
+future.
diff --git a/tools/langs/isoLangs.json b/tools/langs/isoLangs.json
index 975d984..78ea48d 100644
--- a/tools/langs/isoLangs.json
+++ b/tools/langs/isoLangs.json
@@ -75,8 +75,8 @@
     "name": "Bengali",
     "nativeName": "বাংলা"
   },
-  "bh": {
-    "name": "Bihari",
+  "bho": {
+    "name": "Bhojpuri",
     "nativeName": "भोजपुरी"
   },
   "bi": {
@@ -103,6 +103,10 @@
     "name": "Catalan; Valencian",
     "nativeName": "Català"
   },
+  "ckb": {
+    "name": "Central Kurdish",
+    "nativeName": "کوردیی ناوەندی"
+  },
   "ch": {
     "name": "Chamorro",
     "nativeName": "Chamoru"
@@ -149,7 +153,11 @@
   },
   "dv": {
     "name": "Divehi; Dhivehi; Maldivian;",
-    "nativeName": ""
+    "nativeName": "ދިވެހި"
+  },
+  "doi": {
+    "name": "Dogri",
+    "nativeName": "𑠖𑠵𑠌𑠤𑠮"
   },
   "nl": {
     "name": "Dutch",
@@ -275,6 +283,10 @@
     "name": "Icelandic",
     "nativeName": "Íslenska"
   },
+  "ilo": {
+    "name": "Ilocano",
+    "nativeName": "Ilokano"
+  },
   "it": {
     "name": "Italian",
     "nativeName": "Italiano"
@@ -335,10 +347,18 @@
     "name": "Kongo",
     "nativeName": "KiKongo"
   },
+  "gom": {
+    "name": "Konkani",
+    "nativeName": "कोंकणी"
+  },
   "ko": {
     "name": "Korean",
     "nativeName": "한국어 (韓國語), 조선말 (朝鮮語)"
   },
+  "kri": {
+    "name": "Krio",
+    "nativeName": "Krio"
+  },
   "ku": {
     "name": "Kurdish",
     "nativeName": "Kurdî, كوردی‎"
@@ -391,6 +411,10 @@
     "name": "Macedonian",
     "nativeName": "македонски јазик"
   },
+  "mai": {
+    "name": "Maithili",
+    "nativeName": "मैथिली"
+  },
   "mg": {
     "name": "Malagasy",
     "nativeName": "Malagasy fiteny"
@@ -419,6 +443,14 @@
     "name": "Marshallese",
     "nativeName": "Kajin M̧ajeļ"
   },
+  "mni-Mtei": {
+    "name": "Meiteilon (Manipuri)",
+    "nativeName": "ꯃꯤꯇꯩꯂꯣꯟ"
+  },
+  "lus": {
+    "name": "Mizo",
+    "nativeName": "Mizo ṭawng"
+  },
   "mn": {
     "name": "Mongolian",
     "nativeName": "монгол"
@@ -459,6 +491,10 @@
     "name": "Nuosu",
     "nativeName": "ꆈꌠ꒿ Nuosuhxop"
   },
+  "nso": {
+    "name": "Sepedi",
+    "nativeName": "Pedi"
+  },
   "nr": {
     "name": "South Ndebele",
     "nativeName": "isiNdebele"
diff --git a/tools/langs/langs.go b/tools/langs/langs.go
index acfb811..f7f3a2d 100644
--- a/tools/langs/langs.go
+++ b/tools/langs/langs.go
@@ -7,11 +7,14 @@
 	"log"
 	"net/http"
 	"os"
+	"regexp"
 	"strings"
 )
 
 const isoLangsFileName = "isoLangs.json"
 
+var initDataRe = regexp.MustCompile(`AF_initDataCallback\(.*data: ?(.+), ?sideChannel`)
+
 type Language struct {
 	CodeName   string `json:"-"`
 	Name       string `json:"name"`
@@ -55,31 +58,43 @@
 		log.Fatalf("Couldn't unmarshal JSON file %v, error: %v", isoLangsFileName, err)
 	}
 
-	resp, err := http.Get("http://translate.google.com/translate_a/l?client=chrome")
+	resp, err := http.Get("http://translate.google.com/")
 	if err != nil {
-		log.Fatalf("Couldn't get current Google Translate languages from server, error: %v", err)
+		log.Fatalf("Couldn't get current Google Translate page from server, error: %v", err)
 	}
 	defer resp.Body.Close()
 
 	gTranslateRawData, err := ioutil.ReadAll(resp.Body)
 	if err != nil {
-		log.Fatalf("Couldn't read body data from Google Translate languages request, error: %v", err)
+		log.Fatalf("Couldn't read body data from Google Translate request, error: %v", err)
 	}
 
-	var gTranslateJson map[string]interface{}
-	if err := json.Unmarshal(gTranslateRawData, &gTranslateJson); err != nil {
+  initDataMatches := initDataRe.FindSubmatch(gTranslateRawData)
+  if len(initDataMatches) < 2 {
+    log.Fatalln("Couldn't find languages information in Google Translate homepage.")
+  }
+  initDataRaw := initDataMatches[1]
+
+	var gTranslateJson []interface{}
+	if err := json.Unmarshal(initDataRaw, &gTranslateJson); err != nil {
 		log.Fatalf("Couldn't unmarshal JSON data from the Google Translate languages request, error: %v", err)
 	}
 
-	gTranslateLangs := gTranslateJson["tl"].(map[string]interface{})
+	gTranslateLangs := gTranslateJson[1].([]interface{})
 	langs := make(map[string]Language, len(gTranslateLangs))
 
-	for langCode, name := range gTranslateLangs {
+	for _, lang := range gTranslateLangs {
+		langSlice := lang.([]interface{})
+		if len(langSlice) < 2 {
+			log.Fatalln("A Google Translate language entry is malformed.")
+		}
+		langCode := langSlice[0].(string)
+    name := langSlice[1].(string)
 		isoLang, err := getLanguage(isoLangs, langCode)
 		if err != nil {
 			log.Fatalf("Didn't find language '%v' in isoLangs, error: %v", langCode, err)
 		}
-		isoLang.Name = name.(string)
+		isoLang.Name = name
 		langs[langCode] = isoLang
 	}