Fix isoLangs generator
The isoLangs generator didn't detect a new set of languages which were
rencently added to Google Translate. This is because the
https://translate.google.com/translate_a/l?client=chrome endpoint no
longer returns the full list of languages.
Bug: translateselectedtext:8
Change-Id: If529a6001bc8c80db997e254249dbebd1245f1f3
diff --git a/tools/langs/README.md b/tools/langs/README.md
new file mode 100644
index 0000000..684befc
--- /dev/null
+++ b/tools/langs/README.md
@@ -0,0 +1,13 @@
+# Language list generator
+The tool found in this folder serves the purpose of updating the `isoLangs`
+object found in `//src/js/common.js`, which contains a list of possible
+target translation languages.
+
+Run `go run langs.go` in order to obtain the updated `isoLangs` object, and copy
+it to `//src/js/common.js`. If it fails, run it again until it works (it might
+take 3 or 4 attempts).
+
+If the way the languages are encoded into `https://translate.google.com/`
+change, this tool will no longer work, but it probably can be slightly adapted
+to make it work again. It is expected that this tool will not work in the
+future.
diff --git a/tools/langs/isoLangs.json b/tools/langs/isoLangs.json
index 975d984..78ea48d 100644
--- a/tools/langs/isoLangs.json
+++ b/tools/langs/isoLangs.json
@@ -75,8 +75,8 @@
"name": "Bengali",
"nativeName": "বাংলা"
},
- "bh": {
- "name": "Bihari",
+ "bho": {
+ "name": "Bhojpuri",
"nativeName": "भोजपुरी"
},
"bi": {
@@ -103,6 +103,10 @@
"name": "Catalan; Valencian",
"nativeName": "Català"
},
+ "ckb": {
+ "name": "Central Kurdish",
+ "nativeName": "کوردیی ناوەندی"
+ },
"ch": {
"name": "Chamorro",
"nativeName": "Chamoru"
@@ -149,7 +153,11 @@
},
"dv": {
"name": "Divehi; Dhivehi; Maldivian;",
- "nativeName": ""
+ "nativeName": "ދިވެހި"
+ },
+ "doi": {
+ "name": "Dogri",
+ "nativeName": "𑠖𑠵𑠌𑠤𑠮"
},
"nl": {
"name": "Dutch",
@@ -275,6 +283,10 @@
"name": "Icelandic",
"nativeName": "Íslenska"
},
+ "ilo": {
+ "name": "Ilocano",
+ "nativeName": "Ilokano"
+ },
"it": {
"name": "Italian",
"nativeName": "Italiano"
@@ -335,10 +347,18 @@
"name": "Kongo",
"nativeName": "KiKongo"
},
+ "gom": {
+ "name": "Konkani",
+ "nativeName": "कोंकणी"
+ },
"ko": {
"name": "Korean",
"nativeName": "한국어 (韓國語), 조선말 (朝鮮語)"
},
+ "kri": {
+ "name": "Krio",
+ "nativeName": "Krio"
+ },
"ku": {
"name": "Kurdish",
"nativeName": "Kurdî, كوردی"
@@ -391,6 +411,10 @@
"name": "Macedonian",
"nativeName": "македонски јазик"
},
+ "mai": {
+ "name": "Maithili",
+ "nativeName": "मैथिली"
+ },
"mg": {
"name": "Malagasy",
"nativeName": "Malagasy fiteny"
@@ -419,6 +443,14 @@
"name": "Marshallese",
"nativeName": "Kajin M̧ajeļ"
},
+ "mni-Mtei": {
+ "name": "Meiteilon (Manipuri)",
+ "nativeName": "ꯃꯤꯇꯩꯂꯣꯟ"
+ },
+ "lus": {
+ "name": "Mizo",
+ "nativeName": "Mizo ṭawng"
+ },
"mn": {
"name": "Mongolian",
"nativeName": "монгол"
@@ -459,6 +491,10 @@
"name": "Nuosu",
"nativeName": "ꆈꌠ꒿ Nuosuhxop"
},
+ "nso": {
+ "name": "Sepedi",
+ "nativeName": "Pedi"
+ },
"nr": {
"name": "South Ndebele",
"nativeName": "isiNdebele"
diff --git a/tools/langs/langs.go b/tools/langs/langs.go
index acfb811..f7f3a2d 100644
--- a/tools/langs/langs.go
+++ b/tools/langs/langs.go
@@ -7,11 +7,14 @@
"log"
"net/http"
"os"
+ "regexp"
"strings"
)
const isoLangsFileName = "isoLangs.json"
+var initDataRe = regexp.MustCompile(`AF_initDataCallback\(.*data: ?(.+), ?sideChannel`)
+
type Language struct {
CodeName string `json:"-"`
Name string `json:"name"`
@@ -55,31 +58,43 @@
log.Fatalf("Couldn't unmarshal JSON file %v, error: %v", isoLangsFileName, err)
}
- resp, err := http.Get("http://translate.google.com/translate_a/l?client=chrome")
+ resp, err := http.Get("http://translate.google.com/")
if err != nil {
- log.Fatalf("Couldn't get current Google Translate languages from server, error: %v", err)
+ log.Fatalf("Couldn't get current Google Translate page from server, error: %v", err)
}
defer resp.Body.Close()
gTranslateRawData, err := ioutil.ReadAll(resp.Body)
if err != nil {
- log.Fatalf("Couldn't read body data from Google Translate languages request, error: %v", err)
+ log.Fatalf("Couldn't read body data from Google Translate request, error: %v", err)
}
- var gTranslateJson map[string]interface{}
- if err := json.Unmarshal(gTranslateRawData, &gTranslateJson); err != nil {
+ initDataMatches := initDataRe.FindSubmatch(gTranslateRawData)
+ if len(initDataMatches) < 2 {
+ log.Fatalln("Couldn't find languages information in Google Translate homepage.")
+ }
+ initDataRaw := initDataMatches[1]
+
+ var gTranslateJson []interface{}
+ if err := json.Unmarshal(initDataRaw, &gTranslateJson); err != nil {
log.Fatalf("Couldn't unmarshal JSON data from the Google Translate languages request, error: %v", err)
}
- gTranslateLangs := gTranslateJson["tl"].(map[string]interface{})
+ gTranslateLangs := gTranslateJson[1].([]interface{})
langs := make(map[string]Language, len(gTranslateLangs))
- for langCode, name := range gTranslateLangs {
+ for _, lang := range gTranslateLangs {
+ langSlice := lang.([]interface{})
+ if len(langSlice) < 2 {
+ log.Fatalln("A Google Translate language entry is malformed.")
+ }
+ langCode := langSlice[0].(string)
+ name := langSlice[1].(string)
isoLang, err := getLanguage(isoLangs, langCode)
if err != nil {
log.Fatalf("Didn't find language '%v' in isoLangs, error: %v", langCode, err)
}
- isoLang.Name = name.(string)
+ isoLang.Name = name
langs[langCode] = isoLang
}