blob: 2be8801c76b22c5db318167da9856f887bf1378e [file] [log] [blame]
Adrià Vilanova Martínez81a703d2022-03-04 01:03:01 +01001package main
2
3import (
4 "bufio"
5 "encoding/json"
6 "flag"
7 "fmt"
8 "io"
9 "io/ioutil"
10 "log"
11 "math/rand"
12 "net/http"
13 "net/url"
14 "os"
15 "strconv"
16 "strings"
17 "time"
18)
19
20const DefaultBL = "boq_groupsfrontendserver_20220224.07_p0"
21
22// IDs for API requests:
23const RPCIDListConversations = "Dq0xse" // /GroupsFrontendConversationService.ListConversations
24const RPCIDListConversationMessages = "H08Fi" // /GroupsFrontendConversationService.ListConversationMessages
25
26var (
27 group = flag.String("group", "", "Email of the group you want to export.")
28 getList = flag.Bool("getList", false, "Get a list of threads and write it to the file specified in --file (one of --getList or --getThreads is required).")
29 getThreads = flag.Bool("getThreads", false, "Retrieve all the threads specified in the thread list passed via STDIN (one of --getList or --getThreads is required).")
30 fileName = flag.String("file", "threads.txt", "File where thread IDs will be written when running with --getList.")
31 folderName = flag.String("folder", "threads", "Folder where threads will be saved when running with --getThreads.")
32 authenticated = flag.Bool("authenticated", false, "Whether you want to take out the forum with authentication.")
33 cookies = flag.String("cookies", "", "Cookies (if you want to take out the forum authenticated).")
34 fsid = flag.String("fsid", "", "f.sid value (if you want to take out the forum authenticated).")
35 at = flag.String("at", "", "at value (if you want to take out the forum authenticated).")
36 realCookies []*http.Cookie
37
38 reqId = rand.Intn(999999)
39)
40
41type Request struct {
42 Rpc string // RPC ID
43 Request string // Request encoded as a string
44}
45
46type Response struct {
47 Rpc string // RPC ID
48 Data string // Data
49 Index string // Order index (can be a number encoded as a string or "generic")
50 Ok bool // Whether the request finshed successfully and the data is thus filled
51}
52
53type ConversationListResponse struct {
54 PaginationToken string // Next page token
55 IDs []string // List with thread IDs
56}
57
58type ConversationMessagesResponse struct {
59 PaginationToken string // Next page token
60 Data string // Thread data encoded as PB+JSON
61}
62
63func batchRequest(requests *[]Request) (*[]Response, error) {
64 var requestsArray [][]interface{}
65 var RPCIdsSlice []string
66 for i, r := range *requests {
67 requestArray := make([]interface{}, 4)
68 requestArray[0] = r.Rpc
69 requestArray[1] = r.Request
70 requestArray[2] = nil
71 requestArray[3] = strconv.Itoa(i + 1)
72 requestsArray = append(requestsArray, requestArray)
73 RPCIdsSlice = append(RPCIdsSlice, r.Rpc)
74 }
75 freq, err := json.Marshal(requestsArray)
76 if err != nil {
77 return nil, err
78 }
79 freqString := "[" + string(freq) + "]"
80
81 v := url.Values{}
82 v.Set("f.req", freqString)
83 if *authenticated {
84 v.Set("at", *at)
85 }
86 RPCIds := url.QueryEscape(strings.Join(RPCIdsSlice, ","))
87 reqUrl := "https://groups.google.com/_/GroupsFrontendUi/data/batchexecute?rpcids=" + RPCIds + "&bl=" + DefaultBL + "&hl=en&_reqid=" + strconv.Itoa(reqId)
88 if *authenticated {
89 reqUrl += "&f.sid=" + url.QueryEscape(*fsid)
90 }
91
92 req, err := http.NewRequest("POST", reqUrl, strings.NewReader(v.Encode()))
93 if err != nil {
94 return nil, err
95 }
96 req.Header.Set("Content-Type", "application/x-www-form-urlencoded")
97 if *authenticated {
98 for _, c := range realCookies {
99 req.AddCookie(c)
100 }
101 }
102 c := &http.Client{}
103 resp, err := c.Do(req)
104 reqId += 100000
105 if err != nil {
106 return nil, err
107 }
108
109 if resp.StatusCode != 200 {
110 return nil, fmt.Errorf("Status code is %v", resp.StatusCode)
111 }
112
113 var respBody [][]interface{}
114 io.CopyN(ioutil.Discard, resp.Body, 6) // Discard first 6 bytes
115 err = json.NewDecoder(resp.Body).Decode(&respBody)
116 if err != nil {
117 return nil, err
118 }
119
120 var responses []Response
121 for _, r := range respBody {
122 if len(r) < 7 || r[0] != "wrb.fr" {
123 continue
124 }
125
126 rpc, ok1 := r[1].(string)
127 data, ok2 := r[2].(string)
128 index, ok3 := r[6].(string)
129 if !ok1 {
130 return nil, fmt.Errorf("Couldn't parse the response (expected a string with the rpc ID).")
131 }
132
133 var response Response
134 if !ok2 || !ok3 {
135 response = Response{
136 Rpc: rpc,
137 Ok: false,
138 }
139 } else {
140 response = Response{
141 Rpc: rpc,
142 Data: data,
143 Index: index,
144 Ok: true,
145 }
146 }
147 responses = append(responses, response)
148 }
149
150 return &responses, nil
151}
152
153func getConversations(group string, paginationToken string, num int) (*ConversationListResponse, error) {
154 request := make([]interface{}, 3)
155 request[0] = group
156 request[1] = num
157 request[2] = paginationToken
158 reqText, err := json.Marshal(request)
159 if err != nil {
160 return nil, err
161 }
162
163 requests := []Request{
164 Request{
165 Rpc: RPCIDListConversations,
166 Request: string(reqText),
167 },
168 }
169
170 resp, err := batchRequest(&requests)
171 if err != nil {
172 return nil, fmt.Errorf("An error occurred while requesting the conversation list: %v\n", err)
173 }
174
175 for _, r := range *resp {
176 if r.Rpc == RPCIDListConversations {
177 if !r.Ok {
178 return nil, fmt.Errorf("The server didn't fulfill the request successfully (maybe you don't have permission to view the group?)")
179 }
180
181 var body []interface{}
182 err = json.Unmarshal([]byte(r.Data), &body)
183 if err != nil {
184 return nil, fmt.Errorf("While parsing conversation list response: %v", err)
185 }
186 if len(body) < 3 {
187 return nil, fmt.Errorf("While parsing conversation list response: body isn't long enough")
188 }
189
190 var resp ConversationListResponse
191
192 // Retrieve thread IDs
193 var IDs []string
194 threads, ok := body[2].([]interface{})
195 if !ok {
196 return nil, fmt.Errorf("The conversation list response doesn't comply with the protobuf model we have seen (body[2] should be an array).")
197 }
198 for _, t := range threads {
199 ta, ok := t.([]interface{})
200 if !ok {
201 return nil, fmt.Errorf("The conversation list response doesn't comply with the protobuf model we have seen (body[2][i] should be an array).")
202 }
203 if len(ta) < 1 {
204 return nil, fmt.Errorf("While parsing conversation list response: thread isn't long enough")
205 }
206 info, ok := ta[0].([]interface{})
207 if !ok {
208 return nil, fmt.Errorf("The conversation list response doesn't comply with the protobuf model we have seen (body[2][i] should be an array).")
209 }
210 if len(info) < 2 {
211 return nil, fmt.Errorf("While parsing conversation list response: thread info isn't long enough")
212 }
213 threadId, ok := info[1].(string)
214 if !ok {
215 return nil, fmt.Errorf("The conversation list response doesn't comply with the protobuf model we have seen (body[2][i][0][1] should be a string).")
216 }
217 IDs = append(IDs, threadId)
218 }
219 resp.IDs = IDs
220
221 // Retrieve pagination token
222 if len(body) >= 4 {
223 paginationToken, ok := body[3].(string)
224 if ok {
225 resp.PaginationToken = paginationToken
226 }
227 }
228
229 return &resp, nil
230 }
231 }
232
233 return nil, fmt.Errorf("The server didn't return the conversations list correctly, or we couldn't find it.")
234}
235
236func getAllConversations(group string) (*[]string, error) {
237 paginationToken := ""
238 totalRetrieved := 0
239 var IDs []string
240 for {
241 resp, err := getConversations(group, paginationToken, 50)
242 if err != nil {
243 return nil, err
244 }
245 totalRetrieved += len(resp.IDs)
246 log.Printf("Retrieved %v posts (total: %v)...\n", len(resp.IDs), totalRetrieved)
247
248 IDs = append(IDs, resp.IDs...)
249
250 if resp.PaginationToken == "" {
251 break
252 }
253 paginationToken = resp.PaginationToken
254 time.Sleep(time.Second) // Sleep for a second to prevent overwhelming the server
255 }
256 return &IDs, nil
257}
258
259func getConversation(group string, id string, paginationToken string, num int) (*ConversationMessagesResponse, error) {
260 request := make([]interface{}, 4)
261 request[0] = group
262 request[1] = id
263 if paginationToken == "" {
264 request[2] = num
265 request[3] = nil
266 request = append(request, nil, 2)
267 } else {
268 request[2] = nil
269 request[3] = paginationToken
270 }
271 reqText, err := json.Marshal(request)
272 if err != nil {
273 return nil, err
274 }
275
276 requests := []Request{
277 Request{
278 Rpc: RPCIDListConversationMessages,
279 Request: string(reqText),
280 },
281 }
282
283 resp, err := batchRequest(&requests)
284 if err != nil {
285 return nil, fmt.Errorf("An error occurred while requesting the conversation messages: %v\n", err)
286 }
287
288 for _, r := range *resp {
289 if r.Rpc == RPCIDListConversationMessages {
290 if !r.Ok {
291 return nil, fmt.Errorf("The server didn't fulfill the request successfully (maybe you don't have permission to view the group?)")
292 }
293
294 if r.Data == "" || r.Data == "[]" {
295 return nil, fmt.Errorf("No data was returned for the thread.")
296 }
297
298 var resp ConversationMessagesResponse
299 resp.Data = r.Data
300
301 // Get pagination token
302 var body []interface{}
303 err = json.Unmarshal([]byte(r.Data), &body)
304 if err != nil {
305 return nil, fmt.Errorf("While parsing conversation list response: %v", err)
306 }
307
308 if len(body) >= 4 {
309 paginationToken, ok := body[3].(string)
310 if ok {
311 resp.PaginationToken = paginationToken
312 }
313 }
314
315 return &resp, nil
316 }
317 }
318
319 return nil, fmt.Errorf("The server didn't return the conversations list correctly, or we couldn't find it.")
320}
321
322func downloadThread(group string, id string, chFailedIDs chan string, chFinishedIDs chan string) {
323 i := 0
324 paginationToken := ""
325 for {
326 resp, err := getConversation(group, id, paginationToken, 100)
327 if err != nil {
328 log.Printf("Error downloading thread %v: %v", id, err)
329 chFailedIDs <- id
330 return
331 }
332
333 name := id + ".json"
334 if i > 0 {
335 name = id + "_" + strconv.Itoa(i) + ".json"
336 }
337 fullName := *folderName + "/" + name
338
339 err = os.WriteFile(fullName, []byte(resp.Data), 0644)
340 if err != nil {
341 log.Printf("Error downloading thread %v: couldn't write file \"%v\": %v", id, fullName, err)
342 }
343
344 if resp.PaginationToken == "" {
345 break
346 }
347 paginationToken = resp.PaginationToken
348 i++
349 }
350
351 chFinishedIDs <- id
352}
353
354func queueDownloadIfNeeded(group string, chFailedIDs chan string, chFinishedIDs chan string, IDs *[]string, nextIndex *int) {
355 if *nextIndex < len(*IDs) {
356 time.Sleep(50 * time.Millisecond)
357 go downloadThread(group, (*IDs)[*nextIndex], chFailedIDs, chFinishedIDs)
358 *nextIndex++
359 }
360}
361
362func main() {
363 flag.Parse()
364 if *group == "" {
365 log.Fatalln("A Google Group wasn't provided via the \"--group\" flag.")
366 }
367
368 if (*getList && *getThreads) || (!*getList && !*getThreads) {
369 log.Fatalln("Please specify one of --getList or --getThreads (but not both).")
370 }
371
372 if *authenticated {
373 if *cookies == "" || *fsid == "" || *at == "" {
374 log.Fatalln("If you specify --authenticated, you should also specify --cookies, --fsid and --at.")
375 }
376
377 rawRequest := fmt.Sprintf("GET / HTTP/1.0\nCookie: %s\n\n", *cookies)
378 req, err := http.ReadRequest(bufio.NewReader(strings.NewReader(rawRequest)))
379 if err == nil {
380 realCookies = req.Cookies()
381 }
382 }
383
384 if *getList {
385 log.Printf("Getting list of thread IDs for group %s...\n", *group)
386
387 file, err := os.Create(*fileName)
388 if err != nil {
389 log.Fatalf("Couldn't create file \"%v\"", *fileName)
390 }
391
392 // Get a list of conversation IDs
393 convs, err := getAllConversations(*group)
394 if err != nil {
395 log.Fatalf("Error calling getAllConversations: %v\n", err)
396 }
397
398 // Save those to the file, one by line
399 for _, id := range *convs {
400 io.WriteString(file, id+"\n")
401 }
402 }
403
404 if *getThreads {
405 log.Printf("Starting actual takeout for group %s...\n", *group)
406 scanner := bufio.NewScanner(os.Stdin)
407 var IDs []string
408 for scanner.Scan() {
409 id := scanner.Text()
410 IDs = append(IDs, id)
411 }
412 log.Printf("Total: %v threads. Beginning to download them...\n", len(IDs))
413
414 chFailedIDs := make(chan string)
415 chFinishedIDs := make(chan string)
416
417 nextIndex := -1
418 for i, id := range IDs {
419 go downloadThread(*group, id, chFailedIDs, chFinishedIDs)
420 nextIndex = i
421 if i > 10 {
422 break
423 }
424 }
425
426 failedThreads := make([]string, 0)
427 for i := 0; i < len(IDs); i++ {
428 select {
429 case id := <-chFailedIDs:
430 failedThreads = append(failedThreads, id)
431 queueDownloadIfNeeded(*group, chFailedIDs, chFinishedIDs, &IDs, &nextIndex)
432 case id := <-chFinishedIDs:
433 log.Printf("Finished downloading thread %v successfully\n", id)
434 queueDownloadIfNeeded(*group, chFailedIDs, chFinishedIDs, &IDs, &nextIndex)
435 }
436 }
437
438 log.Printf("Failed threads: %v", failedThreads)
439 }
440}