blob: 40e3ee4b6930c4d2d013886658780dcde8d4379d [file] [log] [blame]
Dusan Kasan17e497e2017-04-10 22:44:22 +02001package parsemail
2
3import (
Dusan Kasan17e497e2017-04-10 22:44:22 +02004 "bytes"
Dusan Kasan4595dfe2017-04-13 00:38:24 +02005 "encoding/base64"
Dusan Kasan4595dfe2017-04-13 00:38:24 +02006 "fmt"
7 "io"
8 "io/ioutil"
9 "mime"
10 "mime/multipart"
11 "net/mail"
12 "strings"
13 "time"
Dusan Kasan17e497e2017-04-10 22:44:22 +020014)
15
Dusan Kasan45ca2642017-04-18 10:39:35 +020016const contentTypeMultipartMixed = "multipart/mixed"
17const contentTypeMultipartAlternative = "multipart/alternative"
18const contentTypeMultipartRelated = "multipart/related"
19const contentTypeTextHtml = "text/html"
20const contentTypeTextPlain = "text/plain"
Dusan Kasan17e497e2017-04-10 22:44:22 +020021
Dusan Kasan45ca2642017-04-18 10:39:35 +020022// Parse an email message read from io.Reader into parsemail.Email struct
Dusan Kasanb49ceb62017-04-13 00:00:36 +020023func Parse(r io.Reader) (email Email, err error) {
Dusan Kasan4595dfe2017-04-13 00:38:24 +020024 msg, err := mail.ReadMessage(r)
Dusan Kasan17e497e2017-04-10 22:44:22 +020025 if err != nil {
Dusan Kasanb49ceb62017-04-13 00:00:36 +020026 return
Dusan Kasan17e497e2017-04-10 22:44:22 +020027 }
28
Dusan Kasanb49ceb62017-04-13 00:00:36 +020029 email, err = createEmailFromHeader(msg.Header)
Dusan Kasan17e497e2017-04-10 22:44:22 +020030 if err != nil {
Dusan Kasanb49ceb62017-04-13 00:00:36 +020031 return
Dusan Kasan17e497e2017-04-10 22:44:22 +020032 }
33
Dusan Kasanb49ceb62017-04-13 00:00:36 +020034 contentType, params, err := parseContentType(msg.Header.Get("Content-Type"))
Dusan Kasan17e497e2017-04-10 22:44:22 +020035 if err != nil {
Dusan Kasanb49ceb62017-04-13 00:00:36 +020036 return
Dusan Kasan17e497e2017-04-10 22:44:22 +020037 }
38
Dusan Kasanb49ceb62017-04-13 00:00:36 +020039 switch contentType {
Dusan Kasan45ca2642017-04-18 10:39:35 +020040 case contentTypeMultipartMixed:
Dusan Kasan17e497e2017-04-10 22:44:22 +020041 email.TextBody, email.HTMLBody, email.Attachments, email.EmbeddedFiles, err = parseMultipartMixed(msg.Body, params["boundary"])
Dusan Kasan45ca2642017-04-18 10:39:35 +020042 case contentTypeMultipartAlternative:
Dusan Kasan17e497e2017-04-10 22:44:22 +020043 email.TextBody, email.HTMLBody, email.EmbeddedFiles, err = parseMultipartAlternative(msg.Body, params["boundary"])
Dusan Kasan45ca2642017-04-18 10:39:35 +020044 case contentTypeTextPlain:
Dusan Kasan17e497e2017-04-10 22:44:22 +020045 message, _ := ioutil.ReadAll(msg.Body)
46 email.TextBody = strings.TrimSuffix(string(message[:]), "\n")
Dusan Kasan45ca2642017-04-18 10:39:35 +020047 case contentTypeTextHtml:
Dusan Kasan17e497e2017-04-10 22:44:22 +020048 message, _ := ioutil.ReadAll(msg.Body)
49 email.HTMLBody = strings.TrimSuffix(string(message[:]), "\n")
Dusan Kasanb49ceb62017-04-13 00:00:36 +020050 default:
Dusan Kasan45ca2642017-04-18 10:39:35 +020051 err = fmt.Errorf("Unknown top level mime type: %s", contentType)
Dusan Kasan17e497e2017-04-10 22:44:22 +020052 }
53
Dusan Kasanb49ceb62017-04-13 00:00:36 +020054 return
55}
56
57func createEmailFromHeader(header mail.Header) (email Email, err error) {
Dusan Kasane668cf22017-04-18 12:56:51 +020058 hp := headerParser{header: &header}
59
Dusan Kasanf4376a62017-05-23 21:03:55 +020060 email.Subject = decodeMimeSentence(header.Get("Subject"))
Dusan Kasane668cf22017-04-18 12:56:51 +020061 email.From = hp.parseAddressList(header.Get("From"))
62 email.Sender = hp.parseAddress(header.Get("Sender"))
63 email.ReplyTo = hp.parseAddressList(header.Get("Reply-To"))
64 email.To = hp.parseAddressList(header.Get("To"))
65 email.Cc = hp.parseAddressList(header.Get("Cc"))
66 email.Bcc = hp.parseAddressList(header.Get("Bcc"))
67 email.Date = hp.parseTime(header.Get("Date"))
68 email.ResentFrom = hp.parseAddressList(header.Get("Resent-From"))
69 email.ResentSender = hp.parseAddress(header.Get("Resent-Sender"))
70 email.ResentTo = hp.parseAddressList(header.Get("Resent-To"))
71 email.ResentCc = hp.parseAddressList(header.Get("Resent-Cc"))
72 email.ResentBcc = hp.parseAddressList(header.Get("Resent-Bcc"))
73 email.ResentMessageID = hp.parseMessageId(header.Get("Resent-Message-ID"))
74 email.MessageID = hp.parseMessageId(header.Get("Message-ID"))
75 email.InReplyTo = hp.parseMessageIdList(header.Get("In-Reply-To"))
76 email.References = hp.parseMessageIdList(header.Get("References"))
77 email.ResentDate = hp.parseTime(header.Get("Resent-Date"))
Dusan Kasanb49ceb62017-04-13 00:00:36 +020078
Dusan Kasane668cf22017-04-18 12:56:51 +020079 if hp.err != nil {
80 err = hp.err
Dusan Kasanb49ceb62017-04-13 00:00:36 +020081 return
82 }
83
Dusan Kasanb49ceb62017-04-13 00:00:36 +020084 //decode whole header for easier access to extra fields
85 //todo: should we decode? aren't only standard fields mime encoded?
86 email.Header, err = decodeHeaderMime(header)
87 if err != nil {
88 return
89 }
90
91 return
92}
93
94func parseContentType(contentTypeHeader string) (contentType string, params map[string]string, err error) {
95 if contentTypeHeader == "" {
Dusan Kasan45ca2642017-04-18 10:39:35 +020096 contentType = contentTypeTextPlain
Dusan Kasanb49ceb62017-04-13 00:00:36 +020097 return
98 }
99
100 return mime.ParseMediaType(contentTypeHeader)
101}
102
Kevin Chen5dc5bc82018-05-03 22:07:35 -0400103func parseMultipartRelated(msg io.Reader, boundary string) (textBody, htmlBody string, embeddedFiles []EmbeddedFile, err error) {
104 pmr := multipart.NewReader(msg, boundary)
105 for {
106 part, err := pmr.NextPart()
107
108 if err == io.EOF {
109 break
110 } else if err != nil {
111 return textBody, htmlBody, embeddedFiles, err
112 }
113
114 contentType, params, err := mime.ParseMediaType(part.Header.Get("Content-Type"))
115 if err != nil {
116 return textBody, htmlBody, embeddedFiles, err
117 }
118
119 switch contentType {
120 case contentTypeTextPlain:
121 ppContent, err := ioutil.ReadAll(part)
122 if err != nil {
123 return textBody, htmlBody, embeddedFiles, err
124 }
125
126 textBody += strings.TrimSuffix(string(ppContent[:]), "\n")
127 case contentTypeTextHtml:
128 ppContent, err := ioutil.ReadAll(part)
129 if err != nil {
130 return textBody, htmlBody, embeddedFiles, err
131 }
132
133 htmlBody += strings.TrimSuffix(string(ppContent[:]), "\n")
134 case contentTypeMultipartAlternative:
135 tb, hb, ef, err := parseMultipartAlternative(part, params["boundary"])
136 if err != nil {
137 return textBody, htmlBody, embeddedFiles, err
138 }
139
140 htmlBody += hb
141 textBody += tb
142 embeddedFiles = append(embeddedFiles, ef...)
143 default:
144 if isEmbeddedFile(part) {
145 ef, err := decodeEmbeddedFile(part)
146 if err != nil {
147 return textBody, htmlBody, embeddedFiles, err
148 }
149
150 embeddedFiles = append(embeddedFiles, ef)
151 } else {
152 return textBody, htmlBody, embeddedFiles, fmt.Errorf("Can't process multipart/related inner mime type: %s", contentType)
153 }
154 }
155 }
156
157 return textBody, htmlBody, embeddedFiles, err
158}
159
Dusan Kasanb49ceb62017-04-13 00:00:36 +0200160func parseMultipartAlternative(msg io.Reader, boundary string) (textBody, htmlBody string, embeddedFiles []EmbeddedFile, err error) {
161 pmr := multipart.NewReader(msg, boundary)
162 for {
163 part, err := pmr.NextPart()
164
165 if err == io.EOF {
166 break
167 } else if err != nil {
168 return textBody, htmlBody, embeddedFiles, err
169 }
170
171 contentType, params, err := mime.ParseMediaType(part.Header.Get("Content-Type"))
Dusan Kasanc661cc02017-04-18 10:51:51 +0200172 if err != nil {
173 return textBody, htmlBody, embeddedFiles, err
174 }
Dusan Kasanb49ceb62017-04-13 00:00:36 +0200175
176 switch contentType {
Dusan Kasan45ca2642017-04-18 10:39:35 +0200177 case contentTypeTextPlain:
Dusan Kasanb49ceb62017-04-13 00:00:36 +0200178 ppContent, err := ioutil.ReadAll(part)
179 if err != nil {
180 return textBody, htmlBody, embeddedFiles, err
181 }
182
183 textBody += strings.TrimSuffix(string(ppContent[:]), "\n")
Dusan Kasan45ca2642017-04-18 10:39:35 +0200184 case contentTypeTextHtml:
Dusan Kasanb49ceb62017-04-13 00:00:36 +0200185 ppContent, err := ioutil.ReadAll(part)
186 if err != nil {
187 return textBody, htmlBody, embeddedFiles, err
188 }
189
190 htmlBody += strings.TrimSuffix(string(ppContent[:]), "\n")
Dusan Kasan45ca2642017-04-18 10:39:35 +0200191 case contentTypeMultipartRelated:
Kevin Chen9b9506a2018-05-03 22:17:38 -0400192 tb, hb, ef, err := parseMultipartRelated(part, params["boundary"])
Dusan Kasan1a966482017-04-18 10:45:25 +0200193 if err != nil {
194 return textBody, htmlBody, embeddedFiles, err
195 }
196
Dusan Kasanb49ceb62017-04-13 00:00:36 +0200197 htmlBody += hb
198 textBody += tb
199 embeddedFiles = append(embeddedFiles, ef...)
200 default:
201 if isEmbeddedFile(part) {
202 ef, err := decodeEmbeddedFile(part)
203 if err != nil {
204 return textBody, htmlBody, embeddedFiles, err
205 }
206
207 embeddedFiles = append(embeddedFiles, ef)
208 } else {
Dusan Kasan45ca2642017-04-18 10:39:35 +0200209 return textBody, htmlBody, embeddedFiles, fmt.Errorf("Can't process multipart/alternative inner mime type: %s", contentType)
Dusan Kasanb49ceb62017-04-13 00:00:36 +0200210 }
211 }
212 }
213
214 return textBody, htmlBody, embeddedFiles, err
215}
216
217func parseMultipartMixed(msg io.Reader, boundary string) (textBody, htmlBody string, attachments []Attachment, embeddedFiles []EmbeddedFile, err error) {
218 mr := multipart.NewReader(msg, boundary)
219 for {
220 part, err := mr.NextPart()
221 if err == io.EOF {
222 break
223 } else if err != nil {
224 return textBody, htmlBody, attachments, embeddedFiles, err
225 }
226
227 contentType, params, err := mime.ParseMediaType(part.Header.Get("Content-Type"))
228 if err != nil {
229 return textBody, htmlBody, attachments, embeddedFiles, err
230 }
231
Dusan Kasan45ca2642017-04-18 10:39:35 +0200232 if contentType == contentTypeMultipartAlternative {
Dusan Kasanb49ceb62017-04-13 00:00:36 +0200233 textBody, htmlBody, embeddedFiles, err = parseMultipartAlternative(part, params["boundary"])
234 if err != nil {
235 return textBody, htmlBody, attachments, embeddedFiles, err
236 }
Kevin Chen5dc5bc82018-05-03 22:07:35 -0400237 } else if contentType == contentTypeMultipartRelated {
238 textBody, htmlBody, embeddedFiles, err = parseMultipartRelated(part, params["boundary"])
239 if err != nil {
240 return textBody, htmlBody, attachments, embeddedFiles, err
241 }
Maya Rashisha3803bd2019-06-08 17:53:21 +0300242 } else if contentType == contentTypeTextPlain {
243 ppContent, err := ioutil.ReadAll(part)
244 if err != nil {
245 return textBody, htmlBody, attachments, embeddedFiles, err
246 }
247
248 textBody += strings.TrimSuffix(string(ppContent[:]), "\n")
Dusan Kasanb49ceb62017-04-13 00:00:36 +0200249 } else if isAttachment(part) {
250 at, err := decodeAttachment(part)
251 if err != nil {
252 return textBody, htmlBody, attachments, embeddedFiles, err
253 }
254
255 attachments = append(attachments, at)
256 } else {
Dusan Kasan45ca2642017-04-18 10:39:35 +0200257 return textBody, htmlBody, attachments, embeddedFiles, fmt.Errorf("Unknown multipart/mixed nested mime type: %s", contentType)
Dusan Kasanb49ceb62017-04-13 00:00:36 +0200258 }
259 }
260
261 return textBody, htmlBody, attachments, embeddedFiles, err
Dusan Kasan17e497e2017-04-10 22:44:22 +0200262}
263
Dusan Kasanf4376a62017-05-23 21:03:55 +0200264func decodeMimeSentence(s string) string {
Dusan Kasan17e497e2017-04-10 22:44:22 +0200265 result := []string{}
266 ss := strings.Split(s, " ")
267
268 for _, word := range ss {
269 dec := new(mime.WordDecoder)
270 w, err := dec.Decode(word)
271 if err != nil {
272 if len(result) == 0 {
273 w = word
274 } else {
275 w = " " + word
276 }
277 }
278
279 result = append(result, w)
280 }
281
Dusan Kasanf4376a62017-05-23 21:03:55 +0200282 return strings.Join(result, "")
Dusan Kasan17e497e2017-04-10 22:44:22 +0200283}
284
Dusan Kasan4595dfe2017-04-13 00:38:24 +0200285func decodeHeaderMime(header mail.Header) (mail.Header, error) {
Dusan Kasan17e497e2017-04-10 22:44:22 +0200286 parsedHeader := map[string][]string{}
287
288 for headerName, headerData := range header {
289
290 parsedHeaderData := []string{}
291 for _, headerValue := range headerData {
Dusan Kasanf4376a62017-05-23 21:03:55 +0200292 parsedHeaderData = append(parsedHeaderData, decodeMimeSentence(headerValue))
Dusan Kasan17e497e2017-04-10 22:44:22 +0200293 }
294
295 parsedHeader[headerName] = parsedHeaderData
296 }
297
298 return mail.Header(parsedHeader), nil
299}
300
301func decodePartData(part *multipart.Part) (io.Reader, error) {
302 encoding := part.Header.Get("Content-Transfer-Encoding")
303
Kevin Chen7835eb02018-05-02 20:58:58 -0400304 if strings.EqualFold(encoding, "base64") {
Dusan Kasan17e497e2017-04-10 22:44:22 +0200305 dr := base64.NewDecoder(base64.StdEncoding, part)
306 dd, err := ioutil.ReadAll(dr)
307 if err != nil {
308 return nil, err
309 }
310
311 return bytes.NewReader(dd), nil
Dusan Kasan17e497e2017-04-10 22:44:22 +0200312 }
Dusan Kasan45ca2642017-04-18 10:39:35 +0200313
314 return nil, fmt.Errorf("Unknown encoding: %s", encoding)
Dusan Kasan17e497e2017-04-10 22:44:22 +0200315}
316
Dusan Kasanb49ceb62017-04-13 00:00:36 +0200317func isEmbeddedFile(part *multipart.Part) bool {
318 return part.Header.Get("Content-Transfer-Encoding") != ""
319}
320
321func decodeEmbeddedFile(part *multipart.Part) (ef EmbeddedFile, err error) {
Dusan Kasanf4376a62017-05-23 21:03:55 +0200322 cid := decodeMimeSentence(part.Header.Get("Content-Id"))
Dusan Kasanb49ceb62017-04-13 00:00:36 +0200323 decoded, err := decodePartData(part)
324 if err != nil {
325 return
326 }
327
328 ef.CID = strings.Trim(cid, "<>")
329 ef.Data = decoded
330 ef.ContentType = part.Header.Get("Content-Type")
331
332 return
333}
334
335func isAttachment(part *multipart.Part) bool {
336 return part.FileName() != ""
337}
338
339func decodeAttachment(part *multipart.Part) (at Attachment, err error) {
Dusan Kasanf4376a62017-05-23 21:03:55 +0200340 filename := decodeMimeSentence(part.FileName())
Dusan Kasanb49ceb62017-04-13 00:00:36 +0200341 decoded, err := decodePartData(part)
342 if err != nil {
343 return
344 }
345
346 at.Filename = filename
347 at.Data = decoded
348 at.ContentType = strings.Split(part.Header.Get("Content-Type"), ";")[0]
349
350 return
351}
352
Dusan Kasane668cf22017-04-18 12:56:51 +0200353type headerParser struct {
354 header *mail.Header
Dusan Kasanb974c632017-04-18 12:58:42 +0200355 err error
Dusan Kasane668cf22017-04-18 12:56:51 +0200356}
357
358func (hp headerParser) parseAddress(s string) (ma *mail.Address) {
359 if hp.err != nil {
360 return nil
361 }
362
363 if strings.Trim(s, " \n") != "" {
364 ma, hp.err = mail.ParseAddress(s)
365
366 return ma
367 }
368
369 return nil
370}
371
372func (hp headerParser) parseAddressList(s string) (ma []*mail.Address) {
373 if hp.err != nil {
374 return
375 }
376
377 if strings.Trim(s, " \n") != "" {
378 ma, hp.err = mail.ParseAddressList(s)
379 return
380 }
381
382 return
383}
384
385func (hp headerParser) parseTime(s string) (t time.Time) {
Dusan Kasanb974c632017-04-18 12:58:42 +0200386 if hp.err != nil || s == "" {
Dusan Kasane668cf22017-04-18 12:56:51 +0200387 return
388 }
389
390 t, hp.err = time.Parse(time.RFC1123Z, s)
391 if hp.err == nil {
392 return t
393 }
394
395 t, hp.err = time.Parse("Mon, 2 Jan 2006 15:04:05 -0700", s)
396
397 return
398}
399
400func (hp headerParser) parseMessageId(s string) string {
401 if hp.err != nil {
402 return ""
403 }
404
405 return strings.Trim(s, "<> ")
406}
407
408func (hp headerParser) parseMessageIdList(s string) (result []string) {
409 if hp.err != nil {
410 return
411 }
412
413 for _, p := range strings.Split(s, " ") {
414 if strings.Trim(p, " \n") != "" {
415 result = append(result, hp.parseMessageId(p))
416 }
417 }
418
419 return
420}
421
Dusan Kasan1a966482017-04-18 10:45:25 +0200422// Attachment with filename, content type and data (as a io.Reader)
Dusan Kasan17e497e2017-04-10 22:44:22 +0200423type Attachment struct {
Dusan Kasan4595dfe2017-04-13 00:38:24 +0200424 Filename string
Dusan Kasanb49ceb62017-04-13 00:00:36 +0200425 ContentType string
Dusan Kasan4595dfe2017-04-13 00:38:24 +0200426 Data io.Reader
Dusan Kasan17e497e2017-04-10 22:44:22 +0200427}
428
Dusan Kasan1a966482017-04-18 10:45:25 +0200429// EmbeddedFile with content id, content type and data (as a io.Reader)
Dusan Kasan17e497e2017-04-10 22:44:22 +0200430type EmbeddedFile struct {
Dusan Kasan4595dfe2017-04-13 00:38:24 +0200431 CID string
Dusan Kasanb49ceb62017-04-13 00:00:36 +0200432 ContentType string
Dusan Kasan4595dfe2017-04-13 00:38:24 +0200433 Data io.Reader
Dusan Kasan17e497e2017-04-10 22:44:22 +0200434}
435
Dusan Kasan1a966482017-04-18 10:45:25 +0200436// Email with fields for all the headers defined in RFC5322 with it's attachments and
Dusan Kasan17e497e2017-04-10 22:44:22 +0200437type Email struct {
438 Header mail.Header
Dusan Kasanb49ceb62017-04-13 00:00:36 +0200439
Dusan Kasan4595dfe2017-04-13 00:38:24 +0200440 Subject string
441 Sender *mail.Address
442 From []*mail.Address
443 ReplyTo []*mail.Address
444 To []*mail.Address
445 Cc []*mail.Address
446 Bcc []*mail.Address
447 Date time.Time
448 MessageID string
449 InReplyTo []string
Dusan Kasanb49ceb62017-04-13 00:00:36 +0200450 References []string
451
Dusan Kasan4595dfe2017-04-13 00:38:24 +0200452 ResentFrom []*mail.Address
453 ResentSender *mail.Address
454 ResentTo []*mail.Address
455 ResentDate time.Time
456 ResentCc []*mail.Address
457 ResentBcc []*mail.Address
Dusan Kasanb49ceb62017-04-13 00:00:36 +0200458 ResentMessageID string
459
Dusan Kasan17e497e2017-04-10 22:44:22 +0200460 HTMLBody string
461 TextBody string
Dusan Kasanb49ceb62017-04-13 00:00:36 +0200462
Dusan Kasan4595dfe2017-04-13 00:38:24 +0200463 Attachments []Attachment
Dusan Kasan17e497e2017-04-10 22:44:22 +0200464 EmbeddedFiles []EmbeddedFile
Dusan Kasan4595dfe2017-04-13 00:38:24 +0200465}