blob: a52dd45240b2e6dd2446ea3b5c6ffc7fd7536e4b [file] [log] [blame]
Dusan Kasan17e497e2017-04-10 22:44:22 +02001package parsemail
2
3import (
4 "net/mail"
5 "io"
6 "strings"
7 "mime/multipart"
8 "mime"
9 "fmt"
10 "errors"
11 "io/ioutil"
12 "time"
13 "encoding/base64"
14 "bytes"
15)
16
Dusan Kasanb49ceb62017-04-13 00:00:36 +020017const content_type_multipart_mixed = "multipart/mixed"
18const content_type_multipart_alternative = "multipart/alternative"
19const content_type_multipart_related = "multipart/related"
20const content_type_text_html = "text/html"
21const content_type_text_plain = "text/plain"
Dusan Kasan17e497e2017-04-10 22:44:22 +020022
Dusan Kasanb49ceb62017-04-13 00:00:36 +020023func Parse(r io.Reader) (email Email, err error) {
Dusan Kasan17e497e2017-04-10 22:44:22 +020024 msg, err := mail.ReadMessage(r);
25 if err != nil {
Dusan Kasanb49ceb62017-04-13 00:00:36 +020026 return
Dusan Kasan17e497e2017-04-10 22:44:22 +020027 }
28
Dusan Kasanb49ceb62017-04-13 00:00:36 +020029 email, err = createEmailFromHeader(msg.Header)
Dusan Kasan17e497e2017-04-10 22:44:22 +020030 if err != nil {
Dusan Kasanb49ceb62017-04-13 00:00:36 +020031 return
Dusan Kasan17e497e2017-04-10 22:44:22 +020032 }
33
Dusan Kasanb49ceb62017-04-13 00:00:36 +020034 contentType, params, err := parseContentType(msg.Header.Get("Content-Type"))
Dusan Kasan17e497e2017-04-10 22:44:22 +020035 if err != nil {
Dusan Kasanb49ceb62017-04-13 00:00:36 +020036 return
Dusan Kasan17e497e2017-04-10 22:44:22 +020037 }
38
Dusan Kasanb49ceb62017-04-13 00:00:36 +020039 switch contentType {
40 case content_type_multipart_mixed:
Dusan Kasan17e497e2017-04-10 22:44:22 +020041 email.TextBody, email.HTMLBody, email.Attachments, email.EmbeddedFiles, err = parseMultipartMixed(msg.Body, params["boundary"])
Dusan Kasanb49ceb62017-04-13 00:00:36 +020042 case content_type_multipart_alternative:
Dusan Kasan17e497e2017-04-10 22:44:22 +020043 email.TextBody, email.HTMLBody, email.EmbeddedFiles, err = parseMultipartAlternative(msg.Body, params["boundary"])
Dusan Kasanb49ceb62017-04-13 00:00:36 +020044 case content_type_text_plain:
Dusan Kasan17e497e2017-04-10 22:44:22 +020045 message, _ := ioutil.ReadAll(msg.Body)
46 email.TextBody = strings.TrimSuffix(string(message[:]), "\n")
Dusan Kasanb49ceb62017-04-13 00:00:36 +020047 case content_type_text_html:
Dusan Kasan17e497e2017-04-10 22:44:22 +020048 message, _ := ioutil.ReadAll(msg.Body)
49 email.HTMLBody = strings.TrimSuffix(string(message[:]), "\n")
Dusan Kasanb49ceb62017-04-13 00:00:36 +020050 case "":
51 err = errors.New("No top level mime type specified")
52 default:
53 err = errors.New(fmt.Sprintf("Unknown top level mime type: %s", contentType))
Dusan Kasan17e497e2017-04-10 22:44:22 +020054 }
55
Dusan Kasanb49ceb62017-04-13 00:00:36 +020056 return
57}
58
59func createEmailFromHeader(header mail.Header) (email Email, err error) {
60 email.Subject = header.Get("Subject")
61
62 email.From, err = parseAddressList(header.Get("From"))
63 if err != nil {
64 return
65 }
66
67 email.Sender, err = parseAddress(header.Get("Sender"))
68 if err != nil {
69 return
70 }
71
72 email.ReplyTo, err = parseAddressList(header.Get("Reply-To"))
73 if err != nil {
74 return
75 }
76
77 email.To, err = parseAddressList(header.Get("To"))
78 if err != nil {
79 return
80 }
81
82 email.Cc, err = parseAddressList(header.Get("Cc"))
83 if err != nil {
84 return
85 }
86
87 email.Bcc, err = parseAddressList(header.Get("Bcc"))
88 if err != nil {
89 return
90 }
91
92 email.Date, err = parseTime(header.Get("Date"))
93 if err != nil {
94 return
95 }
96
97 email.ResentFrom, err = parseAddressList(header.Get("Resent-From"))
98 if err != nil {
99 return
100 }
101
102 email.ResentSender, err = parseAddress(header.Get("Resent-Sender"))
103 if err != nil {
104 return
105 }
106
107 email.ResentTo, err = parseAddressList(header.Get("Resent-To"))
108 if err != nil {
109 return
110 }
111
112 email.ResentCc, err = parseAddressList(header.Get("Resent-Cc"))
113 if err != nil {
114 return
115 }
116
117 email.ResentBcc, err = parseAddressList(header.Get("Resent-Bcc"))
118 if err != nil {
119 return
120 }
121
122 if header.Get("Resent-Date") == "" {
123 email.ResentDate = time.Time{}
124 } else {
125 email.ResentDate, err = parseTime(header.Get("Resent-Date"))
126 if err != nil {
127 return
128 }
129 }
130
131 email.ResentMessageID = parseMessageId(header.Get("Resent-Message-ID"))
132 email.MessageID = parseMessageId(header.Get("Message-ID"))
133 email.InReplyTo = parseMessageIdList(header.Get("In-Reply-To"))
134 email.References = parseMessageIdList(header.Get("References"))
135
136 //decode whole header for easier access to extra fields
137 //todo: should we decode? aren't only standard fields mime encoded?
138 email.Header, err = decodeHeaderMime(header)
139 if err != nil {
140 return
141 }
142
143 return
144}
145
146func parseContentType(contentTypeHeader string) (contentType string, params map[string]string, err error) {
147 if contentTypeHeader == "" {
148 contentType = content_type_text_plain
149 return
150 }
151
152 return mime.ParseMediaType(contentTypeHeader)
153}
154
155func parseAddress(s string) (*mail.Address, error) {
156 if strings.Trim(s, " \n") != "" {
157 return mail.ParseAddress(s)
158 }
159
160 return nil, nil
161}
162
163func parseAddressList(s string) ([]*mail.Address, error) {
164 if strings.Trim(s, " \n") != "" {
165 return mail.ParseAddressList(s)
166 }
167
168 return []*mail.Address{}, nil
169}
170
171func parseTime(s string) (time.Time, error) {
172 t, err := time.Parse(time.RFC1123Z, s)
173 if err == nil {
174 return t, err
175 }
176
177 return time.Parse("Mon, 2 Jan 2006 15:04:05 -0700", s)
178}
179
180func parseMessageId(s string) string {
181 return strings.Trim(s, "<> ")
182}
183
184func parseMessageIdList(s string) (result []string) {
185 for _, p := range(strings.Split(s, " ")) {
186 if strings.Trim(p, " \n") != "" {
187 result = append(result, parseMessageId(p))
188 }
189 }
190
191 return
192}
193
194func parseMultipartAlternative(msg io.Reader, boundary string) (textBody, htmlBody string, embeddedFiles []EmbeddedFile, err error) {
195 pmr := multipart.NewReader(msg, boundary)
196 for {
197 part, err := pmr.NextPart()
198
199 if err == io.EOF {
200 break
201 } else if err != nil {
202 return textBody, htmlBody, embeddedFiles, err
203 }
204
205 contentType, params, err := mime.ParseMediaType(part.Header.Get("Content-Type"))
206
207 switch contentType {
208 case content_type_text_plain:
209 ppContent, err := ioutil.ReadAll(part)
210 if err != nil {
211 return textBody, htmlBody, embeddedFiles, err
212 }
213
214 textBody += strings.TrimSuffix(string(ppContent[:]), "\n")
215 case content_type_text_html:
216 ppContent, err := ioutil.ReadAll(part)
217 if err != nil {
218 return textBody, htmlBody, embeddedFiles, err
219 }
220
221 htmlBody += strings.TrimSuffix(string(ppContent[:]), "\n")
222 case content_type_multipart_related:
223 var tb, hb string
224 var ef []EmbeddedFile
225 tb, hb, ef, err = parseMultipartAlternative(part, params["boundary"])
226 htmlBody += hb
227 textBody += tb
228 embeddedFiles = append(embeddedFiles, ef...)
229 default:
230 if isEmbeddedFile(part) {
231 ef, err := decodeEmbeddedFile(part)
232 if err != nil {
233 return textBody, htmlBody, embeddedFiles, err
234 }
235
236 embeddedFiles = append(embeddedFiles, ef)
237 } else {
238 return textBody, htmlBody, embeddedFiles, errors.New(fmt.Sprintf("Can't process multipart/alternative inner mime type: %s", contentType))
239 }
240 }
241 }
242
243 return textBody, htmlBody, embeddedFiles, err
244}
245
246func parseMultipartMixed(msg io.Reader, boundary string) (textBody, htmlBody string, attachments []Attachment, embeddedFiles []EmbeddedFile, err error) {
247 mr := multipart.NewReader(msg, boundary)
248 for {
249 part, err := mr.NextPart()
250 if err == io.EOF {
251 break
252 } else if err != nil {
253 return textBody, htmlBody, attachments, embeddedFiles, err
254 }
255
256 contentType, params, err := mime.ParseMediaType(part.Header.Get("Content-Type"))
257 if err != nil {
258 return textBody, htmlBody, attachments, embeddedFiles, err
259 }
260
261 if contentType == content_type_multipart_alternative {
262 textBody, htmlBody, embeddedFiles, err = parseMultipartAlternative(part, params["boundary"])
263 if err != nil {
264 return textBody, htmlBody, attachments, embeddedFiles, err
265 }
266 } else if isAttachment(part) {
267 at, err := decodeAttachment(part)
268 if err != nil {
269 return textBody, htmlBody, attachments, embeddedFiles, err
270 }
271
272 attachments = append(attachments, at)
273 } else {
274 return textBody, htmlBody, attachments, embeddedFiles, errors.New(fmt.Sprintf("Unknown multipart/mixed nested mime type: %s", contentType))
275 }
276 }
277
278 return textBody, htmlBody, attachments, embeddedFiles, err
Dusan Kasan17e497e2017-04-10 22:44:22 +0200279}
280
281func decodeMimeSentence(s string) (string, error) {
282 result := []string{}
283 ss := strings.Split(s, " ")
284
285 for _, word := range ss {
286 dec := new(mime.WordDecoder)
287 w, err := dec.Decode(word)
288 if err != nil {
289 if len(result) == 0 {
290 w = word
291 } else {
292 w = " " + word
293 }
294 }
295
296 result = append(result, w)
297 }
298
299 return strings.Join(result, ""), nil
300}
301
Dusan Kasan17e497e2017-04-10 22:44:22 +0200302func decodeHeaderMime(header mail.Header) (mail.Header, error) {
303 parsedHeader := map[string][]string{}
304
305 for headerName, headerData := range header {
306
307 parsedHeaderData := []string{}
308 for _, headerValue := range headerData {
309 decodedHeaderValue, err := decodeMimeSentence(headerValue)
310 if err != nil {
311 return mail.Header{}, err
312 }
313 parsedHeaderData = append(parsedHeaderData, decodedHeaderValue)
314 }
315
316 parsedHeader[headerName] = parsedHeaderData
317 }
318
319 return mail.Header(parsedHeader), nil
320}
321
322func decodePartData(part *multipart.Part) (io.Reader, error) {
323 encoding := part.Header.Get("Content-Transfer-Encoding")
324
325 if encoding == "base64" {
326 dr := base64.NewDecoder(base64.StdEncoding, part)
327 dd, err := ioutil.ReadAll(dr)
328 if err != nil {
329 return nil, err
330 }
331
332 return bytes.NewReader(dd), nil
333 } else {
334 return nil, errors.New(fmt.Sprintf("Unknown encoding: %s", encoding))
335 }
336}
337
Dusan Kasanb49ceb62017-04-13 00:00:36 +0200338func isEmbeddedFile(part *multipart.Part) bool {
339 return part.Header.Get("Content-Transfer-Encoding") != ""
340}
341
342func decodeEmbeddedFile(part *multipart.Part) (ef EmbeddedFile, err error) {
343 cid, err := decodeMimeSentence(part.Header.Get("Content-Id"));
344 if err != nil {
345 return
346 }
347
348 decoded, err := decodePartData(part)
349 if err != nil {
350 return
351 }
352
353 ef.CID = strings.Trim(cid, "<>")
354 ef.Data = decoded
355 ef.ContentType = part.Header.Get("Content-Type")
356
357 return
358}
359
360func isAttachment(part *multipart.Part) bool {
361 return part.FileName() != ""
362}
363
364func decodeAttachment(part *multipart.Part) (at Attachment, err error) {
365 filename, err := decodeMimeSentence(part.FileName());
366 if err != nil {
367 return
368 }
369
370 decoded, err := decodePartData(part)
371 if err != nil {
372 return
373 }
374
375 at.Filename = filename
376 at.Data = decoded
377 at.ContentType = strings.Split(part.Header.Get("Content-Type"), ";")[0]
378
379 return
380}
381
Dusan Kasan17e497e2017-04-10 22:44:22 +0200382type Attachment struct {
383 Filename string
Dusan Kasanb49ceb62017-04-13 00:00:36 +0200384 ContentType string
Dusan Kasan17e497e2017-04-10 22:44:22 +0200385 Data io.Reader
386}
387
388type EmbeddedFile struct {
389 CID string
Dusan Kasanb49ceb62017-04-13 00:00:36 +0200390 ContentType string
Dusan Kasan17e497e2017-04-10 22:44:22 +0200391 Data io.Reader
392}
393
Dusan Kasanb49ceb62017-04-13 00:00:36 +0200394type MessageID string
395
Dusan Kasan17e497e2017-04-10 22:44:22 +0200396type Email struct {
397 Header mail.Header
Dusan Kasanb49ceb62017-04-13 00:00:36 +0200398
399 Subject string
400 Sender *mail.Address
401 From []*mail.Address
402 ReplyTo []*mail.Address
403 To []*mail.Address
404 Cc []*mail.Address
405 Bcc []*mail.Address
406 Date time.Time
407 MessageID string
408 InReplyTo []string
409 References []string
410
411 ResentFrom []*mail.Address
412 ResentSender *mail.Address
413 ResentTo []*mail.Address
414 ResentDate time.Time
415 ResentCc []*mail.Address
416 ResentBcc []*mail.Address
417 ResentMessageID string
418
419 Received string
420
Dusan Kasan17e497e2017-04-10 22:44:22 +0200421 HTMLBody string
422 TextBody string
Dusan Kasanb49ceb62017-04-13 00:00:36 +0200423
Dusan Kasan17e497e2017-04-10 22:44:22 +0200424 Attachments []Attachment
425 EmbeddedFiles []EmbeddedFile
Dusan Kasanb49ceb62017-04-13 00:00:36 +0200426}