blob: 0580481ad27cd4b4fcb10c2e1f7f9bc49f2d9ec2 [file] [log] [blame]
Dusan Kasan17e497e2017-04-10 22:44:22 +02001package parsemail
2
3import (
Dusan Kasan17e497e2017-04-10 22:44:22 +02004 "bytes"
Dusan Kasan4595dfe2017-04-13 00:38:24 +02005 "encoding/base64"
Dusan Kasan4595dfe2017-04-13 00:38:24 +02006 "fmt"
7 "io"
8 "io/ioutil"
9 "mime"
10 "mime/multipart"
11 "net/mail"
12 "strings"
13 "time"
Dusan Kasan17e497e2017-04-10 22:44:22 +020014)
15
Dusan Kasan45ca2642017-04-18 10:39:35 +020016const contentTypeMultipartMixed = "multipart/mixed"
17const contentTypeMultipartAlternative = "multipart/alternative"
18const contentTypeMultipartRelated = "multipart/related"
19const contentTypeTextHtml = "text/html"
20const contentTypeTextPlain = "text/plain"
Dusan Kasan17e497e2017-04-10 22:44:22 +020021
Dusan Kasan45ca2642017-04-18 10:39:35 +020022// Parse an email message read from io.Reader into parsemail.Email struct
Dusan Kasanb49ceb62017-04-13 00:00:36 +020023func Parse(r io.Reader) (email Email, err error) {
Dusan Kasan4595dfe2017-04-13 00:38:24 +020024 msg, err := mail.ReadMessage(r)
Dusan Kasan17e497e2017-04-10 22:44:22 +020025 if err != nil {
Dusan Kasanb49ceb62017-04-13 00:00:36 +020026 return
Dusan Kasan17e497e2017-04-10 22:44:22 +020027 }
28
Dusan Kasanb49ceb62017-04-13 00:00:36 +020029 email, err = createEmailFromHeader(msg.Header)
Dusan Kasan17e497e2017-04-10 22:44:22 +020030 if err != nil {
Dusan Kasanb49ceb62017-04-13 00:00:36 +020031 return
Dusan Kasan17e497e2017-04-10 22:44:22 +020032 }
33
Dusan Kasanb49ceb62017-04-13 00:00:36 +020034 contentType, params, err := parseContentType(msg.Header.Get("Content-Type"))
Dusan Kasan17e497e2017-04-10 22:44:22 +020035 if err != nil {
Dusan Kasanb49ceb62017-04-13 00:00:36 +020036 return
Dusan Kasan17e497e2017-04-10 22:44:22 +020037 }
38
Dusan Kasanb49ceb62017-04-13 00:00:36 +020039 switch contentType {
Dusan Kasan45ca2642017-04-18 10:39:35 +020040 case contentTypeMultipartMixed:
Dusan Kasan17e497e2017-04-10 22:44:22 +020041 email.TextBody, email.HTMLBody, email.Attachments, email.EmbeddedFiles, err = parseMultipartMixed(msg.Body, params["boundary"])
Dusan Kasan45ca2642017-04-18 10:39:35 +020042 case contentTypeMultipartAlternative:
Dusan Kasan17e497e2017-04-10 22:44:22 +020043 email.TextBody, email.HTMLBody, email.EmbeddedFiles, err = parseMultipartAlternative(msg.Body, params["boundary"])
Dusan Kasan45ca2642017-04-18 10:39:35 +020044 case contentTypeTextPlain:
Dusan Kasan17e497e2017-04-10 22:44:22 +020045 message, _ := ioutil.ReadAll(msg.Body)
46 email.TextBody = strings.TrimSuffix(string(message[:]), "\n")
Dusan Kasan45ca2642017-04-18 10:39:35 +020047 case contentTypeTextHtml:
Dusan Kasan17e497e2017-04-10 22:44:22 +020048 message, _ := ioutil.ReadAll(msg.Body)
49 email.HTMLBody = strings.TrimSuffix(string(message[:]), "\n")
Dusan Kasanb49ceb62017-04-13 00:00:36 +020050 default:
Dusan Kasan45ca2642017-04-18 10:39:35 +020051 err = fmt.Errorf("Unknown top level mime type: %s", contentType)
Dusan Kasan17e497e2017-04-10 22:44:22 +020052 }
53
Dusan Kasanb49ceb62017-04-13 00:00:36 +020054 return
55}
56
57func createEmailFromHeader(header mail.Header) (email Email, err error) {
58 email.Subject = header.Get("Subject")
59
60 email.From, err = parseAddressList(header.Get("From"))
61 if err != nil {
62 return
63 }
64
65 email.Sender, err = parseAddress(header.Get("Sender"))
66 if err != nil {
67 return
68 }
69
70 email.ReplyTo, err = parseAddressList(header.Get("Reply-To"))
71 if err != nil {
72 return
73 }
74
75 email.To, err = parseAddressList(header.Get("To"))
76 if err != nil {
77 return
78 }
79
80 email.Cc, err = parseAddressList(header.Get("Cc"))
81 if err != nil {
82 return
83 }
84
85 email.Bcc, err = parseAddressList(header.Get("Bcc"))
86 if err != nil {
87 return
88 }
89
90 email.Date, err = parseTime(header.Get("Date"))
91 if err != nil {
92 return
93 }
94
95 email.ResentFrom, err = parseAddressList(header.Get("Resent-From"))
96 if err != nil {
97 return
98 }
99
100 email.ResentSender, err = parseAddress(header.Get("Resent-Sender"))
101 if err != nil {
102 return
103 }
104
105 email.ResentTo, err = parseAddressList(header.Get("Resent-To"))
106 if err != nil {
107 return
108 }
109
110 email.ResentCc, err = parseAddressList(header.Get("Resent-Cc"))
111 if err != nil {
112 return
113 }
114
115 email.ResentBcc, err = parseAddressList(header.Get("Resent-Bcc"))
116 if err != nil {
117 return
118 }
119
120 if header.Get("Resent-Date") == "" {
121 email.ResentDate = time.Time{}
122 } else {
123 email.ResentDate, err = parseTime(header.Get("Resent-Date"))
124 if err != nil {
125 return
126 }
127 }
128
129 email.ResentMessageID = parseMessageId(header.Get("Resent-Message-ID"))
130 email.MessageID = parseMessageId(header.Get("Message-ID"))
131 email.InReplyTo = parseMessageIdList(header.Get("In-Reply-To"))
132 email.References = parseMessageIdList(header.Get("References"))
133
134 //decode whole header for easier access to extra fields
135 //todo: should we decode? aren't only standard fields mime encoded?
136 email.Header, err = decodeHeaderMime(header)
137 if err != nil {
138 return
139 }
140
141 return
142}
143
144func parseContentType(contentTypeHeader string) (contentType string, params map[string]string, err error) {
145 if contentTypeHeader == "" {
Dusan Kasan45ca2642017-04-18 10:39:35 +0200146 contentType = contentTypeTextPlain
Dusan Kasanb49ceb62017-04-13 00:00:36 +0200147 return
148 }
149
150 return mime.ParseMediaType(contentTypeHeader)
151}
152
153func parseAddress(s string) (*mail.Address, error) {
154 if strings.Trim(s, " \n") != "" {
155 return mail.ParseAddress(s)
156 }
157
158 return nil, nil
159}
160
161func parseAddressList(s string) ([]*mail.Address, error) {
162 if strings.Trim(s, " \n") != "" {
163 return mail.ParseAddressList(s)
164 }
165
166 return []*mail.Address{}, nil
167}
168
169func parseTime(s string) (time.Time, error) {
170 t, err := time.Parse(time.RFC1123Z, s)
171 if err == nil {
172 return t, err
173 }
174
175 return time.Parse("Mon, 2 Jan 2006 15:04:05 -0700", s)
176}
177
178func parseMessageId(s string) string {
179 return strings.Trim(s, "<> ")
180}
181
182func parseMessageIdList(s string) (result []string) {
Dusan Kasan4595dfe2017-04-13 00:38:24 +0200183 for _, p := range strings.Split(s, " ") {
Dusan Kasanb49ceb62017-04-13 00:00:36 +0200184 if strings.Trim(p, " \n") != "" {
185 result = append(result, parseMessageId(p))
186 }
187 }
188
189 return
190}
191
192func parseMultipartAlternative(msg io.Reader, boundary string) (textBody, htmlBody string, embeddedFiles []EmbeddedFile, err error) {
193 pmr := multipart.NewReader(msg, boundary)
194 for {
195 part, err := pmr.NextPart()
196
197 if err == io.EOF {
198 break
199 } else if err != nil {
200 return textBody, htmlBody, embeddedFiles, err
201 }
202
203 contentType, params, err := mime.ParseMediaType(part.Header.Get("Content-Type"))
Dusan Kasanc661cc02017-04-18 10:51:51 +0200204 if err != nil {
205 return textBody, htmlBody, embeddedFiles, err
206 }
Dusan Kasanb49ceb62017-04-13 00:00:36 +0200207
208 switch contentType {
Dusan Kasan45ca2642017-04-18 10:39:35 +0200209 case contentTypeTextPlain:
Dusan Kasanb49ceb62017-04-13 00:00:36 +0200210 ppContent, err := ioutil.ReadAll(part)
211 if err != nil {
212 return textBody, htmlBody, embeddedFiles, err
213 }
214
215 textBody += strings.TrimSuffix(string(ppContent[:]), "\n")
Dusan Kasan45ca2642017-04-18 10:39:35 +0200216 case contentTypeTextHtml:
Dusan Kasanb49ceb62017-04-13 00:00:36 +0200217 ppContent, err := ioutil.ReadAll(part)
218 if err != nil {
219 return textBody, htmlBody, embeddedFiles, err
220 }
221
222 htmlBody += strings.TrimSuffix(string(ppContent[:]), "\n")
Dusan Kasan45ca2642017-04-18 10:39:35 +0200223 case contentTypeMultipartRelated:
Dusan Kasan1a966482017-04-18 10:45:25 +0200224 tb, hb, ef, err := parseMultipartAlternative(part, params["boundary"])
225 if err != nil {
226 return textBody, htmlBody, embeddedFiles, err
227 }
228
Dusan Kasanb49ceb62017-04-13 00:00:36 +0200229 htmlBody += hb
230 textBody += tb
231 embeddedFiles = append(embeddedFiles, ef...)
232 default:
233 if isEmbeddedFile(part) {
234 ef, err := decodeEmbeddedFile(part)
235 if err != nil {
236 return textBody, htmlBody, embeddedFiles, err
237 }
238
239 embeddedFiles = append(embeddedFiles, ef)
240 } else {
Dusan Kasan45ca2642017-04-18 10:39:35 +0200241 return textBody, htmlBody, embeddedFiles, fmt.Errorf("Can't process multipart/alternative inner mime type: %s", contentType)
Dusan Kasanb49ceb62017-04-13 00:00:36 +0200242 }
243 }
244 }
245
246 return textBody, htmlBody, embeddedFiles, err
247}
248
249func parseMultipartMixed(msg io.Reader, boundary string) (textBody, htmlBody string, attachments []Attachment, embeddedFiles []EmbeddedFile, err error) {
250 mr := multipart.NewReader(msg, boundary)
251 for {
252 part, err := mr.NextPart()
253 if err == io.EOF {
254 break
255 } else if err != nil {
256 return textBody, htmlBody, attachments, embeddedFiles, err
257 }
258
259 contentType, params, err := mime.ParseMediaType(part.Header.Get("Content-Type"))
260 if err != nil {
261 return textBody, htmlBody, attachments, embeddedFiles, err
262 }
263
Dusan Kasan45ca2642017-04-18 10:39:35 +0200264 if contentType == contentTypeMultipartAlternative {
Dusan Kasanb49ceb62017-04-13 00:00:36 +0200265 textBody, htmlBody, embeddedFiles, err = parseMultipartAlternative(part, params["boundary"])
266 if err != nil {
267 return textBody, htmlBody, attachments, embeddedFiles, err
268 }
269 } else if isAttachment(part) {
270 at, err := decodeAttachment(part)
271 if err != nil {
272 return textBody, htmlBody, attachments, embeddedFiles, err
273 }
274
275 attachments = append(attachments, at)
276 } else {
Dusan Kasan45ca2642017-04-18 10:39:35 +0200277 return textBody, htmlBody, attachments, embeddedFiles, fmt.Errorf("Unknown multipart/mixed nested mime type: %s", contentType)
Dusan Kasanb49ceb62017-04-13 00:00:36 +0200278 }
279 }
280
281 return textBody, htmlBody, attachments, embeddedFiles, err
Dusan Kasan17e497e2017-04-10 22:44:22 +0200282}
283
284func decodeMimeSentence(s string) (string, error) {
285 result := []string{}
286 ss := strings.Split(s, " ")
287
288 for _, word := range ss {
289 dec := new(mime.WordDecoder)
290 w, err := dec.Decode(word)
291 if err != nil {
292 if len(result) == 0 {
293 w = word
294 } else {
295 w = " " + word
296 }
297 }
298
299 result = append(result, w)
300 }
301
302 return strings.Join(result, ""), nil
303}
304
Dusan Kasan4595dfe2017-04-13 00:38:24 +0200305func decodeHeaderMime(header mail.Header) (mail.Header, error) {
Dusan Kasan17e497e2017-04-10 22:44:22 +0200306 parsedHeader := map[string][]string{}
307
308 for headerName, headerData := range header {
309
310 parsedHeaderData := []string{}
311 for _, headerValue := range headerData {
312 decodedHeaderValue, err := decodeMimeSentence(headerValue)
313 if err != nil {
314 return mail.Header{}, err
315 }
316 parsedHeaderData = append(parsedHeaderData, decodedHeaderValue)
317 }
318
319 parsedHeader[headerName] = parsedHeaderData
320 }
321
322 return mail.Header(parsedHeader), nil
323}
324
325func decodePartData(part *multipart.Part) (io.Reader, error) {
326 encoding := part.Header.Get("Content-Transfer-Encoding")
327
328 if encoding == "base64" {
329 dr := base64.NewDecoder(base64.StdEncoding, part)
330 dd, err := ioutil.ReadAll(dr)
331 if err != nil {
332 return nil, err
333 }
334
335 return bytes.NewReader(dd), nil
Dusan Kasan17e497e2017-04-10 22:44:22 +0200336 }
Dusan Kasan45ca2642017-04-18 10:39:35 +0200337
338 return nil, fmt.Errorf("Unknown encoding: %s", encoding)
Dusan Kasan17e497e2017-04-10 22:44:22 +0200339}
340
Dusan Kasanb49ceb62017-04-13 00:00:36 +0200341func isEmbeddedFile(part *multipart.Part) bool {
342 return part.Header.Get("Content-Transfer-Encoding") != ""
343}
344
345func decodeEmbeddedFile(part *multipart.Part) (ef EmbeddedFile, err error) {
Dusan Kasan4595dfe2017-04-13 00:38:24 +0200346 cid, err := decodeMimeSentence(part.Header.Get("Content-Id"))
Dusan Kasanb49ceb62017-04-13 00:00:36 +0200347 if err != nil {
348 return
349 }
350
351 decoded, err := decodePartData(part)
352 if err != nil {
353 return
354 }
355
356 ef.CID = strings.Trim(cid, "<>")
357 ef.Data = decoded
358 ef.ContentType = part.Header.Get("Content-Type")
359
360 return
361}
362
363func isAttachment(part *multipart.Part) bool {
364 return part.FileName() != ""
365}
366
367func decodeAttachment(part *multipart.Part) (at Attachment, err error) {
Dusan Kasan4595dfe2017-04-13 00:38:24 +0200368 filename, err := decodeMimeSentence(part.FileName())
Dusan Kasanb49ceb62017-04-13 00:00:36 +0200369 if err != nil {
370 return
371 }
372
373 decoded, err := decodePartData(part)
374 if err != nil {
375 return
376 }
377
378 at.Filename = filename
379 at.Data = decoded
380 at.ContentType = strings.Split(part.Header.Get("Content-Type"), ";")[0]
381
382 return
383}
384
Dusan Kasan1a966482017-04-18 10:45:25 +0200385// Attachment with filename, content type and data (as a io.Reader)
Dusan Kasan17e497e2017-04-10 22:44:22 +0200386type Attachment struct {
Dusan Kasan4595dfe2017-04-13 00:38:24 +0200387 Filename string
Dusan Kasanb49ceb62017-04-13 00:00:36 +0200388 ContentType string
Dusan Kasan4595dfe2017-04-13 00:38:24 +0200389 Data io.Reader
Dusan Kasan17e497e2017-04-10 22:44:22 +0200390}
391
Dusan Kasan1a966482017-04-18 10:45:25 +0200392// EmbeddedFile with content id, content type and data (as a io.Reader)
Dusan Kasan17e497e2017-04-10 22:44:22 +0200393type EmbeddedFile struct {
Dusan Kasan4595dfe2017-04-13 00:38:24 +0200394 CID string
Dusan Kasanb49ceb62017-04-13 00:00:36 +0200395 ContentType string
Dusan Kasan4595dfe2017-04-13 00:38:24 +0200396 Data io.Reader
Dusan Kasan17e497e2017-04-10 22:44:22 +0200397}
398
Dusan Kasan1a966482017-04-18 10:45:25 +0200399// Email with fields for all the headers defined in RFC5322 with it's attachments and
Dusan Kasan17e497e2017-04-10 22:44:22 +0200400type Email struct {
401 Header mail.Header
Dusan Kasanb49ceb62017-04-13 00:00:36 +0200402
Dusan Kasan4595dfe2017-04-13 00:38:24 +0200403 Subject string
404 Sender *mail.Address
405 From []*mail.Address
406 ReplyTo []*mail.Address
407 To []*mail.Address
408 Cc []*mail.Address
409 Bcc []*mail.Address
410 Date time.Time
411 MessageID string
412 InReplyTo []string
Dusan Kasanb49ceb62017-04-13 00:00:36 +0200413 References []string
414
Dusan Kasan4595dfe2017-04-13 00:38:24 +0200415 ResentFrom []*mail.Address
416 ResentSender *mail.Address
417 ResentTo []*mail.Address
418 ResentDate time.Time
419 ResentCc []*mail.Address
420 ResentBcc []*mail.Address
Dusan Kasanb49ceb62017-04-13 00:00:36 +0200421 ResentMessageID string
422
Dusan Kasan17e497e2017-04-10 22:44:22 +0200423 HTMLBody string
424 TextBody string
Dusan Kasanb49ceb62017-04-13 00:00:36 +0200425
Dusan Kasan4595dfe2017-04-13 00:38:24 +0200426 Attachments []Attachment
Dusan Kasan17e497e2017-04-10 22:44:22 +0200427 EmbeddedFiles []EmbeddedFile
Dusan Kasan4595dfe2017-04-13 00:38:24 +0200428}