blob: f1a512efda975069f8d9d21cbcaa1270b50b4471 [file] [log] [blame]
Dusan Kasan17e497e2017-04-10 22:44:22 +02001package parsemail
2
3import (
4 "net/mail"
5 "io"
6 "strings"
7 "mime/multipart"
8 "mime"
9 "fmt"
10 "errors"
11 "io/ioutil"
12 "time"
13 "encoding/base64"
14 "bytes"
15)
16
17func Parse(r io.Reader) (Email, error) {
18 email := Email{}
19
20 msg, err := mail.ReadMessage(r);
21 if err != nil {
22 return email, err
23 }
24
25 var body []byte
26 _,err = msg.Body.Read(body);
27 if err != nil {
28 return email, err
29 }
30
31 email.Header, err = decodeHeaderMime(msg.Header)
32 if err != nil {
33 return email, err
34 }
35
36 mediaType, params, err := mime.ParseMediaType(msg.Header.Get("Content-Type"))
37 if err != nil {
38 return email, err
39 }
40
41 if mediaType == "" {
42 return email, errors.New("No top level mime type specified")
43 } else if strings.HasPrefix(mediaType, "multipart/mixed") {
44 email.TextBody, email.HTMLBody, email.Attachments, email.EmbeddedFiles, err = parseMultipartMixed(msg.Body, params["boundary"])
45 if err != nil {
46 return email, err
47 }
48 } else if strings.HasPrefix(mediaType, "multipart/alternative") {
49 email.TextBody, email.HTMLBody, email.EmbeddedFiles, err = parseMultipartAlternative(msg.Body, params["boundary"])
50 if err != nil {
51 return email, err
52 }
53 } else if strings.HasPrefix(mediaType, "text/plain") {
54 message, _ := ioutil.ReadAll(msg.Body)
55 email.TextBody = strings.TrimSuffix(string(message[:]), "\n")
56 } else if strings.HasPrefix(mediaType, "text/html") {
57 message, _ := ioutil.ReadAll(msg.Body)
58 email.HTMLBody = strings.TrimSuffix(string(message[:]), "\n")
59 } else {
60 return email, errors.New(fmt.Sprintf("Unknown top level mime type: %s", mediaType))
61 }
62
63 return email, nil
64}
65
66func decodeMimeSentence(s string) (string, error) {
67 result := []string{}
68 ss := strings.Split(s, " ")
69
70 for _, word := range ss {
71 dec := new(mime.WordDecoder)
72 w, err := dec.Decode(word)
73 if err != nil {
74 if len(result) == 0 {
75 w = word
76 } else {
77 w = " " + word
78 }
79 }
80
81 result = append(result, w)
82 }
83
84 return strings.Join(result, ""), nil
85}
86
87func parseMultipartAlternative(msg io.Reader, boundary string) (textBody, htmlBody string, embeddedFiles []EmbeddedFile, err error) {
88 pmr := multipart.NewReader(msg, boundary)
89 for {
90
91 pp, err := pmr.NextPart()
92
93 if err == io.EOF {
94 break
95 }
96 if err != nil {
97 return textBody, htmlBody, embeddedFiles, err
98 }
99
100 ppMediaType, ppParams, err := mime.ParseMediaType(pp.Header.Get("Content-Type"))
101
102 if ppMediaType == "text/plain" {
103 ppContent, err := ioutil.ReadAll(pp)
104 if err != nil {
105 return textBody, htmlBody, embeddedFiles, err
106 }
107
108 textBody += strings.TrimSuffix(string(ppContent[:]), "\n")
109 } else if ppMediaType == "text/html" {
110 ppContent, err := ioutil.ReadAll(pp)
111 if err != nil {
112 return textBody, htmlBody, embeddedFiles, err
113 }
114
115 htmlBody += strings.TrimSuffix(string(ppContent[:]), "\n")
116 } else if ppMediaType == "multipart/related" {
117 var tb, hb string
118 var ef []EmbeddedFile
119 tb, hb, ef, err = parseMultipartAlternative(pp, ppParams["boundary"])
120 htmlBody += hb
121 textBody += tb
122 embeddedFiles = append(embeddedFiles, ef...)
123 } else if pp.Header.Get("Content-Transfer-Encoding") != "" {
124 reference, err := decodeMimeSentence(pp.Header.Get("Content-Id"));
125 if err != nil {
126 return textBody, htmlBody, embeddedFiles, err
127 }
128 reference = strings.Trim(reference, "<>")
129
130 decoded, err := decodePartData(pp)
131 if err != nil {
132 return textBody, htmlBody, embeddedFiles, err
133 }
134
135 embeddedFiles = append(embeddedFiles, EmbeddedFile{reference, decoded})
136 } else {
137 return textBody, htmlBody, embeddedFiles, errors.New(fmt.Sprintf("Can't process multipart/alternative inner mime type: %s", ppMediaType))
138 }
139 }
140
141 return textBody, htmlBody, embeddedFiles, err
142}
143
144func parseMultipartMixed(msg io.Reader, boundary string) (textBody, htmlBody string, attachments []Attachment, embeddedFiles []EmbeddedFile, err error) {
145 mr := multipart.NewReader(msg, boundary)
146 for {
147 p, err := mr.NextPart()
148 if err == io.EOF {
149 break
150 }
151 if err != nil {
152 return textBody, htmlBody, attachments, embeddedFiles, err
153 }
154
155 pMediaType, pParams, err := mime.ParseMediaType(p.Header.Get("Content-Type"))
156 if err != nil {
157 return textBody, htmlBody, attachments, embeddedFiles, err
158 }
159
160 if strings.HasPrefix(pMediaType, "multipart/alternative") {
161 textBody, htmlBody, embeddedFiles, err = parseMultipartAlternative(p, pParams["boundary"])
162 if err != nil {
163 return textBody, htmlBody, attachments, embeddedFiles, err
164 }
165 } else if p.FileName() != "" {
166
167 filename, err := decodeMimeSentence(p.FileName());
168 if err != nil {
169 return textBody, htmlBody, attachments, embeddedFiles, err
170 }
171
172 decoded, err := decodePartData(p)
173 if err != nil {
174 return textBody, htmlBody, attachments, embeddedFiles, err
175 }
176
177 attachments = append(attachments, Attachment{filename, decoded})
178 } else {
179 return textBody, htmlBody, attachments, embeddedFiles, errors.New(fmt.Sprintf("Unknown multipart/mixed nested mime type: %s", pMediaType))
180 }
181 }
182
183 return textBody, htmlBody, attachments, embeddedFiles, err
184}
185
186func decodeHeaderMime(header mail.Header) (mail.Header, error) {
187 parsedHeader := map[string][]string{}
188
189 for headerName, headerData := range header {
190
191 parsedHeaderData := []string{}
192 for _, headerValue := range headerData {
193 decodedHeaderValue, err := decodeMimeSentence(headerValue)
194 if err != nil {
195 return mail.Header{}, err
196 }
197 parsedHeaderData = append(parsedHeaderData, decodedHeaderValue)
198 }
199
200 parsedHeader[headerName] = parsedHeaderData
201 }
202
203 return mail.Header(parsedHeader), nil
204}
205
206func decodePartData(part *multipart.Part) (io.Reader, error) {
207 encoding := part.Header.Get("Content-Transfer-Encoding")
208
209 if encoding == "base64" {
210 dr := base64.NewDecoder(base64.StdEncoding, part)
211 dd, err := ioutil.ReadAll(dr)
212 if err != nil {
213 return nil, err
214 }
215
216 return bytes.NewReader(dd), nil
217 } else {
218 return nil, errors.New(fmt.Sprintf("Unknown encoding: %s", encoding))
219 }
220}
221
222type Attachment struct {
223 Filename string
224 Data io.Reader
225}
226
227type EmbeddedFile struct {
228 CID string
229 Data io.Reader
230}
231
232type Email struct {
233 Header mail.Header
234 HTMLBody string
235 TextBody string
236 Attachments []Attachment
237 EmbeddedFiles []EmbeddedFile
238}
239
240func (e *Email) Subject() string {
241 return e.Header.Get("Subject")
242}
243
244func (e *Email) Sender() string {
245 return e.Header.Get("Sender")
246}
247
248func (e *Email) From() []string {
249 result := []string{}
250
251 for _, v := range(strings.Split(e.Header.Get("From"), ",")) {
252 t := strings.Trim(v, " ")
253 if t != "" {
254 result = append(result, t)
255 }
256 }
257
258 return result
259}
260
261func (e *Email) To() []string {
262 result := []string{}
263
264 for _, v := range(strings.Split(e.Header.Get("To"), ",")) {
265 t := strings.Trim(v, " ")
266 if t != "" {
267 result = append(result, t)
268 }
269 }
270
271 return result
272}
273
274func (e *Email) Cc() []string {
275 result := []string{}
276
277 for _, v := range(strings.Split(e.Header.Get("Cc"), ",")) {
278 t := strings.Trim(v, " ")
279 if t != "" {
280 result = append(result, t)
281 }
282 }
283
284 return result
285}
286
287func (e *Email) Bcc() []string {
288 result := []string{}
289
290 for _, v := range(strings.Split(e.Header.Get("Bcc"), ",")) {
291 t := strings.Trim(v, " ")
292 if t != "" {
293 result = append(result, t)
294 }
295 }
296
297 return result
298}
299
300func (e *Email) ReplyTo() []string {
301 result := []string{}
302
303 for _, v := range(strings.Split(e.Header.Get("Reply-To"), ",")) {
304 t := strings.Trim(v, " ")
305 if t != "" {
306 result = append(result, t)
307 }
308 }
309
310 return result
311}
312
313func (e *Email) Date() (time.Time, error) {
314 t, err := time.Parse(time.RFC1123Z, e.Header.Get("Date"))
315 if err == nil {
316 return t, err
317 }
318
319 return time.Parse("Mon, 2 Jan 2006 15:04:05 -0700", e.Header.Get("Date"))
320}
321
322func (e *Email) MessageID() string {
323 return strings.Trim(e.Header.Get("Message-ID"), "<>")
324}
325
326func (e *Email) InReplyTo() []string {
327 result := []string{}
328
329 for _, v := range(strings.Split(e.Header.Get("In-Reply-To"), " ")) {
330 if v != "" {
331 result = append(result, strings.Trim(v, "<> "))
332 }
333 }
334
335 return result
336}
337
338func (e *Email) References() []string {
339 result := []string{}
340
341 for _, v := range(strings.Split(e.Header.Get("References"), " ")) {
342 if v != "" {
343 result = append(result, strings.Trim(v, "<> "))
344 }
345 }
346
347 return result
348}