Merge branch 'seven-bit-attachment' of https://github.com/EdwinLove/parsemail into EdwinLove-seven-bit-attachment
diff --git a/parsemail.go b/parsemail.go
index a62426a..a8f84e8 100644
--- a/parsemail.go
+++ b/parsemail.go
@@ -31,7 +31,8 @@
return
}
- contentType, params, err := parseContentType(msg.Header.Get("Content-Type"))
+ email.ContentType = msg.Header.Get("Content-Type")
+ contentType, params, err := parseContentType(email.ContentType)
if err != nil {
return
}
@@ -48,7 +49,7 @@
message, _ := ioutil.ReadAll(msg.Body)
email.HTMLBody = strings.TrimSuffix(string(message[:]), "\n")
default:
- err = fmt.Errorf("Unknown top level mime type: %s", contentType)
+ email.Content, err = decodeContent(msg.Body, msg.Header.Get("Content-Transfer-Encoding"))
}
return
@@ -239,6 +240,13 @@
if err != nil {
return textBody, htmlBody, attachments, embeddedFiles, err
}
+ } else if contentType == contentTypeTextPlain {
+ ppContent, err := ioutil.ReadAll(part)
+ if err != nil {
+ return textBody, htmlBody, attachments, embeddedFiles, err
+ }
+
+ textBody += strings.TrimSuffix(string(ppContent[:]), "\n")
} else if isAttachment(part) {
at, err := decodeAttachment(part)
if err != nil {
@@ -291,36 +299,13 @@
return mail.Header(parsedHeader), nil
}
-func decodePartData(part *multipart.Part) (io.Reader, error) {
- encoding := part.Header.Get("Content-Transfer-Encoding")
-
- if strings.EqualFold(encoding, "base64") {
- dr := base64.NewDecoder(base64.StdEncoding, part)
- dd, err := ioutil.ReadAll(dr)
- if err != nil {
- return nil, err
- }
-
- return bytes.NewReader(dd), nil
- } else if strings.EqualFold(encoding, "7bit") {
- dd, err := ioutil.ReadAll(part)
- if err != nil {
- return nil, err
- }
-
- return bytes.NewReader(dd), nil
- }
-
- return nil, fmt.Errorf("Unknown encoding: %s", encoding)
-}
-
func isEmbeddedFile(part *multipart.Part) bool {
return part.Header.Get("Content-Transfer-Encoding") != ""
}
func decodeEmbeddedFile(part *multipart.Part) (ef EmbeddedFile, err error) {
cid := decodeMimeSentence(part.Header.Get("Content-Id"))
- decoded, err := decodePartData(part)
+ decoded, err := decodeContent(part, part.Header.Get("Content-Transfer-Encoding"))
if err != nil {
return
}
@@ -338,7 +323,7 @@
func decodeAttachment(part *multipart.Part) (at Attachment, err error) {
filename := decodeMimeSentence(part.FileName())
- decoded, err := decodePartData(part)
+ decoded, err := decodeContent(part, part.Header.Get("Content-Transfer-Encoding"))
if err != nil {
return
}
@@ -350,6 +335,30 @@
return
}
+func decodeContent(content io.Reader, encoding string) (io.Reader, error) {
+ switch encoding {
+ case "base64":
+ decoded := base64.NewDecoder(base64.StdEncoding, content)
+ b, err := ioutil.ReadAll(decoded)
+ if err != nil {
+ return nil, err
+ }
+
+ return bytes.NewReader(b), nil
+ case "7bit":
+ dd, err := ioutil.ReadAll(content)
+ if err != nil {
+ return nil, err
+ }
+
+ return bytes.NewReader(dd), nil
+ case "":
+ return content, nil
+ default:
+ return nil, fmt.Errorf("unknown encoding: %s", encoding)
+ }
+}
+
type headerParser struct {
header *mail.Header
err error
@@ -387,12 +396,19 @@
return
}
- t, hp.err = time.Parse(time.RFC1123Z, s)
- if hp.err == nil {
- return t
+ formats := []string{
+ time.RFC1123Z,
+ "Mon, 2 Jan 2006 15:04:05 -0700",
+ time.RFC1123Z + " (MST)",
+ "Mon, 2 Jan 2006 15:04:05 -0700 (MST)",
}
- t, hp.err = time.Parse("Mon, 2 Jan 2006 15:04:05 -0700", s)
+ for _, format := range formats {
+ t, hp.err = time.Parse(format, s)
+ if hp.err == nil {
+ return
+ }
+ }
return
}
@@ -457,9 +473,12 @@
ResentBcc []*mail.Address
ResentMessageID string
+ ContentType string
+ Content io.Reader
+
HTMLBody string
TextBody string
Attachments []Attachment
EmbeddedFiles []EmbeddedFile
-}
+}
\ No newline at end of file
diff --git a/parsemail_test.go b/parsemail_test.go
index 3e66364..5cc106a 100644
--- a/parsemail_test.go
+++ b/parsemail_test.go
@@ -2,8 +2,8 @@
import (
"encoding/base64"
- "io/ioutil"
"fmt"
+ "io/ioutil"
"net/mail"
"strings"
"testing"
@@ -14,6 +14,8 @@
var testData = map[int]struct {
mailData string
+ contentType string
+ content string
subject string
date time.Time
from []mail.Address
@@ -182,8 +184,10 @@
So, "Hello".`,
},
6: {
- mailData: data1,
- subject: "Peter Paholík",
+ mailData: data1,
+ contentType: `multipart/mixed; boundary=f403045f1dcc043a44054c8e6bbf`,
+ content: "",
+ subject: "Peter Paholík",
from: []mail.Address{
{
Name: "Peter Paholík",
@@ -201,15 +205,17 @@
htmlBody: "<div dir=\"ltr\"><br></div>",
attachments: []attachmentData{
{
- filename: "Peter Paholík 1 4 2017 2017-04-07.pdf",
- contentType: "application/pdf",
- base64data: "JVBERi0xLjQNCiW1tbW1DQoxIDAgb2JqDQo8PC9UeXBlL0NhdGFsb2cvUGFnZXMgMiAwIFIvTGFuZyhlbi1VUykgL1N0cnVjdFRyZWVSb290IDY3IDAgUi9NYXJrSW5mbzw8L01hcmtlZCB0cnVlPj4vT3V0cHV0SW50ZW50c1s8PC9UeXBlL091dHB1dEludGVudC9TL0dUU19QREZBMS9PdXRwdXRDb25kZXYgMzk1MzYyDQo+Pg0Kc3RhcnR4cmVmDQo0MTk4ODUNCiUlRU9GDQo=",
+ filename: "Peter Paholík 1 4 2017 2017-04-07.json",
+ contentType: "application/json",
+ data: "[1, 2, 3]",
},
},
},
7: {
- mailData: data2,
- subject: "Re: Test Subject 2",
+ mailData: data2,
+ contentType: `multipart/alternative; boundary="------------C70C0458A558E585ACB75FB4"`,
+ content: "",
+ subject: "Re: Test Subject 2",
from: []mail.Address{
{
Name: "Sender Man",
@@ -247,8 +253,88 @@
},
},
8: {
- mailData: data3,
- subject: "Peter Foobar",
+ mailData: imageContentExample,
+ subject: "Saying Hello",
+ from: []mail.Address{
+ {
+ Name: "John Doe",
+ Address: "jdoe@machine.example",
+ },
+ },
+ to: []mail.Address{
+ {
+ Name: "Mary Smith",
+ Address: "mary@example.net",
+ },
+ },
+ sender: mail.Address{
+ Name: "Michael Jones",
+ Address: "mjones@machine.example",
+ },
+ messageID: "1234@local.machine.example",
+ date: parseDate("Fri, 21 Nov 1997 09:55:06 -0600"),
+ contentType: `image/jpeg; x-unix-mode=0644; name="image.gif"`,
+ content: `GIF89a;`,
+ },
+ 9: {
+ contentType: `multipart/mixed; boundary="0000000000007e2bb40587e36196"`,
+ mailData: textPlainInMultipart,
+ subject: "Re: kern/54143 (virtualbox)",
+ from: []mail.Address{
+ {
+ Name: "Rares",
+ Address: "rares@example.com",
+ },
+ },
+ to: []mail.Address{
+ {
+ Name: "",
+ Address: "bugs@example.com",
+ },
+ },
+ date: parseDate("Fri, 02 May 2019 11:25:35 +0300"),
+ textBody: `plain text part`,
+ },
+ 10: {
+ mailData: rfc5322exampleA12WithTimezone,
+ from: []mail.Address{
+ {
+ Name: "Joe Q. Public",
+ Address: "john.q.public@example.com",
+ },
+ },
+ to: []mail.Address{
+ {
+ Name: "Mary Smith",
+ Address: "mary@x.test",
+ },
+ {
+ Name: "",
+ Address: "jdoe@example.org",
+ },
+ {
+ Name: "Who?",
+ Address: "one@y.test",
+ },
+ },
+ cc: []mail.Address{
+ {
+ Name: "",
+ Address: "boss@nil.test",
+ },
+ {
+ Name: "Giant; \"Big\" Box",
+ Address: "sysservices@example.net",
+ },
+ },
+ messageID: "5678.21-Nov-1997@example.com",
+ date: parseDate("Tue, 01 Jul 2003 10:52:37 +0200"),
+ textBody: `Hi everyone.`,
+ },
+ 11: {
+ contentType: "multipart/mixed; boundary=f403045f1dcc043a44054c8e6bbf",
+ mailData: attachment7bit,
+ subject: "Peter Foobar",
from: []mail.Address{
{
Name: "Peter Foobar",
@@ -266,13 +352,12 @@
htmlBody: "<div dir=\"ltr\"><br></div>",
attachments: []attachmentData{
{
- filename: "unencoded.csv",
- contentType: "application/csv",
- unencodedData: fmt.Sprintf("\n"+`"%s", "%s", "%s", "%s", "%s"`+"\n"+`"%s", "%s", "%s", "%s", "%s"`+"\n", "Some", "Data", "In", "Csv", "Format", "Foo", "Bar", "Baz", "Bum", "Poo"),
+ filename: "unencoded.csv",
+ contentType: "application/csv",
+ data: fmt.Sprintf("\n"+`"%s", "%s", "%s", "%s", "%s"`+"\n"+`"%s", "%s", "%s", "%s", "%s"`+"\n", "Some", "Data", "In", "Csv", "Format", "Foo", "Bar", "Baz", "Bum", "Poo"),
},
},
},
-
}
for index, td := range testData {
@@ -281,6 +366,19 @@
t.Error(err)
}
+ if td.contentType != e.ContentType {
+ t.Errorf("[Test Case %v] Wrong content type. Expected: %s, Got: %s", index, td.contentType, e.ContentType)
+ }
+
+ if td.content != "" {
+ b, err := ioutil.ReadAll(e.Content)
+ if err != nil {
+ t.Error(err)
+ } else if td.content != string(b) {
+ t.Errorf("[Test Case %v] Wrong content. Expected: %s, Got: %s", index, td.content, string(b))
+ }
+ }
+
if td.subject != e.Subject {
t.Errorf("[Test Case %v] Wrong subject. Expected: %s, Got: %s", index, td.subject, e.Subject)
}
@@ -391,16 +489,8 @@
if err != nil {
t.Error(err)
}
- actual := "actual"
- expected := "expected"
- if ad.base64data != "" {
- actual = base64.StdEncoding.EncodeToString(b)
- expected = ad.base64data
- } else if ad.unencodedData != "" {
- actual = string(b)
- expected = ad.unencodedData
- }
- if ra.Filename == ad.filename && actual == expected && ra.ContentType == ad.contentType {
+
+ if ra.Filename == ad.filename && string(b) == ad.data && ra.ContentType == ad.contentType {
found = true
attachs = append(attachs[:i], attachs[i+1:]...)
}
@@ -462,8 +552,7 @@
type attachmentData struct {
filename string
contentType string
- base64data string
- unencodedData string
+ data string
}
type embeddedFileData struct {
@@ -554,19 +643,16 @@
--f403045f1dcc043a3f054c8e6bbd--
--f403045f1dcc043a44054c8e6bbf
-Content-Type: application/pdf;
+Content-Type: application/json;
name="=?UTF-8?Q?Peter_Paholi=CC=81k_1?=
- =?UTF-8?Q?_4_2017_2017=2D04=2D07=2Epdf?="
+ =?UTF-8?Q?_4_2017_2017=2D04=2D07=2Ejson?="
Content-Disposition: attachment;
filename="=?UTF-8?Q?Peter_Paholi=CC=81k_1?=
- =?UTF-8?Q?_4_2017_2017=2D04=2D07=2Epdf?="
+ =?UTF-8?Q?_4_2017_2017=2D04=2D07=2Ejson?="
Content-Transfer-Encoding: base64
X-Attachment-Id: f_j17i0f0d0
-JVBERi0xLjQNCiW1tbW1DQoxIDAgb2JqDQo8PC9UeXBlL0NhdGFsb2cvUGFnZXMgMiAwIFIvTGFu
-Zyhlbi1VUykgL1N0cnVjdFRyZWVSb290IDY3IDAgUi9NYXJrSW5mbzw8L01hcmtlZCB0cnVlPj4v
-T3V0cHV0SW50ZW50c1s8PC9UeXBlL091dHB1dEludGVudC9TL0dUU19QREZBMS9PdXRwdXRDb25k
-ZXYgMzk1MzYyDQo+Pg0Kc3RhcnR4cmVmDQo0MTk4ODUNCiUlRU9GDQo=
+WzEsIDIsIDNd
--f403045f1dcc043a44054c8e6bbf--
`
@@ -619,39 +705,17 @@
--------------C70C0458A558E585ACB75FB4--
`
-var data3 = `From: =?UTF-8?Q?Peter_Foobar?= <peter.foobar@gmail.com>
-Date: Tue, 2 Apr 2019 11:12:26 +0000
-Message-ID: <CACtgX4kNXE7T5XKSKeH_zEcfUUmf2vXVASxYjaaK9cCn-3zb_g@mail.gmail.com>
-Subject: =?UTF-8?Q?Peter_Foobar?=
-To: dusan@kasan.sk
-Content-Type: multipart/mixed; boundary=f403045f1dcc043a44054c8e6bbf
+var textPlainInMultipart = `From: Rares <rares@example.com>
+Date: Thu, 2 May 2019 11:25:35 +0300
+Subject: Re: kern/54143 (virtualbox)
+To: bugs@example.com
+Content-Type: multipart/mixed; boundary="0000000000007e2bb40587e36196"
---f403045f1dcc043a44054c8e6bbf
-Content-Type: multipart/alternative; boundary=f403045f1dcc043a3f054c8e6bbd
+--0000000000007e2bb40587e36196
+Content-Type: text/plain; charset="UTF-8"
---f403045f1dcc043a3f054c8e6bbd
-Content-Type: text/plain; charset=UTF-8
-
-
-
---f403045f1dcc043a3f054c8e6bbd
-Content-Type: text/html; charset=UTF-8
-
-<div dir="ltr"><br></div>
-
---f403045f1dcc043a3f054c8e6bbd--
---f403045f1dcc043a44054c8e6bbf
-Content-Type: application/csv;
- name="unencoded.csv"
-Content-Transfer-Encoding: 7bit
-Content-Disposition: attachment;
- filename="unencoded.csv"
-
-
-"Some", "Data", "In", "Csv", "Format"
-"Foo", "Bar", "Baz", "Bum", "Poo"
-
---f403045f1dcc043a44054c8e6bbf--
+plain text part
+--0000000000007e2bb40587e36196--
`
var rfc5322exampleA11 = `From: John Doe <jdoe@machine.example>
@@ -674,6 +738,15 @@
Hi everyone.
`
+var rfc5322exampleA12WithTimezone = `From: "Joe Q. Public" <john.q.public@example.com>
+To: Mary Smith <mary@x.test>, jdoe@example.org, Who? <one@y.test>
+Cc: <boss@nil.test>, "Giant; \"Big\" Box" <sysservices@example.net>
+Date: Tue, 1 Jul 2003 10:52:37 +0200 (GMT)
+Message-ID: <5678.21-Nov-1997@example.com>
+
+Hi everyone.
+`
+
//todo: not yet implemented in net/mail
//once there is support for this, add it
var rfc5322exampleA13 = `From: Pete <pete@silly.example>
@@ -737,3 +810,51 @@
This is a message just to say hello.
So, "Hello".`
+
+var imageContentExample = `From: John Doe <jdoe@machine.example>
+Sender: Michael Jones <mjones@machine.example>
+To: Mary Smith <mary@example.net>
+Subject: Saying Hello
+Date: Fri, 21 Nov 1997 09:55:06 -0600
+Message-ID: <1234@local.machine.example>
+Content-Type: image/jpeg;
+ x-unix-mode=0644;
+ name="image.gif"
+Content-Transfer-Encoding: base64
+
+R0lGODlhAQE7`
+
+var attachment7bit = `From: =?UTF-8?Q?Peter_Foobar?= <peter.foobar@gmail.com>
+Date: Tue, 2 Apr 2019 11:12:26 +0000
+Message-ID: <CACtgX4kNXE7T5XKSKeH_zEcfUUmf2vXVASxYjaaK9cCn-3zb_g@mail.gmail.com>
+Subject: =?UTF-8?Q?Peter_Foobar?=
+To: dusan@kasan.sk
+Content-Type: multipart/mixed; boundary=f403045f1dcc043a44054c8e6bbf
+
+--f403045f1dcc043a44054c8e6bbf
+Content-Type: multipart/alternative; boundary=f403045f1dcc043a3f054c8e6bbd
+
+--f403045f1dcc043a3f054c8e6bbd
+Content-Type: text/plain; charset=UTF-8
+
+
+
+--f403045f1dcc043a3f054c8e6bbd
+Content-Type: text/html; charset=UTF-8
+
+<div dir="ltr"><br></div>
+
+--f403045f1dcc043a3f054c8e6bbd--
+--f403045f1dcc043a44054c8e6bbf
+Content-Type: application/csv;
+ name="unencoded.csv"
+Content-Transfer-Encoding: 7bit
+Content-Disposition: attachment;
+ filename="unencoded.csv"
+
+
+"Some", "Data", "In", "Csv", "Format"
+"Foo", "Bar", "Baz", "Bum", "Poo"
+
+--f403045f1dcc043a44054c8e6bbf--
+`