Merge branch 'fix-time' of https://github.com/EdwinLove/parsemail into EdwinLove-fix-time
diff --git a/parsemail.go b/parsemail.go
index d526dd7..3011f96 100644
--- a/parsemail.go
+++ b/parsemail.go
@@ -9,7 +9,6 @@
"mime"
"mime/multipart"
"net/mail"
- "regexp"
"strings"
"time"
)
@@ -32,11 +31,13 @@
return
}
- contentType, params, err := parseContentType(msg.Header.Get("Content-Type"))
+ email.ContentType = msg.Header.Get("Content-Type")
+ contentType, params, err := parseContentType(email.ContentType)
if err != nil {
return
}
+
switch contentType {
case contentTypeMultipartMixed:
email.TextBody, email.HTMLBody, email.Attachments, email.EmbeddedFiles, err = parseMultipartMixed(msg.Body, params["boundary"])
@@ -49,7 +50,7 @@
message, _ := ioutil.ReadAll(msg.Body)
email.HTMLBody = strings.TrimSuffix(string(message[:]), "\n")
default:
- err = fmt.Errorf("Unknown top level mime type: %s", contentType)
+ email.Content, err = decodeContent(msg.Body, msg.Header.Get("Content-Transfer-Encoding"))
}
return
@@ -240,6 +241,13 @@
if err != nil {
return textBody, htmlBody, attachments, embeddedFiles, err
}
+ } else if contentType == contentTypeTextPlain {
+ ppContent, err := ioutil.ReadAll(part)
+ if err != nil {
+ return textBody, htmlBody, attachments, embeddedFiles, err
+ }
+
+ textBody += strings.TrimSuffix(string(ppContent[:]), "\n")
} else if isAttachment(part) {
at, err := decodeAttachment(part)
if err != nil {
@@ -292,29 +300,13 @@
return mail.Header(parsedHeader), nil
}
-func decodePartData(part *multipart.Part) (io.Reader, error) {
- encoding := part.Header.Get("Content-Transfer-Encoding")
-
- if strings.EqualFold(encoding, "base64") {
- dr := base64.NewDecoder(base64.StdEncoding, part)
- dd, err := ioutil.ReadAll(dr)
- if err != nil {
- return nil, err
- }
-
- return bytes.NewReader(dd), nil
- }
-
- return nil, fmt.Errorf("Unknown encoding: %s", encoding)
-}
-
func isEmbeddedFile(part *multipart.Part) bool {
return part.Header.Get("Content-Transfer-Encoding") != ""
}
func decodeEmbeddedFile(part *multipart.Part) (ef EmbeddedFile, err error) {
cid := decodeMimeSentence(part.Header.Get("Content-Id"))
- decoded, err := decodePartData(part)
+ decoded, err := decodeContent(part, part.Header.Get("Content-Transfer-Encoding"))
if err != nil {
return
}
@@ -332,7 +324,7 @@
func decodeAttachment(part *multipart.Part) (at Attachment, err error) {
filename := decodeMimeSentence(part.FileName())
- decoded, err := decodePartData(part)
+ decoded, err := decodeContent(part, part.Header.Get("Content-Transfer-Encoding"))
if err != nil {
return
}
@@ -344,6 +336,23 @@
return
}
+func decodeContent(content io.Reader, encoding string) (io.Reader, error) {
+ switch encoding {
+ case "base64":
+ decoded := base64.NewDecoder(base64.StdEncoding, content)
+ b, err := ioutil.ReadAll(decoded)
+ if err != nil {
+ return nil, err
+ }
+
+ return bytes.NewReader(b), nil
+ case "":
+ return content, nil
+ default:
+ return nil, fmt.Errorf("unknown encoding: %s", encoding)
+ }
+}
+
type headerParser struct {
header *mail.Header
err error
@@ -376,21 +385,24 @@
return
}
-var timezoneRegex = regexp.MustCompile(` \([A-Za-z0-9]+\)$`)
-
func (hp headerParser) parseTime(s string) (t time.Time) {
if hp.err != nil || s == "" {
return
}
- t, hp.err = time.Parse(time.RFC1123Z, s)
- if hp.err == nil {
- return t
+ formats := []string{
+ time.RFC1123Z,
+ "Mon, 2 Jan 2006 15:04:05 -0700",
+ time.RFC1123Z + " (MST)",
+ "Mon, 2 Jan 2006 15:04:05 -0700 (MST)",
}
- s = timezoneRegex.ReplaceAllString(s, "")
-
- t, hp.err = time.Parse("Mon, 2 Jan 2006 15:04:05 -0700", s)
+ for _, format := range formats {
+ t, hp.err = time.Parse(format, s)
+ if hp.err == nil {
+ return
+ }
+ }
return
}
@@ -455,9 +467,12 @@
ResentBcc []*mail.Address
ResentMessageID string
+ ContentType string
+ Content io.Reader
+
HTMLBody string
TextBody string
Attachments []Attachment
EmbeddedFiles []EmbeddedFile
-}
+}
\ No newline at end of file
diff --git a/parsemail_test.go b/parsemail_test.go
index 9d6e4d3..17ae024 100644
--- a/parsemail_test.go
+++ b/parsemail_test.go
@@ -13,6 +13,8 @@
var testData = map[int]struct {
mailData string
+ contentType string
+ content string
subject string
date time.Time
from []mail.Address
@@ -182,6 +184,8 @@
},
6: {
mailData: data1,
+ contentType: `multipart/mixed; boundary=f403045f1dcc043a44054c8e6bbf`,
+ content: "",
subject: "Peter Paholík",
from: []mail.Address{
{
@@ -208,6 +212,8 @@
},
7: {
mailData: data2,
+ contentType: `multipart/alternative; boundary="------------C70C0458A558E585ACB75FB4"`,
+ content: "",
subject: "Re: Test Subject 2",
from: []mail.Address{
{
@@ -245,6 +251,85 @@
},
},
},
+ 8: {
+ mailData: imageContentExample,
+ subject: "Saying Hello",
+ from: []mail.Address{
+ {
+ Name: "John Doe",
+ Address: "jdoe@machine.example",
+ },
+ },
+ to: []mail.Address{
+ {
+ Name: "Mary Smith",
+ Address: "mary@example.net",
+ },
+ },
+ sender: mail.Address{
+ Name: "Michael Jones",
+ Address: "mjones@machine.example",
+ },
+ messageID: "1234@local.machine.example",
+ date: parseDate("Fri, 21 Nov 1997 09:55:06 -0600"),
+ contentType: `image/jpeg; x-unix-mode=0644; name="image.gif"`,
+ content: `GIF89a;`,
+ },
+ 9: {
+ contentType: `multipart/mixed; boundary="0000000000007e2bb40587e36196"`,
+ mailData: textPlainInMultipart,
+ subject: "Re: kern/54143 (virtualbox)",
+ from: []mail.Address{
+ {
+ Name: "Rares",
+ Address: "rares@example.com",
+ },
+ },
+ to: []mail.Address{
+ {
+ Name: "",
+ Address: "bugs@example.com",
+ },
+ },
+ date: parseDate("Fri, 02 May 2019 11:25:35 +0300"),
+ textBody: `plain text part`,
+ },
+ 10: {
+ mailData: rfc5322exampleA12WithTimezone,
+ from: []mail.Address{
+ {
+ Name: "Joe Q. Public",
+ Address: "john.q.public@example.com",
+ },
+ },
+ to: []mail.Address{
+ {
+ Name: "Mary Smith",
+ Address: "mary@x.test",
+ },
+ {
+ Name: "",
+ Address: "jdoe@example.org",
+ },
+ {
+ Name: "Who?",
+ Address: "one@y.test",
+ },
+ },
+ cc: []mail.Address{
+ {
+ Name: "",
+ Address: "boss@nil.test",
+ },
+ {
+ Name: "Giant; \"Big\" Box",
+ Address: "sysservices@example.net",
+ },
+ },
+ messageID: "5678.21-Nov-1997@example.com",
+ date: parseDate("Tue, 01 Jul 2003 10:52:37 +0200"),
+ textBody: `Hi everyone.`,
+ },
}
for index, td := range testData {
@@ -253,6 +338,19 @@
t.Error(err)
}
+ if td.contentType != e.ContentType {
+ t.Errorf("[Test Case %v] Wrong content type. Expected: %s, Got: %s", index, td.contentType, e.ContentType)
+ }
+
+ if td.content != "" {
+ b, err := ioutil.ReadAll(e.Content)
+ if err != nil {
+ t.Error(err)
+ } else if td.content != string(b) {
+ t.Errorf("[Test Case %v] Wrong content. Expected: %s, Got: %s", index, td.content, string(b))
+ }
+ }
+
if td.subject != e.Subject {
t.Errorf("[Test Case %v] Wrong subject. Expected: %s, Got: %s", index, td.subject, e.Subject)
}
@@ -583,6 +681,19 @@
--------------C70C0458A558E585ACB75FB4--
`
+var textPlainInMultipart = `From: Rares <rares@example.com>
+Date: Thu, 2 May 2019 11:25:35 +0300
+Subject: Re: kern/54143 (virtualbox)
+To: bugs@example.com
+Content-Type: multipart/mixed; boundary="0000000000007e2bb40587e36196"
+
+--0000000000007e2bb40587e36196
+Content-Type: text/plain; charset="UTF-8"
+
+plain text part
+--0000000000007e2bb40587e36196--
+`
+
var rfc5322exampleA11 = `From: John Doe <jdoe@machine.example>
Sender: Michael Jones <mjones@machine.example>
To: Mary Smith <mary@example.net>
@@ -597,12 +708,22 @@
var rfc5322exampleA12 = `From: "Joe Q. Public" <john.q.public@example.com>
To: Mary Smith <mary@x.test>, jdoe@example.org, Who? <one@y.test>
Cc: <boss@nil.test>, "Giant; \"Big\" Box" <sysservices@example.net>
+Date: Tue, 1 Jul 2003 10:52:37 +0200
+Message-ID: <5678.21-Nov-1997@example.com>
+
+Hi everyone.
+`
+
+var rfc5322exampleA12WithTimezone = `From: "Joe Q. Public" <john.q.public@example.com>
+To: Mary Smith <mary@x.test>, jdoe@example.org, Who? <one@y.test>
+Cc: <boss@nil.test>, "Giant; \"Big\" Box" <sysservices@example.net>
Date: Tue, 1 Jul 2003 10:52:37 +0200 (GMT)
Message-ID: <5678.21-Nov-1997@example.com>
Hi everyone.
`
+
//todo: not yet implemented in net/mail
//once there is support for this, add it
var rfc5322exampleA13 = `From: Pete <pete@silly.example>
@@ -666,3 +787,16 @@
This is a message just to say hello.
So, "Hello".`
+
+var imageContentExample = `From: John Doe <jdoe@machine.example>
+Sender: Michael Jones <mjones@machine.example>
+To: Mary Smith <mary@example.net>
+Subject: Saying Hello
+Date: Fri, 21 Nov 1997 09:55:06 -0600
+Message-ID: <1234@local.machine.example>
+Content-Type: image/jpeg;
+ x-unix-mode=0644;
+ name="image.gif"
+Content-Transfer-Encoding: base64
+
+R0lGODlhAQE7`