Skip to content

Commit

Permalink
add Content-Disposition and Filename to the payload of incoming webhooks
Browse files Browse the repository at this point in the history
for each message part. The ContentDisposition value is the base value without
header key/value parameters. the Filename field is the likely filename of the
part. the different email clients encode filenames differently. there is a
standard mime mechanism from rfc 2231. and there is the q/b-word-encoding from
rfc 2047. instead of letting users of the webhook api deal with those
differences, we provide just the parsed filename.

for issue #258 by morki, thanks for reporting!
  • Loading branch information
mjl- committed Dec 6, 2024
1 parent 8804d6b commit 4279383
Show file tree
Hide file tree
Showing 15 changed files with 170 additions and 52 deletions.
64 changes: 64 additions & 0 deletions message/part.go
Original file line number Diff line number Diff line change
Expand Up @@ -598,6 +598,70 @@ func (p *Part) IsDSN() bool {
(p.Parts[1].MediaSubType == "DELIVERY-STATUS" || p.Parts[1].MediaSubType == "GLOBAL-DELIVERY-STATUS")
}

var ErrParamEncoding = errors.New("bad header parameter encoding")

// DispositionFilename tries to parse the disposition header and the "filename"
// parameter. If the filename parameter is absent or can't be parsed, the "name"
// parameter from the Content-Type header is used for the filename. The returned
// filename is decoded according to RFC 2231 or RFC 2047. This is a best-effort
// attempt to find a filename for a part. If no Content-Disposition header, or
// filename was found, empty values without error are returned.
//
// If the returned error is an ErrParamEncoding, it can be treated as a diagnostic
// and a filename may still be returned.
func (p *Part) DispositionFilename() (disposition string, filename string, err error) {
h, err := p.Header()
if err != nil {
return "", "", fmt.Errorf("parsing header: %v", err)
}
var disp string
var params map[string]string
cd := h.Get("Content-Disposition")
if cd != "" {
disp, params, err = mime.ParseMediaType(cd)
}
if err != nil {
return "", "", fmt.Errorf("%w: parsing disposition header: %v", ErrParamEncoding, err)
}
filename, err = tryDecodeParam(params["filename"])
if filename == "" {
s, err2 := tryDecodeParam(p.ContentTypeParams["name"])
filename = s
if err == nil {
err = err2
}
}
return disp, filename, err
}

// Attempt q/b-word-decode name, coming from Content-Type "name" field or
// Content-Disposition "filename" field.
//
// RFC 2231 specifies an encoding for non-ascii values in mime header parameters. But
// it appears common practice to instead just q/b-word encode the values.
// Thunderbird and gmail.com do this for the Content-Type "name" parameter.
// gmail.com also does that for the Content-Disposition "filename" parameter, where
// Thunderbird uses the RFC 2231-defined encoding. Go's mime.ParseMediaType parses
// the mechanism specified in RFC 2231 only. The value for "name" we get here would
// already be decoded properly for standards-compliant headers, like
// "filename*0*=UTF-8”%...; filename*1*=%.... We'll look for Q/B-word encoding
// markers ("=?"-prefix or "?="-suffix) and try to decode if present. This would
// only cause trouble for filenames having this prefix/suffix.
func tryDecodeParam(name string) (string, error) {
if name == "" || !strings.HasPrefix(name, "=?") && !strings.HasSuffix(name, "?=") {
return name, nil
}
// todo: find where this is allowed. it seems quite common. perhaps we should remove the pedantic check?
if Pedantic {
return name, fmt.Errorf("%w: attachment contains rfc2047 q/b-word-encoded mime parameter instead of rfc2231-encoded", ErrParamEncoding)
}
s, err := wordDecoder.DecodeHeader(name)
if err != nil {
return name, fmt.Errorf("%w: q/b-word decoding mime parameter: %v", ErrParamEncoding, err)
}
return s, nil
}

// Reader returns a reader for the decoded body content.
func (p *Part) Reader() io.Reader {
return p.bodyReader(p.RawReader())
Expand Down
7 changes: 6 additions & 1 deletion queue/hook.go
Original file line number Diff line number Diff line change
Expand Up @@ -796,13 +796,18 @@ func Incoming(ctx context.Context, log mlog.Log, acc *store.Account, messageID s

log.Debug("composing webhook for incoming message")

structure, err := webhook.PartStructure(log, &part)
if err != nil {
return fmt.Errorf("parsing part structure: %v", err)
}

isIncoming = true
var rcptTo string
if m.RcptToDomain != "" {
rcptTo = m.RcptToLocalpart.String() + "@" + m.RcptToDomain
}
in := webhook.Incoming{
Structure: webhook.PartStructure(&part),
Structure: structure,
Meta: webhook.IncomingMeta{
MsgID: m.ID,
MailFrom: m.MailFrom,
Expand Down
5 changes: 4 additions & 1 deletion queue/hook_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,9 @@ func TestHookIncoming(t *testing.T) {
tcheck(t, err, "decode incoming webhook")
in.Meta.Received = in.Meta.Received.Local() // For TZ UTC.

structure, err := webhook.PartStructure(pkglog, &part)
tcheck(t, err, "part structure")

expIncoming := webhook.Incoming{
From: []webhook.NameAddress{{Address: "[email protected]"}},
To: []webhook.NameAddress{{Address: "[email protected]"}},
Expand All @@ -92,7 +95,7 @@ func TestHookIncoming(t *testing.T) {
Subject: "test",
Text: "test email\n",

Structure: webhook.PartStructure(&part),
Structure: structure,
Meta: webhook.IncomingMeta{
MsgID: m.ID,
MailFrom: m.MailFrom,
Expand Down
4 changes: 3 additions & 1 deletion webaccount/account.js
Original file line number Diff line number Diff line change
Expand Up @@ -278,7 +278,7 @@ var api;
"Outgoing": { "Name": "Outgoing", "Docs": "", "Fields": [{ "Name": "Version", "Docs": "", "Typewords": ["int32"] }, { "Name": "Event", "Docs": "", "Typewords": ["OutgoingEvent"] }, { "Name": "DSN", "Docs": "", "Typewords": ["bool"] }, { "Name": "Suppressing", "Docs": "", "Typewords": ["bool"] }, { "Name": "QueueMsgID", "Docs": "", "Typewords": ["int64"] }, { "Name": "FromID", "Docs": "", "Typewords": ["string"] }, { "Name": "MessageID", "Docs": "", "Typewords": ["string"] }, { "Name": "Subject", "Docs": "", "Typewords": ["string"] }, { "Name": "WebhookQueued", "Docs": "", "Typewords": ["timestamp"] }, { "Name": "SMTPCode", "Docs": "", "Typewords": ["int32"] }, { "Name": "SMTPEnhancedCode", "Docs": "", "Typewords": ["string"] }, { "Name": "Error", "Docs": "", "Typewords": ["string"] }, { "Name": "Extra", "Docs": "", "Typewords": ["{}", "string"] }] },
"Incoming": { "Name": "Incoming", "Docs": "", "Fields": [{ "Name": "Version", "Docs": "", "Typewords": ["int32"] }, { "Name": "From", "Docs": "", "Typewords": ["[]", "NameAddress"] }, { "Name": "To", "Docs": "", "Typewords": ["[]", "NameAddress"] }, { "Name": "CC", "Docs": "", "Typewords": ["[]", "NameAddress"] }, { "Name": "BCC", "Docs": "", "Typewords": ["[]", "NameAddress"] }, { "Name": "ReplyTo", "Docs": "", "Typewords": ["[]", "NameAddress"] }, { "Name": "Subject", "Docs": "", "Typewords": ["string"] }, { "Name": "MessageID", "Docs": "", "Typewords": ["string"] }, { "Name": "InReplyTo", "Docs": "", "Typewords": ["string"] }, { "Name": "References", "Docs": "", "Typewords": ["[]", "string"] }, { "Name": "Date", "Docs": "", "Typewords": ["nullable", "timestamp"] }, { "Name": "Text", "Docs": "", "Typewords": ["string"] }, { "Name": "HTML", "Docs": "", "Typewords": ["string"] }, { "Name": "Structure", "Docs": "", "Typewords": ["Structure"] }, { "Name": "Meta", "Docs": "", "Typewords": ["IncomingMeta"] }] },
"NameAddress": { "Name": "NameAddress", "Docs": "", "Fields": [{ "Name": "Name", "Docs": "", "Typewords": ["string"] }, { "Name": "Address", "Docs": "", "Typewords": ["string"] }] },
"Structure": { "Name": "Structure", "Docs": "", "Fields": [{ "Name": "ContentType", "Docs": "", "Typewords": ["string"] }, { "Name": "ContentTypeParams", "Docs": "", "Typewords": ["{}", "string"] }, { "Name": "ContentID", "Docs": "", "Typewords": ["string"] }, { "Name": "DecodedSize", "Docs": "", "Typewords": ["int64"] }, { "Name": "Parts", "Docs": "", "Typewords": ["[]", "Structure"] }] },
"Structure": { "Name": "Structure", "Docs": "", "Fields": [{ "Name": "ContentType", "Docs": "", "Typewords": ["string"] }, { "Name": "ContentTypeParams", "Docs": "", "Typewords": ["{}", "string"] }, { "Name": "ContentID", "Docs": "", "Typewords": ["string"] }, { "Name": "ContentDisposition", "Docs": "", "Typewords": ["string"] }, { "Name": "Filename", "Docs": "", "Typewords": ["string"] }, { "Name": "DecodedSize", "Docs": "", "Typewords": ["int64"] }, { "Name": "Parts", "Docs": "", "Typewords": ["[]", "Structure"] }] },
"IncomingMeta": { "Name": "IncomingMeta", "Docs": "", "Fields": [{ "Name": "MsgID", "Docs": "", "Typewords": ["int64"] }, { "Name": "MailFrom", "Docs": "", "Typewords": ["string"] }, { "Name": "MailFromValidated", "Docs": "", "Typewords": ["bool"] }, { "Name": "MsgFromValidated", "Docs": "", "Typewords": ["bool"] }, { "Name": "RcptTo", "Docs": "", "Typewords": ["string"] }, { "Name": "DKIMVerifiedDomains", "Docs": "", "Typewords": ["[]", "string"] }, { "Name": "RemoteIP", "Docs": "", "Typewords": ["string"] }, { "Name": "Received", "Docs": "", "Typewords": ["timestamp"] }, { "Name": "MailboxName", "Docs": "", "Typewords": ["string"] }, { "Name": "Automated", "Docs": "", "Typewords": ["bool"] }] },
"TLSPublicKey": { "Name": "TLSPublicKey", "Docs": "", "Fields": [{ "Name": "Fingerprint", "Docs": "", "Typewords": ["string"] }, { "Name": "Created", "Docs": "", "Typewords": ["timestamp"] }, { "Name": "Type", "Docs": "", "Typewords": ["string"] }, { "Name": "Name", "Docs": "", "Typewords": ["string"] }, { "Name": "NoIMAPPreauth", "Docs": "", "Typewords": ["bool"] }, { "Name": "CertDER", "Docs": "", "Typewords": ["nullable", "string"] }, { "Name": "Account", "Docs": "", "Typewords": ["string"] }, { "Name": "LoginAddress", "Docs": "", "Typewords": ["string"] }] },
"CSRFToken": { "Name": "CSRFToken", "Docs": "", "Values": null },
Expand Down Expand Up @@ -1390,6 +1390,8 @@ const index = async () => {
ContentType: 'text/plain',
ContentTypeParams: { charset: 'utf-8' },
ContentID: '',
ContentDisposition: '',
Filename: '',
DecodedSize: 8,
Parts: [],
},
Expand Down
2 changes: 2 additions & 0 deletions webaccount/account.ts
Original file line number Diff line number Diff line change
Expand Up @@ -669,6 +669,8 @@ const index = async () => {
ContentType: 'text/plain',
ContentTypeParams: {charset: 'utf-8'},
ContentID: '',
ContentDisposition: '',
Filename: '',
DecodedSize: 8,
Parts: [],
},
Expand Down
14 changes: 14 additions & 0 deletions webaccount/api.json
Original file line number Diff line number Diff line change
Expand Up @@ -1495,6 +1495,20 @@
"string"
]
},
{
"Name": "ContentDisposition",
"Docs": "Lower-case value, e.g. \"attachment\", \"inline\" or empty when absent. Without the key/value header parameters.",
"Typewords": [
"string"
]
},
{
"Name": "Filename",
"Docs": "Filename for this part, based on \"filename\" parameter from Content-Disposition, or \"name\" from Content-Type after decoding.",
"Typewords": [
"string"
]
},
{
"Name": "DecodedSize",
"Docs": "Size of content after decoding content-transfer-encoding. For text and HTML parts, this can be larger than the data returned since this size includes \\r\\n line endings.",
Expand Down
4 changes: 3 additions & 1 deletion webaccount/api.ts
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,8 @@ export interface Structure {
ContentType: string // Lower case, e.g. text/plain.
ContentTypeParams?: { [key: string]: string } // Lower case keys, original case values, e.g. {"charset": "UTF-8"}.
ContentID: string // Can be empty. Otherwise, should be a value wrapped in <>'s. For use in HTML, referenced as URI `cid:...`.
ContentDisposition: string // Lower-case value, e.g. "attachment", "inline" or empty when absent. Without the key/value header parameters.
Filename: string // Filename for this part, based on "filename" parameter from Content-Disposition, or "name" from Content-Type after decoding.
DecodedSize: number // Size of content after decoding content-transfer-encoding. For text and HTML parts, this can be larger than the data returned since this size includes \r\n line endings.
Parts?: Structure[] | null // Subparts of a multipart message, possibly recursive.
}
Expand Down Expand Up @@ -274,7 +276,7 @@ export const types: TypenameMap = {
"Outgoing": {"Name":"Outgoing","Docs":"","Fields":[{"Name":"Version","Docs":"","Typewords":["int32"]},{"Name":"Event","Docs":"","Typewords":["OutgoingEvent"]},{"Name":"DSN","Docs":"","Typewords":["bool"]},{"Name":"Suppressing","Docs":"","Typewords":["bool"]},{"Name":"QueueMsgID","Docs":"","Typewords":["int64"]},{"Name":"FromID","Docs":"","Typewords":["string"]},{"Name":"MessageID","Docs":"","Typewords":["string"]},{"Name":"Subject","Docs":"","Typewords":["string"]},{"Name":"WebhookQueued","Docs":"","Typewords":["timestamp"]},{"Name":"SMTPCode","Docs":"","Typewords":["int32"]},{"Name":"SMTPEnhancedCode","Docs":"","Typewords":["string"]},{"Name":"Error","Docs":"","Typewords":["string"]},{"Name":"Extra","Docs":"","Typewords":["{}","string"]}]},
"Incoming": {"Name":"Incoming","Docs":"","Fields":[{"Name":"Version","Docs":"","Typewords":["int32"]},{"Name":"From","Docs":"","Typewords":["[]","NameAddress"]},{"Name":"To","Docs":"","Typewords":["[]","NameAddress"]},{"Name":"CC","Docs":"","Typewords":["[]","NameAddress"]},{"Name":"BCC","Docs":"","Typewords":["[]","NameAddress"]},{"Name":"ReplyTo","Docs":"","Typewords":["[]","NameAddress"]},{"Name":"Subject","Docs":"","Typewords":["string"]},{"Name":"MessageID","Docs":"","Typewords":["string"]},{"Name":"InReplyTo","Docs":"","Typewords":["string"]},{"Name":"References","Docs":"","Typewords":["[]","string"]},{"Name":"Date","Docs":"","Typewords":["nullable","timestamp"]},{"Name":"Text","Docs":"","Typewords":["string"]},{"Name":"HTML","Docs":"","Typewords":["string"]},{"Name":"Structure","Docs":"","Typewords":["Structure"]},{"Name":"Meta","Docs":"","Typewords":["IncomingMeta"]}]},
"NameAddress": {"Name":"NameAddress","Docs":"","Fields":[{"Name":"Name","Docs":"","Typewords":["string"]},{"Name":"Address","Docs":"","Typewords":["string"]}]},
"Structure": {"Name":"Structure","Docs":"","Fields":[{"Name":"ContentType","Docs":"","Typewords":["string"]},{"Name":"ContentTypeParams","Docs":"","Typewords":["{}","string"]},{"Name":"ContentID","Docs":"","Typewords":["string"]},{"Name":"DecodedSize","Docs":"","Typewords":["int64"]},{"Name":"Parts","Docs":"","Typewords":["[]","Structure"]}]},
"Structure": {"Name":"Structure","Docs":"","Fields":[{"Name":"ContentType","Docs":"","Typewords":["string"]},{"Name":"ContentTypeParams","Docs":"","Typewords":["{}","string"]},{"Name":"ContentID","Docs":"","Typewords":["string"]},{"Name":"ContentDisposition","Docs":"","Typewords":["string"]},{"Name":"Filename","Docs":"","Typewords":["string"]},{"Name":"DecodedSize","Docs":"","Typewords":["int64"]},{"Name":"Parts","Docs":"","Typewords":["[]","Structure"]}]},
"IncomingMeta": {"Name":"IncomingMeta","Docs":"","Fields":[{"Name":"MsgID","Docs":"","Typewords":["int64"]},{"Name":"MailFrom","Docs":"","Typewords":["string"]},{"Name":"MailFromValidated","Docs":"","Typewords":["bool"]},{"Name":"MsgFromValidated","Docs":"","Typewords":["bool"]},{"Name":"RcptTo","Docs":"","Typewords":["string"]},{"Name":"DKIMVerifiedDomains","Docs":"","Typewords":["[]","string"]},{"Name":"RemoteIP","Docs":"","Typewords":["string"]},{"Name":"Received","Docs":"","Typewords":["timestamp"]},{"Name":"MailboxName","Docs":"","Typewords":["string"]},{"Name":"Automated","Docs":"","Typewords":["bool"]}]},
"TLSPublicKey": {"Name":"TLSPublicKey","Docs":"","Fields":[{"Name":"Fingerprint","Docs":"","Typewords":["string"]},{"Name":"Created","Docs":"","Typewords":["timestamp"]},{"Name":"Type","Docs":"","Typewords":["string"]},{"Name":"Name","Docs":"","Typewords":["string"]},{"Name":"NoIMAPPreauth","Docs":"","Typewords":["bool"]},{"Name":"CertDER","Docs":"","Typewords":["nullable","string"]},{"Name":"Account","Docs":"","Typewords":["string"]},{"Name":"LoginAddress","Docs":"","Typewords":["string"]}]},
"CSRFToken": {"Name":"CSRFToken","Docs":"","Values":null},
Expand Down
2 changes: 2 additions & 0 deletions webapi/doc.go
Original file line number Diff line number Diff line change
Expand Up @@ -347,6 +347,8 @@ Example JSON body for webhooks for incoming delivery of basic message:
"charset": "utf-8"
},
"ContentID": "",
"ContentDisposition": "",
"Filename": "",
"DecodedSize": 17,
"Parts": []
},
Expand Down
5 changes: 4 additions & 1 deletion webapisrv/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -1263,9 +1263,12 @@ func (s server) MessageGet(ctx context.Context, req webapi.MessageGetRequest) (r
MailboxName: mb.Name,
}

structure, err := webhook.PartStructure(log, &p)
xcheckf(err, "parsing structure")

result := webapi.MessageGetResult{
Message: msg,
Structure: webhook.PartStructure(&p),
Structure: structure,
Meta: meta,
}
return result, nil
Expand Down
4 changes: 3 additions & 1 deletion webapisrv/server_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -418,7 +418,9 @@ func TestServer(t *testing.T) {
tcheckf(t, err, "reading raw message")
part, err := message.EnsurePart(log.Logger, true, bytes.NewReader(b.Bytes()), int64(b.Len()))
tcheckf(t, err, "parsing raw message")
tcompare(t, webhook.PartStructure(&part), msgRes.Structure)
structure, err := webhook.PartStructure(log, &part)
tcheckf(t, err, "part structure")
tcompare(t, structure, msgRes.Structure)

_, err = client.MessageRawGet(ctxbg, webapi.MessageRawGetRequest{MsgID: 1 + 999})
terrcode(t, err, "messageNotFound")
Expand Down
42 changes: 29 additions & 13 deletions webhook/webhook.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,12 @@
package webhook

import (
"errors"
"strings"
"time"

"github.com/mjl-/mox/message"
"github.com/mjl-/mox/mlog"
)

// OutgoingEvent is an activity for an outgoing delivery. Either generated by the
Expand Down Expand Up @@ -135,29 +137,43 @@ type NameAddress struct {
}

type Structure struct {
ContentType string // Lower case, e.g. text/plain.
ContentTypeParams map[string]string // Lower case keys, original case values, e.g. {"charset": "UTF-8"}.
ContentID string // Can be empty. Otherwise, should be a value wrapped in <>'s. For use in HTML, referenced as URI `cid:...`.
DecodedSize int64 // Size of content after decoding content-transfer-encoding. For text and HTML parts, this can be larger than the data returned since this size includes \r\n line endings.
Parts []Structure // Subparts of a multipart message, possibly recursive.
ContentType string // Lower case, e.g. text/plain.
ContentTypeParams map[string]string // Lower case keys, original case values, e.g. {"charset": "UTF-8"}.
ContentID string // Can be empty. Otherwise, should be a value wrapped in <>'s. For use in HTML, referenced as URI `cid:...`.
ContentDisposition string // Lower-case value, e.g. "attachment", "inline" or empty when absent. Without the key/value header parameters.
Filename string // Filename for this part, based on "filename" parameter from Content-Disposition, or "name" from Content-Type after decoding.
DecodedSize int64 // Size of content after decoding content-transfer-encoding. For text and HTML parts, this can be larger than the data returned since this size includes \r\n line endings.
Parts []Structure // Subparts of a multipart message, possibly recursive.
}

// PartStructure returns a Structure for a parsed message part.
func PartStructure(p *message.Part) Structure {
func PartStructure(log mlog.Log, p *message.Part) (Structure, error) {
parts := make([]Structure, len(p.Parts))
for i := range p.Parts {
parts[i] = PartStructure(&p.Parts[i])
var err error
parts[i], err = PartStructure(log, &p.Parts[i])
if err != nil && !errors.Is(err, message.ErrParamEncoding) {
return Structure{}, err
}
}
disp, filename, err := p.DispositionFilename()
if err != nil && errors.Is(err, message.ErrParamEncoding) {
log.Debugx("parsing disposition/filename", err)
} else if err != nil {
return Structure{}, err
}
s := Structure{
ContentType: strings.ToLower(p.MediaType + "/" + p.MediaSubType),
ContentTypeParams: p.ContentTypeParams,
ContentID: p.ContentID,
DecodedSize: p.DecodedSize,
Parts: parts,
ContentType: strings.ToLower(p.MediaType + "/" + p.MediaSubType),
ContentTypeParams: p.ContentTypeParams,
ContentID: p.ContentID,
ContentDisposition: strings.ToLower(disp),
Filename: filename,
DecodedSize: p.DecodedSize,
Parts: parts,
}
// Replace nil map with empty map, for easier to use JSON.
if s.ContentTypeParams == nil {
s.ContentTypeParams = map[string]string{}
}
return s
return s, nil
}
7 changes: 6 additions & 1 deletion webmail/api.go
Original file line number Diff line number Diff line change
Expand Up @@ -904,7 +904,12 @@ func (w Webmail) MessageSubmit(ctx context.Context, m SubmitMessage) {
ap = ap.Parts[xp]
}

filename := tryDecodeParam(log, ap.ContentTypeParams["name"])
_, filename, err := ap.DispositionFilename()
if err != nil && errors.Is(err, message.ErrParamEncoding) {
log.Debugx("parsing disposition/filename", err)
} else {
xcheckf(ctx, err, "reading disposition")
}
if filename == "" {
filename = "unnamed.bin"
}
Expand Down
Loading

0 comments on commit 4279383

Please sign in to comment.