Skip to content

Commit 01ca2dd

Browse files
committed
fix(gmail): encode body parts as base64 and decode numeric HTML entities
1 parent 02b88f9 commit 01ca2dd

2 files changed

Lines changed: 55 additions & 10 deletions

File tree

apps/sim/tools/gmail/utils.test.ts

Lines changed: 39 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,20 @@ function decodeSimpleMessage(encoded: string): string {
1515
return Buffer.from(encoded, 'base64url').toString('utf-8')
1616
}
1717

18+
/**
19+
* Extract and base64-decode the body of a specific MIME part identified by its
20+
* Content-Type prefix (e.g. `text/plain`, `text/html`). Returns the decoded
21+
* UTF-8 string.
22+
*/
23+
function decodePart(mime: string, contentTypePrefix: string): string {
24+
const partRegex = new RegExp(
25+
`Content-Type: ${contentTypePrefix}[^\\n]*\\nContent-Transfer-Encoding: base64\\n\\n([\\s\\S]*?)\\n\\n--`
26+
)
27+
const match = mime.match(partRegex)
28+
if (!match) throw new Error(`No ${contentTypePrefix} part found`)
29+
return Buffer.from(match[1].replace(/\n/g, ''), 'base64').toString('utf-8')
30+
}
31+
1832
describe('encodeRfc2047', () => {
1933
it('returns ASCII text unchanged', () => {
2034
expect(encodeRfc2047('Simple ASCII Subject')).toBe('Simple ASCII Subject')
@@ -81,6 +95,12 @@ describe('htmlToPlainText', () => {
8195
'&lt; is the literal < entity'
8296
)
8397
})
98+
99+
it('decodes decimal and hexadecimal numeric entities', () => {
100+
expect(htmlToPlainText('<p>&#8220;hi&#8221; &#160;and&#x2019;s</p>')).toBe(
101+
'\u201chi\u201d \u00a0and\u2019s'
102+
)
103+
})
84104
})
85105

86106
describe('buildSimpleEmailMessage', () => {
@@ -96,8 +116,21 @@ describe('buildSimpleEmailMessage', () => {
96116
const htmlIdx = decoded.indexOf('text/html')
97117
expect(plainIdx).toBeGreaterThan(-1)
98118
expect(htmlIdx).toBeGreaterThan(plainIdx)
99-
expect(decoded).toContain('Hi Janice,')
100-
expect(decoded).toContain('<p>Hi Janice,</p>')
119+
expect(decodePart(decoded, 'text/plain')).toBe('Hi Janice,\n\nQuick question.')
120+
expect(decodePart(decoded, 'text/html')).toContain('<p>Hi Janice,</p>')
121+
})
122+
123+
it('encodes bodies as base64 so UTF-8 (emoji, accents) round-trips cleanly', () => {
124+
const body = 'Café 🎉 — résumé'
125+
const encoded = buildSimpleEmailMessage({
126+
to: 'a@example.com',
127+
subject: 'Hi',
128+
body,
129+
})
130+
const decoded = decodeSimpleMessage(encoded)
131+
expect(decoded).toContain('Content-Transfer-Encoding: base64')
132+
expect(decodePart(decoded, 'text/plain')).toBe(body)
133+
expect(decodePart(decoded, 'text/html')).toContain('Café 🎉 — résumé')
101134
})
102135

103136
it('uses the supplied HTML body and derives a plain-text fallback when contentType is html', () => {
@@ -108,8 +141,8 @@ describe('buildSimpleEmailMessage', () => {
108141
contentType: 'html',
109142
})
110143
const decoded = decodeSimpleMessage(encoded)
111-
expect(decoded).toContain('<p>Hello <b>there</b></p>')
112-
expect(decoded).toContain('Hello there')
144+
expect(decodePart(decoded, 'text/html')).toBe('<p>Hello <b>there</b></p>')
145+
expect(decodePart(decoded, 'text/plain')).toBe('Hello there')
113146
})
114147

115148
it('includes threading headers when replying', () => {
@@ -142,7 +175,8 @@ describe('buildMimeMessage', () => {
142175
expect(message).toMatch(/Content-Type: multipart\/mixed; boundary="([^"]+)"/)
143176
expect(message).toMatch(/Content-Type: multipart\/alternative; boundary="([^"]+)"/)
144177
expect(message).toContain('Content-Disposition: attachment; filename="note.txt"')
145-
expect(message).toContain('<p>Hello</p>')
178+
expect(decodePart(message, 'text/plain')).toBe('Hello')
179+
expect(decodePart(message, 'text/html')).toContain('<p>Hello</p>')
146180
})
147181

148182
it('emits multipart/alternative without multipart/mixed when no attachments', () => {

apps/sim/tools/gmail/utils.ts

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -359,7 +359,8 @@ export function htmlToPlainText(html: string): string {
359359
.replace(/&lt;/g, '<')
360360
.replace(/&gt;/g, '>')
361361
.replace(/&quot;/g, '"')
362-
.replace(/&#39;/g, "'")
362+
.replace(/&#x([0-9a-f]+);/gi, (_, hex) => String.fromCodePoint(Number.parseInt(hex, 16)))
363+
.replace(/&#(\d+);/g, (_, dec) => String.fromCodePoint(Number.parseInt(dec, 10)))
363364
.replace(/&amp;/g, '&')
364365
.replace(/\n{3,}/g, '\n\n')
365366
.trim()
@@ -381,6 +382,16 @@ export function buildBodyAlternatives(
381382
return { plain: body, html: plainTextToHtml(body) }
382383
}
383384

385+
/**
386+
* Encode a text body as base64 with RFC 2045 line wrapping (max 76 chars).
387+
* Using base64 lets us safely transport arbitrary UTF-8 (emoji, accented
388+
* characters, etc.) — `7bit` is only valid for strict 7-bit ASCII.
389+
*/
390+
function encodeBodyBase64(content: string): string[] {
391+
const base64 = Buffer.from(content, 'utf-8').toString('base64')
392+
return base64.match(/.{1,76}/g) || ['']
393+
}
394+
384395
/**
385396
* Render the inner part of a `multipart/alternative` section (text/plain
386397
* followed by text/html, per RFC 2046 — clients pick the last format they
@@ -390,15 +401,15 @@ function renderAlternativeParts(plain: string, html: string, boundary: string):
390401
return [
391402
`--${boundary}`,
392403
'Content-Type: text/plain; charset="UTF-8"',
393-
'Content-Transfer-Encoding: 7bit',
404+
'Content-Transfer-Encoding: base64',
394405
'',
395-
plain,
406+
...encodeBodyBase64(plain),
396407
'',
397408
`--${boundary}`,
398409
'Content-Type: text/html; charset="UTF-8"',
399-
'Content-Transfer-Encoding: 7bit',
410+
'Content-Transfer-Encoding: base64',
400411
'',
401-
html,
412+
...encodeBodyBase64(html),
402413
'',
403414
`--${boundary}--`,
404415
]

0 commit comments

Comments
 (0)