-
Notifications
You must be signed in to change notification settings - Fork 7
/
mhtToHtml.js
124 lines (109 loc) · 3.25 KB
/
mhtToHtml.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
'use strict';
var fs = require('fs'),
readline = require('readline');
// Get the file to parse
var file = process.argv[2];
var email = {images: {}};
var currentBlockDecoding = '';
var currentBlockContentType = '';
var currentBlockContentExt = '';
var currentBlockContentId = '';
var currentBlockDecodingIndex = 0;
var blockstart = false;
/**
* Find and set the content type of the current block
* @param {string} line
*/
function extractContentType(line) {
var contentTypeMatch = line.match(/^Content-Type: ([^\/]*)\/([a-z]+)/);
if (contentTypeMatch) {
currentBlockContentType = contentTypeMatch[1];
currentBlockContentExt = contentTypeMatch[2];
}
}
/**
* Find and set the content id of the current block
* @param {string} line
*/
function extractContentId(line) {
var contentIdMatch = line.match(/^Content-ID: <([^>]*)/);
if (contentIdMatch) {
currentBlockContentId = contentIdMatch[1];
}
}
/**
* We have a full base64 block to decode
* If the content type is image, we save it in the image object
* If the content type is text, we decode it
*/
function finishBaseDecoding() {
if (currentBlockContentType === 'image') {
email.images[currentBlockContentId] = {
ext: currentBlockContentExt,
base: currentBlockDecoding
};
} else if (currentBlockContentType === 'text') {
email.body = atob(currentBlockDecoding);
}
blockstart = false;
currentBlockDecodingIndex = 0;
currentBlockDecoding = '';
}
/**
* Base64 decode function
*/
function atob(string) {
return new Buffer(string, 'base64').toString('binary');
}
/**
* Replace each img cid by a valid base64 src
*/
function replaceCidByBase64() {
Object.keys(email.images).forEach(function(id) {
var image = email.images[id];
var search = new RegExp('src="cid:' + id + '[^"]*', 'g');
email.body = email.body.replace(search, 'src="data:image/' + image.ext + ';base64,' + image.base);
});
}
// Interface of the file we want to parse
var rd = readline.createInterface({
input: fs.createReadStream(file),
output: process.stdout,
terminal: false
});
// Start reading
rd.on('line', function(line) {
if (line !== '') {
if (!email.subject) {
var subjectMatch = /^Subject: (.*)/.exec(line);
if (subjectMatch) {
email.subject = subjectMatch[1];
}
}
var boundaryMatch = /^----boundary/.test(line);
if (boundaryMatch) {
finishBaseDecoding();
blockstart = true;
} else if (blockstart) {
var contentMatch = /^Content-([^:]*)/.exec(line);
if (contentMatch) {
switch(contentMatch[1]) {
case 'Type':
extractContentType(line);
break;
case 'ID':
extractContentId(line);
break;
default:
break;
}
} else {
currentBlockDecoding += line;
}
}
}
}).on('close', function() {
replaceCidByBase64();
// Change here the way you want to get your html result
console.log(email.body);
});