-
Notifications
You must be signed in to change notification settings - Fork 5
/
TikaParseString.js
33 lines (23 loc) · 1.01 KB
/
TikaParseString.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
/*
* To change this license header, choose License Headers in Project Properties.
* To change this template file, choose Tools | Templates
* and open the template in the editor.
*/
function tikaParseString(doc) {
var ByteArrayInputStream = java.io.ByteArrayInputStream;
var InputStream = java.io.InputStream;
var Metadata = org.apache.tika.metadata.Metadata;
var AutoDetectParser = org.apache.tika.parser.AutoDetectParser;
var BodyContentHandler = org.apache.tika.sax.BodyContentHandler;
var parser = new AutoDetectParser();
var handler = new BodyContentHandler();
var metadata = new Metadata();
var base64 = java.util.Base64;
var decoder = base64.getDecoder();
var fragment = doc.getFirstFieldValue("_raw_content_");
// need to decode base64 raw content:
fragment = decoder.decode(fragment);
var stream = new ByteArrayInputStream(fragment.getBytes());
parser.parse(stream, handler, metadata);
return handler.toString();
}