Skip to content

Commit

Permalink
Consume Tika stderr output to avoid crash. (#1366)
Browse files Browse the repository at this point in the history
- Resolves VUFIND-1330.
  • Loading branch information
demiankatz authored May 2, 2019
1 parent 8081645 commit ba744de
Showing 1 changed file with 30 additions and 7 deletions.
37 changes: 30 additions & 7 deletions import/index_java/src/org/vufind/index/FullTextTools.java
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,7 @@ public String harvestWithAperture(String url, String aperturePath) {
//System.out.println("Loading fulltext from " + url + ". Please wait ...");
try {
Process p = Runtime.getRuntime().exec(cmd);

// Debugging output
/*
BufferedReader stdInput = new BufferedReader(new
Expand All @@ -216,7 +216,7 @@ public String harvestWithAperture(String url, String aperturePath) {
System.out.println(s);
}
*/

// Wait for Aperture to finish
p.waitFor();
} catch (Throwable e) {
Expand Down Expand Up @@ -248,6 +248,28 @@ public String harvestWithAperture(String url, String aperturePath) {
return plainText;
}

class ErrorStreamHandler extends Thread {
InputStream stdErr;

ErrorStreamHandler(InputStream stdErr) {
this.stdErr = stdErr;
}

public void run()
{
try {
InputStreamReader isr = new InputStreamReader(stdErr, "UTF8");
BufferedReader br = new BufferedReader(isr);
String line = null;
while ((line = br.readLine()) != null) {
logger.debug(line);
}
} catch (Exception e) {
e.printStackTrace();
}
}
}

/**
* Harvest the contents of a document file (PDF, Word, etc.) using Tika.
* This method will only work if Tika is properly configured in the fulltext.ini
Expand All @@ -258,16 +280,17 @@ public String harvestWithAperture(String url, String aperturePath) {
* @return the full-text
*/
public String harvestWithTika(String url, String scraperPath) {

// Construct the command
String cmd = "java -jar " + scraperPath + " -t -eUTF8 " + url;

StringBuilder stringBuilder= new StringBuilder();

// Call our scraper
//System.out.println("Loading fulltext from " + url + ". Please wait ...");
try {
Process p = Runtime.getRuntime().exec(cmd);
ProcessBuilder pb = new ProcessBuilder(
"java", "-jar", scraperPath, "-t", "-eutf8", url
);
Process p = pb.start();
ErrorStreamHandler esh = new ErrorStreamHandler(p.getErrorStream());
esh.start();
BufferedReader stdInput = new BufferedReader(new
InputStreamReader(p.getInputStream(), "UTF8"));

Expand Down

0 comments on commit ba744de

Please sign in to comment.