diff --git a/.gitignore b/.gitignore
index 62eb210..3154aea 100644
--- a/.gitignore
+++ b/.gitignore
@@ -9,7 +9,8 @@ target/
 luceneIndex_wikipedia/
 testIndex/
 bla.txt
+.idea/
+leechcrawler.iml
+testIndex_4PostProcessing/
 
 
-# Except this file
-#!.gitignore
diff --git a/pom.xml b/pom.xml
index f4ea186..4ed9abe 100644
--- a/pom.xml
+++ b/pom.xml
@@ -1,345 +1,252 @@
 <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
-	xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
-	<modelVersion>4.0.0</modelVersion>
-	<groupId>de.dfki.km</groupId>
-	<artifactId>leechcrawler</artifactId>
-	<packaging>jar</packaging>
-	<version>1.11.1</version>
-	<name>leechcrawler</name>
-	<url>http://leechcrawler.github.com/leech/</url>
-
-	<developers>
-		<developer>
-			<name>Christian Reuschling</name>
-			<email>reuschling@dfki.uni-kl.de</email>
-			<organization>DFKI, KnowledgeManagement</organization>
-			<organizationUrl>http://www.dfki.de/web/forschung/km</organizationUrl>
-		</developer>
-	</developers>
-
-	<properties>
-		<maven.compiler.source>1.7</maven.compiler.source>
-		<maven.compiler.target>1.7</maven.compiler.target>
-
-		<maven.compiler.encoding>UTF-8</maven.compiler.encoding>
-		<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
-		<project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
-	</properties>
-
-
-
-	<repositories>
-
-	</repositories>
-
-
-
-
-	<distributionManagement>
-		<repository>
-			<id>artifactory-libs-releases-local</id>
-			<url>http://www.dfki.uni-kl.de/artifactory/libs-releases-local</url>
-		</repository>
-		<snapshotRepository>
-			<id>artifactory-libs-snapshots-local</id>
-			<url>http://www.dfki.uni-kl.de/artifactory/libs-snapshots-local</url>
-			<uniqueVersion>false</uniqueVersion>
-		</snapshotRepository>
-	</distributionManagement>
-
-
-
-	<dependencies>
-		<dependency>
-			<groupId>junit</groupId>
-			<artifactId>junit</artifactId>
-			<version>4.8.2</version>
-			<scope>test</scope>
-		</dependency>
-
-		<dependency>
-			<groupId>org.apache.tika</groupId>
-			<artifactId>tika-core</artifactId>
-			<version>1.14</version>
-		</dependency>
-
-		<dependency>
-			<groupId>org.apache.tika</groupId>
-			<artifactId>tika-parsers</artifactId>
-			<version>1.14</version>
-		</dependency>
-
-		<dependency>
-			<groupId>org.apache.lucene</groupId>
-			<artifactId>lucene-core</artifactId>
-			<version>6.4.2</version>
-		</dependency>
-
-		<dependency>
-			<groupId>org.apache.lucene</groupId>
-			<artifactId>lucene-analyzers-common</artifactId>
-			<version>6.4.2</version>
-		</dependency>
-
-
-		<dependency>
-			<groupId>info.bliki.wiki</groupId>
-			<artifactId>bliki-core</artifactId>
-			<version>3.0.19</version>
-		</dependency>
-
-		<dependency>
-			<groupId>javax.mail</groupId>
-			<artifactId>mail</artifactId>
-			<version>1.4.5</version>
-		</dependency>
-
-
-		<dependency>
-			<groupId>org.apache.commons</groupId>
-			<artifactId>commons-lang3</artifactId>
-			<version>3.3.2</version>
-		</dependency>
-
-
-		<dependency>
-			<groupId>org.mapdb</groupId>
-			<artifactId>mapdb</artifactId>
-			<!-- <version>1.0.7</version> -->
-			<version>2.0-beta11</version>
-		</dependency>
-
-		<dependency>
-			<groupId>com.cedarsoftware</groupId>
-			<artifactId>json-io</artifactId>
-			<version>2.9.3</version>
-		</dependency>
-
-		<dependency>
-			<groupId>de.dfki.km</groupId>
-			<artifactId>inquisition</artifactId>
-			<version>20151124</version>
-		</dependency>
-
-
-		<dependency>
-			<groupId>org.apache.solr</groupId>
-			<artifactId>solr-solrj</artifactId>
-			<version>5.2.1</version>
-		</dependency>
-
-	</dependencies>
-
-
-
-
-
-	<build>
-
-		<plugins>
-
-
-			<plugin>
-				<groupId>org.codehaus.mojo</groupId>
-				<artifactId>appassembler-maven-plugin</artifactId>
-				<version>1.2.2</version>
-				<configuration>
-					<assembleDirectory>${project.build.directory}/assembleDir</assembleDirectory>
-					<extraJvmArguments>-Xmx1G</extraJvmArguments>
-					<binFolder>bin</binFolder>
-					<repositoryName>lib</repositoryName>
-					<repositoryLayout>flat</repositoryLayout>
-					<useAsterikClassPath>true</useAsterikClassPath>
-					<projectArtifactFirstInClassPath>true</projectArtifactFirstInClassPath>
-
-
-					<binFileExtensions>
-						<unix>.sh</unix>
-					</binFileExtensions>
-					<programs>
-						<program>
-							<mainClass>de.dfki.km.leech.util.LuceneIndexCreator</mainClass>
-							<name>createLuceneIndex</name>
-						</program>
-						<program>
-							<mainClass>de.dfki.km.leech.util.SolrIndexCreator</mainClass>
-							<name>createSolrIndex</name>
-						</program>
-						<program>
-							<mainClass>de.dfki.km.leech.Leech</mainClass>
-							<name>leech</name>
-						</program>
-
-					</programs>
-				</configuration>
-			</plugin>
-
-		</plugins>
-
-
-
-		<pluginManagement>
-			<!-- NOTE: plugins are not automatically added to lifecycle unless stated 
-				in an <plugin> element. -->
-			<plugins>
-				<plugin>
-					<groupId>org.apache.maven.plugins</groupId>
-					<artifactId>maven-compiler-plugin</artifactId>
-					<version>2.3.2</version>
-					<configuration>
-						<source>${maven.compiler.source}</source>
-						<target>${maven.compiler.target}</target>
-						<encoding>${maven.compiler.encoding}</encoding>
-					</configuration>
-				</plugin>
-				<plugin>
-					<groupId>org.apache.maven.plugins</groupId>
-					<artifactId>maven-javadoc-plugin</artifactId>
-					<version>2.5</version>
-					<configuration>
-						<encoding>${maven.compiler.encoding}</encoding>
-						<quiet>true</quiet>
-					</configuration>
-				</plugin>
-				<plugin>
-					<groupId>org.apache.maven.plugins</groupId>
-					<artifactId>maven-resources-plugin</artifactId>
-					<version>2.5</version>
-					<configuration>
-						<encoding>${maven.compiler.encoding}</encoding>
-					</configuration>
-				</plugin>
-				<plugin>
-					<groupId>org.apache.maven.plugins</groupId>
-					<artifactId>maven-source-plugin</artifactId>
-					<version>2.1.2</version>
-				</plugin>
-				<plugin>
-					<groupId>org.apache.maven.plugins</groupId>
-					<artifactId>maven-deploy-plugin</artifactId>
-					<version>2.5</version>
-				</plugin>
-				<plugin>
-					<groupId>org.apache.maven.plugins</groupId>
-					<artifactId>maven-install-plugin</artifactId>
-					<version>2.3.1</version>
-				</plugin>
-				<plugin>
-					<groupId>org.apache.maven.plugins</groupId>
-					<artifactId>maven-release-plugin</artifactId>
-					<version>2.2</version>
-				</plugin>
-
-				<plugin>
-					<artifactId>maven-assembly-plugin</artifactId>
-					<version>2.3</version>
-					<configuration>
-						<descriptors>
-							<descriptor>src/main/assembly/distributable.xml</descriptor>
-						</descriptors>
-					</configuration>
-				</plugin>
-			</plugins>
-		</pluginManagement>
-	</build>
-
-
-
-	<profiles>
-
-		<profile>
-			<!-- Profile enables generation of additional javadoc files. Activate 
-				profile with -Pjavadoc. -->
-			<id>javadoc</id>
-			<activation>
-				<activeByDefault>true</activeByDefault>
-				<property>
-					<name>javadoc</name>
-				</property>
-			</activation>
-			<build>
-				<plugins>
-					<plugin>
-						<groupId>org.apache.maven.plugins</groupId>
-						<artifactId>maven-javadoc-plugin</artifactId>
-						<executions>
-							<execution>
-								<id>javadoc-jar</id>
-								<phase>package</phase>
-								<goals>
-									<goal>jar</goal>
-								</goals>
-							</execution>
-						</executions>
-					</plugin>
-				</plugins>
-			</build>
-
-			<reporting>
-				<plugins>
-					<plugin>
-						<groupId>org.apache.maven.plugins</groupId>
-						<artifactId>maven-javadoc-plugin</artifactId>
-						<version>2.5</version>
-						<reportSets>
-							<reportSet>
-								<id>javadoc</id>
-								<configuration>
-									<goal>aggregate-jar</goal>
-								</configuration>
-							</reportSet>
-						</reportSets>
-					</plugin>
-				</plugins>
-			</reporting>
-
-		</profile>
-
-		<profile>
-			<!-- Creates a additional source jar. Active by default. Must be activated 
-				if package-binsrc is activated. -->
-			<id>package-source</id>
-			<activation>
-				<activeByDefault>true</activeByDefault>
-				<property>
-					<name>package-source</name>
-				</property>
-			</activation>
-
-			<build>
-				<plugins>
-					<plugin>
-						<artifactId>maven-source-plugin</artifactId>
-						<version>2.1.2</version>
-						<executions>
-							<execution>
-								<id>package-source-jar</id>
-								<phase>package</phase>
-								<goals>
-									<goal>jar</goal>
-								</goals>
-							</execution>
-						</executions>
-					</plugin>
-				</plugins>
-			</build>
-		</profile>
-
-		<profile>
-			<id>package-project</id>
-			<build>
-				<plugins>
-					<plugin>
-						<groupId>org.apache.maven.plugins</groupId>
-						<artifactId>maven-assembly-plugin</artifactId>
-						<version>2.3</version>
-					</plugin>
-				</plugins>
-			</build>
-		</profile>
-
-
-	</profiles>
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+    <modelVersion>4.0.0</modelVersion>
+    <groupId>de.dfki.sds</groupId>
+    <artifactId>leechcrawler</artifactId>
+    <packaging>jar</packaging>
+    <version>1.25.0</version>
+    <name>leechcrawler</name>
+    <url>http://leechcrawler.github.com/leech/</url>
+
+    <developers>
+        <developer>
+            <name>Christian Reuschling</name>
+            <email>reuschling@dfki.uni-kl.de</email>
+            <organization>DFKI, SDS department</organization>
+            <organizationUrl>https://www.dfki.de/en/web/research/research-departments/smart-data-knowledge-services/</organizationUrl>
+        </developer>
+    </developers>
+
+    <properties>
+        <maven.compiler.source>1.7</maven.compiler.source>
+        <maven.compiler.target>1.7</maven.compiler.target>
+
+        <maven.compiler.encoding>UTF-8</maven.compiler.encoding>
+        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
+        <project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
+
+        <additionalparam>-Xdoclint:none</additionalparam>
+    </properties>
+
+
+    <distributionManagement>
+        <repository>
+            <id>artifactory-libs-releases-local</id>
+            <url>http://www.dfki.uni-kl.de/artifactory/libs-releases-local</url>
+        </repository>
+        <snapshotRepository>
+            <id>artifactory-libs-snapshots-local</id>
+            <url>http://www.dfki.uni-kl.de/artifactory/libs-snapshots-local</url>
+        </snapshotRepository>
+    </distributionManagement>
+
+
+    <repositories>
+        <repository>
+            <id>artifactory-libs-releases</id>
+            <url>http://www.dfki.uni-kl.de/artifactory/libs-releases</url>
+        </repository>
+        <repository>
+            <id>artifactory-libs-snapshots</id>
+            <url>http://www.dfki.uni-kl.de/artifactory/libs-snapshots</url>
+        </repository>
+    </repositories>
+
+
+    <dependencies>
+        <dependency>
+            <groupId>junit</groupId>
+            <artifactId>junit</artifactId>
+            <version>4.8.2</version>
+            <scope>test</scope>
+        </dependency>
+
+
+        <!--        Apache Tika-->
+        <dependency>
+            <groupId>org.apache.tika</groupId>
+            <artifactId>tika-parsers</artifactId>
+            <version>1.25</version>
+        </dependency>
+
+        <dependency>
+            <groupId>com.github.jai-imageio</groupId>
+            <artifactId>jai-imageio-jpeg2000</artifactId>
+            <version>1.4.0</version>
+        </dependency>
+        <!--        Apache Tika-->
+
+
+        <dependency>
+            <groupId>org.apache.lucene</groupId>
+            <artifactId>lucene-core</artifactId>
+            <version>6.4.2</version>
+        </dependency>
+
+        <dependency>
+            <groupId>org.apache.lucene</groupId>
+            <artifactId>lucene-analyzers-common</artifactId>
+            <version>6.4.2</version>
+        </dependency>
+
+        <dependency>
+            <groupId>org.apache.lucene</groupId>
+            <artifactId>lucene-queryparser</artifactId>
+            <version>6.4.2</version>
+        </dependency>
+
+
+        <dependency>
+            <groupId>info.bliki.wiki</groupId>
+            <artifactId>bliki-core</artifactId>
+            <!--            <version>3.0.19</version>-->
+            <version>3.1.0</version>
+        </dependency>
+
+        <dependency>
+            <groupId>com.sun.mail</groupId>
+            <artifactId>javax.mail</artifactId>
+            <version>1.6.2</version>
+        </dependency>
+
+
+        <dependency>
+            <groupId>org.apache.commons</groupId>
+            <artifactId>commons-lang3</artifactId>
+            <version>3.11</version>
+        </dependency>
+
+
+        <dependency>
+            <groupId>org.mapdb</groupId>
+            <artifactId>mapdb</artifactId>
+            <version>3.0.8</version>
+        </dependency>
+
+        <dependency>
+            <groupId>com.cedarsoftware</groupId>
+            <artifactId>json-io</artifactId>
+            <version>2.9.3</version>
+        </dependency>
+
+        <dependency>
+            <groupId>de.dfki.sds</groupId>
+            <artifactId>inquisitor</artifactId>
+            <version>23_1-SNAPSHOT</version>
+        </dependency>
+
+
+        <dependency>
+            <groupId>org.apache.solr</groupId>
+            <artifactId>solr-solrj</artifactId>
+            <version>5.2.1</version>
+        </dependency>
+
+    </dependencies>
+
+
+    <build>
+
+        <plugins>
+
+
+            <plugin>
+                <groupId>org.codehaus.mojo</groupId>
+                <artifactId>appassembler-maven-plugin</artifactId>
+                <version>2.1.0</version>
+                <configuration>
+                    <assembleDirectory>${project.build.directory}/assembleDir</assembleDirectory>
+                    <extraJvmArguments>-Xmx5G --add-opens java.base/java.lang=ALL-UNNAMED</extraJvmArguments>
+                    <binFolder>/bin</binFolder>
+                    <repositoryName>lib</repositoryName>
+                    <repositoryLayout>flat</repositoryLayout>
+                    <useWildcardClassPath>true</useWildcardClassPath>
+                    <projectArtifactFirstInClassPath>true</projectArtifactFirstInClassPath>
+
+
+                    <binFileExtensions>
+                        <unix>.sh</unix>
+                    </binFileExtensions>
+                    <programs>
+                        <program>
+                            <mainClass>de.dfki.km.leech.util.LuceneIndexCreator</mainClass>
+                            <name>createLuceneIndex</name>
+                        </program>
+                        <program>
+                            <mainClass>de.dfki.km.leech.util.SolrIndexCreator</mainClass>
+                            <name>createSolrIndex</name>
+                        </program>
+                        <program>
+                            <mainClass>de.dfki.km.leech.Leech</mainClass>
+                            <name>leechcrawler</name>
+                        </program>
+
+                    </programs>
+                </configuration>
+            </plugin>
+
+
+            <plugin>
+                <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-source-plugin</artifactId>
+                <version>3.2.1</version>
+                <executions>
+                    <execution>
+                        <id>attach-sources</id>
+                        <goals>
+                            <goal>jar</goal>
+                        </goals>
+                    </execution>
+                </executions>
+            </plugin>
+
+        </plugins>
+
+
+        <pluginManagement>
+            <plugins>
+                <plugin>
+                    <groupId>org.apache.maven.plugins</groupId>
+                    <artifactId>maven-compiler-plugin</artifactId>
+                    <version>3.8.1</version>
+                    <configuration>
+                        <source>${maven.compiler.source}</source>
+                        <target>${maven.compiler.target}</target>
+                        <encoding>${maven.compiler.encoding}</encoding>
+
+                        <compilerArgs>
+                            <arg>-parameters</arg>
+                        </compilerArgs>
+                    </configuration>
+                </plugin>
+
+                <plugin>
+                    <artifactId>maven-assembly-plugin</artifactId>
+                    <version>2.3</version>
+                    <configuration>
+                        <descriptors>
+                            <descriptor>src/main/assembly/distributable.xml</descriptor>
+                        </descriptors>
+                    </configuration>
+                </plugin>
+            </plugins>
+        </pluginManagement>
+    </build>
+
+
+    <profiles>
+
+        <profile>
+            <id>disable-java8-doclint</id>
+            <activation>
+                <jdk>[1.8,)</jdk>
+            </activation>
+            <properties>
+                <additionalparam>-Xdoclint:none</additionalparam>
+            </properties>
+        </profile>
+
+    </profiles>
 
 
 </project>
diff --git a/src/main/java/de/dfki/km/leech/Leech.java b/src/main/java/de/dfki/km/leech/Leech.java
index fc1a898..0f77826 100644
--- a/src/main/java/de/dfki/km/leech/Leech.java
+++ b/src/main/java/de/dfki/km/leech/Leech.java
@@ -40,7 +40,7 @@
 import org.xml.sax.ContentHandler;
 import org.xml.sax.SAXException;
 
-import de.dfki.inquisition.text.StringUtils;
+import de.dfki.inquisitor.text.StringUtils;
 import de.dfki.km.leech.config.CrawlerContext;
 import de.dfki.km.leech.config.DirectoryCrawlerContext;
 import de.dfki.km.leech.config.LeechConfig;
diff --git a/src/main/java/de/dfki/km/leech/config/CrawlerContext.java b/src/main/java/de/dfki/km/leech/config/CrawlerContext.java
index fffcda2..1db187f 100644
--- a/src/main/java/de/dfki/km/leech/config/CrawlerContext.java
+++ b/src/main/java/de/dfki/km/leech/config/CrawlerContext.java
@@ -89,7 +89,7 @@ public class CrawlerContext
     protected Map<String, String> m_userHeaders = null;
 
     /**
-     * Creates a new ParseContext Object with an entry with this {@link #CrawlerContext} configuration. This method is only for convenience.
+     * Creates a new ParseContext Object with an entry with this {@link CrawlerContext} configuration. This method is only for convenience.
      * 
      * @return the created ParseContext Object.
      */
diff --git a/src/main/java/de/dfki/km/leech/io/ImapURLStreamProvider.java b/src/main/java/de/dfki/km/leech/io/ImapURLStreamProvider.java
index 5ee6b69..92ef3d1 100644
--- a/src/main/java/de/dfki/km/leech/io/ImapURLStreamProvider.java
+++ b/src/main/java/de/dfki/km/leech/io/ImapURLStreamProvider.java
@@ -18,6 +18,17 @@
 
 
 
+import com.sun.mail.imap.IMAPFolder;
+import com.sun.mail.imap.IMAPMessage;
+import de.dfki.km.leech.detect.DatasourceMediaTypes;
+import de.dfki.km.leech.parser.ImapCrawlerParser;
+import de.dfki.km.leech.parser.incremental.IncrementalCrawlingHistory;
+import de.dfki.km.leech.util.UrlUtil;
+import org.apache.tika.io.TikaInputStream;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.parser.ParseContext;
+
+import javax.mail.*;
 import java.io.FileNotFoundException;
 import java.io.IOException;
 import java.io.InputStream;
@@ -27,24 +38,6 @@
 import java.util.logging.Level;
 import java.util.logging.Logger;
 
-import javax.mail.Folder;
-import javax.mail.Message;
-import javax.mail.MessagingException;
-import javax.mail.Store;
-import javax.mail.URLName;
-
-import org.apache.tika.io.TikaInputStream;
-import org.apache.tika.metadata.Metadata;
-import org.apache.tika.parser.ParseContext;
-
-import com.sun.mail.imap.IMAPFolder;
-import com.sun.mail.imap.IMAPMessage;
-
-import de.dfki.km.leech.detect.DatasourceMediaTypes;
-import de.dfki.km.leech.parser.ImapCrawlerParser;
-import de.dfki.km.leech.parser.incremental.IncrementalCrawlingHistory;
-import de.dfki.km.leech.util.UrlUtil;
-
 
 
 public class ImapURLStreamProvider extends URLStreamProvider
diff --git a/src/main/java/de/dfki/km/leech/lucene/LeechDefaultFieldConfig.java b/src/main/java/de/dfki/km/leech/lucene/LeechDefaultFieldConfig.java
index b07e5c5..bd319e0 100644
--- a/src/main/java/de/dfki/km/leech/lucene/LeechDefaultFieldConfig.java
+++ b/src/main/java/de/dfki/km/leech/lucene/LeechDefaultFieldConfig.java
@@ -2,8 +2,13 @@
 
 
 
-import de.dfki.inquisition.lucene.DynamicFieldType;
-import de.dfki.inquisition.lucene.FieldConfig;
+// import de.dfki.inquisitor.lucene.DynamicFieldType;
+// import de.dfki.inquisitor.lucene.FieldConfig;
+
+
+
+import de.dfki.km.leech.lucene.basic.DynamicFieldType;
+import de.dfki.km.leech.lucene.basic.FieldConfig;
 
 
 
diff --git a/src/main/java/de/dfki/km/leech/lucene/LeechSimpleAnalyzer.java b/src/main/java/de/dfki/km/leech/lucene/LeechSimpleAnalyzer.java
new file mode 100644
index 0000000..0180ffd
--- /dev/null
+++ b/src/main/java/de/dfki/km/leech/lucene/LeechSimpleAnalyzer.java
@@ -0,0 +1,40 @@
+package de.dfki.km.leech.lucene;
+
+import java.io.Reader;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.core.LowerCaseFilter;
+import org.apache.lucene.analysis.core.LowerCaseTokenizer;
+import org.apache.lucene.util.Version;
+
+
+
+/**
+ * An {@link Analyzer} that filters {@link LetterOrDigitLowerCaseTokenizer} with {@link LowerCaseFilter}
+ **/
+public class LeechSimpleAnalyzer extends Analyzer
+{
+
+    static final protected LeechSimpleAnalyzer m_singelton = new LeechSimpleAnalyzer();
+    
+    static public LeechSimpleAnalyzer getSingleton()
+    {
+        return m_singelton;
+    }
+
+
+    /**
+     * Creates a new {@link LeechSimpleAnalyzer}
+     */
+    public LeechSimpleAnalyzer()
+    {
+    }
+
+
+
+    @Override
+    protected TokenStreamComponents createComponents(String fieldName)
+    {
+        return new TokenStreamComponents(new LetterOrDigitLowerCaseTokenizer());
+    }
+}
diff --git a/src/main/java/de/dfki/km/leech/lucene/LetterOrDigitLowerCaseTokenizer.java b/src/main/java/de/dfki/km/leech/lucene/LetterOrDigitLowerCaseTokenizer.java
new file mode 100644
index 0000000..298ab6a
--- /dev/null
+++ b/src/main/java/de/dfki/km/leech/lucene/LetterOrDigitLowerCaseTokenizer.java
@@ -0,0 +1,55 @@
+package de.dfki.km.leech.lucene;
+
+
+
+import org.apache.lucene.analysis.util.CharTokenizer;
+import org.apache.lucene.util.AttributeFactory;
+
+
+
+/**
+ * Tokenizer that tokenizes between letter and digit entries. The chars will also be converted to lower case.
+ * <p>
+ * Note: this does a decent job for most European languages, but does a terrible job for some Asian languages, where words maybe are not separated by
+ * spaces, etc.
+ *
+ * @author Christian Reuschling, Dipl.Ing.(BA)
+ */
+public class LetterOrDigitLowerCaseTokenizer extends CharTokenizer
+{
+
+    public LetterOrDigitLowerCaseTokenizer(AttributeFactory factory)
+    {
+        super(factory);
+    }
+
+
+
+    public LetterOrDigitLowerCaseTokenizer()
+    {
+        super();
+    }
+
+
+
+
+    /**
+     * Collects only characters which satisfy {@link Character#isLetterOrDigit(int)}.
+     */
+    @Override
+    protected boolean isTokenChar(int c)
+    {
+        return Character.isLetterOrDigit(c);
+    }
+
+
+
+    /**
+     * Converts char to lower case {@link Character#toLowerCase(int)}.
+     */
+    @Override
+    protected int normalize(int c)
+    {
+        return Character.toLowerCase(c);
+    }
+}
diff --git a/src/main/java/de/dfki/km/leech/lucene/ToLuceneContentHandler.java b/src/main/java/de/dfki/km/leech/lucene/ToLuceneContentHandler.java
index 6b9207c..b4e5ada 100644
--- a/src/main/java/de/dfki/km/leech/lucene/ToLuceneContentHandler.java
+++ b/src/main/java/de/dfki/km/leech/lucene/ToLuceneContentHandler.java
@@ -1,16 +1,16 @@
 /*
  * Leech - crawling capabilities for Apache Tika
- * 
+ *
  * Copyright (C) 2012 DFKI GmbH, Author: Christian Reuschling
- * 
+ *
  * This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation,
  * either version 3 of the License, or (at your option) any later version.
- * 
+ *
  * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
  * PARTICULAR PURPOSE. See the GNU General Public License for more details.
- * 
+ *
  * You should have received a copy of the GNU General Public License along with this program. If not, see <http://www.gnu.org/licenses/>.
- * 
+ *
  * Contact us by mail: christian.reuschling@dfki.de
  */
 
@@ -18,60 +18,47 @@
 
 
 
+import de.dfki.inquisitor.collections.MultiValueHashMap;
+import de.dfki.inquisitor.file.FileUtilz;
+// import de.dfki.inquisitor.lucene.FieldConfig;
+import de.dfki.km.leech.Leech;
+import de.dfki.km.leech.lucene.basic.FieldConfig;
+import de.dfki.km.leech.metadata.LeechMetadata;
+import de.dfki.km.leech.parser.incremental.IncrementalCrawlingHistory;
+import de.dfki.km.leech.sax.DataSinkContentHandler;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.index.*;
+import org.apache.lucene.index.IndexWriterConfig.OpenMode;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.FSDirectory;
+import org.apache.lucene.store.LockObtainFailedException;
+import org.apache.lucene.store.SimpleFSDirectory;
+import org.apache.tika.metadata.Metadata;
+
 import java.io.File;
 import java.io.IOException;
 import java.nio.file.Path;
 import java.nio.file.Paths;
 import java.rmi.server.UID;
-import java.util.Collection;
-import java.util.Collections;
-import java.util.HashSet;
-import java.util.LinkedList;
-import java.util.List;
-import java.util.Map;
+import java.util.*;
 import java.util.Map.Entry;
-import java.util.UUID;
 import java.util.concurrent.BlockingQueue;
 import java.util.concurrent.CyclicBarrier;
 import java.util.concurrent.LinkedBlockingQueue;
 import java.util.logging.Level;
 import java.util.logging.Logger;
 
-import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.document.Document;
-import org.apache.lucene.index.CorruptIndexException;
-import org.apache.lucene.index.IndexWriter;
-import org.apache.lucene.index.IndexWriterConfig;
-import org.apache.lucene.index.IndexWriterConfig.OpenMode;
-import org.apache.lucene.index.IndexableField;
-import org.apache.lucene.index.Term;
-import org.apache.lucene.store.Directory;
-import org.apache.lucene.store.FSDirectory;
-import org.apache.lucene.store.LockObtainFailedException;
-import org.apache.lucene.store.SimpleFSDirectory;
-import org.apache.lucene.util.Version;
-import org.apache.tika.metadata.Metadata;
-
-import de.dfki.inquisition.collections.MultiValueHashMap;
-import de.dfki.inquisition.file.FileUtils;
-import de.dfki.inquisition.lucene.FieldConfig;
-import de.dfki.km.leech.Leech;
-import de.dfki.km.leech.metadata.LeechMetadata;
-import de.dfki.km.leech.parser.incremental.IncrementalCrawlingHistory;
-import de.dfki.km.leech.sax.DataSinkContentHandler;
-
 
 
 /**
  * This is a content handler that allows to store crawled data into a Lucene index. You are able to configure the field types and the analyzers that should be used.
- * Further, blockindexing with {@link IndexWriter#addDocuments(java.util.Collection, Analyzer)} is supported, you can enable it with
+ * Further, blockindexing with {@link IndexWriter#addDocuments(Iterable)} is supported, you can enable it with
  * {@link ToLuceneContentHandler#setBlockIndexing(boolean)}. If it is enabled, {@link ToLuceneContentHandler} checks whether inside the metadata is a
  * {@link LeechMetadata#childId} or a {@link LeechMetadata#parentId} key. Documents with a {@link LeechMetadata#childId} entry will appear as parent documents, docs with
  * an {@link LeechMetadata#parentId} as childs. {@link ToLuceneContentHandler} collects the child documents if they appear at a processXXX method, and writes them as
  * block at the time a succeeding parent document appears. In the case a non-parent doc appears, all collected docs will be indexed normally, not as block.
- * 
+ *
  * @author Christian Reuschling, Dipl.Ing.(BA)
- * 
  */
 public class ToLuceneContentHandler extends DataSinkContentHandler
 {
@@ -90,7 +77,7 @@ public void run()
                 {
                     List<Document> llDocs = m_addDocsQueue.take();
 
-                    if(llDocs instanceof InterruptThreadList)
+                    if (llDocs instanceof InterruptThreadList)
                     {
                         break;
                     }
@@ -99,48 +86,36 @@ public void run()
                     {
 
 
-                        if(llDocs.size() == 1)
+                        if (llDocs.size() == 1)
                         {
                             getCurrentWriter().addDocument(llDocs.get(0));
                         }
-                        else if(llDocs.size() > 1)
+                        else if (llDocs.size() > 1)
                         {
                             getCurrentWriter().addDocuments(llDocs);
                         }
-
-                    }
-                    catch (Exception e)
+                    } catch (Exception e)
                     {
-                        Logger.getLogger(ToLuceneContentHandler.DocConsumer.class.getName()).log(
-                                Level.WARNING,
-                                "Error during writing a document to the index (lucene exception while addDocument) - will ignore it. This is a hint to a lucene bug."
-                                        + llDocs);
+                        Logger.getLogger(ToLuceneContentHandler.DocConsumer.class.getName()).log(Level.WARNING,
+                                "Error during writing a document to the index (lucene exception while addDocument) - will ignore it. This is a hint to a lucene bug." + llDocs);
                     }
-
                 }
-            }
-            catch (InterruptedException e)
+            } catch (InterruptedException e)
             {
                 // NOP
-            }
-            catch (Exception e)
+            } catch (Exception e)
             {
                 Logger.getLogger(ToLuceneContentHandler.DocConsumer.class.getName()).log(Level.SEVERE, "Error", e);
-
-            }
-            finally
+            } finally
             {
                 try
                 {
                     m_cyclicBarrier4DocConsumerThreads.await();
-                }
-                catch (Exception e2)
+                } catch (Exception e2)
                 {
                     Logger.getLogger(ToLuceneContentHandler.DocConsumer.class.getName()).log(Level.SEVERE, "Error", e2);
                 }
             }
-
-
         }
     }
 
@@ -266,6 +241,21 @@ public ToLuceneContentHandler(Metadata metadata, int writeLimit, FieldConfig fie
 
 
 
+    protected void addStaticAttValuePairs(Document doc) throws Exception
+    {
+        for (Entry<String, String> fieldName2Value : getStaticAttributeValuePairs().entryList())
+        {
+            IndexableField field = m_fieldConfig.createField(fieldName2Value.getKey(), fieldName2Value.getValue());
+            if (field != null)
+                doc.add(field);
+            else
+                Logger.getLogger(ToLuceneContentHandler.class.getName())
+                        .warning("Could not create lucene field for " + fieldName2Value.getKey() + ":" + fieldName2Value.getValue() + ". Will ignore it.");
+        }
+    }
+
+
+
     /**
      * Will merge all temporar indices together into the initial indexWriter index. This is only necessary if SplitAndMerge is enabled. Otherwise you don't have to invoke
      * this method.
@@ -283,12 +273,13 @@ public void crawlFinished()
 
             m_llConsumerThreads.clear();
 
-            if(getSplitAndMergeIndex() <= 0) return;
+            if (getSplitAndMergeIndex() <= 0)
+                return;
 
             // hier mergen wir nun alle temporären indices in den originalen
 
             // der temporären müssen noch geschlossen werden - das machen wir jetzt. Der letzte steht noch nicht in der Liste
-            if(m_luceneWriter != m_initialLuceneWriter)
+            if (m_luceneWriter != m_initialLuceneWriter)
             {
                 for (IndexWriter writer2close : m_llIndexWriter2Close)
                     writer2close.close();
@@ -300,7 +291,8 @@ public void crawlFinished()
             for (String strTmpPath : m_hsTmpLuceneWriterPaths2Merge)
                 llIndicesDirs2Merge.add(new SimpleFSDirectory(Paths.get(strTmpPath)));
 
-            if(llIndicesDirs2Merge.size() == 0) return;
+            if (llIndicesDirs2Merge.size() == 0)
+                return;
 
             Logger.getLogger(ToLuceneContentHandler.class.getName()).info("Will merge " + llIndicesDirs2Merge.size() + " temporary indices to the final one.");
 
@@ -310,15 +302,175 @@ public void crawlFinished()
             m_initialLuceneWriter.commit();
 
             for (String strTmpPath : m_hsTmpLuceneWriterPaths2Merge)
-                FileUtils.deleteDirectory(new File(strTmpPath));
+                FileUtilz.deleteDirectory(new File(strTmpPath));
+        } catch (Exception e)
+        {
+            Logger.getLogger(ToLuceneContentHandler.class.getName()).log(Level.SEVERE, "Error", e);
+        }
+    }
+
+
+
+    /**
+     * Returns null in the case the documents should be ignored according the given constraints (given with {@link #setIgnoreAllDocsWithout(Map)})
+     *
+     * @param metadata
+     * @param strFulltext
+     *
+     * @return null in the case the documents should be ignored according the given constraints (given with {@link #setIgnoreAllDocsWithout(Map)})
+     *
+     * @throws Exception
+     */
+    protected Document createAndFillLuceneDocument(Metadata metadata, String strFulltext) throws Exception
+    {
+        // // wir erstellen kein Document-Object neu, wenn es nicht unbedingt nötig ist - dazu merken wir uns die Referenzen auf die schon allokierten
+        // // Document Objekte
+        // // Document Object reuse
+        // Document doc = null;
+        // for (Document preAllocatedDoc : m_llAllocatedDocuments)
+        // {
+        // if(!m_llLastChildDocuments.contains(preAllocatedDoc))
+        // {
+        // doc = preAllocatedDoc;
+        // LinkedList<String> llFieldNames = new
+        // for (Fieldable field : doc.getFields())
+        // doc.removeFields(field.name());
+        //
+        // break;
+        // }
+        // }
+        // if(doc == null)
+        // {
+        // doc = new Document();
+        // m_llAllocatedDocuments.add(doc);
+        // }
+
+        Document doc = new Document();
+
 
 
+        // Das man kein Field aus einem reader machen kann ist der Grund, warum processNewMetaData den Fulltext als String und nicht als reader
+        // übergibt
+
+        // eine eindeutige ID muß da sein
+        if (metadata.getValues(LeechMetadata.id).length == 0)
+            doc.add(m_fieldConfig.createField(LeechMetadata.id, new UID().toString()));
+        if (!getFields2Ignore().contains(LeechMetadata.body))
+            doc.add(m_fieldConfig.createField(LeechMetadata.body, strFulltext));
+        // die kopien
+        for (String strFieldCopy : getFieldCopyMap().get(LeechMetadata.body))
+            if (!getFields2Ignore().contains(strFieldCopy))
+                doc.add(m_fieldConfig.createField(strFieldCopy, strFulltext));
+
+
+        // die restlichen metadaten
+        for (String strFieldName : metadata.names())
+        {
+            if (!getFields2Ignore().contains(strFieldName))
+            {
+                for (String strValue : metadata.getValues(strFieldName))
+                {
+                    IndexableField field = m_fieldConfig.createField(strFieldName, strValue);
+                    if (field != null)
+                        doc.add(field);
+                    else
+                        Logger.getLogger(ToLuceneContentHandler.class.getName())
+                                .warning("Could not create lucene field for " + strFieldName + ":" + strValue + ". Will ignore it.");
+                }
+            }
+
+            // die kopien
+            for (String strFieldCopy : getFieldCopyMap().get(strFieldName))
+                if (!getFields2Ignore().contains(strFieldCopy))
+                {
+                    for (String strValue : metadata.getValues(strFieldName))
+                    {
+                        IndexableField field = m_fieldConfig.createField(strFieldCopy, strValue);
+                        if (field != null)
+                            doc.add(field);
+                        else
+                            Logger.getLogger(ToLuceneContentHandler.class.getName())
+                                    .warning("Could not create lucene field for " + strFieldCopy + ":" + strValue + ". Will ignore it.");
+                    }
+                }
         }
-        catch (Exception e)
+
+        // die statischen Attribut-Value-Paare
+        addStaticAttValuePairs(doc);
+
+        // und jetzt aggregieren wir noch
+        for (String strTargetAtt : getFieldAggregationMap().keySet())
         {
-            Logger.getLogger(ToLuceneContentHandler.class.getName()).log(Level.SEVERE, "Error", e);
+            // wenn es das TargetAtt schon im doc gibt, dann aggregieren wir nix
+            if (doc.get(strTargetAtt) != null)
+                continue;
+
+            Collection<String> colSourceAtts = getFieldAggregationMap().get(strTargetAtt);
+
+            for (String strSourceAtt : colSourceAtts)
+            {
+                String strNewValue = metadata.get(strSourceAtt);
+                if (strNewValue == null)
+                    strNewValue = getStaticAttributeValuePairs().getFirst(strSourceAtt);
+
+                if (strNewValue != null)
+                {
+                    IndexableField field = m_fieldConfig.createField(strTargetAtt, strNewValue);
+                    if (field != null)
+                        doc.add(field);
+                    else
+                        Logger.getLogger(ToLuceneContentHandler.class.getName())
+                                .warning("Could not create lucene field for " + strTargetAtt + ":" + strNewValue + ". Will ignore it.");
+
+                    break;
+                }
+            }
+        }
+
+
+
+        // wenn ein Doc nicht unseren constraints entspricht, dann ignorieren wir das hier, indem wir null zurück geben
+        if (m_hsFieldName2FieldValueConstraint == null || m_hsFieldName2FieldValueConstraint.size() == 0)
+            return doc;
+
+        for (Entry<String, String> fieldname2fieldValRegEx : m_hsFieldName2FieldValueConstraint.entrySet())
+        {
+            IndexableField[] fieldables = doc.getFields(fieldname2fieldValRegEx.getKey());
+            for (IndexableField fieldable : fieldables)
+            {
+                String strVal = fieldable.stringValue();
+                if (strVal.matches(fieldname2fieldValRegEx.getValue()))
+                {
+                    // wir haben einen Treffer
+                    return doc;
+                }
+            }
         }
 
+
+        return null;
+    }
+
+
+
+    protected void ensureConsumerThreadsRunning()
+    {
+        if (m_llConsumerThreads.size() != 0)
+            return;
+
+        int iCoreCount = Runtime.getRuntime().availableProcessors();
+        int iThreadCount = (int) Math.round(iCoreCount / 2d);
+        iThreadCount = Math.max(iThreadCount, 1);
+
+        m_cyclicBarrier4DocConsumerThreads = new CyclicBarrier(iThreadCount + 1);
+        for (int i = 0; i < iThreadCount; i++)
+        {
+            Thread consumerThread = new Thread(new DocConsumer(), "ToLuceneContentHandlerDocConsumer " + i);
+            m_llConsumerThreads.add(consumerThread);
+            consumerThread.setDaemon(true);
+
+            consumerThread.start();
+        }
     }
 
 
@@ -330,11 +482,58 @@ public boolean getBlockIndexing()
 
 
 
+    synchronized protected IndexWriter getCurrentWriter() throws CorruptIndexException, LockObtainFailedException, IOException
+    {
+
+
+        if (getSplitAndMergeIndex() <= 0)
+            return m_initialLuceneWriter;
+
+        if (m_luceneWriter.maxDoc() < getSplitAndMergeIndex())
+            return m_luceneWriter;
+
+
+        Directory directory = m_initialLuceneWriter.getDirectory();
+
+        Path fOurTmpDir = null;
+        if (directory instanceof FSDirectory)
+        {
+            if (m_luceneWriter != m_initialLuceneWriter)
+                m_llIndexWriter2Close.add(m_luceneWriter);
+
+            String strTmpPath = ((FSDirectory) directory).getDirectory().toAbsolutePath().toString();
+            // if(strTmpPath.charAt(strTmpPath.length() - 1) == '/' || strTmpPath.charAt(strTmpPath.length() - 1) == '\\')
+            // strTmpPath = strTmpPath.substring(0, strTmpPath.length() - 1);
+            strTmpPath += "_" + (m_hsTmpLuceneWriterPaths2Merge.size() + 1);
+            fOurTmpDir = Paths.get(strTmpPath);
+        }
+        else
+        {
+            // wir brauchen was temporäres
+            File parentDir = new File(System.getProperty("java.io.tmpdir"));
+            fOurTmpDir = Paths.get(parentDir.getAbsolutePath() + "/leechTmp/" + UUID.randomUUID().toString().replaceAll("\\W", "_"));
+        }
+
+        Logger.getLogger(ToLuceneContentHandler.class.getName())
+                .info("Current index exceeds " + m_iSplitIndexDocumentCount + " documents. Will create another temporary one under " + fOurTmpDir);
+
+
+        @SuppressWarnings("deprecation") IndexWriterConfig config = new IndexWriterConfig(m_initialLuceneWriter.getConfig().getAnalyzer());
+        config.setOpenMode(OpenMode.CREATE);
+
+        m_luceneWriter = new IndexWriter(new SimpleFSDirectory(fOurTmpDir), config);
+        m_hsTmpLuceneWriterPaths2Merge.add(fOurTmpDir.toAbsolutePath().toString());
+
+        return m_luceneWriter;
+    }
+
+
+
     /**
      * Gets the field aggregation map. This means that you want to generate a field entry, whereby its value should be copied from another, existing metadata entry. You
      * can specify a list of these source-attributes, the first who have an entry wins and appears as new attribute, so the source field name list is in fact a priorized
      * list.
-     * 
+     *
      * @return the current field aggregation map
      */
     public MultiValueHashMap<String, String> getFieldAggregationMap()
@@ -346,7 +545,7 @@ public MultiValueHashMap<String, String> getFieldAggregationMap()
 
     /**
      * Gets the field config
-     * 
+     *
      * @return the field config
      */
     public FieldConfig getFieldConfig()
@@ -360,7 +559,7 @@ public FieldConfig getFieldConfig()
      * Gets the field copy mappings. This means that the content of every metadata key that is specified as key inside hsSource2TargetFieldnames will be copied into
      * several other fields. The field names of these fields are specified as corresponding value inside hsSource2TargetFieldnames. In the case you want to rename
      * attribute names, specify a field mapping and ignore the source field name with {@link #setFieldNames2Ignore(HashSet)}
-     * 
+     *
      * @return the current field mappings
      */
     public MultiValueHashMap<String, String> getFieldCopyMap()
@@ -372,7 +571,7 @@ public MultiValueHashMap<String, String> getFieldCopyMap()
 
     /**
      * Gets the set of field names / metadata key values that will NOT be stored into the lucene index.
-     * 
+     *
      * @return the set of field names / metadata key values that will NOT be stored into the lucene index.
      */
     public HashSet<String> getFields2Ignore()
@@ -384,7 +583,7 @@ public HashSet<String> getFields2Ignore()
 
     /**
      * All docs without at least one of the given fieldname-value pairs will be ignored. You can specif regular expressions as field values
-     * 
+     *
      * @return the fieldname-value pairs. At least one have to match that a document will be written into the index
      */
     public Map<String, String> getIgnoreAllDocsWithout()
@@ -400,7 +599,7 @@ public Map<String, String> getIgnoreAllDocsWithout()
      * writing, until this one also gets 'overfilled'. In the case your crawl is finished, {@link Leech} invokes {@link ToLuceneContentHandler#crawlFinished()}. This will
      * merge all temporary indices into the initial indexWriter object. This is for performance reasons because writing into a Lucene index tends to get slow after a
      * certain size. Splitting and merging afterwards is faster.
-     * 
+     *
      * @return the document count a new index will be created
      */
     public int getSplitAndMergeIndex()
@@ -412,7 +611,7 @@ public int getSplitAndMergeIndex()
 
     /**
      * Sets some attribute value pairs that will be added to every crawled document.
-     * 
+     *
      * @return the current static attribute value pairs
      */
     public MultiValueHashMap<String, String> getStaticAttributeValuePairs()
@@ -422,6 +621,16 @@ public MultiValueHashMap<String, String> getStaticAttributeValuePairs()
 
 
 
+    @Override
+    protected void init()
+    {
+        Logger.getLogger(ToLuceneContentHandler.class.getName()).info("Will write crawled data into " + m_luceneWriter.getDirectory().toString());
+
+        ensureConsumerThreadsRunning();
+    }
+
+
+
     @Override
     public void processErrorData(Metadata metadata)
     {
@@ -439,20 +648,17 @@ public void processModifiedData(Metadata metadata, String strFulltext)
 
             // hier modifizieren wir ein schon vorhandenes Dokument
             Document luceneDocument = createAndFillLuceneDocument(metadata, strFulltext);
-            if(luceneDocument == null) return;
+            if (luceneDocument == null)
+                return;
 
 
 
             // TODO: was passiert hier mit block-indexierten Dokumenten?
-            m_initialLuceneWriter
-                    .updateDocument(new Term(IncrementalCrawlingHistory.dataEntityId, metadata.get(IncrementalCrawlingHistory.dataEntityId)), luceneDocument);
-
-        }
-        catch (Exception e)
+            m_initialLuceneWriter.updateDocument(new Term(IncrementalCrawlingHistory.dataEntityId, metadata.get(IncrementalCrawlingHistory.dataEntityId)), luceneDocument);
+        } catch (Exception e)
         {
             Logger.getLogger(ToLuceneContentHandler.class.getName()).log(Level.SEVERE, "Error during writing into the index", e);
         }
-
     }
 
 
@@ -463,7 +669,8 @@ public void processNewData(Metadata metadata, String strFulltext)
 
         try
         {
-            if(m_initialLuceneWriter == null) throw new IllegalStateException("Lucene writer was not specified");
+            if (m_initialLuceneWriter == null)
+                throw new IllegalStateException("Lucene writer was not specified");
 
             m_luceneWriter = getCurrentWriter();
 
@@ -471,7 +678,8 @@ public void processNewData(Metadata metadata, String strFulltext)
 
 
             Document doc = createAndFillLuceneDocument(metadata, strFulltext);
-            if(doc == null) return;
+            if (doc == null)
+                return;
 
 
 
@@ -480,16 +688,16 @@ public void processNewData(Metadata metadata, String strFulltext)
             // - wenn wir auf ein Doc ohne parent-oder child-Id stossen, dann schreiben wir alle bisherigen Docs als Einzeldokumente raus - nicht im
             // Block
 
-            if(ToLuceneContentHandler.this.getBlockIndexing())
+            if (ToLuceneContentHandler.this.getBlockIndexing())
             {
 
 
-                if(metadata.get(LeechMetadata.parentId) != null)
+                if (metadata.get(LeechMetadata.parentId) != null)
                 {
                     // wir haben ein child-Doc (wir haben eine Referenz zu unserem parent). Das merken wir uns einfach
                     m_llLastChildDocuments.add(doc);
                 }
-                else if(metadata.get(LeechMetadata.childId) != null)
+                else if (metadata.get(LeechMetadata.childId) != null)
                 {
                     // wir haben ein parentDoc (ein parent hat min eine childId) - wir schreiben zusammen mit den bisher gesammelten im block. Das
                     // parentDoc ist das letzte
@@ -507,24 +715,15 @@ else if(metadata.get(LeechMetadata.childId) != null)
 
                     m_addDocsQueue.put(Collections.singletonList(doc));
                 }
-
-
             }
             else
             {
                 m_addDocsQueue.put(Collections.singletonList(doc));
             }
-
-
-
-
-
-        }
-        catch (Exception e)
+        } catch (Exception e)
         {
             Logger.getLogger(ToLuceneContentHandler.class.getName()).log(Level.SEVERE, "Error", e);
         }
-
     }
 
 
@@ -534,13 +733,15 @@ public void processNewDocument(Document doc)
 
         try
         {
-            if(m_initialLuceneWriter == null) throw new IllegalStateException("Lucene writer was not specified");
+            if (m_initialLuceneWriter == null)
+                throw new IllegalStateException("Lucene writer was not specified");
 
             m_luceneWriter = getCurrentWriter();
 
             ensureConsumerThreadsRunning();
 
-            if(doc == null) return;
+            if (doc == null)
+                return;
 
 
 
@@ -549,16 +750,16 @@ public void processNewDocument(Document doc)
             // - wenn wir auf ein Doc ohne parent-oder child-Id stossen, dann schreiben wir alle bisherigen Docs als Einzeldokumente raus - nicht im
             // Block
 
-            if(ToLuceneContentHandler.this.getBlockIndexing())
+            if (ToLuceneContentHandler.this.getBlockIndexing())
             {
 
 
-                if(doc.get(LeechMetadata.parentId) != null)
+                if (doc.get(LeechMetadata.parentId) != null)
                 {
                     // wir haben ein child-Doc (wir haben eine Referenz zu unserem parent). Das merken wir uns einfach
                     m_llLastChildDocuments.add(doc);
                 }
-                else if(doc.get(LeechMetadata.childId) != null)
+                else if (doc.get(LeechMetadata.childId) != null)
                 {
                     // wir haben ein parentDoc (ein parent hat min eine childId) - wir schreiben zusammen mit den bisher gesammelten im block. Das
                     // parentDoc ist das letzte
@@ -576,24 +777,15 @@ else if(doc.get(LeechMetadata.childId) != null)
 
                     m_addDocsQueue.put(Collections.singletonList(doc));
                 }
-
-
             }
             else
             {
                 m_addDocsQueue.put(Collections.singletonList(doc));
             }
-
-
-
-
-
-        }
-        catch (Exception e)
+        } catch (Exception e)
         {
             Logger.getLogger(ToLuceneContentHandler.class.getName()).log(Level.SEVERE, "Error", e);
         }
-
     }
 
 
@@ -616,18 +808,14 @@ public void processRemovedData(Metadata metadata)
 
             // TODO: was passiert hier mit block-indexierten Dokumenten?
             m_initialLuceneWriter.deleteDocuments(new Term(IncrementalCrawlingHistory.dataEntityId, metadata.get(IncrementalCrawlingHistory.dataEntityId)));
-
-        }
-        catch (Exception e)
+        } catch (Exception e)
         {
             Logger.getLogger(ToLuceneContentHandler.class.getName()).log(Level.SEVERE, "Error during writing into the index", e);
         }
-
     }
 
 
 
-
     @Override
     public void processUnmodifiedData(Metadata metadata)
     {
@@ -637,12 +825,12 @@ public void processUnmodifiedData(Metadata metadata)
 
 
     /**
-     * Sets whether block indexing with {@link IndexWriter#addDocuments(java.util.Collection, Analyzer)} is enabled or not. If it is enabled,
+     * Sets whether block indexing with {@link IndexWriter#addDocuments(Iterable)} is enabled or not. If it is enabled,
      * {@link ToLuceneContentHandler} checks whether inside the metadata is a {@link LeechMetadata#childId} or a {@link LeechMetadata#parentId} key. Documents with a
      * {@link LeechMetadata#childId} entry will appear as parent documents, docs with an {@link LeechMetadata#parentId} as childs. {@link ToLuceneContentHandler} collects
      * the child documents if they appear at a processXXX method, and writes them as block at the time a succeeding parent document appears. In the case a non-parent doc
      * appears, all collected docs will be indexed normally, not as block.
-     * 
+     *
      * @param blockIndexing true in the case blockindexing should be inabled, false otherwise.
      */
     public void setBlockIndexing(boolean blockIndexing)
@@ -656,7 +844,7 @@ public void setBlockIndexing(boolean blockIndexing)
      * Sets the field aggregation map. This means that you want to generate a field entry, whereby its value should be copied from another, existing metadata entry. You
      * can specify a list of these source-attributes, the first who have an entry wins and appears as new attribute, so the source field name list is in fact a priorized
      * list.
-     * 
+     *
      * @param hsTarget2SourcesFieldnames the field aggregation map
      */
     public void setFieldAggregationMap(MultiValueHashMap<String, String> hsTarget2SourcesFieldnames)
@@ -666,16 +854,13 @@ public void setFieldAggregationMap(MultiValueHashMap<String, String> hsTarget2So
 
 
 
-
-
-
     /**
      * Sets the field copy mappings. This means that the content of every metadata key that is specified as key inside hsSource2TargetFieldnames will be copied into
      * several other fields. The field names of these fields are specified as corresponding value inside hsSource2TargetFieldnames. In the case you want to rename
      * attribute names, specify a field mapping and ignore the source field name with {@link #setFieldNames2Ignore(HashSet)}
-     * 
+     *
      * @param hsSource2TargetFieldnames keys: source field names, given as metadata keys. values: target field names - the content will also appear under these fields
-     *            inside a lucene document
+     *                                  inside a lucene document
      */
     public void setFieldCopyMap(MultiValueHashMap<String, String> hsSource2TargetFieldnames)
     {
@@ -687,7 +872,7 @@ public void setFieldCopyMap(MultiValueHashMap<String, String> hsSource2TargetFie
     /**
      * Sets the set of field names / metadata key values that will NOT be stored into the lucene index. Nevertheless, you can consider these in
      * {@link #setFieldCopyMap(MultiValueHashMap)}. In this case you have 'moved' the attribute value into another attribute (or several ones).
-     * 
+     *
      * @param hsAttNamesNot2Store the set of attribute/field names that will not stored into the lucene index
      */
     public void setFieldNames2Ignore(HashSet<String> hsAttNamesNot2Store)
@@ -700,9 +885,9 @@ public void setFieldNames2Ignore(HashSet<String> hsAttNamesNot2Store)
     /**
      * All docs without at least one of the given fieldname-value pairs will be ignored. You can specif regular expressions as field values. If this is set to null or to
      * an empty map, all documents will be accepted.
-     * 
+     *
      * @param hsFieldName2FieldValue the fieldname-value pairs. At least one have to match that a document will be written into the index
-     * 
+     *
      * @return this
      */
     public ToLuceneContentHandler setIgnoreAllDocsWithout(Map<String, String> hsFieldName2FieldValue)
@@ -714,7 +899,6 @@ public ToLuceneContentHandler setIgnoreAllDocsWithout(Map<String, String> hsFiel
 
 
 
-
     /**
      * If split and merge is enabled, {@link ToLuceneContentHandler} will check at each {@link #processNewData(Metadata, String)} invocation whether the current
      * indexWriter has more than iSplitIndexDocumentCount documents. In the case it has more, {@link ToLuceneContentHandler} will create an entirely new index for
@@ -722,10 +906,10 @@ public ToLuceneContentHandler setIgnoreAllDocsWithout(Map<String, String> hsFiel
      * indices into the initial indexWriter object. This invocation will be done automatically by the {@link Leech} class. This is for performance reasons because writing
      * into a Lucene index tends to get slow after a certain size. Splitting and merging afterwards is faster. Update: this behaviour depends on the Lucene version used,
      * currently this seems to be not a problem. Thus, this functionality is disabled per default.
-     * 
+     *
      * @param iSplitIndexDocumentCount the document count a new index will be created. A good size is 500 000 (from my stomach feeling, if it is necessary). -1 in the
-     *            case you want to disable SplitAndMerge, which is the default.
-     * 
+     *                                 case you want to disable SplitAndMerge, which is the default.
+     *
      * @return this
      */
     public ToLuceneContentHandler setSplitAndMergeIndex(int iSplitIndexDocumentCount)
@@ -739,9 +923,9 @@ public ToLuceneContentHandler setSplitAndMergeIndex(int iSplitIndexDocumentCount
 
     /**
      * Sets some attribute value pairs that will be added to every crawled document.
-     * 
+     *
      * @param hsStaticAttValuePairs a multi value map containing the additional attribute value pairs
-     * 
+     *
      * @return this
      */
     public ToLuceneContentHandler setStaticAttributeValuePairs(MultiValueHashMap<String, String> hsStaticAttValuePairs)
@@ -750,244 +934,4 @@ public ToLuceneContentHandler setStaticAttributeValuePairs(MultiValueHashMap<Str
 
         return this;
     }
-
-
-
-    protected void addStaticAttValuePairs(Document doc) throws Exception
-    {
-        for (Entry<String, String> fieldName2Value : getStaticAttributeValuePairs().entryList())
-        {
-            IndexableField field = m_fieldConfig.createField(fieldName2Value.getKey(), fieldName2Value.getValue());
-            if(field != null)
-                doc.add(field);
-            else
-                Logger.getLogger(ToLuceneContentHandler.class.getName()).warning(
-                        "Could not create lucene field for " + fieldName2Value.getKey() + ":" + fieldName2Value.getValue() + ". Will ignore it.");
-        }
-    }
-
-
-
-
-    /**
-     * Returns null in the case the documents should be ignored according the given constraints (given with {@link #setIgnoreAllDocsWithout(Map)})
-     * 
-     * @param metadata
-     * @param strFulltext
-     * 
-     * @return null in the case the documents should be ignored according the given constraints (given with {@link #setIgnoreAllDocsWithout(Map)})
-     * 
-     * @throws Exception
-     */
-    protected Document createAndFillLuceneDocument(Metadata metadata, String strFulltext) throws Exception
-    {
-        // // wir erstellen kein Document-Object neu, wenn es nicht unbedingt nötig ist - dazu merken wir uns die Referenzen auf die schon allokierten
-        // // Document Objekte
-        // // Document Object reuse
-        // Document doc = null;
-        // for (Document preAllocatedDoc : m_llAllocatedDocuments)
-        // {
-        // if(!m_llLastChildDocuments.contains(preAllocatedDoc))
-        // {
-        // doc = preAllocatedDoc;
-        // LinkedList<String> llFieldNames = new
-        // for (Fieldable field : doc.getFields())
-        // doc.removeFields(field.name());
-        //
-        // break;
-        // }
-        // }
-        // if(doc == null)
-        // {
-        // doc = new Document();
-        // m_llAllocatedDocuments.add(doc);
-        // }
-
-        Document doc = new Document();
-
-
-
-        // Das man kein Field aus einem reader machen kann ist der Grund, warum processNewMetaData den Fulltext als String und nicht als reader
-        // übergibt
-
-        // eine eindeutige ID muß da sein
-        if(metadata.getValues(LeechMetadata.id).length == 0) doc.add(m_fieldConfig.createField(LeechMetadata.id, new UID().toString()));
-        if(!getFields2Ignore().contains(LeechMetadata.body)) doc.add(m_fieldConfig.createField(LeechMetadata.body, strFulltext));
-        // die kopien
-        for (String strFieldCopy : getFieldCopyMap().get(LeechMetadata.body))
-            if(!getFields2Ignore().contains(strFieldCopy)) doc.add(m_fieldConfig.createField(strFieldCopy, strFulltext));
-
-
-        // die restlichen metadaten
-        for (String strFieldName : metadata.names())
-        {
-            if(!getFields2Ignore().contains(strFieldName))
-            {
-                for (String strValue : metadata.getValues(strFieldName))
-                {
-                    IndexableField field = m_fieldConfig.createField(strFieldName, strValue);
-                    if(field != null)
-                        doc.add(field);
-                    else
-                        Logger.getLogger(ToLuceneContentHandler.class.getName()).warning(
-                                "Could not create lucene field for " + strFieldName + ":" + strValue + ". Will ignore it.");
-                }
-
-            }
-
-            // die kopien
-            for (String strFieldCopy : getFieldCopyMap().get(strFieldName))
-                if(!getFields2Ignore().contains(strFieldCopy))
-                {
-                    for (String strValue : metadata.getValues(strFieldName))
-                    {
-                        IndexableField field = m_fieldConfig.createField(strFieldCopy, strValue);
-                        if(field != null)
-                            doc.add(field);
-                        else
-                            Logger.getLogger(ToLuceneContentHandler.class.getName()).warning(
-                                    "Could not create lucene field for " + strFieldCopy + ":" + strValue + ". Will ignore it.");
-                    }
-                }
-        }
-
-        // die statischen Attribut-Value-Paare
-        addStaticAttValuePairs(doc);
-
-        // und jetzt aggregieren wir noch
-        for (String strTargetAtt : getFieldAggregationMap().keySet())
-        {
-            // wenn es das TargetAtt schon im doc gibt, dann aggregieren wir nix
-            if(doc.get(strTargetAtt) != null) continue;
-
-            Collection<String> colSourceAtts = getFieldAggregationMap().get(strTargetAtt);
-
-            for (String strSourceAtt : colSourceAtts)
-            {
-                String strNewValue = metadata.get(strSourceAtt);
-                if(strNewValue == null) strNewValue = getStaticAttributeValuePairs().getFirst(strSourceAtt);
-
-                if(strNewValue != null)
-                {
-                    IndexableField field = m_fieldConfig.createField(strTargetAtt, strNewValue);
-                    if(field != null)
-                        doc.add(field);
-                    else
-                        Logger.getLogger(ToLuceneContentHandler.class.getName()).warning(
-                                "Could not create lucene field for " + strTargetAtt + ":" + strNewValue + ". Will ignore it.");
-
-                    break;
-                }
-            }
-        }
-
-
-
-        // wenn ein Doc nicht unseren constraints entspricht, dann ignorieren wir das hier, indem wir null zurück geben
-        if(m_hsFieldName2FieldValueConstraint == null || m_hsFieldName2FieldValueConstraint.size() == 0) return doc;
-
-        for (Entry<String, String> fieldname2fieldValRegEx : m_hsFieldName2FieldValueConstraint.entrySet())
-        {
-            IndexableField[] fieldables = doc.getFields(fieldname2fieldValRegEx.getKey());
-            for (IndexableField fieldable : fieldables)
-            {
-                String strVal = fieldable.stringValue();
-                if(strVal.matches(fieldname2fieldValRegEx.getValue()))
-                {
-                    // wir haben einen Treffer
-                    return doc;
-                }
-            }
-        }
-
-
-        return null;
-    }
-
-
-
-
-
-
-
-    protected void ensureConsumerThreadsRunning()
-    {
-        if(m_llConsumerThreads.size() != 0) return;
-
-        int iCoreCount = Runtime.getRuntime().availableProcessors();
-        int iThreadCount = (int) Math.round(iCoreCount / 2d);
-        iThreadCount = Math.max(iThreadCount, 1);
-
-        m_cyclicBarrier4DocConsumerThreads = new CyclicBarrier(iThreadCount + 1);
-        for (int i = 0; i < iThreadCount; i++)
-        {
-            Thread consumerThread = new Thread(new DocConsumer(), "ToLuceneContentHandlerDocConsumer " + i);
-            m_llConsumerThreads.add(consumerThread);
-            consumerThread.setDaemon(true);
-
-            consumerThread.start();
-        }
-    }
-
-
-
-    synchronized protected IndexWriter getCurrentWriter() throws CorruptIndexException, LockObtainFailedException, IOException
-    {
-
-
-        if(getSplitAndMergeIndex() <= 0) return m_initialLuceneWriter;
-
-        if(m_luceneWriter.maxDoc() < getSplitAndMergeIndex()) return m_luceneWriter;
-
-
-        Directory directory = m_initialLuceneWriter.getDirectory();
-
-        Path fOurTmpDir = null;
-        if(directory instanceof FSDirectory)
-        {
-            if(m_luceneWriter != m_initialLuceneWriter) m_llIndexWriter2Close.add(m_luceneWriter);
-
-            String strTmpPath = ((FSDirectory) directory).getDirectory().toAbsolutePath().toString();
-            // if(strTmpPath.charAt(strTmpPath.length() - 1) == '/' || strTmpPath.charAt(strTmpPath.length() - 1) == '\\')
-            // strTmpPath = strTmpPath.substring(0, strTmpPath.length() - 1);
-            strTmpPath += "_" + (m_hsTmpLuceneWriterPaths2Merge.size() + 1);
-            fOurTmpDir = Paths.get(strTmpPath);
-        }
-        else
-        {
-            // wir brauchen was temporäres
-            File parentDir = new File(System.getProperty("java.io.tmpdir"));
-            fOurTmpDir = Paths.get(parentDir.getAbsolutePath() + "/leechTmp/" + UUID.randomUUID().toString().replaceAll("\\W", "_"));
-        }
-
-        Logger.getLogger(ToLuceneContentHandler.class.getName()).info(
-                "Current index exceeds " + m_iSplitIndexDocumentCount + " documents. Will create another temporary one under " + fOurTmpDir);
-
-
-        @SuppressWarnings("deprecation")
-        IndexWriterConfig config = new IndexWriterConfig(m_initialLuceneWriter.getConfig().getAnalyzer());
-        config.setOpenMode(OpenMode.CREATE);
-
-        m_luceneWriter = new IndexWriter(new SimpleFSDirectory(fOurTmpDir), config);
-        m_hsTmpLuceneWriterPaths2Merge.add(fOurTmpDir.toAbsolutePath().toString());
-
-        return m_luceneWriter;
-    }
-
-
-
-    @Override
-    protected void init()
-    {
-        Logger.getLogger(ToLuceneContentHandler.class.getName()).info("Will write crawled data into " + m_luceneWriter.getDirectory().toString());
-
-        ensureConsumerThreadsRunning();
-    }
-
-
-
-
-
-
-
 }
diff --git a/src/main/java/de/dfki/km/leech/lucene/basic/Buzzwords.java b/src/main/java/de/dfki/km/leech/lucene/basic/Buzzwords.java
new file mode 100644
index 0000000..8ed2e52
--- /dev/null
+++ b/src/main/java/de/dfki/km/leech/lucene/basic/Buzzwords.java
@@ -0,0 +1,954 @@
+package de.dfki.km.leech.lucene.basic;
+
+
+
+import de.dfki.inquisitor.collections.MultiValueTreeMap;
+// import de.dfki.inquisitor.lucene.DynamicFieldType;
+// import de.dfki.inquisitor.lucene.*;
+import de.dfki.inquisitor.text.Levenshtein;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.FieldType;
+import org.apache.lucene.index.*;
+import org.apache.lucene.index.IndexOptions;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.search.TopDocs;
+import org.apache.lucene.search.similarities.ClassicSimilarity;
+
+import java.io.IOException;
+import java.net.URISyntaxException;
+import java.util.*;
+import java.util.Map.Entry;
+
+
+
+/**
+ * The class Buzzwords extracts keywords out of documents - these can be in the form of lucene-documents, which enables to calculate the buzzwords very fast because the
+ * most information is still in the lucene index. But also strings can be processed, with an index as a base for calculation
+ * 
+ * @author Christian Reuschling, Elisabeth Wolf
+ * 
+ */
+public class Buzzwords
+{
+
+
+    static protected ClassicSimilarity m_defaultSimilarity = new ClassicSimilarity();
+
+
+
+    //
+    // /**
+    //  * Adds calculated buzzwords to the given document. The method makes use of the IndexAccessor default Analyzer.
+    //  *
+    //  * @param doc2modify the document that should enriched with a new buzzword field
+    //  * @param strIdFieldName the attribute name that should be used to identify the documents according to their id String
+    //  * @param strNewField4Buzzwords the attribute that should be created for the buzzword. Becomes part of the document object
+    //  * @param sAttNames4BuzzwordCalculation the attributes that should be considered for buzzword generation
+    //  * @param iMaxNumberOfBuzzwords the maximum number of buzzwords the method should generate
+    //  * @param bSkipSimilarTerms true: similar terms (according to the Levenshtein-distance) will be skipped for better readability
+    //  * @param hsIndexPaths the list of indices that should be used for buzzword calculation. The document must be stored in exactly one index, referenced by the document
+    //  *            object value of strIdFieldName.
+    //  *
+    //  * @return true in the case the document object was modified, false otherwise. The method do not modify the index entry
+    //  *
+    //  * @throws Exception
+    //  */
+    // static public boolean addBuzzwords(Document doc2modify, String strIdFieldName, String strNewField4Buzzwords, Set<String> sAttNames4BuzzwordCalculation,
+    //         int iMaxNumberOfBuzzwords, boolean bSkipSimilarTerms, LinkedHashSet<String> hsIndexPaths) throws Exception
+    // {
+    //
+    //
+    //     String strDocID = getAttributeValue(doc2modify, strIdFieldName);
+    //     List<String> lBuzzwords = getBuzzwords(strDocID, sAttNames4BuzzwordCalculation, iMaxNumberOfBuzzwords, bSkipSimilarTerms, hsIndexPaths);
+    //
+    //     // wenn es keinen Content gibt, mache mer gar nix
+    //     if(lBuzzwords == null) return false;
+    //
+    //     StringBuilder strbBuzzWordz = new StringBuilder();
+    //
+    //     for (int i = 0; i < Math.min(iMaxNumberOfBuzzwords, lBuzzwords.size()); i++)
+    //         strbBuzzWordz.append(lBuzzwords.get(i)).append(" ");
+    //
+    //
+    //     // wenn es das Buzzword-feld schon gibt, wirds gelöscht
+    //     doc2modify.removeFields(strNewField4Buzzwords);
+    //     // die neu berechneten Buzzwords werden zum Doc hinzugefügt
+    //     doc2modify.add(new TextWithTermVectorOffsetsField(strNewField4Buzzwords, strbBuzzWordz.toString()));
+    //
+    //
+    //     return true;
+    // }
+
+
+
+    /**
+     * Gets the value of an attribute inside the document as String.
+     * 
+     * @param doc
+     * @param strFieldName the attributes name
+     * 
+     * @return the first attribute value under the given attribute name
+     */
+    private static String getAttributeValue(Document doc, String strFieldName)
+    {
+
+        IndexableField docAtt = doc.getField(strFieldName);
+        if(docAtt == null) return null;
+
+
+        return docAtt.stringValue();
+    }
+
+
+
+    //
+    // /**
+    //  * Gets the buzzwords for fields of a document. The metohd makes use of the IndexAccessor default Analyzer.
+    //  *
+    //  * @param strDocID the ID of the document from which the buzzwords should be extracted
+    //  * @param sAttNames4BuzzwordCalculation the name of the attributes the buzzwords should be extracted from
+    //  * @param iMaxNumberOfBuzzwords the maximum number of buzzwords
+    //  * @param bSkipSimilarTerms true: similar terms (according to the Levenshtein-distance) will be skipped for better readability
+    //  * @param hsIndexPaths the list of indices that should be used for buzzword calculation. The document must be stored in exactly one index, referenced by the document
+    //  *            object value of strIdFieldName.
+    //  *
+    //  * @return the list of the extracted buzzwords, null in the case the given attribute doesn't exist
+    //  *
+    //  * @throws CorruptIndexException
+    //  * @throws IOException
+    //  * @throws URINotFoundException
+    //  * @throws URISyntaxException
+    //  */
+    // static public List<String> getBuzzwords(String strDocID, Set<String> sAttNames4BuzzwordCalculation, int iMaxNumberOfBuzzwords, boolean bSkipSimilarTerms,
+    //         LinkedHashSet<String> hsIndexPaths) throws CorruptIndexException, IOException, URINotFoundException, URISyntaxException
+    // {
+    //
+    //     LinkedHashMap<String, Float> buzzwordsWithTfIdf =
+    //             getBuzzwordsWithTfIdf(strDocID, sAttNames4BuzzwordCalculation, iMaxNumberOfBuzzwords, bSkipSimilarTerms, hsIndexPaths);
+    //
+    //     LinkedList<String> llBuzzwords = new LinkedList<String>(buzzwordsWithTfIdf.keySet());
+    //
+    //
+    //     return llBuzzwords;
+    // }
+
+
+    //
+    //
+    // /**
+    //  * Gets the buzzwords for fields of a document. The metohd makes use of the IndexAccessor default Analyzer.
+    //  *
+    //  * @param strDocID the ID of the document from which the buzzwords should be extracted
+    //  * @param strFieldName the name of the attribute the buzzwords should be extracted from
+    //  * @param iMaxNumberOfBuzzwords the maximum number of buzzwords
+    //  * @param bSkipSimilarTerms true: similar terms (according to the Levenshtein-distance) will be skipped for better readability
+    //  * @param hsIndexPaths the list of indices that should be used for buzzword calculation. The document must be stored in exactly one index, referenced by the document
+    //  *            object value of strIdFieldName.
+    //  *
+    //  * @return the list of the extracted buzzwords, null in the case the given attribute doesn't exist
+    //  * @throws CorruptIndexException
+    //  * @throws IOException
+    //  * @throws URINotFoundException
+    //  * @throws URISyntaxException
+    //  */
+    // static public List<List<String>> getBuzzwords4AllFieldValues(String strDocID, String strFieldName, int iMaxNumberOfBuzzwords, boolean bSkipSimilarTerms,
+    //         LinkedHashSet<String> hsIndexPaths) throws CorruptIndexException, IOException, URINotFoundException, URISyntaxException
+    // {
+    //
+    //     List<LinkedHashMap<String, Float>> buzzwordsWithTfIdfMaps =
+    //             getBuzzwordsWithTfIdf4AllFieldValues(strDocID, strFieldName, iMaxNumberOfBuzzwords, bSkipSimilarTerms, hsIndexPaths);
+    //
+    //     LinkedList<List<String>> llbuzzwords4AllFieldValues = new LinkedList<List<String>>();
+    //     for (LinkedHashMap<String, Float> hsBuzzwords2TfIdf : buzzwordsWithTfIdfMaps)
+    //     {
+    //
+    //         LinkedList<String> llBuzzwords = new LinkedList<String>(hsBuzzwords2TfIdf.keySet());
+    //
+    //         llbuzzwords4AllFieldValues.add(llBuzzwords);
+    //     }
+    //
+    //
+    //     return llbuzzwords4AllFieldValues;
+    // }
+
+
+    //
+    //
+    // /**
+    //  * Gets the buzzwords for fields of a document, together with their document TfIdf value. The metohd makes use of the IndexAccessor default Analyzer.
+    //  *
+    //  * @param strDocID the ID of the document from which the buzzwords should be extracted
+    //  * @param sAttNames4BuzzwordCalculation the name of the attributes the buzzwords should be extracted from.
+    //  * @param iMaxNumberOfBuzzwords the maximum number of buzzwords
+    //  * @param bSkipSimilarTerms true: similar terms (according to the Levenshtein-distance) will be skipped for better readability
+    //  * @param hsIndexPaths the list of indices that should be used for buzzword calculation. The document must be stored in exactly one index, referenced by the document
+    //  *            object value of strIdFieldName.
+    //  *
+    //  * @return the extracted buzzwords, boosted according their score. Key: the term itself. Value: the according score. null in the case the given attribute doesn't
+    //  *         exist.
+    //  * @throws CorruptIndexException
+    //  * @throws IOException
+    //  * @throws URINotFoundException
+    //  * @throws URISyntaxException
+    //  */
+    // static public LinkedHashMap<String, Float> getBuzzwordsWithTfIdf(String strDocID, Set<String> sAttNames4BuzzwordCalculation, int iMaxNumberOfBuzzwords,
+    //         boolean bSkipSimilarTerms, LinkedHashSet<String> hsIndexPaths) throws CorruptIndexException, IOException, URINotFoundException, URISyntaxException
+    // {
+    //
+    //     MultiValueTreeMap<Float, String> tmScore2Term =
+    //             retrieveInterestingTerms(strDocID, sAttNames4BuzzwordCalculation, iMaxNumberOfBuzzwords, 2, 1, 2, bSkipSimilarTerms, hsIndexPaths);
+    //
+    //     if(tmScore2Term.valueSize() < iMaxNumberOfBuzzwords)
+    //     {
+    //
+    //         MultiValueTreeMap<Float, String> tmScore2TermWeak =
+    //                 retrieveInterestingTerms(strDocID, sAttNames4BuzzwordCalculation, iMaxNumberOfBuzzwords, 1, 1, 2, bSkipSimilarTerms, hsIndexPaths);
+    //
+    //         while (tmScore2TermWeak.keySize() > 0)
+    //         {
+    //             Float fTfIdf = tmScore2TermWeak.firstKey();
+    //             String strTopTerm = tmScore2TermWeak.getFirst(fTfIdf);
+    //             tmScore2TermWeak.remove(fTfIdf, strTopTerm);
+    //
+    //             if(!tmScore2Term.containsValue(strTopTerm)) tmScore2Term.add(fTfIdf, strTopTerm);
+    //
+    //             if(tmScore2Term.valueSize() >= iMaxNumberOfBuzzwords) break;
+    //         }
+    //     }
+    //
+    //     LinkedHashMap<String, Float> hsTerm2TfIdf = new LinkedHashMap<String, Float>();
+    //     for (Entry<Float, String> score2term : tmScore2Term.entryList())
+    //         hsTerm2TfIdf.put(score2term.getValue(), score2term.getKey());
+    //
+    //
+    //     return hsTerm2TfIdf;
+    // }
+
+
+
+    //
+    // /**
+    //  * This method is for calculating buzzwords out of an arbritrary String, by giving an index attribute as 'context. The string will be tokenized according the given
+    //  * analyzer for this attribute (as set by the IndexAccessor default analyzer), and also takes the document frequencies for all terms of this attribute.
+    //  *
+    //  * @param strDocumentText the text of the document. This text influences the buzzword calculation as it would be an attribute value of
+    //  *            strAttributeName4BuzzwordCalculation
+    //  * @param strAttributeName4BuzzwordCalculation this is the name of the attribute the given text should be differentiated against with buzzwords
+    //  * @param iMaxNumberOfBuzzwords the maximum number of buzzwords
+    //  * @param bSkipSimilarTerms true: similar terms (according to the Levenshtein-distance) will be skipped for better readability
+    //  * @param hsIndexPaths the list of indices that should be used for buzzword calculation. The document must be stored in exactly one index, referenced by the document
+    //  *            object value of strIdFieldName.
+    //  *
+    //  * @return the extracted buzzwords, with their according tfidf value, sorted by TfIdf values. Key: the term itself. Value: the tfIdf value.
+    //  *
+    //  * @throws CorruptIndexException
+    //  * @throws IOException
+    //  * @throws URINotFoundException
+    //  * @throws URISyntaxException
+    //  */
+    // static public LinkedHashMap<String, Float> getBuzzwordsWithTfIdf(String strDocumentText, String strAttributeName4BuzzwordCalculation, int iMaxNumberOfBuzzwords,
+    //         boolean bSkipSimilarTerms, LinkedHashSet<String> hsIndexPaths) throws CorruptIndexException, IOException, URINotFoundException, URISyntaxException
+    // {
+    //     MultiValueTreeMap<Float, String> tmScore2Term =
+    //             retrieveInterestingTerms(strDocumentText, strAttributeName4BuzzwordCalculation, iMaxNumberOfBuzzwords, 2, 1, 2, bSkipSimilarTerms, hsIndexPaths);
+    //
+    //     if(tmScore2Term.valueSize() < iMaxNumberOfBuzzwords)
+    //     {
+    //
+    //         MultiValueTreeMap<Float, String> tmScore2TermWeak =
+    //                 retrieveInterestingTerms(strDocumentText, strAttributeName4BuzzwordCalculation, iMaxNumberOfBuzzwords, 1, 1, 2, bSkipSimilarTerms, hsIndexPaths);
+    //
+    //         while (tmScore2TermWeak.keySize() > 0)
+    //         {
+    //             Float fTfIdf = tmScore2TermWeak.firstKey();
+    //             String strTopTerm = tmScore2TermWeak.getFirst(fTfIdf);
+    //             tmScore2TermWeak.remove(fTfIdf, strTopTerm);
+    //
+    //             if(!tmScore2Term.containsValue(strTopTerm)) tmScore2Term.add(fTfIdf, strTopTerm);
+    //
+    //             if(tmScore2Term.valueSize() >= iMaxNumberOfBuzzwords) break;
+    //         }
+    //     }
+    //
+    //     LinkedHashMap<String, Float> hsTerm2TfIdf = new LinkedHashMap<String, Float>();
+    //     for (Entry<Float, String> score2term : tmScore2Term.entryList())
+    //         hsTerm2TfIdf.put(score2term.getValue(), score2term.getKey());
+    //
+    //
+    //     return hsTerm2TfIdf;
+    //
+    // }
+
+
+
+    // /**
+    //  * Gets the buzzwords for fields of a document, together with their document TfIdf value. The metohd makes use of the IndexAccessor default Analyzer.
+    //  *
+    //  * @param strDocID the ID of the document from which the buzzwords should be extracted
+    //  * @param strFieldName the name of the attribute the buzzwords should be extracted from.
+    //  * @param iMaxNumberOfBuzzwords the maximum number of buzzwords
+    //  * @param bSkipSimilarTerms true: similar terms (according to the Levenshtein-distance) will be skipped for better readability
+    //  * @param hsIndexPaths the list of indices that should be used for buzzword calculation. The document must be stored in exactly one index, referenced by the document
+    //  *            object value of strIdFieldName.
+    //  *
+    //  * @return the extracted buzzwords, boosted according their score. Key: the term itself. Value: the according score. null in the case the given attribute doesn't
+    //  *         exist.
+    //  * @throws CorruptIndexException
+    //  * @throws IOException
+    //  * @throws URINotFoundException
+    //  * @throws URISyntaxException
+    //  */
+    // static public List<LinkedHashMap<String, Float>> getBuzzwordsWithTfIdf4AllFieldValues(String strDocID, String strFieldName, int iMaxNumberOfBuzzwords,
+    //         boolean bSkipSimilarTerms, LinkedHashSet<String> hsIndexPaths) throws CorruptIndexException, IOException, URINotFoundException, URISyntaxException
+    // {
+    //
+    //     List<MultiValueTreeMap<Float, String>> tmScore2TermMaps =
+    //             retrieveInterestingTerms4AllFieldValues(strDocID, strFieldName, iMaxNumberOfBuzzwords, 2, 1, 2, bSkipSimilarTerms, hsIndexPaths);
+    //
+    //     // aus Performancegründen verzichte ich hier mal auf eine 'weichere' Strategie, falls unsere Maximalanzahl der Buzzwords nicht erreicht wurde
+    //
+    //     LinkedList<LinkedHashMap<String, Float>> hsTerm2ScoreMaps = new LinkedList<LinkedHashMap<String, Float>>();
+    //
+    //     for (MultiValueTreeMap<Float, String> hsScore2Term : tmScore2TermMaps)
+    //     {
+    //         LinkedHashMap<String, Float> hsTerm2TfIdf = new LinkedHashMap<String, Float>();
+    //         for (Entry<Float, String> score2term : hsScore2Term.entryList())
+    //             hsTerm2TfIdf.put(score2term.getValue(), score2term.getKey());
+    //
+    //         hsTerm2ScoreMaps.add(hsTerm2TfIdf);
+    //     }
+    //
+    //
+    //     return hsTerm2ScoreMaps;
+    // }
+
+
+    
+
+    /**
+     * Adds calculated buzzwords to the given document. The method makes use of the IndexAccessor default Analyzer.
+     * 
+     * @param iDocNo the lucene document number inside the index behind reader, for the document doc2modify
+     * @param doc2modify the document that should enriched with a new buzzword field
+     * @param strNewField4Buzzwords the attribute that should be created for the buzzword. Becomes part of the document object
+     * @param sAttNames4BuzzwordCalculation the attributes that should be considered for buzzword generation
+     * @param iMaxNumberOfBuzzwords the maximum number of buzzwords the method should generate
+     * @param bSkipSimilarTerms true: similar terms (according to the Levenshtein-distance) will be skipped for better readability
+     * @param reader the lucene index reader
+     * 
+     * @return true in the case the document object was modified, false otherwise. The method do not modify the index entry
+     * 
+     * @throws Exception
+     */
+    static public boolean addBuzzwords(int iDocNo, Document doc2modify, String strNewField4Buzzwords, Set<String> sAttNames4BuzzwordCalculation,
+            int iMaxNumberOfBuzzwords, boolean bSkipSimilarTerms, IndexReader reader) throws Exception
+    {
+
+
+        List<String> lBuzzwords = getBuzzwords(iDocNo, doc2modify, sAttNames4BuzzwordCalculation, iMaxNumberOfBuzzwords, bSkipSimilarTerms, reader);
+
+        // wenn es keinen Content gibt, mache mer gar nix
+        if(lBuzzwords == null) return false;
+
+        StringBuilder strbBuzzWordz = new StringBuilder();
+
+        for (int i = 0; i < Math.min(iMaxNumberOfBuzzwords, lBuzzwords.size()); i++)
+            strbBuzzWordz.append(lBuzzwords.get(i)).append(" ");
+
+
+        // wenn es das Buzzword-feld schon gibt, wirds gelöscht
+        doc2modify.removeFields(strNewField4Buzzwords);
+        // die neu berechneten Buzzwords werden zum Doc hinzugefügt
+        FieldType fieldType =
+                new DynamicFieldType().setIndexOptionS(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS).setStoreD(true).setStoreTermVectorS(true)
+                        .setStoreTermVectorOffsetS(true).setTokenizeD(true).freezE();
+
+        Field field4buzzwords = new Field(strNewField4Buzzwords, strbBuzzWordz.toString(), fieldType);
+        doc2modify.add(field4buzzwords);
+
+
+        return true;
+    }
+
+
+
+    static protected int docID2DocNo(String strDocIdAttributeName, String strDocID, IndexReader reader) throws Exception
+    {
+        int luceneDocumentNumber;
+
+        IndexSearcher searcher = new IndexSearcher(reader);
+
+        TopDocs topDocs = searcher.search(new TermQuery(new Term(strDocIdAttributeName, strDocID)), 1);
+
+        if(topDocs.totalHits == 0) throw new Exception("no lucene document found with id '" + strDocID + "'");
+
+        // es sollte lediglich ein Dokument mit dieser id aufzufinden sein...
+        luceneDocumentNumber = topDocs.scoreDocs[0].doc;
+
+        return luceneDocumentNumber;
+    }
+
+
+
+
+
+
+
+    /**
+     * Gets the buzzwords for fields of a document. The metohd makes use of the IndexAccessor default Analyzer.
+     * 
+     * @param iDocNo the lucene document number inside the index behind reader, for the document doc2modify
+     * @param doc2modify the document that should enriched with a new buzzword field
+     * @param sAttNames4BuzzwordCalculation the name of the attributes the buzzwords should be extracted from
+     * @param iMaxNumberOfBuzzwords the maximum number of buzzwords
+     * @param bSkipSimilarTerms true: similar terms (according to the Levenshtein-distance) will be skipped for better readability
+     * @param reader the lucene index reader
+     * 
+     * @return the list of the extracted buzzwords, null in the case the given attribute doesn't exist
+     * 
+     * @throws Exception
+     * @throws URINotFoundException
+     */
+    static public List<String> getBuzzwords(int iDocNo, Document doc2modify, Set<String> sAttNames4BuzzwordCalculation, int iMaxNumberOfBuzzwords,
+            boolean bSkipSimilarTerms, IndexReader reader) throws Exception
+    {
+
+        LinkedHashMap<String, Float> buzzwordsWithTfIdf =
+                getBuzzwordsWithTfIdf(iDocNo, doc2modify, sAttNames4BuzzwordCalculation, iMaxNumberOfBuzzwords, bSkipSimilarTerms, reader);
+
+        LinkedList<String> llBuzzwords = new LinkedList<String>(buzzwordsWithTfIdf.keySet());
+
+
+        return llBuzzwords;
+    }
+
+
+
+    /**
+     * Gets the buzzwords for fields of a document, together with their document TfIdf value. The metohd makes use of the IndexAccessor default Analyzer.
+     * 
+     * @param iDocNo the lucene document number inside the index behind reader, for the document doc2modify
+     * @param doc2modify the document that should enriched with a new buzzword field
+     * @param sAttNames4BuzzwordCalculation the name of the attributes the buzzwords should be extracted from.
+     * @param iMaxNumberOfBuzzwords the maximum number of buzzwords
+     * @param bSkipSimilarTerms true: similar terms (according to the Levenshtein-distance) will be skipped for better readability
+     * @param reader the lucene index reader
+     * 
+     * @return the extracted buzzwords, boosted according their score. Key: the term itself. Value: the according score. null in the case the given attribute doesn't
+     *         exist.
+     * 
+     * @throws Exception
+     */
+    static public LinkedHashMap<String, Float> getBuzzwordsWithTfIdf(int iDocNo, Document doc2modify, Set<String> sAttNames4BuzzwordCalculation,
+            int iMaxNumberOfBuzzwords, boolean bSkipSimilarTerms, IndexReader reader) throws Exception
+    {
+
+        MultiValueTreeMap<Float, String> tmScore2Term =
+                retrieveInterestingTerms(iDocNo, doc2modify, sAttNames4BuzzwordCalculation, iMaxNumberOfBuzzwords, 2, 1, 2, bSkipSimilarTerms, reader);
+
+        if(tmScore2Term.valueSize() < iMaxNumberOfBuzzwords)
+        {
+
+            MultiValueTreeMap<Float, String> tmScore2TermWeak =
+                    retrieveInterestingTerms(iDocNo, doc2modify, sAttNames4BuzzwordCalculation, iMaxNumberOfBuzzwords, 1, 1, 2, bSkipSimilarTerms, reader);
+
+            while (tmScore2TermWeak.keySize() > 0)
+            {
+                Float fTfIdf = tmScore2TermWeak.firstKey();
+                String strTopTerm = tmScore2TermWeak.getFirst(fTfIdf);
+                tmScore2TermWeak.remove(fTfIdf, strTopTerm);
+
+                if(!tmScore2Term.containsValue(strTopTerm)) tmScore2Term.add(fTfIdf, strTopTerm);
+
+                if(tmScore2Term.valueSize() >= iMaxNumberOfBuzzwords) break;
+            }
+        }
+
+        LinkedHashMap<String, Float> hsTerm2TfIdf = new LinkedHashMap<String, Float>();
+        for (Entry<Float, String> score2term : tmScore2Term.entryList())
+            hsTerm2TfIdf.put(score2term.getValue(), score2term.getKey());
+
+
+        return hsTerm2TfIdf;
+    }
+
+
+
+    /**
+     * 
+     * @param iDocNo the lucene document number inside the index behind reader, for the document doc2modify
+     * @param doc2modify the document that should enriched with a new buzzword field
+     * @param strFieldName the field where you want the top frequent terms for.
+     * @param iMinFrequency the minimum frequency a term must appear in this field
+     * @param iMinWordLength the minimum word length a term must have
+     * @param iMaxNumberOfTerms the maximum number of terms the method returns
+     * @param reader the lucene index reader
+     * 
+     * @return
+     * 
+     * @throws Exception
+     */
+    public static List<Term2FrequencyEntry> getTopFrequentTerms(int iDocNo, Document doc2modify, String strFieldName, int iMinFrequency, int iMinWordLength,
+                                                                int iMaxNumberOfTerms, IndexReader reader) throws Exception
+    {
+
+        LinkedList<Term2FrequencyEntry> llTerm2Frequency = new LinkedList<Term2FrequencyEntry>();
+        PriorityQueue<Term2FrequencyEntry> pqTerm2Frequency = new PriorityQueue<Term2FrequencyEntry>(iMaxNumberOfTerms, new Comparator<Term2FrequencyEntry>()
+        {
+
+            @Override
+            public int compare(Term2FrequencyEntry o1, Term2FrequencyEntry o2)
+            {
+                return o1.getFrequency().compareTo(o2.getFrequency());
+            }
+        });
+
+        // wenn es das feld gar nicht gibt in diesem doc, dann machen wir gar nix! (das überprüfen ist erheblich billiger als das unnötige iterieren durch alles im reader
+        if(doc2modify.getField(strFieldName) == null) return llTerm2Frequency;
+
+        Terms termVector = reader.getTermVector(iDocNo, strFieldName);
+        if(termVector == null) return llTerm2Frequency;
+
+        TermsEnum termsEnum = termVector.iterator();
+
+        while (termsEnum.next() != null)
+        {
+            String strTerm = termsEnum.term().utf8ToString();
+            long lFrequency = termsEnum.totalTermFreq();
+
+            if(lFrequency >= iMinFrequency && strTerm.length() >= iMinWordLength)
+                pqTerm2Frequency.add(new Term2FrequencyEntry(strTerm, Long.valueOf(lFrequency).intValue()));
+
+            if(pqTerm2Frequency.size() > iMaxNumberOfTerms) pqTerm2Frequency.poll();
+        }
+
+        for (Term2FrequencyEntry term2Frq : pqTerm2Frequency)
+            llTerm2Frequency.add(0, term2Frq);
+
+
+
+        return llTerm2Frequency;
+    }
+
+
+
+    static MultiValueTreeMap<Float, String> retrieveInterestingTerms(int iDocNo, Document doc2modify, Set<String> sAttNames4BuzzwordCalculation,
+            int iMaxNumberOfBuzzwords, int iMinDocFreq, int iMinTermFreq, int iMinWordLen, boolean bSkipSimilarTerms, IndexReader reader) throws Exception
+    {
+
+        int iIndexDocumentCount = reader.numDocs();
+
+        HashMap<String, Integer> hsTerm2Frequency = new HashMap<String, Integer>();
+
+        // als erstes werden die frequencies aller fields aufsummiert
+        for (String strFieldName : sAttNames4BuzzwordCalculation)
+        {
+
+            // XXX: hier ist erst mal die Anzahl der verschiedenen Terme des docs hartkodiert
+            List<Term2FrequencyEntry> topFrequentTerms = getTopFrequentTerms(iDocNo, doc2modify, strFieldName, iMinTermFreq, iMinWordLen, 1234, reader);
+
+            for (Term2FrequencyEntry topTerm2FreqLocal : topFrequentTerms)
+            {
+                Integer iFreqOld = hsTerm2Frequency.get(topTerm2FreqLocal.getTerm());
+                if(iFreqOld == null)
+                    iFreqOld = topTerm2FreqLocal.getFrequency();
+                else
+                    iFreqOld += topTerm2FreqLocal.getFrequency();
+
+                hsTerm2Frequency.put(topTerm2FreqLocal.getTerm(), iFreqOld);
+            }
+        }
+
+        // nun werden die Terme bezüglich ihres scores (tfIdf) sortiert
+        MultiValueTreeMap<Float, String> tmScore2Term = new MultiValueTreeMap<Float, String>(HashSet.class);
+        for (Entry<String, Integer> term2Frequency : hsTerm2Frequency.entrySet())
+        {
+            String strTerm = term2Frequency.getKey();
+            Integer iTermFrequency = term2Frequency.getValue();
+
+            // wir haben angegeben, wie oft der Term mindestens da sein muß
+            if(iMinTermFreq > 0 && iTermFrequency < iMinTermFreq) continue;
+
+            // Zahlen ignorieren wir
+            if(!strTerm.matches("\\D+")) continue;
+
+            // es wird die max-docFrequency berücksichtig (wie in MoreLikeThis)
+            int iMaxDocumentFrequency = 0;
+            for (String strField : sAttNames4BuzzwordCalculation)
+            {
+                int iDocumentFrequency = reader.docFreq(new Term(strField, strTerm));
+                if(iMaxDocumentFrequency < iDocumentFrequency) iMaxDocumentFrequency = iDocumentFrequency;
+            }
+
+            if(iMinDocFreq > 0 && iMaxDocumentFrequency < iMinDocFreq) continue;
+
+            // das sollte eigentlich nicht passieren - im Fehlerfall ignorieren wir das einfach
+            if(iMaxDocumentFrequency == 0) continue;
+
+            // das ist die Formel der defaultSimilarity. Eine andere werden wir einfach nie brauchen
+            float fIdf = m_defaultSimilarity.idf(iMaxDocumentFrequency, iIndexDocumentCount);
+            float fScore = m_defaultSimilarity.tf(iTermFrequency) * fIdf * fIdf;
+
+            boolean bRemoveLastTerm4Score = false;
+            // nur die top -Terme - wenn wir über die max-Anzahl sind, dann tauschen wir den kleinsten aus
+            if(tmScore2Term.valueSize() >= iMaxNumberOfBuzzwords)
+            {
+                // wir sind drüber
+                // wenn unser kleinster schon größer ist, dann ignorieren wir den neuen
+                if(tmScore2Term.firstKey() >= fScore) continue;
+                // ansonsten tauschen wir unseren kleinsten aus
+                bRemoveLastTerm4Score = true;
+            }
+
+
+            // wir schauen, ob wir schon einen term drin haben, der uns sehr ähnlich sieht - dann nehmen wir den mit dem höchsten score (alternativ
+            // wäre auch der kürzere möglich, aber der könnte einen niederen score haben, und dann später wieder rausfliegen - das würde die Qualität
+            // verschlechtern)
+            Boolean bBetterSimilarTermInList = false;
+            if(bSkipSimilarTerms)
+            {
+                for (Entry<Float, String> score2TermInList : tmScore2Term.entryList())
+                {
+                    if(!Levenshtein.isInDistance(score2TermInList.getValue(), strTerm, 3)) continue;
+                    // wenn der existierende größer ist, dann brauchen wir gar nix eintragen
+                    if(score2TermInList.getKey() >= fScore)
+                    {
+                        bBetterSimilarTermInList = true;
+                        break;
+                    }
+                    // wenn der neue vom score her besser ist, dann müssen wir den austauschen
+                    tmScore2Term.remove(score2TermInList.getKey(), score2TermInList.getValue());
+                }
+            }
+
+            if(bRemoveLastTerm4Score && !bBetterSimilarTermInList) tmScore2Term.remove(tmScore2Term.firstKey());
+            if(!bBetterSimilarTermInList) tmScore2Term.add(fScore, strTerm);
+        }
+
+
+        return tmScore2Term;
+    }
+    
+    
+    
+    
+    
+
+    // static MultiValueTreeMap<Float, String> retrieveInterestingTerms(String strDocID, Set<String> sAttNames4BuzzwordCalculation, int iMaxNumberOfBuzzwords,
+    //         int iMinDocFreq, int iMinTermFreq, int iMinWordLen, boolean bSkipSimilarTerms, LinkedHashSet<String> hsIndexPaths) throws CorruptIndexException, IOException,
+    //         URINotFoundException, URISyntaxException
+    // {
+    //
+    //     RemoteIndexReader reader = IndexAccessor.getMultiIndexReader(hsIndexPaths, true);
+    //     int iIndexDocumentCount = reader.numDocs();
+    //
+    //     HashMap<String, Integer> hsTerm2Frequency = new HashMap<String, Integer>();
+    //
+    //     // als erstes werden die frequencies aller fields aufsummiert
+    //     for (String strFieldName : sAttNames4BuzzwordCalculation)
+    //     {
+    //
+    //         // XXX: hier ist erst mal die Anzahl der verschiedenen Terme des docs hartkodiert
+    //         List<Term2FrequencyEntry> topFrequentTerms = reader.getTopFrequentTerms(strDocID, strFieldName, iMinTermFreq, iMinWordLen, 1234);
+    //
+    //         for (Term2FrequencyEntry topTerm2FreqLocal : topFrequentTerms)
+    //         {
+    //             Integer iFreqOld = hsTerm2Frequency.get(topTerm2FreqLocal.getTerm());
+    //             if(iFreqOld == null)
+    //                 iFreqOld = topTerm2FreqLocal.getFrequency();
+    //             else
+    //                 iFreqOld += topTerm2FreqLocal.getFrequency();
+    //
+    //             hsTerm2Frequency.put(topTerm2FreqLocal.getTerm(), iFreqOld);
+    //         }
+    //     }
+    //
+    //     // nun werden die Terme bezüglich ihres scores (tfIdf) sortiert
+    //     MultiValueTreeMap<Float, String> tmScore2Term = new MultiValueTreeMap<Float, String>(HashSet.class);
+    //     for (Entry<String, Integer> term2Frequency : hsTerm2Frequency.entrySet())
+    //     {
+    //         String strTerm = term2Frequency.getKey();
+    //         Integer iTermFrequency = term2Frequency.getValue();
+    //
+    //         // wir haben angegeben, wie oft der Term mindestens da sein muß
+    //         if(iMinTermFreq > 0 && iTermFrequency < iMinTermFreq) continue;
+    //
+    //         // Zahlen ignorieren wir
+    //         if(!strTerm.matches("\\D+")) continue;
+    //
+    //         // es wird die max-docFrequency berücksichtig (wie in MoreLikeThis)
+    //         int iMaxDocumentFrequency = 0;
+    //         for (String strField : sAttNames4BuzzwordCalculation)
+    //         {
+    //             int iDocumentFrequency = reader.documentFrequency(strField, strTerm);
+    //             if(iMaxDocumentFrequency < iDocumentFrequency) iMaxDocumentFrequency = iDocumentFrequency;
+    //         }
+    //
+    //         if(iMinDocFreq > 0 && iMaxDocumentFrequency < iMinDocFreq) continue;
+    //
+    //         // das sollte eigentlich nicht passieren - im Fehlerfall ignorieren wir das einfach
+    //         if(iMaxDocumentFrequency == 0) continue;
+    //
+    //         // das ist die Formel der defaultSimilarity. Eine andere werden wir einfach nie brauchen
+    //         float fIdf = m_defaultSimilarity.idf(iMaxDocumentFrequency, iIndexDocumentCount);
+    //         float fScore = m_defaultSimilarity.tf(iTermFrequency) * fIdf * fIdf;
+    //
+    //         boolean bRemoveLastTerm4Score = false;
+    //         // nur die top -Terme - wenn wir über die max-Anzahl sind, dann tauschen wir den kleinsten aus
+    //         if(tmScore2Term.valueSize() >= iMaxNumberOfBuzzwords)
+    //         {
+    //             // wir sind drüber
+    //             // wenn unser kleinster schon größer ist, dann ignorieren wir den neuen
+    //             if(tmScore2Term.firstKey() >= fScore) continue;
+    //             // ansonsten tauschen wir unseren kleinsten aus
+    //             bRemoveLastTerm4Score = true;
+    //         }
+    //
+    //
+    //         // wir schauen, ob wir schon einen term drin haben, der uns sehr ähnlich sieht - dann nehmen wir den mit dem höchsten score (alternativ
+    //         // wäre auch der kürzere möglich, aber der könnte einen niederen score haben, und dann später wieder rausfliegen - das würde die Qualität
+    //         // verschlechtern)
+    //         Boolean bBetterSimilarTermInList = false;
+    //         if(bSkipSimilarTerms)
+    //         {
+    //             for (Entry<Float, String> score2TermInList : tmScore2Term.entryList())
+    //             {
+    //                 if(!Levenshtein.isInDistance(score2TermInList.getValue(), strTerm, 3)) continue;
+    //                 // wenn der existierende größer ist, dann brauchen wir gar nix eintragen
+    //                 if(score2TermInList.getKey() >= fScore)
+    //                 {
+    //                     bBetterSimilarTermInList = true;
+    //                     break;
+    //                 }
+    //                 // wenn der neue vom score her besser ist, dann müssen wir den austauschen
+    //                 tmScore2Term.remove(score2TermInList.getKey(), score2TermInList.getValue());
+    //             }
+    //         }
+    //
+    //         if(bRemoveLastTerm4Score && !bBetterSimilarTermInList) tmScore2Term.remove(tmScore2Term.firstKey());
+    //         if(!bBetterSimilarTermInList) tmScore2Term.add(fScore, strTerm);
+    //     }
+    //
+    //
+    //     return tmScore2Term;
+    // }
+
+
+    //
+    // /**
+    //  * This method is for calculating buzzwords out of an arbritrary String, by giving an index attribute as 'context. The string will be tokenized according the given
+    //  * analyzer for this attribute (as set by the IndexAccessor default analyzer), and also takes the document frequencies for all terms of this attribute.
+    //  *
+    //  * @param strDocumentText the text of the document. This text influences the buzzword calculation as it would be an attribute value of
+    //  *            strAttributeName4BuzzwordCalculation
+    //  * @param strAttributeName4BuzzwordCalculation this is the name of the attribute the given text should be differentiated against with buzzwords
+    //  * @param iMaxNumberOfBuzzwords the maximum number of buzzwords
+    //  * @param iMinDocFreq
+    //  * @param iMinTermFreq
+    //  * @param iMinWordLen
+    //  * @param bSkipSimilarTerms true: similar terms (according to the Levenshtein-distance) will be skipped for better readability
+    //  * @param hsIndexPaths the list of indices that should be used for buzzword calculation. The document must be stored in exactly one index, referenced by the document
+    //  *            object value of strIdFieldName.
+    //  *
+    //  * @return the extracted buzzwords, sorted by their according tfidf value. Key: the tfIdf value. Value: the term.
+    //  *
+    //  * @throws CorruptIndexException
+    //  * @throws IOException
+    //  * @throws URINotFoundException
+    //  * @throws URISyntaxException
+    //  */
+    // static MultiValueTreeMap<Float, String> retrieveInterestingTerms(String strDocumentText, String strAttributeName4BuzzwordCalculation, int iMaxNumberOfBuzzwords,
+    //         int iMinDocFreq, int iMinTermFreq, int iMinWordLen, boolean bSkipSimilarTerms, LinkedHashSet<String> hsIndexPaths) throws CorruptIndexException, IOException,
+    //         URINotFoundException, URISyntaxException
+    // {
+    //
+    //     RemoteIndexReader reader = IndexAccessor.getMultiIndexReader(hsIndexPaths, true);
+    //     int iIndexDocumentCount = reader.numDocs();
+    //
+    //     // hier tokenisieren wir den übergebenen Text und ermitteln die term frequencies
+    //     HashMap<String, Integer> hsTerm2Frequency = new HashMap<String, Integer>();
+    //
+    //     TokenStream tokenStream = IndexAccessor.getDefaultAnalyzer().tokenStream(strAttributeName4BuzzwordCalculation, strDocumentText);
+    //
+    //     tokenStream.reset();
+    //     while (tokenStream.incrementToken())
+    //     {
+    //         // hier ermitteln wir die termfrequenzen für das aktuelle AttValue
+    //         CharTermAttribute termAttribute = tokenStream.getAttribute(CharTermAttribute.class);
+    //         String strTerm = termAttribute.toString();
+    //
+    //         Integer iFrequency = hsTerm2Frequency.get(strTerm);
+    //         if(iFrequency == null)
+    //             hsTerm2Frequency.put(strTerm, 1);
+    //         else
+    //             hsTerm2Frequency.put(strTerm, iFrequency + 1);
+    //     }
+    //     tokenStream.close();
+    //
+    //
+    //
+    //     // nun werden die Terme bezüglich ihres scores (tfIdf) sortiert
+    //     MultiValueTreeMap<Float, String> tmScore2Term = new MultiValueTreeMap<Float, String>(HashSet.class);
+    //     for (Entry<String, Integer> term2Frequency : hsTerm2Frequency.entrySet())
+    //     {
+    //         String strTerm = term2Frequency.getKey();
+    //         Integer iTermFrequency = term2Frequency.getValue();
+    //
+    //
+    //         if(strTerm.length() < iMinWordLen) continue;
+    //         // wir haben angegeben, wie oft der Term mindestens da sein muß
+    //         if(iMinTermFreq > 0 && iTermFrequency < iMinTermFreq) continue;
+    //
+    //         // Zahlen ignorieren wir
+    //         if(!strTerm.matches("\\D+")) continue;
+    //
+    //         int iDocumentFrequency = reader.documentFrequency(strAttributeName4BuzzwordCalculation, strTerm);
+    //
+    //         if(iMinDocFreq > 0 && iDocumentFrequency < iMinDocFreq) continue;
+    //
+    //         // das sollte eigentlich nicht passieren - im Fehlerfall ignorieren wir das einfach
+    //         if(iDocumentFrequency == 0) continue;
+    //
+    //         // das ist die Formel der defaultSimilarity. Eine andere werden wir einfach nie brauchen
+    //         float fIdf = m_defaultSimilarity.idf(iDocumentFrequency, iIndexDocumentCount);
+    //         float fScore = m_defaultSimilarity.tf(iTermFrequency) * fIdf * fIdf;
+    //
+    //         boolean bRemoveLastTerm4Score = false;
+    //         // nur die top -Terme - wenn wir über die max-Anzahl sind, dann tauschen wir den kleinsten aus
+    //         if(tmScore2Term.valueSize() >= iMaxNumberOfBuzzwords)
+    //         {
+    //             // wir sind drüber
+    //             // wenn unser kleinster schon größer ist, dann ignorieren wir den neuen
+    //             if(tmScore2Term.firstKey() >= fScore) continue;
+    //             // ansonsten tauschen wir unseren kleinsten aus
+    //             bRemoveLastTerm4Score = true;
+    //         }
+    //
+    //
+    //         // wir schauen, ob wir schon einen term drin haben, der uns sehr ähnlich sieht - dann nehmen wir den mit dem höchsten score (alternativ
+    //         // wäre auch der kürzere möglich, aber der könnte einen niederen score haben, und dann später wieder rausfliegen - das würde die Qualität
+    //         // verschlechtern)
+    //         Boolean bBetterSimilarTermInList = false;
+    //         if(bSkipSimilarTerms)
+    //         {
+    //             for (Entry<Float, String> score2TermInList : tmScore2Term.entryList())
+    //             {
+    //                 if(!Levenshtein.isInDistance(score2TermInList.getValue(), strTerm, 3)) continue;
+    //                 // wenn der existierende größer ist, dann brauchen wir gar nix eintragen
+    //                 if(score2TermInList.getKey() >= fScore)
+    //                 {
+    //                     bBetterSimilarTermInList = true;
+    //                     break;
+    //                 }
+    //                 // wenn der neue vom score her besser ist, dann müssen wir den austauschen
+    //                 tmScore2Term.remove(score2TermInList.getKey(), score2TermInList.getValue());
+    //             }
+    //         }
+    //
+    //         if(bRemoveLastTerm4Score && !bBetterSimilarTermInList) tmScore2Term.remove(tmScore2Term.firstKey());
+    //         if(!bBetterSimilarTermInList) tmScore2Term.add(fScore, strTerm);
+    //     }
+    //
+    //
+    //
+    //     return tmScore2Term;
+    // }
+
+    //
+    //
+    // static List<MultiValueTreeMap<Float, String>> retrieveInterestingTerms4AllFieldValues(String strDocID, String strFieldName, int iMaxNumberOfBuzzwords,
+    //         int iMinDocFreq, int iMinTermFreq, int iMinWordLen, boolean bSkipSimilarTerms, LinkedHashSet<String> hsIndexPaths) throws CorruptIndexException, IOException,
+    //         URINotFoundException, URISyntaxException
+    // {
+    //
+    //     RemoteIndexReader reader = IndexAccessor.getMultiIndexReader(hsIndexPaths, true);
+    //     int iIndexDocumentCount = reader.numDocs();
+    //
+    //
+    //     LinkedList<MultiValueTreeMap<Float, String>> llScore2TermMaps = new LinkedList<MultiValueTreeMap<Float, String>>();
+    //
+    //     // XXX: hier ist erst mal die Anzahl der verschiedenen Terme des docs hartkodiert
+    //     for (List<Term2FrequencyEntry> lTerm2Frequencies : reader.getTopFrequentTermsPerAttributeValue(strDocID, strFieldName, iMinTermFreq, iMinWordLen, 1234))
+    //     {
+    //
+    //         // nun werden die Terme bezüglich ihres scores (tfIdf) sortiert
+    //         MultiValueTreeMap<Float, String> tmScore2Term = new MultiValueTreeMap<Float, String>(HashSet.class);
+    //         for (Term2FrequencyEntry term2Frequency : lTerm2Frequencies)
+    //         {
+    //             String strTerm = term2Frequency.getTerm();
+    //             Integer iTermFrequency = term2Frequency.getFrequency();
+    //
+    //             // wir haben angegeben, wie oft der Term mindestens da sein muß
+    //             if(iMinTermFreq > 0 && iTermFrequency < iMinTermFreq) continue;
+    //
+    //             // Zahlen ignorieren wir
+    //             if(!strTerm.matches("\\D+")) continue;
+    //
+    //             int iDocumentFrequency = reader.documentFrequency(strFieldName, strTerm);
+    //
+    //             if(iMinDocFreq > 0 && iDocumentFrequency < iMinDocFreq) continue;
+    //
+    //             // das sollte eigentlich nicht passieren - im Fehlerfall ignorieren wir das einfach
+    //             if(iDocumentFrequency == 0) continue;
+    //
+    //             // das ist die Formel der defaultSimilarity. Eine andere werden wir einfach nie brauchen
+    //             float fIdf = m_defaultSimilarity.idf(iDocumentFrequency, iIndexDocumentCount);
+    //             float fScore = m_defaultSimilarity.tf(iTermFrequency) * fIdf * fIdf;
+    //
+    //             boolean bRemoveLastTerm4Score = false;
+    //             // nur die top -Terme - wenn wir über die max-Anzahl sind, dann tauschen wir den kleinsten aus
+    //             if(tmScore2Term.valueSize() >= iMaxNumberOfBuzzwords)
+    //             {
+    //                 // wir sind drüber
+    //                 // wenn unser kleinster schon größer ist, dann ignorieren wir den neuen
+    //                 if(tmScore2Term.firstKey() >= fScore) continue;
+    //                 // ansonsten tauschen wir unseren kleinsten aus
+    //                 bRemoveLastTerm4Score = true;
+    //             }
+    //
+    //
+    //             // wir schauen, ob wir schon einen term drin haben, der uns sehr ähnlich sieht - dann nehmen wir den mit dem höchsten score
+    //             // (alternativ
+    //             // wäre auch der kürzere möglich, aber der könnte einen niederen score haben, und dann später wieder rausfliegen - das würde die
+    //             // Qualität
+    //             // verschlechtern)
+    //             Boolean bBetterSimilarTermInList = false;
+    //             if(bSkipSimilarTerms)
+    //             {
+    //                 for (Entry<Float, String> score2TermInList : tmScore2Term.entryList())
+    //                 {
+    //                     if(!Levenshtein.isInDistance(score2TermInList.getValue(), strTerm, 3)) continue;
+    //                     // wenn der existierende größer ist, dann brauchen wir gar nix eintragen
+    //                     if(score2TermInList.getKey() >= fScore)
+    //                     {
+    //                         bBetterSimilarTermInList = true;
+    //                         break;
+    //                     }
+    //                     // wenn der neue vom score her besser ist, dann müssen wir den austauschen
+    //                     tmScore2Term.remove(score2TermInList.getKey(), score2TermInList.getValue());
+    //                 }
+    //             }
+    //
+    //             if(bRemoveLastTerm4Score && !bBetterSimilarTermInList) tmScore2Term.remove(tmScore2Term.firstKey());
+    //             if(!bBetterSimilarTermInList) tmScore2Term.add(fScore, strTerm);
+    //         }
+    //
+    //         llScore2TermMaps.add(tmScore2Term);
+    //     }
+    //
+    //
+    //
+    //     return llScore2TermMaps;
+    // }
+
+
+
+
+
+
+
+}
diff --git a/src/main/java/de/dfki/km/leech/lucene/basic/DocumentFrqClass.java b/src/main/java/de/dfki/km/leech/lucene/basic/DocumentFrqClass.java
new file mode 100644
index 0000000..bf0fab3
--- /dev/null
+++ b/src/main/java/de/dfki/km/leech/lucene/basic/DocumentFrqClass.java
@@ -0,0 +1,179 @@
+package de.dfki.km.leech.lucene.basic;
+
+
+
+// import de.dfki.inquisitor.lucene.DynamicFieldType;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.FieldType;
+import org.apache.lucene.index.IndexOptions;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.MultiFields;
+import org.apache.lucene.index.Terms;
+import org.apache.lucene.index.TermsEnum;
+import org.mapdb.DB;
+import org.mapdb.DBMaker;
+
+import java.io.Closeable;
+import java.io.IOException;
+import java.util.Map;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
+
+
+public class DocumentFrqClass implements Closeable
+{
+
+
+    protected Map<String, Long> m_hsTerm2IndexFrq;
+
+    protected long m_lMaxFrq = 0;
+
+    protected DB m_mapDB;
+
+    protected IndexReader m_reader;
+
+    protected String m_strFieldName4Calculation;
+
+    protected String m_strMaxFrqTerm = "";
+
+
+
+    @SuppressWarnings("unchecked")
+    public DocumentFrqClass(IndexReader reader, String strFieldName4Calculation)
+    {
+        m_reader = reader;
+        m_strFieldName4Calculation = strFieldName4Calculation;
+
+        try
+        {
+            Logger.getLogger(DocumentFrqClass.class.getName()).info("load overall term index frequencies");
+
+
+            // OLD: m_mapDB = DBMaker.newTempFileDB().deleteFilesAfterClose().closeOnJvmShutdown().transactionDisable().make();
+            // m_hsTerm2IndexFrq = m_mapDB.getTreeMap("temp");
+            m_mapDB = DBMaker.tempFileDB().closeOnJvmShutdown().fileDeleteAfterOpen().fileDeleteAfterClose().fileLockDisable().fileMmapEnableIfSupported().make();
+            m_hsTerm2IndexFrq = (Map<String, Long>) m_mapDB.treeMap("temp").create();
+
+
+
+            Terms terms;
+
+            terms = MultiFields.getTerms(reader, strFieldName4Calculation);
+
+
+            if(terms != null)
+            {
+                TermsEnum termsEnum = terms.iterator();
+
+                while (termsEnum.next() != null)
+                {
+                    long lFrequency = termsEnum.totalTermFreq();
+                    String strTerm = termsEnum.term().utf8ToString();
+
+                    m_hsTerm2IndexFrq.put(strTerm, lFrequency);
+                    if(lFrequency > m_lMaxFrq)
+                    {
+                        m_lMaxFrq = lFrequency;
+                        m_strMaxFrqTerm = strTerm;
+                    }
+                }
+            }
+
+
+            Logger.getLogger(DocumentFrqClass.class.getName()).info("...finished");
+
+        }
+        catch (Throwable e)
+        {
+            Logger.getLogger(DocumentFrqClass.class.getName()).log(Level.SEVERE, "Error", e);
+        }
+
+    }
+
+
+
+    public boolean addDocumentFrequencyClass(int iDocNo, Document doc2modify, String strNewField4FrqClass) throws Exception
+    {
+
+        boolean bModified = false;
+        if(doc2modify.getField(strNewField4FrqClass) != null) bModified = true;
+
+        doc2modify.removeFields(strNewField4FrqClass);
+
+        if(doc2modify.getField(m_strFieldName4Calculation) == null) return bModified;
+
+
+        double dAverageFrqClass = 0;
+        int iFrqClassesCount = 0;
+
+
+
+        Terms termVector = m_reader.getTermVector(iDocNo, m_strFieldName4Calculation);
+        if(termVector == null) return bModified;
+
+        TermsEnum termsEnum = termVector.iterator();
+
+        while (termsEnum.next() != null)
+        {
+            String strTerm = termsEnum.term().utf8ToString();
+            // reine Zahlen sind draussen
+            if(strTerm.matches("\\d*")) continue;
+            // das zählt nur für dieses doc, siehe ApiDoc reader.getTermVector(..)
+            long lFrequencyInDoc = termsEnum.totalTermFreq();
+
+
+            Long lFrequencyInIndex = m_hsTerm2IndexFrq.get(strTerm);
+            if(lFrequencyInIndex == null) continue;
+
+            int iFrqClass;
+            if(m_lMaxFrq <= 0 || lFrequencyInIndex <= 0)
+                iFrqClass = -1;
+            else
+                iFrqClass = (int) Math.floor((Math.log((m_lMaxFrq / lFrequencyInIndex)) / Math.log(2)));
+
+            if(iFrqClass >= 2)
+            {
+                dAverageFrqClass += iFrqClass * lFrequencyInDoc;
+                iFrqClassesCount += lFrequencyInDoc;
+            }
+        }
+
+
+
+        if(iFrqClassesCount >= 0) dAverageFrqClass = dAverageFrqClass / iFrqClassesCount;
+
+        // wir diskretisieren auf halbe Werte
+        dAverageFrqClass = Math.round(dAverageFrqClass * 2);
+        // als Integer, ohne Nachkommastellen (der eigentliche Wert mal 10)
+        int iAverageFrqClass = (int) (dAverageFrqClass * 5d);
+
+
+
+        // und an das doc dran
+        FieldType fieldType =
+                new DynamicFieldType().setIndexOptionS(IndexOptions.DOCS).setStoreD(true).setStoreTermVectorS(true)
+                        .setStoreTermVectorOffsetS(true).setTokenizeD(true).freezE();
+
+        Field field4buzzwords = new Field(strNewField4FrqClass, String.valueOf(iAverageFrqClass), fieldType);
+
+
+        doc2modify.add(field4buzzwords);
+
+
+        return true;
+    }
+
+
+
+    @Override
+    public void close() throws IOException
+    {
+        if(m_mapDB != null) m_mapDB.close();
+        m_mapDB = null;
+        m_hsTerm2IndexFrq = null;
+        m_reader = null;
+    }
+
+}
diff --git a/src/main/java/de/dfki/km/leech/lucene/basic/DynamicFieldType.java b/src/main/java/de/dfki/km/leech/lucene/basic/DynamicFieldType.java
new file mode 100644
index 0000000..7434112
--- /dev/null
+++ b/src/main/java/de/dfki/km/leech/lucene/basic/DynamicFieldType.java
@@ -0,0 +1,418 @@
+package de.dfki.km.leech.lucene.basic;
+
+
+
+import com.cedarsoftware.util.io.JsonReader;
+import com.cedarsoftware.util.io.JsonWriter;
+import de.dfki.inquisitor.text.DateParser;
+import de.dfki.inquisitor.text.DateUtils;
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.document.*;
+import org.apache.lucene.index.DocValuesType;
+import org.apache.lucene.index.IndexOptions;
+
+import java.io.IOException;
+import java.util.Date;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
+// import de.dfki.inquisitor.lucene.LuceneAnalyzerFactory;
+
+
+
+@SuppressWarnings("javadoc")
+public class DynamicFieldType extends FieldType
+{
+
+
+    public static final DynamicFieldType doubleFieldType = new DynamicFieldType(LegacyDoubleField.TYPE_STORED).freezE();
+
+    public static final DynamicFieldType floatFieldType = new DynamicFieldType(LegacyFloatField.TYPE_STORED).freezE();
+
+    public static final DynamicFieldType integerFieldType = new DynamicFieldType(LegacyIntField.TYPE_STORED).freezE();
+
+    public static final DynamicFieldType dateFieldType = new DynamicFieldType(LegacyLongField.TYPE_STORED).setDateParsing(true).freezE();
+
+    public static final DynamicFieldType keywordFieldType =
+            new DynamicFieldType().setTokenizeD(true).setStoreD(true).setStoreTermVectorS(true).setStoreTermVectorOffsetS(true)
+                    .setIndexOptionS(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS).setOmitNormS(true).setAnalyzer("org.apache.lucene.analysis.core.KeywordAnalyzer")
+                    .freezE();
+
+    public static final DynamicFieldType longFieldType = new DynamicFieldType(LegacyLongField.TYPE_STORED).freezE();
+
+    public static final DynamicFieldType tokenizedFieldType =
+            new DynamicFieldType().setTokenizeD(true).setStoreD(true).setStoreTermVectorS(true).setStoreTermVectorOffsetS(true)
+                    .setIndexOptionS(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS).setAnalyzer("de.dfki.km.leech.lucene.LeechSimpleAnalyzer").freezE();
+
+
+
+    /**
+     * Create Field instances, according to the configuration inside the given fieldType. Number fields will be generated, if a string value is given, it will be
+     * converted in the case the fieldType is a number type. Further, the method parses Strings for date if the fieldtype is of type {@link DynamicFieldType} and
+     * configured accordingly. You can also give number values for generating number or String fields fields (also according to the given fieldType).
+     *
+     * @param strAttName the attributes name
+     * @param attValue   the attributes value
+     * @param fieldType  the field type that influences the returned type of the field
+     *
+     * @return the field, with the configured fieldType. Null in the case the Field can not be generated out of the value.
+     */
+    static public Field createField(String strAttName, Object attValue, FieldType fieldType)
+    {
+        try
+        {
+            if (attValue == null)
+                return null;
+
+
+            if (fieldType instanceof DynamicFieldType && ((DynamicFieldType) fieldType).getDateParsing() && attValue instanceof String)
+            {
+                Date parsedDate = DateParser.parseDateString((String) attValue);
+                if (parsedDate != null)
+                    return new LegacyLongField(strAttName, DateUtils.date2Number(parsedDate), fieldType);
+                else
+                    return null;
+            }
+            else if (attValue instanceof String)
+            {
+
+                if (fieldType.numericType() == LegacyNumericType.INT)
+                    return new LegacyIntField(strAttName, Integer.valueOf((String) attValue), fieldType);
+                else if (fieldType.numericType() == LegacyNumericType.LONG)
+                    return new LegacyLongField(strAttName, Long.valueOf((String) attValue), fieldType);
+                else if (fieldType.numericType() == LegacyNumericType.FLOAT)
+                    return new LegacyFloatField(strAttName, Float.valueOf((String) attValue), fieldType);
+                else if (fieldType.numericType() == LegacyNumericType.DOUBLE)
+                    return new LegacyDoubleField(strAttName, Double.valueOf((String) attValue), fieldType);
+                else
+                    return new Field(strAttName, (String) attValue, fieldType);
+            }
+            else if (attValue instanceof Number)
+            {
+
+                if (fieldType.numericType() == LegacyNumericType.INT)
+                    return new LegacyIntField(strAttName, ((Number) attValue).intValue(), fieldType);
+                else if (fieldType.numericType() == LegacyNumericType.LONG)
+                    return new LegacyLongField(strAttName, ((Number) attValue).longValue(), fieldType);
+                else if (fieldType.numericType() == LegacyNumericType.FLOAT)
+                    return new LegacyFloatField(strAttName, ((Number) attValue).floatValue(), fieldType);
+                else if (fieldType.numericType() == LegacyNumericType.DOUBLE)
+                    return new LegacyDoubleField(strAttName, ((Number) attValue).doubleValue(), fieldType);
+                else
+                    return new Field(strAttName, String.valueOf(attValue), fieldType);
+            }
+            else
+                return null;
+        } catch (Exception e)
+        {
+            Logger.getLogger(FieldConfig.class.getName()).log(Level.SEVERE, "Error", e);
+            return null;
+        }
+    }
+    protected String analyzer;
+    protected boolean dateParsing = false;
+
+    public DynamicFieldType()
+    {
+        super();
+    }
+
+
+
+    public DynamicFieldType(FieldType ref)
+    {
+        super(ref);
+    }
+
+
+
+    public Analyzer createAnalyzer()
+    {
+        try
+        {
+
+            return LuceneAnalyzerFactory.createAnalyzer(getAnalyzer(), null);
+        } catch (Exception e)
+        {
+            Logger.getLogger(DynamicFieldType.class.getName()).log(Level.SEVERE, "Error", e);
+            return null;
+        }
+    }
+
+
+
+    /**
+     * Create Field instances, according to the configuration inside the given fieldType. Number fields will be generated, if a string value is given, it will be
+     * converted in the case the fieldType is a number type. Further, the method parses Strings for date if the fieldtype is of type {@link DynamicFieldType} and
+     * configured accordingly. You can also give number values for generating number or String fields fields (also according to the given fieldType).
+     *
+     * @param strAttName the attributes name
+     * @param attValue   the attributes value
+     *
+     * @return the field, with the configured fieldType. Null in the case the Field can not be generated out of the value.
+     */
+    public Field createField(String strAttName, Object attValue)
+    {
+        return createField(strAttName, attValue, this);
+    }
+
+
+
+    /**
+     * Same functionality as in upper class method, but returns this as sugar.
+     **/
+    public DynamicFieldType freezE()
+    {
+        super.freeze();
+
+        return this;
+    }
+
+
+
+    /**
+     * works only if this is not frozen yet
+     */
+    public void fromJson(String strJson)
+    {
+        try
+        {
+            DynamicFieldType ref = (DynamicFieldType) JsonReader.jsonToJava(strJson);
+
+            // this.setIndexed(ref.indexed());
+            this.setStored(ref.stored());
+            this.setTokenized(ref.tokenized());
+            this.setStoreTermVectors(ref.storeTermVectors());
+            this.setStoreTermVectorOffsets(ref.storeTermVectorOffsets());
+            this.setStoreTermVectorPositions(ref.storeTermVectorPositions());
+            this.setStoreTermVectorPayloads(ref.storeTermVectorPayloads());
+            this.setOmitNorms(ref.omitNorms());
+            this.setIndexOptions(ref.indexOptions());
+            this.setDocValuesType(ref.docValuesType());
+            this.setNumericType(ref.numericType());
+            this.setNumericPrecisionStep(ref.numericPrecisionStep());
+
+            this.setAnalyzer(ref.getAnalyzer());
+        } catch (IOException e)
+        {
+            throw new RuntimeException(e);
+        }
+    }
+
+
+
+    /**
+     * Get the analyzer for this class. This is additionaly to the upper Lucene Fieldtype, for convinience. Returns this as sugar.
+     */
+    public String getAnalyzer()
+    {
+        return this.analyzer;
+    }
+
+
+
+    public boolean getDateParsing()
+    {
+        return dateParsing;
+    }
+
+
+
+    /**
+     * Set the analyzer for this class. The given String is the full class name of the analyzer, that can be used with Class.forName(..). This is additionaly to the upper
+     * Lucene Fieldtype, for convinience. Returns this as sugar.
+     */
+    public DynamicFieldType setAnalyzer(String analyzer)
+    {
+        this.analyzer = analyzer;
+
+        return this;
+    }
+
+
+
+    /**
+     * Specifies whether the values of this field should be parsed as date values or not. If true, all input strings will be parsed and written as according number into
+     * the index
+     *
+     * @return this as sugar
+     */
+    public DynamicFieldType setDateParsing(boolean enableDateParsing)
+    {
+        this.dateParsing = enableDateParsing;
+
+        return this;
+    }
+
+
+
+    /**
+     * Same functionality as in upper class method, but returns this as sugar.
+     **/
+    public DynamicFieldType setDocValuesTypE(DocValuesType type)
+    {
+        super.setDocValuesType(type);
+
+        return this;
+    }
+
+
+
+    /**
+     * Same functionality as in upper class method, but returns this as sugar.
+     **/
+    public DynamicFieldType setIndexOptionS(IndexOptions value)
+    {
+        super.setIndexOptions(value);
+
+        return this;
+    }
+
+
+
+    // /**
+    //  * Same functionality as in upper class method, but returns this as sugar.
+    //  **/
+    // public DynamicFieldType setIndexeD(boolean value)
+    // {
+    //     super.setIndexed(value);
+    //
+    //     return this;
+    // }
+
+
+
+    /**
+     * Same functionality as in upper class method, but returns this as sugar.
+     **/
+    public DynamicFieldType setNumericPrecisionSteP(int precisionStep)
+    {
+        super.setNumericPrecisionStep(precisionStep);
+
+        return this;
+    }
+
+
+
+
+    /**
+     * Same functionality as in upper class method, but returns this as sugar.
+     **/
+    public DynamicFieldType setNumericTypE(LegacyNumericType type)
+    {
+        super.setNumericType(type);
+
+        return this;
+    }
+
+
+
+
+    /**
+     * Same functionality as in upper class method, but returns this as sugar.
+     **/
+    public DynamicFieldType setOmitNormS(boolean value)
+    {
+        super.setOmitNorms(value);
+
+        return this;
+    }
+
+
+
+
+    /**
+     * Same functionality as in upper class method, but returns this as sugar.
+     **/
+    public DynamicFieldType setStoreD(boolean value)
+    {
+        super.setStored(value);
+
+        return this;
+    }
+
+
+
+
+    /**
+     * Same functionality as in upper class method, but returns this as sugar.
+     **/
+    public DynamicFieldType setStoreTermVectorOffsetS(boolean value)
+    {
+        super.setStoreTermVectorOffsets(value);
+
+        return this;
+    }
+
+
+
+
+    /**
+     * Same functionality as in upper class method, but returns this as sugar.
+     **/
+    public DynamicFieldType setStoreTermVectorPayloadS(boolean value)
+    {
+        super.setStoreTermVectorPayloads(value);
+
+        return this;
+    }
+
+
+
+
+    /**
+     * Same functionality as in upper class method, but returns this as sugar.
+     **/
+    public DynamicFieldType setStoreTermVectorPositionS(boolean value)
+    {
+        super.setStoreTermVectorPositions(value);
+
+        return this;
+    }
+
+
+
+
+    /**
+     * Same functionality as in upper class method, but returns this as sugar.
+     **/
+    public DynamicFieldType setStoreTermVectorS(boolean value)
+    {
+        super.setStoreTermVectors(value);
+
+        return this;
+    }
+
+
+
+
+    /**
+     * Same functionality as in upper class method, but returns this as sugar.
+     **/
+    public DynamicFieldType setTokenizeD(boolean value)
+    {
+        super.setTokenized(value);
+
+        return this;
+    }
+
+
+
+
+    public String toJson(boolean bFormatIt)
+    {
+        try
+        {
+            String strJson = JsonWriter.objectToJson(this);
+
+            if (bFormatIt)
+                strJson = JsonWriter.formatJson(strJson);
+
+            // TODO abchecken, ob das noch nötig ist: https://github.com/jdereg/json-io/issues/19
+            return strJson.replaceAll(",\\s*\"ordinal\":\\d+", "");
+        } catch (IOException e)
+        {
+            throw new RuntimeException(e);
+        }
+    }
+}
diff --git a/src/main/java/de/dfki/km/leech/lucene/basic/FieldConfig.java b/src/main/java/de/dfki/km/leech/lucene/basic/FieldConfig.java
new file mode 100644
index 0000000..b352661
--- /dev/null
+++ b/src/main/java/de/dfki/km/leech/lucene/basic/FieldConfig.java
@@ -0,0 +1,135 @@
+package de.dfki.km.leech.lucene.basic;
+
+
+
+import com.cedarsoftware.util.io.JsonReader;
+import com.cedarsoftware.util.io.JsonWriter;
+// import de.dfki.inquisitor.lucene.LuceneAnalyzerFactory;
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.miscellaneous.PerFieldAnalyzerWrapper;
+import org.apache.lucene.document.Field;
+
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
+
+
+public class FieldConfig
+{
+
+
+
+    public DynamicFieldType defaultFieldType = new DynamicFieldType();
+
+
+
+    public HashMap<String, DynamicFieldType> fieldName2FieldType = new HashMap<String, DynamicFieldType>();
+
+
+
+    /**
+     * Creates a new Analyzer out of this {@link FieldConfig}, which is a {@link PerFieldAnalyzerWrapper} for all configured fields
+     * 
+     * @return the according analyzer
+     * 
+     * @throws Exception
+     */
+    public Analyzer createAnalyzer() throws Exception
+    {
+        return LuceneAnalyzerFactory.createAnalyzer(this);
+    }
+
+
+
+
+
+
+
+    /**
+     * Create Field instances, according to the fieldType mappings inside this {@link FieldConfig}. Number fields will be generated, if a string value is given, it will
+     * be converted in the case the fieldType is a number type. Further, the method parses Strings for date if the fieldtype is of type {@link DynamicFieldType} and
+     * configured accordingly. You can also give number values for generating number or String fields fields (also according to the given fieldType).
+     * 
+     * @param strAttName the attributes name
+     * @param attValue the attributes value
+     *
+     * @return the field, with the configured fieldType. Null in the case the Field can not be generated out of the value.
+     */
+    public Field createField(String strAttName, Object attValue)
+    {
+        DynamicFieldType fieldType = getFieldType(strAttName);
+
+        return fieldType.createField(strAttName, attValue);
+    }
+
+
+
+
+
+    public void fromJson(String strJson)
+    {
+
+        try
+        {
+            FieldConfig fieldConfig = (FieldConfig) JsonReader.jsonToJava(strJson);
+
+            this.defaultFieldType = fieldConfig.defaultFieldType;
+
+            this.fieldName2FieldType = fieldConfig.fieldName2FieldType;
+
+
+        }
+        catch (IOException e)
+        {
+            Logger.getLogger(FieldConfig.class.getName()).log(Level.SEVERE, "Error", e);
+        }
+
+    }
+
+
+
+    /**
+     * Gets the field type for a specific field, as configured. In the case there is no explicit mapping for the field, the default type will be returned.
+     * 
+     * @param strFieldName
+     * @return
+     */
+    public DynamicFieldType getFieldType(String strFieldName)
+    {
+        DynamicFieldType fieldType = fieldName2FieldType.get(strFieldName);
+
+        if(fieldType == null) fieldType = defaultFieldType;
+
+        return fieldType;
+    }
+
+
+
+    public String toJson(boolean bFormatIt)
+    {
+        try
+        {
+
+
+            HashMap<String, Object> hsOptions = new HashMap<>();
+            hsOptions.put(JsonWriter.ENUM_PUBLIC_ONLY, true);
+
+            String strJson = JsonWriter.objectToJson(this, hsOptions);
+
+
+            if(bFormatIt) strJson = JsonWriter.formatJson(strJson);
+
+            // return strJson.replaceAll(",\\s*\"ordinal\":\\d+", "");
+            return strJson;
+
+        }
+        catch (IOException e)
+        {
+            throw new RuntimeException(e);
+        }
+    }
+
+
+}
diff --git a/src/main/java/de/dfki/km/leech/lucene/basic/IndexAccessor.java b/src/main/java/de/dfki/km/leech/lucene/basic/IndexAccessor.java
new file mode 100644
index 0000000..08d4a8e
--- /dev/null
+++ b/src/main/java/de/dfki/km/leech/lucene/basic/IndexAccessor.java
@@ -0,0 +1,1634 @@
+package de.dfki.km.leech.lucene.basic;
+
+
+
+import de.dfki.inquisitor.exceptions.ExceptionUtils;
+import de.dfki.inquisitor.logging.LoggingUtils;
+import de.dfki.inquisitor.text.StringUtils;
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.index.*;
+import org.apache.lucene.index.IndexWriterConfig.OpenMode;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.FSDirectory;
+import org.apache.lucene.store.LockObtainFailedException;
+import org.apache.lucene.store.NativeFSLockFactory;
+import org.apache.lucene.util.Version;
+
+import java.io.File;
+import java.io.IOException;
+import java.net.URI;
+import java.net.URISyntaxException;
+import java.util.*;
+import java.util.Map.Entry;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
+
+
+@SuppressWarnings({"JavaDoc", "PointlessBooleanExpression"})
+public class IndexAccessor
+{
+
+    public static class BetterMultiReader extends MultiReader
+    {
+
+
+        public BetterMultiReader(IndexReader... subReaders) throws IOException
+        {
+            super(subReaders);
+        }
+
+
+
+        public BetterMultiReader(IndexReader[] subReaders, boolean closeSubReaders) throws IOException
+        {
+            super(subReaders, closeSubReaders);
+        }
+
+
+
+        public List<? extends IndexReader> getSubReaders()
+        {
+            return getSequentialSubReaders();
+        }
+    }
+
+
+
+
+    /**
+     * Status constants for removeReaderFromCacheWhenPossible
+     * 
+     * @author Christian Reuschling, Dipl.Ing.(BA)
+     */
+    public static enum ReaderStatus {
+        READER_CLOSED, READER_IN_QUEUE, READER_NOT_IN_CACHE;
+    }
+
+
+
+
+    protected static class ReaderRefreshRunnable implements Runnable
+    {
+
+        @Override
+        public void run()
+        {
+
+            try
+            {
+                while (true)
+                {
+
+                    // wir warten das eingestellte Intervall
+
+                    // ich hatte mal die Situation, daß der Thread nur im korrekten Intervall ausgeführt wird, wenn hier vor dem Sleep noch eine
+                    // Ausgabe steht - da das eigentlich nicht sein kann, und das nur zum debuggen relevant war, mach ich das mal wieder weg. Er kam
+                    // dann, aber halt nicht so oft. Aber schon innerhalb 2min (und nicht 10ms, wie ich es da wollte)
+                    // LinkedList<String> dummy = new LinkedList<String>();
+                    // System.err.print(".");
+                    Thread.sleep(m_lReaderRefreshIntervall);
+
+                    Logger.getLogger(this.getClass().getName()).fine("will refresh all index readers");
+
+                    IndexAccessor.refreshAllIndexReaders();
+                }
+
+            }
+            catch (Exception e)
+            {
+                throw new RuntimeException(e);
+            }
+        }
+    }
+
+
+    private static String m_strIdAttributeName;
+
+    protected static Logger logger = Logger.getLogger(IndexAccessor.class.getName());
+
+    protected static Analyzer m_analyzer4writer;
+
+    // protected static boolean m_bNativeFileLock = true;
+
+    protected static HashMap<String, IndexReader> m_hsIndexPathOrId2CurrentIndexReader = new HashMap<String, IndexReader>();
+
+    // protected static HashMap<String, RemoteIndexSearcher> m_hsIndexPathOrURL2CurrentRemoteSearcher = new HashMap<String, RemoteIndexSearcher>();
+
+    // wenn man mehrere Instanzen von luceneIndexSet hat, darf trotzdem nur ein Writer pro Index offen sein
+    protected static HashMap<String, IndexWriter> m_hsIndexPathOrURL2Writer = new HashMap<String, IndexWriter>();
+
+    protected static HashMap<IndexReader, String> m_hsIndexReader2IndexPath = new HashMap<IndexReader, String>();
+
+    protected static HashMap<IndexReader, Integer> m_hsIndexReader2ReaderRefCount = new HashMap<IndexReader, Integer>();
+
+
+    protected static HashMap<IndexWriter, Integer> m_hsIndexWriter2WriterRefCount = new HashMap<IndexWriter, Integer>();
+
+
+
+
+
+
+    protected static HashSet<IndexReader> m_hsReader2Remove = new HashSet<IndexReader>();
+
+
+
+    protected static HashSet<IndexReader> m_hsStaticIndexReaderSet = new HashSet<IndexReader>();
+
+
+
+
+
+    protected static long m_lReaderRefreshIntervall = 1000 * 60 * 2;
+
+
+
+    static
+    {
+
+        try
+        {
+
+
+
+            // wir starten den Thread, der die reader objekte refreshed
+
+            Thread readerRefreshThread = new Thread(new ReaderRefreshRunnable(), "IndexAccessor reader refresh thread");
+            readerRefreshThread.setDaemon(true);
+            // welche Priority? ich hatte mal das Gefühl, daß der recht selten dran kommt
+            // readerRefreshThread.setPriority(Thread.MIN_PRIORITY);
+            // readerRefreshThread.setPriority(Thread.MAX_PRIORITY);
+            readerRefreshThread.start();
+
+
+
+            // ein shutdown hook um sicherzustellen, daß auch alle Objekte geschlossen werden - wir wollen ja keine anderen Prozesse blockieren
+
+            Runtime.getRuntime().addShutdownHook(new Thread()
+            {
+                @Override
+                public void run()
+                {
+                    try
+                    {
+                        IndexAccessor.forceCloseAll();
+                    }
+                    catch (Exception ex)
+                    {
+                        throw new RuntimeException(ex);
+                    }
+                }
+            });
+
+
+        }
+        catch (Exception e)
+        {
+            Logger.getLogger(IndexAccessor.class.getName()).log(Level.SEVERE, "Error", e);
+        }
+
+    }
+
+
+
+
+    /**
+     * Adds a reader object to the cache. This reader will be static, which means that it won't be refreshed in any case, independent of which method you invoke on
+     * {@link IndexAccessor}, nor in the refresh-Thread. You can get this reader with {@link #getLuceneIndexReader(String, boolean)}, with strIndexID as parameter.You also can remove
+     * the reader from cache with {@link #removeReaderFromCache(String)}, {@link #removeReaderFromCacheWhenPossible(String)} and {@link #removeUnusedReadersFromCache()}
+     * 
+     * 
+     * @param strIndexID a unique ID for the reader
+     * @param staticReader the reader Object
+     */
+    synchronized static public void addStaticReader(String strIndexID, IndexReader staticReader)
+    {
+        // wir merken uns den Reader, damit wir ihn nicht später aus Versehen ersetzen/refreshen
+        m_hsStaticIndexReaderSet.add(staticReader);
+
+        // und mit seiner ID kommt er auch noch in den Cache
+        m_hsIndexPathOrId2CurrentIndexReader.put(strIndexID, staticReader);
+    }
+
+
+
+    /**
+     * Creates a new, empty Lucene index under the given path
+     * 
+     * @param strIndexPathOrURL the path for the new Lucene index. In the case the path does not exists, it will be created
+     * @param bForceAndOverwrite if this is false, the index will be only created in the case there is no existing index under strIndexPathOrURL
+     * 
+     * @return true in the case the index was newly created, false otherwise. In the case strIndexPathOrURL exists and is a file, it will not created in any case
+     * 
+     * @throws IOException
+     * @throws CorruptIndexException
+     */
+    synchronized static public boolean createNewIndex(String strIndexPathOrURL, boolean bForceAndOverwrite) throws CorruptIndexException, IOException
+    {
+        boolean bCreateNew = false;
+
+        File fIndexPath = new File(strIndexPathOrURL);
+
+        if(!fIndexPath.exists())
+        {
+            fIndexPath.mkdirs();
+
+            bCreateNew = true;
+        }
+
+        FSDirectory dir = createFSDirectory(fIndexPath);
+
+        if(bCreateNew == false && (!DirectoryReader.indexExists(dir) || bForceAndOverwrite))
+        {
+            bCreateNew = true;
+        }
+
+        if(!bCreateNew) return false;
+
+
+
+        logger.fine("will open indexWriter for '" + strIndexPathOrURL + "'");
+
+        // wenn fäschlicherweise z.B. ein video-attachment als fulltext verarbeitet wird, haben wir riesige Docs, viel Speicher, lange Zeiten...aus
+        // diesem Grund setzte ich die MaxFieldLength mal wieder auf limited
+        @SuppressWarnings("deprecation")
+        IndexWriter ourIndexWriter = new IndexWriter(dir, new IndexWriterConfig(getDefaultAnalyzer()).setOpenMode(OpenMode.CREATE));
+
+        ourIndexWriter.close();
+
+        return true;
+    }
+
+
+
+
+
+    // /**
+    //  * Enable or disable native file locking. We recommend the native lock, which is also the default.
+    //  *
+    //  * @param bNativeFileLock true in the case you want to use native file OS locks. These could be problematic on NFS drives (see {@link NativeFSLockFactory}). I
+    //  *            recommend to use the native File lock (stress tests on our NFS system have shown that this is really an atomar, working lock - the other lock leads to
+    //  *            exceptions (at least in ealier versions of Lucene)
+    //  */
+    // static public void enableNativeFileLock(boolean bNativeFileLock)
+    // {
+    //     m_bNativeFileLock = bNativeFileLock;
+    // }
+
+
+
+    /**
+     * Gets the default analyzer that will be used for writer creation
+     * 
+     * @return the default analyzer that will be used for writer creation
+     */
+    static public Analyzer getDefaultAnalyzer()
+    {
+        return m_analyzer4writer;
+    }
+
+
+
+    /**
+     * Gets the default attribute name that will be used for RemotIndexReader creation
+     * 
+     * @return the default attribute name that will be used for RemotIndexReader creation
+     */
+    static public String getDefaultIndexIdAttribute()
+    {
+        return IndexAccessor.m_strIdAttributeName;
+    }
+
+
+    //
+    // /**
+    //  * Gets the reader for a given index path. The reader will be refreshed if there are any new changes in the index. In the case you pass an static reader ID to this
+    //  * method, it will be identically to {@link #getIndexReader(String)}. You dont have to release a RemoteIndexReader.
+    //  *
+    //  * @param strIndexPathOrURL the path to the index where you want to read from
+    //  *
+    //  * @return the reader object that reflects the current state of the index
+    //  *
+    //  * @throws IOException
+    //  * @throws CorruptIndexException
+    //  * @throws URISyntaxException
+    //  */
+    // public synchronized static RemoteIndexReader getFreshIndexReader(String strIndexPathOrURL) throws CorruptIndexException, IOException, URISyntaxException
+    // {
+    //     refreshIndexReader(strIndexPathOrURL, false);
+    //
+    //     return getIndexReader(strIndexPathOrURL);
+    // }
+
+    //
+    //
+    // /**
+    //  * Gets the reader for the given index path. The reader will be created when necessary. In the case the specified directory does not exists or is empty, an empty
+    //  * index will NOT be created.<br>
+    //  * Remark:<br>
+    //  * Note that refreshing a reader is a relatively expensive operation. The reader Object returned from this method must not reflect the current state of the index. To
+    //  * get a guaranteed up to date, refreshed reader object, you have the following possibilities:<br>
+    //  * <li>invoke one of the methods {@link #refreshIndexReader(String)} or {@link #refreshAllIndexReaders()}</li> <li>use the method {@link #getFreshIndexReader(String)}
+    //  * </li> <br>
+    //  * You can also set a time intervall where all reader Objects will be refreshed for {@link #getLuceneIndexReader(String, boolean)} periodically with the method
+    //  * {@link #setReaderRefreshIntervall(long)} <br>
+    //  * You dont have to release a RemoteIndexReader.
+    //  *
+    //  * @param strIndexPathOrURL the path to the index you wants to read from
+    //  *
+    //  * @return the index reader object
+    //  *
+    //  * @throws CorruptIndexException
+    //  * @throws IOException
+    //  * @throws URISyntaxException
+    //  */
+    // public synchronized static RemoteIndexReader getIndexReader(String strIndexPathOrURL) throws CorruptIndexException, IOException, URISyntaxException
+    // {
+    //     return getIndexReader(strIndexPathOrURL, false);
+    // }
+
+
+    //
+    // /**
+    //  * Gets the reader for the given index path. The reader will be created when necessary. In the case the specified directory does not exists or is empty, an empty
+    //  * index will be created, if you want.<br>
+    //  * Remark:<br>
+    //  * Note that refreshing a reader is a relatively expensive operation. The reader Object returned from this method must not reflect the current state of the index. To
+    //  * get a guaranteed up to date, refreshed reader object, you have the following possibilities:<br>
+    //  * <li>invoke one of the methods {@link #refreshIndexReader(String)} or {@link #refreshAllIndexReaders()}</li> <li>use the method {@link #getFreshIndexReader(String)}
+    //  * </li> <br>
+    //  * You can also set a time intervall where all reader Objects will be refreshed for {@link #getIndexReader(String, boolean)} periodically with the method
+    //  * {@link #setReaderRefreshIntervall(long)} <br>
+    //  * You dont have to release a RemoteIndexReader.
+    //  *
+    //  * @param strIndexPathOrURL the path to the index you wants to read from. This can be a simple path 'e.g. /home/hitzliputzli' or with URI Syntax
+    //  *            ('file:\\/home/hitzliputzli'). In the case the specified protocoll is not of type 'file', and delight is in the classpath, the method tries to create a
+    //  *            delight client object.
+    //  * @param bCreateIndexIfNotExist if true, the index will be created in the case he did not exist
+    //  *
+    //  * @return the index reader object
+    //  *
+    //  * @throws CorruptIndexException
+    //  * @throws IOException
+    //  * @throws URISyntaxException
+    //  */
+    // synchronized static public RemoteIndexReader getIndexReader(String strIndexPathOrURL, boolean bCreateIndexIfNotExist) throws CorruptIndexException, IOException,
+    //         URISyntaxException
+    // {
+    //
+    //     RemoteIndexReader remoteIndexReader;
+    //
+    //
+    //     if(isLocalPath(strIndexPathOrURL))
+    //     {
+    //         // lokal - wir rufen einfach die entsprechene LuceneReader-Methode einmal auf, um das Objekt intern zu erstellen
+    //         IndexReader luceneIndexReader = getLuceneIndexReader(strIndexPathOrURL, bCreateIndexIfNotExist);
+    //         releaseLuceneIndexReader(luceneIndexReader);
+    //
+    //         // das zugrundeliegende Objekt wurde initialisiert, nun einfach den String/Pfad basierten 'wrapper'
+    //         remoteIndexReader = new RemoteIndexReaderImpl(strIndexPathOrURL, m_strIdAttributeName);
+    //     }
+    //     else
+    //     {
+    //         // wir versuchen, eine Verbindung zu einem RemoteReader aufzubauen
+    //         strIndexPathOrURL = strIndexPathOrURL.replaceAll("/$", "");
+    //         String strHandlerName = strIndexPathOrURL.substring(strIndexPathOrURL.lastIndexOf('/') + 1) + "_reader";
+    //         String strServiceUrl = strIndexPathOrURL.replaceAll("/[^/]+$", "");
+    //
+    //
+    //         remoteIndexReader = delight.connectingTo(strServiceUrl).usingApi(strHandlerName, RemoteIndexReader.class);
+    //     }
+    //
+    //
+    //     return remoteIndexReader;
+    // }
+
+
+
+
+
+    /**
+     * Gets all index paths that are currently inside the reader cache
+     * 
+     * @return all index paths that are currently inside the reader cache
+     */
+    public static Set<String> getIndexReaderPathsAndIDs()
+    {
+        return m_hsIndexPathOrId2CurrentIndexReader.keySet();
+    }
+
+
+    //
+    // synchronized static public RemoteIndexSearcher getIndexSearcher(String strIndexPathOrURL) throws CorruptIndexException, IOException, URISyntaxException
+    // {
+    //     RemoteIndexSearcher searcher4Index;
+    //
+    //
+    //     if(isLocalPath(strIndexPathOrURL))
+    //     {
+    //
+    //         // lokal - wir rufen einfach die entsprechene LuceneReader-Methode einmal auf, um das Objekt intern zu erstellen
+    //         IndexReader luceneIndexReader = getLuceneIndexReader(strIndexPathOrURL, false);
+    //         releaseLuceneIndexReader(luceneIndexReader);
+    //
+    //         // das zugrundeliegende Objekt wurde initialisiert, nun einfach den String/Pfad basierten 'wrapper'
+    //         searcher4Index = new RemoteIndexSearcherImpl(strIndexPathOrURL, m_strIdAttributeName);
+    //     }
+    //     else
+    //     {
+    //
+    //         // es gibt zumindest keinen lokalen Index - dann könnte es noch eine remotegeschichte sein
+    //
+    //         searcher4Index = m_hsIndexPathOrURL2CurrentRemoteSearcher.get(strIndexPathOrURL);
+    //         if(searcher4Index == null)
+    //         {
+    //
+    //             logger.fine("will create new remote searcher for index '" + strIndexPathOrURL + "'");
+    //
+    //             strIndexPathOrURL = strIndexPathOrURL.replaceAll("/$", "");
+    //             String strHandlerName = strIndexPathOrURL.substring(strIndexPathOrURL.lastIndexOf('/') + 1) + "_searcher";
+    //             String strServiceUrl = strIndexPathOrURL.replaceAll("/[^/]+$", "");
+    //
+    //
+    //             searcher4Index = delight.connectingTo(strServiceUrl).usingApi(strHandlerName, RemoteIndexSearcher.class);
+    //
+    //
+    //             m_hsIndexPathOrURL2CurrentRemoteSearcher.put(strIndexPathOrURL, searcher4Index);
+    //         }
+    //     }
+    //
+    //
+    //     return searcher4Index;
+    // }
+    //
+
+
+    /**
+     * Gets a writer instance for an index. DON'T !!!!! close your writer afterwards - use the >>>>> releaseIndexWriter(..) <<<<< method instead, and make SURE not to
+     * forget this. The close will be done automatically, and you would permit any other threads to work with the index by doing this. The default analyzer will be used <br>
+     * In the case the specified directory does not exists or is empty, an empty index will be created.<br>
+     * Remark:<br>
+     * You can change the timeout Lucene waits for getting write access by setting IndexWriter.WRITE_LOCK_TIMEOUT<br>
+     * It is in almost any case I can imagine no good idea to have an IndexWriter member variable that refers on the reference from this method. This will block all other
+     * processes that wants to get access to the index. You can make this in a short-living Object, but know exactly what yo do...
+     * 
+     * @param strIndexPathOrURL the path to the index
+     * 
+     * @return a writer instance for the given index. Autocommit will be FALSE.
+     * 
+     * @throws CorruptIndexException
+     * @throws LockObtainFailedException
+     * @throws IOException
+     */
+    synchronized static public IndexWriter getIndexWriter(String strIndexPathOrURL) throws CorruptIndexException, LockObtainFailedException, IOException
+    {
+        if(getDefaultAnalyzer() == null) logger.severe("default analyzer is not set - this will cause a Nullpointer Exception. Set it before creating an IndexWriter.");
+        return getIndexWriter(strIndexPathOrURL, getDefaultAnalyzer());
+    }
+
+
+
+    /**
+     * Gets a writer instance for an index. DON'T !!!!! close your writer afterwards - use the >>>>> releaseWriter4DefaultIndex() <<<<< method instead, and make SHURE not
+     * to forget this. The close will be done automatically, and you would permit any other threads to work with the index by doing this<br>
+     * In the case the specified directory does not exists or is empty, an empty index will be created.<br>
+     * Remark:<br>
+     * You can change the timeout Lucene waits for getting write access by setting IndexWriter.WRITE_LOCK_TIMEOUT<br>
+     * It is in almost any case I can imagine no good idea to have an IndexWriter member variable that refers on the reference from this method. This will block all other
+     * processes that wants to get access to the index. You can make this in a short-living Object, but know exactly what yo do...
+     * 
+     * @param strIndexPathOrURL the path to the index
+     * @param analyzer the Lucene analyzer that should be used for this writer creation
+     * 
+     * @return a writer instance for the given index. Autocommit will be FALSE.
+     * 
+     * @throws CorruptIndexException
+     * @throws LockObtainFailedException
+     * @throws IOException
+     */
+    @SuppressWarnings("deprecation")
+    synchronized static public IndexWriter getIndexWriter(String strIndexPathOrURL, Analyzer analyzer) throws CorruptIndexException, LockObtainFailedException,
+            IOException
+    {
+
+        // Haben wir schon einen geöffneten Writer?
+        IndexWriter ourIndexWriter = m_hsIndexPathOrURL2Writer.get(strIndexPathOrURL);
+
+
+        // wenn nicht, machen wir doch einen neuen
+        if(ourIndexWriter == null)
+        {
+            // wenn es ein leeres directory ist oder es nicht existiert, dann machen wir auch gleich einen neuen Index
+            createNewIndex(strIndexPathOrURL, false);
+
+            FSDirectory dir = createFSDirectory(new File(strIndexPathOrURL));
+
+            logger.fine("will open indexWriter for '" + strIndexPathOrURL + "'");
+
+            ourIndexWriter = new IndexWriter(dir, new IndexWriterConfig( analyzer).setOpenMode(OpenMode.APPEND));
+
+            m_hsIndexPathOrURL2Writer.put(strIndexPathOrURL, ourIndexWriter);
+        }
+
+        // wir verwalten Tokens - diese müssen wieder mit releaseWriter freigegeben werden
+        Integer iOld = m_hsIndexWriter2WriterRefCount.get(ourIndexWriter);
+        if(iOld == null)
+            m_hsIndexWriter2WriterRefCount.put(ourIndexWriter, 1);
+        else
+            m_hsIndexWriter2WriterRefCount.put(ourIndexWriter, ++iOld);
+
+        if(logger.isLoggable(Level.FINEST)) logger.finest("get indexWriter for '" + strIndexPathOrURL + "'\n" + LoggingUtils.getCurrentStackTrace());
+
+        return ourIndexWriter;
+    }
+
+
+
+    /**
+     * Gets all index paths that are currently inside the writer cache
+     * 
+     * @return all index paths that are currently inside the writer cache
+     */
+    public static Set<String> getIndexWriterPaths()
+    {
+        return m_hsIndexPathOrURL2Writer.keySet();
+    }
+
+
+
+    /**
+     * This is an expert method - the use of RemoteIndexReader is recommended. Gets the reader for the given index path. The reader will be created when necessary. In the
+     * case the specified directory does not exists or is empty, an empty index will be created, if you want.<br>
+     * Remark:<br>
+     * Note that refreshing a reader is a relatively expensive operation. The reader Object returned from this method must not reflect the current state of the index. To
+     * get a guaranteed up to date, refreshed reader object. You have the following possibilities:<br>
+     * <li>invoke one of the methods {@link #refreshIndexReader(String)} or {@link #refreshAllIndexReaders()}</li> <li>
+     * </li> <br>
+     * You can also set a time intervall where all reader Objects will be refreshed for {@link #getLuceneIndexReader(String, boolean)} periodically with the method
+     * {@link #setReaderRefreshIntervall(long)} <br>
+     * Don't forget to release your reader Object with {@link #releaseLuceneIndexReader(IndexReader)}
+     * 
+     * @param strIndexPathOrURL the path to the index you wants to read from. This can be a simple path 'e.g. /home/hitzliputzli' or with URI Syntax
+     *            ('file:\\/home/hitzliputzli').
+     * @param bCreateIndexIfNotExist if true, the index will be created in the case he did not exist
+     * 
+     * @return the index reader object
+     * 
+     * @throws CorruptIndexException
+     * @throws IOException
+     * @throws URISyntaxException
+     */
+    synchronized static public IndexReader getLuceneIndexReader(String strIndexPathOrURL, boolean bCreateIndexIfNotExist) throws CorruptIndexException, IOException,
+            URISyntaxException
+    {
+        IndexReader reader = m_hsIndexPathOrId2CurrentIndexReader.get(strIndexPathOrURL);
+
+        // wenn wir noch keinen haben, dann erstellen wir uns einen
+        if(reader == null)
+        {
+
+            logger.fine("will create new reader for index '" + strIndexPathOrURL + "'");
+
+
+            File fIndex = null;
+            // die super-URI-Implementierung nimmt echt alles an, was auch keine Uri ist, ohne eine syntaxException - insbesondere einen Pfad :(
+
+            if(strIndexPathOrURL.startsWith("file:"))
+                fIndex = new File(new URI(strIndexPathOrURL));
+            else
+                fIndex = new File(strIndexPathOrURL);
+
+
+
+            // wenn es ein leeres directory ist oder es nicht existiert, dann machen wir auch gleich einen neuen Index
+            if(bCreateIndexIfNotExist) createNewIndex(strIndexPathOrURL, false);
+
+            Directory dir = createFSDirectory(fIndex);
+
+
+            reader = DirectoryReader.open(dir);
+
+
+            // hier steht immer der neueste drin - die alten werden in der release-methode wieder zu gemacht
+            m_hsIndexPathOrId2CurrentIndexReader.put(strIndexPathOrURL, reader);
+        }
+
+
+        // das Token wird für diesen Index inkrementiert
+        Integer iOld = m_hsIndexReader2ReaderRefCount.get(reader);
+        if(iOld == null)
+        {
+            m_hsIndexReader2ReaderRefCount.put(reader, 1);
+            m_hsIndexReader2IndexPath.put(reader, strIndexPathOrURL);
+        }
+        else
+            m_hsIndexReader2ReaderRefCount.put(reader, ++iOld);
+
+
+        if(logger.isLoggable(Level.FINEST)) logger.finest("get reader for index '" + strIndexPathOrURL + "'\n" + LoggingUtils.getCurrentStackTrace());
+
+        return reader;
+    }
+
+
+
+    synchronized static public IndexSearcher getLuceneIndexSearcher(String strIndexPathOrURL) throws CorruptIndexException, IOException, URISyntaxException
+    {
+        logger.fine("will create new searcher for index '" + strIndexPathOrURL + "'");
+
+        IndexSearcher searcher4Index = new IndexSearcher(getLuceneIndexReader(strIndexPathOrURL, false));
+
+
+
+        return searcher4Index;
+    }
+
+
+
+    synchronized static public IndexSearcher getLuceneMultiSearcher(LinkedHashSet<String> sIndexPathsOrURLs) throws CorruptIndexException, IOException,
+            URISyntaxException
+    {
+        logger.fine("will create new searcher for index '" + sIndexPathsOrURLs + "'");
+
+        IndexSearcher searcher4Index = new IndexSearcher(getLuceneMultiReader(sIndexPathsOrURLs, false));
+
+
+
+        return searcher4Index;
+    }
+
+
+
+    /**
+     * Gets the lucene MultiReader for all given LOCAL reader paths (paths that point to the file system, not to a remote index). The readers will be created when
+     * necessary. In the case a specified directory does not exist or is empty, an empty index will be created, if you want.<br>
+     * Remark:<br>
+     * Note that refreshing a reader is a relatively expensive operation. The reader Object returned from this method must not reflect the current state of the index. To
+     * get a guaranteed up to date, refreshed reader object, you have the following possibilities:<br>
+     * <li>invoke one of the methods {@link #refreshIndexReader(String)} or {@link #refreshAllIndexReaders()}</li> <li>
+     * </li> <br>
+     * You can also set a time intervall where all reader Objects will be refreshed for {@link #getLuceneIndexReader(String, boolean)} periodically with the method
+     * {@link #setReaderRefreshIntervall(long)} <br>
+     * You dont have to release a RemoteIndexReader.
+     * 
+     * @param sIndexPathsOrURLs the paths to the indices you want to read from. This can be a simple path 'e.g. /home/hitzliputzli' or with URI Syntax
+     *            ('file:\\/home/hitzliputzli'). In the case the specified protocoll is not of type 'file',
+     * @param bCreateIndexIfNotExist if true, the index will be created in the case he did not exist
+     * 
+     * @return the index reader object
+     * 
+     * @throws CorruptIndexException
+     * @throws IOException
+     * @throws URISyntaxException
+     */
+    synchronized static public MultiReader getLuceneMultiReader(LinkedHashSet<String> sIndexPathsOrURLs, boolean bCreateIndexIfNotExist) throws CorruptIndexException,
+            IOException, URISyntaxException
+    {
+
+
+        LinkedList<IndexReader> lReaders = new LinkedList<>();
+        for (String strIndexPathOrUrl : sIndexPathsOrURLs)
+        {
+
+            if(isLocalPath(strIndexPathOrUrl))
+            {
+                // lokal - wir rufen einfach die entsprechene LuceneReader-Methode einmal auf, um das Objekt intern zu erstellen
+                IndexReader luceneIndexReader = getLuceneIndexReader(strIndexPathOrUrl, bCreateIndexIfNotExist);
+
+
+                lReaders.add(luceneIndexReader);
+            }
+            else
+            {
+                // ignore
+            }
+
+        }
+
+
+        BetterMultiReader multiReader = new BetterMultiReader(lReaders.toArray(new IndexReader[0]), false);
+
+
+        return multiReader;
+    }
+
+
+    //
+    // /**
+    //  * Gets a MultiReader that wrapps all index readers for the given Set of index paths. You dont have to release a RemoteIndexReader.
+    //  *
+    //  * @param indexPathsOrIDs2CreateIfNotExist the set of indices that should be wrapped by the MultiReader. The last reader in the list will be stable with respect to
+    //  *            write modifications during the livetime of this MultiReader, because the documents index number will stay stable in this index. For each index, you can
+    //  *            specify whether she should be created or not in the case it not exists.
+    //  *
+    //  * @return a MultiReader the wrapps all index readers for the given Set of index paths. You dont have to release a RemoteIndexReader.
+    //  *
+    //  * @throws CorruptIndexException
+    //  * @throws IOException
+    //  */
+    // public synchronized static RemoteIndexReader getMultiIndexReader(LinkedHashMap<String, Boolean> indexPathsOrIDs2CreateIfNotExist) throws CorruptIndexException,
+    //         IOException
+    // {
+    //
+    //     // wir trennen die lokalen von den remote-URLs. Mit den lokalen machen wir EINEN LuceneMultiReader, und dann packen wir die remotes dazu
+    //
+    //     // Wir trennen in remote-und lokale Indizes
+    //     LinkedList<String> lLocalIndices = new LinkedList<>();
+    //     LinkedList<String> lRemoteIndices = new LinkedList<>();
+    //
+    //     for (Entry<String, Boolean> strIndexPathOrURL2CreateIfNotExist : indexPathsOrIDs2CreateIfNotExist.entrySet())
+    //     {
+    //
+    //         String strIndexPathOrURL = strIndexPathOrURL2CreateIfNotExist.getKey();
+    //         Boolean bCreateIfNotExist = strIndexPathOrURL2CreateIfNotExist.getValue();
+    //
+    //         if(isLocalPath(strIndexPathOrURL))
+    //         {
+    //             lLocalIndices.add(strIndexPathOrURL);
+    //             if(bCreateIfNotExist) createNewIndex(strIndexPathOrURL, false);
+    //         }
+    //         else
+    //         {
+    //             lRemoteIndices.add(strIndexPathOrURL);
+    //         }
+    //     }
+    //
+    //
+    //     LinkedList<de.dfki.inquisition.lucene.RemoteIndexReader> llReaderz = new LinkedList<de.dfki.inquisition.lucene.RemoteIndexReader>();
+    //
+    //     // der lokale MultiReader
+    //     de.dfki.inquisition.lucene.RemoteIndexReader localReader = new RemoteIndexReaderImpl(lLocalIndices.toArray(new String[0]));
+    //     localReader.setIdAttributename(m_strIdAttributeName);
+    //     llReaderz.add(localReader);
+    //
+    //
+    //     // die remote reader
+    //     for (String strRemoteURL : lRemoteIndices)
+    //     {
+    //
+    //         try
+    //         {
+    //             // index creation is of no sense when we have a remote reader anyway
+    //             de.dfki.inquisition.lucene.RemoteIndexReader reader = getIndexReader(strRemoteURL, false);
+    //             // check if this reader is available
+    //             reader.numDocs();
+    //
+    //             llReaderz.add(reader);
+    //         }
+    //         catch (Exception e)
+    //         {
+    //             logger.log(Level.SEVERE, "Exception while creating a remote index reader. The index '" + strRemoteURL + "' will be ignored. ('" + e.getMessage() + "')");
+    //             logger.log(Level.FINE, "Exception for index '" + strRemoteURL + "': ", e);
+    //         }
+    //     }
+    //
+    //
+    //     // und daraus erzeugen wir uns jetzt nen MultiReader
+    //     if(llReaderz.size() == 1) return llReaderz.get(0);
+    //
+    //     RemoteMultiIndexReader multiReader = new RemoteMultiIndexReader(llReaderz.toArray(new de.dfki.inquisition.lucene.RemoteIndexReader[0]));
+    //
+    //
+    //     return multiReader;
+    // }
+
+
+
+    // /**
+    //  * Gets a MultiReader that wrapps all index readers for the given Set of index paths. You dont have to release a RemoteIndexReader.
+    //  *
+    //  * @param indexPathsOrIDs the set of indices that should be wrapped by the MultiReader. The last reader in the list will be stable with respect to write modifications
+    //  *            during the livetime of this MultiReader, because the documents index number will stay stable in this index. For each index, the index will NOT created
+    //  *            in the case it does not exists
+    //  *
+    //  * @return a MultiReader the wrapps all index readers for the given Set of index paths. You dont have to release a RemoteIndexReader.
+    //  *
+    //  * @throws CorruptIndexException
+    //  * @throws IOException
+    //  */
+    // public synchronized static RemoteIndexReader getMultiIndexReader(LinkedHashSet<String> indexPathsOrIDs) throws CorruptIndexException, IOException
+    // {
+    //     return getMultiIndexReader(indexPathsOrIDs, false);
+    // }
+
+    //
+    //
+    // /**
+    //  * Gets a MultiReader that wrapps all index readers for the given Set of index paths. You dont have to release a RemoteIndexReader.
+    //  *
+    //  * @param indexPathsOrIDs the set of indices that should be wrapped by the MultiReader. The last reader in the list will be stable with respect to write modifications
+    //  *            during the livetime of this MultiReader, because the documents index number will stay stable in this index. For each index, the index will NOT created
+    //  *            in the case it does not exists (beside the last one if you want it)
+    //  * @param bCreateLastIndexInListIfNotExist if true, the last index in the list will be created in the case it does not exist
+    //  *
+    //  * @return a MultiReader the wrapps all index readers for the given Set of index paths. You dont have to release a RemoteIndexReader.
+    //  *
+    //  * @throws CorruptIndexException
+    //  * @throws IOException
+    //  */
+    // public synchronized static RemoteIndexReader getMultiIndexReader(LinkedHashSet<String> indexPathsOrIDs, boolean bCreateLastIndexInListIfNotExist)
+    //         throws CorruptIndexException, IOException
+    // {
+    //     LinkedHashMap<String, Boolean> hsIndexPathsOrIDs2CreateIfNotExist = new LinkedHashMap<String, Boolean>();
+    //
+    //
+    //     int i = 0;
+    //     for (String strIndexPathOrURL : indexPathsOrIDs)
+    //     {
+    //         boolean bCreateIfNotExist = false;
+    //         if(i == indexPathsOrIDs.size() - 1) bCreateIfNotExist = bCreateLastIndexInListIfNotExist;
+    //
+    //         hsIndexPathsOrIDs2CreateIfNotExist.put(strIndexPathOrURL, bCreateIfNotExist);
+    //
+    //         i++;
+    //     }
+    //
+    //     return getMultiIndexReader(hsIndexPathsOrIDs2CreateIfNotExist);
+    // }
+
+
+    //
+    // /**
+    //  * Gets a MultiReader that wrapps all currently cached index readers. You dont have to release a RemoteIndexReader.
+    //  *
+    //  * @param strLastIndexInListPathOrID this will be the last reader in the list of reader offered to the MultiReader Constructor. In this index you can write and read
+    //  *            in parallel, because the document numbers will not change during writing (until index optimization). In the case you don't write to any index, the order
+    //  *            is irrelevant and you can set this paraeter simply null
+    //  *
+    //  * @return a MultiReader that wrapps all currently cached index readers. You dont have to release a RemoteIndexReader.
+    //  *
+    //  * @throws CorruptIndexException
+    //  * @throws IOException
+    //  */
+    // public synchronized static RemoteMultiIndexReader getMultiIndexReader(String strLastIndexInListPathOrID) throws CorruptIndexException, IOException
+    // {
+    //     return getMultiIndexReader(strLastIndexInListPathOrID, false);
+    // }
+
+
+
+    // /**
+    //  * Gets a MultiReader that wrapps all currently cached index readers. Don't forget to release it with {@link #releaseLuceneIndexReader(IndexReader)}
+    //  *
+    //  * @param strLastIndexInListPathOrID this will be the last reader in the list of reader offered to the MultiReader Constructor. In this index you can write and read
+    //  *            in parallel, because the document numbers will not change during writing (until index optimization). In the case you don't write to any index, the order
+    //  *            is irrelevant and you can set this paraeter simply null
+    //  * @param bCreateLastIndexInListIfNotExist if true, the last index in the list will be created in the case it does not exist
+    //  *
+    //  * @return a MultiReader that wrapps all currently cached index readers. You dont have to release a RemoteIndexReader.
+    //  *
+    //  * @throws CorruptIndexException
+    //  * @throws IOException
+    //  */
+    // public synchronized static RemoteMultiIndexReader getMultiIndexReader(String strLastIndexInListPathOrID, boolean bCreateLastIndexInListIfNotExist)
+    //         throws CorruptIndexException, IOException
+    // {
+    //     LinkedList<RemoteIndexReader> llReaderz = new LinkedList<de.dfki.inquisition.lucene.RemoteIndexReader>();
+    //
+    //
+    //     // der reader, auf den auch schreibend zugegriffen werden kann, machen wir am Schluß rein - ich habe die Hoffnung,
+    //     // daß sich dann nicht die docIDs verschieben, wenn gleichzeitig geschrieben und in diesem und in externen Indices
+    //     // gesucht wird...die externen müssen halt readonly sein...und des funzt auch :)
+    //
+    //
+    //     HashSet<String> hsIndexPaths = new HashSet<String>();
+    //     hsIndexPaths.addAll(getIndexReaderPathsAndIDs());
+    //
+    //     // aaalso. wir erstellen alle Readers, und für den letzten wird das Flag eingesetzt...
+    //     for (String strIndexPathOrURL : hsIndexPaths)
+    //     {
+    //
+    //         boolean bIsLast = strIndexPathOrURL.equals(strLastIndexInListPathOrID);
+    //
+    //         try
+    //         {
+    //
+    //             de.dfki.inquisition.lucene.RemoteIndexReader reader;
+    //             if(bIsLast)
+    //                 reader = getIndexReader(strIndexPathOrURL, bCreateLastIndexInListIfNotExist);
+    //             else
+    //                 reader = getIndexReader(strIndexPathOrURL, false);
+    //
+    //
+    //             if(strLastIndexInListPathOrID == null || llReaderz.size() == 0 || bIsLast)
+    //                 llReaderz.addLast(reader);
+    //             else
+    //                 llReaderz.addFirst(reader);
+    //
+    //         }
+    //         catch (Exception e)
+    //         {
+    //             logger.log(Level.SEVERE, "Exception while creating a MultiReader. The index '" + strIndexPathOrURL + "' will be ignored. ('" + e.getMessage() + "')");
+    //             logger.log(Level.FINE, "Exception for index '" + strIndexPathOrURL + "': ", e);
+    //         }
+    //     }
+    //
+    //
+    //     // und daraus erzeugen wir uns jetzt nen MultiReader
+    //     RemoteMultiIndexReader multiReader = new RemoteMultiIndexReader(llReaderz.toArray(new RemoteIndexReader[0]));
+    //
+    //
+    //     return multiReader;
+    // }
+
+
+
+    // synchronized static public RemoteIndexSearcher getMultiIndexSearcher(LinkedHashSet<String> indexPathsOrURLs) throws IOException, URISyntaxException
+    // {
+    //
+    //     // - wir erzeugen uns einen searcher aus jeder Quelle - ganz einfach mit getIndexSearcher. Da wird dann auch die Unterscheidung zwischen
+    //     // lokal- und remoteSearcher gemacht.
+    //     // - wir nehmen den wunderschönen ParallelMultiSearcher - verteilte document frequency + multithreaded Suche....sehr schön :)...den gibts nicht mehr :(
+    //
+    //
+    //
+    //     // Wir trennen in remote-und lokale Indizes
+    //     LinkedList<String> lLocalIndices = new LinkedList<>();
+    //     LinkedList<String> lRemoteIndices = new LinkedList<>();
+    //
+    //     for (String strIndexPathOrURL : indexPathsOrURLs)
+    //     {
+    //         if(isLocalPath(strIndexPathOrURL))
+    //         {
+    //             lLocalIndices.add(strIndexPathOrURL);
+    //         }
+    //         else
+    //         {
+    //             lRemoteIndices.add(strIndexPathOrURL);
+    //         }
+    //     }
+    //
+    //
+    //     LinkedList<RemoteIndexSearcher> llSearcherz = new LinkedList<RemoteIndexSearcher>();
+    //
+    //     // der lokale MultiSearcher
+    //     RemoteIndexSearcherImpl localSearcher = new RemoteIndexSearcherImpl(lLocalIndices.toArray(new String[0]));
+    //     localSearcher.setIdAttributename(m_strIdAttributeName);
+    //     llSearcherz.add(localSearcher);
+    //
+    //
+    //     // die remote reader
+    //     for (String strRemoteURL : lRemoteIndices)
+    //     {
+    //
+    //         try
+    //         {
+    //             RemoteIndexSearcher searcher = getIndexSearcher(strRemoteURL);
+    //
+    //             // check if the remote index is up and running
+    //             searcher.maxDoc();
+    //
+    //             llSearcherz.add(searcher);
+    //         }
+    //         catch (Exception e)
+    //         {
+    //             logger.log(Level.SEVERE, "Exception while creating a MultiSearcher. The index '" + strRemoteURL + "' will be ignored. ('" + e.getMessage() + "')");
+    //             logger.log(Level.FINE, "Exception for index '" + strRemoteURL + "': ", e);
+    //         }
+    //     }
+    //
+    //
+    //     // und daraus erzeugen wir uns jetzt nen MultiSearcer
+    //     if(llSearcherz.size() == 1) return llSearcherz.get(0);
+    //
+    //     RemoteMultiIndexSearcher multiSearcher = new RemoteMultiIndexSearcher(llSearcherz.toArray(new RemoteIndexSearcher[0]));
+    //
+    //
+    //     return multiSearcher;
+    //
+    //
+    //
+    //     //
+    //     //
+    //     //
+    //     //
+    //     // LinkedList<RemoteIndexSearcher> llSearchables = new LinkedList<RemoteIndexSearcher>();
+    //     //
+    //     // for (String strIndexPathOrURL : indexPathsOrURLs)
+    //     // {
+    //     // try
+    //     // {
+    //     //
+    //     // RemoteIndexSearcher searcher = getIndexSearcher(strIndexPathOrURL);
+    //     // llSearchables.add(searcher);
+    //     //
+    //     // }
+    //     // catch (Exception e)
+    //     // {
+    //     // logger.log(Level.SEVERE, "Exception while creating a MultiSearcher. The index '" + strIndexPathOrURL + "' will be ignored. ('" + e.getMessage() + "')");
+    //     // logger.log(Level.FINE, "Exception for index '" + strIndexPathOrURL + "': ", e);
+    //     // }
+    //     // }
+    //     //
+    //     //
+    //     // RemoteMultiIndexSearcher searcher = new RemoteMultiIndexSearcher(llSearchables.toArray(new RemoteIndexSearcher[0]));
+    //     //
+    //     //
+    //     // return searcher;
+    // }
+
+
+    //
+    // synchronized static public RemoteIndexSearcher getMultiIndexSearcher(String strLastIndexInListPathOrID) throws IOException, URISyntaxException
+    // {
+    //
+    //     LinkedList<String> llIndices = new LinkedList<String>();
+    //
+    //
+    //     // der reader, auf den auch schreibend zugegriffen werden kann, machen wir am Schluß rein - ich habe die Hoffnung,
+    //     // daß sich dann nicht die docIDs verschieben, wenn gleichzeitig geschrieben und in diesem und in externen Indices
+    //     // gesucht wird...die externen müssen halt readonly sein...und des funzt auch :)
+    //
+    //
+    //     HashSet<String> hsIndexPaths = new HashSet<String>();
+    //     hsIndexPaths.addAll(getIndexReaderPathsAndIDs());
+    //
+    //     // aaalso. wir erstellen alle Readers, und für den letzten wird das Flag eingesetzt...
+    //     for (String strIndexPathOrURL : hsIndexPaths)
+    //     {
+    //
+    //         boolean bIsLast = strIndexPathOrURL.equals(strLastIndexInListPathOrID);
+    //
+    //         if(strLastIndexInListPathOrID == null || llIndices.size() == 0 || bIsLast)
+    //             llIndices.addLast(strIndexPathOrURL);
+    //         else
+    //             llIndices.addFirst(strIndexPathOrURL);
+    //     }
+    //
+    //
+    //     return getMultiIndexSearcher(new LinkedHashSet<String>(llIndices));
+    // }
+
+
+
+
+
+    /**
+     * Gets the time intervall all reader objects will be refreshed automatically. After a refresh, all Objects from subsequent calls of {@link #getLuceneIndexReader(String, boolean)}
+     * will reflect the current state of an index, with any changes done.
+     * 
+     * @return the reader refresh time intervall
+     */
+    static public long getReaderRefreshIntervall()
+    {
+        return m_lReaderRefreshIntervall;
+    }
+
+
+
+    // /**
+    //  * Gets whether native file locking is enabled or not
+    //  *
+    //  * @return whether native file locking is enabled or not
+    //  */
+    // static public boolean isNativeFileLockEnabled()
+    // {
+    //     return m_bNativeFileLock;
+    // }
+
+
+
+
+    /**
+     * Returns true in the case a reader object for a given index path is inside the cache
+     * 
+     * @param strIndexPathOrURL the index path for the reader object
+     * 
+     * @return true in the case a reader object for the given index path is inside the cache
+     */
+    static public boolean isReaderInCache(String strIndexPathOrURL)
+    {
+        return m_hsIndexPathOrId2CurrentIndexReader.containsKey(strIndexPathOrURL);
+    }
+
+
+
+    /**
+     * Refreshs all index readers
+     * 
+     * @throws CorruptIndexException
+     * @throws IOException
+     * @throws URISyntaxException
+     */
+    synchronized static public void refreshAllIndexReaders() throws CorruptIndexException, IOException, URISyntaxException
+    {
+        LinkedList<String> llKeys = new LinkedList<String>();
+        llKeys.addAll(m_hsIndexPathOrId2CurrentIndexReader.keySet());
+
+        for (String strIndexPathOrURL : llKeys)
+            refreshIndexReader(strIndexPathOrURL);
+
+    }
+
+
+
+
+
+
+
+    /**
+     * Refreshs an index reader for a given path. In the case the indexReader was not formerly created by {@link #getLuceneIndexReader(String, boolean)}, it will be
+     * created. In the case you will pass the ID of a static Reader, the method will do nothing.
+     * 
+     * @param strIndexPath the path to the lucene index
+     * 
+     * @throws CorruptIndexException
+     * @throws IOException
+     * @throws URISyntaxException
+     */
+    synchronized static public void refreshIndexReader(String strIndexPath) throws CorruptIndexException, IOException, URISyntaxException
+    {
+        refreshIndexReader(strIndexPath, false);
+    }
+
+
+
+    // static public boolean isLocalPath(String strIndexPathOrURL)
+    // {
+    // try
+    // {
+    //
+    // if(strIndexPathOrURL == null) return false;
+    //
+    // File fIndex = null;
+    // // die super-URI-Implementierung nimmt echt alles an, was auch keine Uri ist, ohne eine syntaxException - insbesondere einen Pfad :(
+    //
+    // if(strIndexPathOrURL.startsWith("file:"))
+    //
+    // fIndex = new File(new URI(strIndexPathOrURL));
+    // else
+    // fIndex = new File(strIndexPathOrURL);
+    //
+    //
+    // if(fIndex.exists()) return true;
+    //
+    // return false;
+    //
+    //
+    // }
+    // catch (URISyntaxException e)
+    // {
+    // return false;
+    // }
+    //
+    // }
+
+
+
+    /**
+     * Refreshs an index reader for a given path. In the case the indexReader was not formerly created by {@link #getLuceneIndexReader(String, boolean)}, it will be
+     * created. In the case the index does not exist, it will be created, if you want. In the case you will pass the ID of a static Reader, the method will do nothing.
+     * 
+     * @param strIndexPath the path to the lucene index
+     * @param bCreateIndexIfNotExist if true, the index will be created in the case he did not exist
+     * 
+     * @throws CorruptIndexException
+     * @throws IOException
+     * @throws URISyntaxException
+     */
+    synchronized static public void refreshIndexReader(String strIndexPath, boolean bCreateIndexIfNotExist) throws CorruptIndexException, IOException, URISyntaxException
+    {
+
+        // haben wir schon einen?
+        IndexReader readerOld = getLuceneIndexReader(strIndexPath, bCreateIndexIfNotExist);
+
+        // wenn es ein statischer Reader ist, dann wird der ned refreshed
+        if(m_hsStaticIndexReaderSet.contains(readerOld)) return;
+        // wenn es kein DirectoryReader ist, können wir ihn nicht refreshen
+        if(!(readerOld instanceof DirectoryReader)) return;
+        DirectoryReader dirReader = (DirectoryReader) readerOld;
+
+        try
+        {
+            if(dirReader.isCurrent()) return;
+
+            logger.info("will refresh reader for index '" + strIndexPath + "'");
+
+            // den neuen erstellen
+            // Directory dir = createFSDirectory(new File(strIndexPath));
+            //
+            // if(m_bLoadReadersInMemory) dir = new RAMDirectory(dir);
+            //
+            // IndexReader readerNew = IndexReader.open(dir, true);
+            IndexReader readerNew = DirectoryReader.openIfChanged(dirReader);
+
+
+            // hier steht immer der neueste drin - die alten werden in der release-methode wieder zu gemacht
+            m_hsIndexPathOrId2CurrentIndexReader.put(strIndexPath, readerNew);
+
+        }
+        catch (org.apache.lucene.store.AlreadyClosedException e)
+        {
+            logger.warning("reader for '" + strIndexPath + "' was closed at refresh time");
+        }
+        finally
+        {
+            // der alte Reader wird dann geschlossen, wenn er nicht mehr gebraucht wird
+            releaseLuceneIndexReader(readerOld);
+        }
+
+    }
+
+
+
+
+
+    /**
+     * Release your indexWriter that you get with getIndexWriter - in any case. In the case the IndexWriter is no more needed by some Instance, it will be commited and
+     * closed.
+     * 
+     * @param indexWriter the writer Object that should be released
+     */
+    synchronized static public void releaseIndexWriter(IndexWriter indexWriter)
+    {
+        try
+        {
+            // wir dekrementieren den count für den aktuellen Index
+            Integer iOld = m_hsIndexWriter2WriterRefCount.get(indexWriter);
+            if(iOld == null || iOld == 0)
+            {
+                logger.warning("have no writer index token for '" + indexWriter + "'");
+                return;
+            }
+
+            // das müssen wir an dieser Stelle machen - wenn der writer geclosed ist, dann wirft getDirectory eine Exception
+            if(!(indexWriter.getDirectory() instanceof FSDirectory)) throw new IllegalStateException("Directory is not of type FSDirectory");
+
+            String strIndexPathOrURL = ((FSDirectory) indexWriter.getDirectory()).getDirectory().toAbsolutePath().toString();
+
+
+            int iNew = --iOld;
+
+            String strDontCloseIndexWriters = System.getProperty("de.dfki.inquisition.lucene.IndexAccessor.DontCloseIndexWriters");
+            boolean bIgnoreClose = false;
+            if(strDontCloseIndexWriters != null) bIgnoreClose = Boolean.parseBoolean(strDontCloseIndexWriters);
+
+            if(iNew == 0 && !bIgnoreClose)
+            {
+                // wenn wir bei 0 sind, dann mache mer des Ding gleich zu
+                Set<Entry<String, IndexWriter>> entrySet = m_hsIndexPathOrURL2Writer.entrySet();
+                Iterator<Entry<String, IndexWriter>> itEntries = entrySet.iterator();
+                while (itEntries.hasNext())
+                {
+                    Entry<String, IndexWriter> entry = itEntries.next();
+                    if(entry.getValue().equals(indexWriter)) itEntries.remove();
+                }
+
+
+                m_hsIndexWriter2WriterRefCount.remove(indexWriter);
+
+
+                logger.fine("will close indexWriter for '" + strIndexPathOrURL + "'");
+
+                indexWriter.commit();
+                if(isLocalPath(strIndexPathOrURL)) indexWriter.close();
+            }
+            else
+                m_hsIndexWriter2WriterRefCount.put(indexWriter, iNew);
+
+            if(logger.isLoggable(Level.FINEST))
+            {
+                if(bIgnoreClose)
+                    logger.finest("indexWriter '" + strIndexPathOrURL + "' released - closing IGNORED (writer is still open)\n" + LoggingUtils.getCurrentStackTrace());
+                else
+                    logger.finest("indexWriter '" + strIndexPathOrURL + "' released\n" + LoggingUtils.getCurrentStackTrace());
+            }
+
+        } catch (IOException e)
+        {
+            logger.severe(ExceptionUtils.createStackTraceString(e));
+        }
+    }
+
+
+
+    /**
+     * This is an expert method - the use of RemoteIndexReader is recommended (You don't need to release it). Releases your reader Object in the case you don't need it
+     * anymore. In the case every instance has released a specific index path, the reader object will be closed.
+     * 
+     * @param reader the IndexReader Object you gets formerly with IndexAccessor
+     */
+    synchronized static public void releaseLuceneIndexReader(IndexReader reader)
+    {
+
+        try
+        {
+
+            if(reader instanceof BetterMultiReader)
+            {
+                for (IndexReader subReader : ((BetterMultiReader) reader).getSubReaders())
+                    releaseLuceneIndexReader(subReader);
+
+                return;
+            }
+
+
+            String strIndexPathOrURL4Reader = m_hsIndexReader2IndexPath.get(reader);
+            if(strIndexPathOrURL4Reader == null)
+                logger.severe("have no path entry for reader. This is a hint to an error, e.g. you have released the reader too often, or the reader was not created with IndexAccessor.");
+
+
+            Integer iOldRefCount = m_hsIndexReader2ReaderRefCount.get(reader);
+
+            if(iOldRefCount == null || iOldRefCount == 0)
+            {
+                logger.warning("have no reader index token for '" + strIndexPathOrURL4Reader + "'");
+                return;
+            }
+
+            int iNew = --iOldRefCount;
+
+            if(iNew == 0)
+            {
+                // wenn wir bei 0 sind, dann mache mer des Ding gleich zu - wenn es nicht noch im Cache bleiben soll
+                m_hsIndexReader2ReaderRefCount.remove(reader);
+                m_hsIndexReader2IndexPath.remove(reader);
+
+                // wir schliessen den nur, wenn es nicht der aktuelle aus der hashmap ist - ansonsten müssten wir ihn ständig wieder neu erzeugen.
+                // der aktuelle wir dann geschlossen, wenn es einen neueren gibt oder explizit mit removeReaderFromCache
+
+                // wenn vorher gesagt wurde (mit removeReaderFromCacheWhenPossible), daß des Teil geschlossen werden soll, machen wir es auch zu
+
+                if(!m_hsIndexPathOrId2CurrentIndexReader.containsValue(reader))
+                {
+                    // es ist nicht der aktuelle reader
+                    if(isLocalPath(strIndexPathOrURL4Reader))
+                    {
+                        logger.info("will close indexReader '" + strIndexPathOrURL4Reader + "'");
+                        reader.close();
+                    }
+
+                }
+                else if(m_hsReader2Remove.contains(reader)) removeReaderFromCache(strIndexPathOrURL4Reader);
+
+            }
+            else
+                m_hsIndexReader2ReaderRefCount.put(reader, iNew);
+
+
+            if(logger.isLoggable(Level.FINEST)) logger.finest("indexReader '" + strIndexPathOrURL4Reader + "' released\n" + LoggingUtils.getCurrentStackTrace());
+
+
+        }
+        catch (IOException e)
+        {
+            logger.severe(ExceptionUtils.createStackTraceString(e));
+        }
+    }
+
+
+
+    synchronized static public void releaseLuceneIndexSearcher(IndexSearcher searcher)
+    {
+        releaseLuceneIndexReader(searcher.getIndexReader());
+    }
+
+
+
+    /**
+     * Removes an closes the reader object for a given index path from the cache. This is only possible in the case this object is no more in use - the method will throw
+     * an exception otherwise.
+     * 
+     * @param strIndexPathOrURL the path to the index
+     * 
+     * @throws IOException
+     */
+    synchronized static public void removeReaderFromCache(String strIndexPathOrURL) throws IOException
+    {
+        // wir haben immer den aktuellen reader für einen index im Speicher - hier können wir ihn wieder entfernen, um den Speicher freizugeben
+
+        // wenn der alte Reader nicht mehr benötigt wird, dann wird er geschlossen
+        IndexReader reader = m_hsIndexPathOrId2CurrentIndexReader.get(strIndexPathOrURL);
+
+        if(m_hsIndexReader2ReaderRefCount.get(reader) == null)
+        {
+            logger.fine("will close indexReader '" + strIndexPathOrURL + "'");
+            m_hsIndexPathOrId2CurrentIndexReader.remove(strIndexPathOrURL);
+            m_hsStaticIndexReaderSet.remove(reader);
+
+            if(isLocalPath(m_hsIndexReader2IndexPath.get(reader))) reader.close();
+
+            m_hsReader2Remove.remove(reader);
+        }
+        else
+        {
+            throw new IllegalStateException("Cannot remove reader object for '" + strIndexPathOrURL
+                    + "' from cache. It is still in use. Did you forget an releaseIndexReader(..) invocation?");
+
+        }
+    }
+
+
+
+    /**
+     * Removes an closes the reader object for a given index path from the cache. This is only possible in the case this object is no more in use - otherwise, the reader
+     * Object will be removed from the cache immediately when it is no more in use.
+     * 
+     * @param strIndexPathOrURL the path to the index
+     * 
+     * @return READER_CLOSED in the case the reader was closed immediately, READER_IN_QUEUE if it is in the queue of 'to close readers' now. If the reader is not inside
+     *         the cache, the method will return READER_NOT_IN_CACHE
+     * 
+     * @throws IOException
+     */
+    synchronized static public ReaderStatus removeReaderFromCacheWhenPossible(String strIndexPathOrURL) throws IOException
+    {
+        // wir haben immer den aktuellen reader für einen index im Speicher - hier können wir ihn wieder entfernen, um den Speicher freizugeben
+
+        if(!isReaderInCache(strIndexPathOrURL)) return ReaderStatus.READER_NOT_IN_CACHE;
+
+        // wenn der alte Reader nicht mehr benötigt wird, dann wird er geschlossen
+        IndexReader reader = m_hsIndexPathOrId2CurrentIndexReader.get(strIndexPathOrURL);
+
+        if(m_hsIndexReader2ReaderRefCount.get(reader) == null)
+        {
+            logger.fine("will close indexReader '" + strIndexPathOrURL + "'");
+            m_hsIndexPathOrId2CurrentIndexReader.remove(strIndexPathOrURL);
+            m_hsStaticIndexReaderSet.remove(reader);
+
+            if(isLocalPath(m_hsIndexReader2IndexPath.get(reader))) reader.close();
+
+            return ReaderStatus.READER_CLOSED;
+
+        }
+        else
+        {
+            m_hsReader2Remove.add(reader);
+
+            return ReaderStatus.READER_IN_QUEUE;
+        }
+    }
+
+
+
+
+    // /**
+    //  * Simply removes a formerly cached Searcher Object from the cache. Only remote Searcher proxies are cached - so this is only to give a possibility to free the memory
+    //  * again (nevertheless, there should be not much amount of memory consumtion - in the case you have not thousands of searcher objects, you should be able to ignore
+    //  * this...(hehe - I didn't say that ;) )
+    //  *
+    //  * @param strIndexPathOrURL the index for which you want to remove the according searcher proxy object out of the internal cache
+    //  */
+    // synchronized static public void removeSearcherFromCache(String strIndexPathOrURL)
+    // {
+    //     m_hsIndexPathOrURL2CurrentRemoteSearcher.remove(strIndexPathOrURL);
+    // }
+
+
+
+    /**
+     * Removes and closes all cached reader objects that are not in use. This method can be used safely at any time, the only disadvantage is that an subsequent
+     * invocation of {@link #getLuceneIndexReader(String, boolean)} for one of these indices will take longer time.
+     * 
+     * @throws IOException
+     */
+    static public void removeUnusedReadersFromCache() throws IOException
+    {
+        LinkedList<String> llIndexPaths = new LinkedList<String>();
+
+        llIndexPaths.addAll(m_hsIndexPathOrId2CurrentIndexReader.keySet());
+
+        for (String strIndexPathOrURL : llIndexPaths)
+            try
+            {
+                removeReaderFromCache(strIndexPathOrURL);
+            }
+            catch (IllegalStateException e)
+            {
+                if(!e.getMessage().startsWith("Cannot remove reader object for")) throw e;
+            }
+    }
+
+
+
+    /**
+     * Sets the default analyzer that will be used for writer creation
+     * 
+     * @param analyzer the default analyzer that will be used for writer creation
+     */
+    static public void setDefaultAnalyzer(Analyzer analyzer)
+    {
+        m_analyzer4writer = analyzer;
+    }
+
+
+
+    /**
+     * Sets the default attribute name that will be used for RemotIndexReader creation
+     * 
+     * @param strIdAttributeName the default attribute name that will be used for RemotIndexReader creation
+     */
+    static public void setDefaultIndexIdAttribute(String strIdAttributeName)
+    {
+        IndexAccessor.m_strIdAttributeName = strIdAttributeName;
+    }
+
+
+
+    /**
+     * Sets the time intervall all reader objects will be refreshed automatically. After a refresh, all Objects from subsequent calls of {@link #getLuceneIndexReader(String, boolean)}
+     * will reflect the current state of an index, with any changes done.
+     * 
+     * @param lMillis the time intervall the reader should be refreshed
+     * 
+     * @return the former time intervall
+     */
+    static public long setReaderRefreshIntervall(long lMillis)
+    {
+        long lOld = m_lReaderRefreshIntervall;
+
+        m_lReaderRefreshIntervall = lMillis;
+
+        return lOld;
+    }
+
+
+
+    protected static FSDirectory createFSDirectory(File fDirPath) throws IOException
+    {
+        // das muß man so umständlich mit setLockfactory machen - wenn man einfach initial das die erstellt, und das dir wurde mit einer anderen
+        // LockFactory erstellt, dann kommt ne Exception
+
+
+        // null heißt SimpleFileLock (ich hab gekuckt ;) )
+        FSDirectory dir = FSDirectory.open(fDirPath.toPath());
+
+        // NativeFSLockFactory lockFactory = new NativeFSLockFactory(fDirPath);
+        // lockFactory.setLockPrefix("indexAccessor");
+        // if(isNativeFileLockEnabled()) dir.setLockFactory(lockFactory);
+
+        return dir;
+    }
+
+
+
+
+    /**
+     * Closes all reader and writer objects. This is mainly for the shutdown hook, to make shure that no other processes will be blocked by non-closed Objects
+     * 
+     * @throws IOException
+     */
+    protected static void forceCloseAll() throws IOException
+    {
+        if(m_hsIndexReader2ReaderRefCount.size() == 0 && m_hsIndexPathOrURL2Writer.size() == 0) return;
+
+        logger.info("closing of all index readers and writers will be forced " + m_hsIndexReader2ReaderRefCount.size() + " reader(s), "
+                + m_hsIndexPathOrURL2Writer.size() + " writer(s)");
+
+
+        for (IndexReader reader : m_hsIndexReader2ReaderRefCount.keySet())
+            if(isLocalPath(m_hsIndexReader2IndexPath.get(reader))) reader.close();
+
+        for (Entry<String, IndexWriter> pathOrURL2Writer : m_hsIndexPathOrURL2Writer.entrySet())
+        {
+
+            String strPath = pathOrURL2Writer.getKey();
+            IndexWriter writer = pathOrURL2Writer.getValue();
+            writer.commit();
+
+            if(isLocalPath(strPath)) writer.close();
+        }
+    }
+
+
+
+    /**
+     * Gets all reader Objects that should be removed from the cache immediately when they are no more in use
+     * 
+     * @return all reader Objects that should be removed from the cache immediately when they are no more in use
+     */
+    protected static HashSet<IndexReader> getReader2RemoveQueue()
+    {
+        return m_hsReader2Remove;
+    }
+
+
+
+
+
+
+    /**
+     * Checks whether the given URL is a local one or not. Local means that the URL starts with 'file:' or that this path exists on the local storage.
+     */
+    protected static boolean isLocalPath(String strIndexPathOrURL)
+    {
+        if(StringUtils.nullOrWhitespace(strIndexPathOrURL)) return false;
+
+        File fIndex = null;
+        // die super-URI-Implementierung nimmt echt alles an, was auch keine Uri ist, ohne eine syntaxException - insbesondere einen Pfad :(
+
+        if(strIndexPathOrURL.startsWith("file:")) return true;
+
+        fIndex = new File(strIndexPathOrURL);
+
+
+        if(fIndex.exists()) return true;
+
+
+        return false;
+    }
+
+
+}
diff --git a/src/main/java/de/dfki/km/leech/lucene/basic/LuceneAnalyzerFactory.java b/src/main/java/de/dfki/km/leech/lucene/basic/LuceneAnalyzerFactory.java
new file mode 100644
index 0000000..ce78cf9
--- /dev/null
+++ b/src/main/java/de/dfki/km/leech/lucene/basic/LuceneAnalyzerFactory.java
@@ -0,0 +1,156 @@
+// * Created on 04.11.2005
+package de.dfki.km.leech.lucene.basic;
+
+
+
+// import de.dfki.inquisitor.lucene.DynamicFieldType;
+// import de.dfki.inquisitor.lucene.FieldConfig;
+import de.dfki.inquisitor.text.StringUtils;
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.miscellaneous.PerFieldAnalyzerWrapper;
+import org.apache.lucene.util.Version;
+
+import java.io.BufferedReader;
+import java.io.FileInputStream;
+import java.io.InputStreamReader;
+import java.lang.reflect.Constructor;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.Map.Entry;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
+
+
+
+public class LuceneAnalyzerFactory
+{
+
+    protected static Logger m_logger = Logger.getLogger(LuceneAnalyzerFactory.class.getName());
+
+
+
+
+    /**
+     * Creates a new Analyzer out of the given
+     *
+     * @return the according analyzer
+     * 
+     * @throws Exception
+     */
+    public static Analyzer createAnalyzer(FieldConfig fieldConfig) throws Exception
+    {
+
+        String strDefaultAnalyzerName = fieldConfig.defaultFieldType.getAnalyzer();
+        Analyzer defaultAnalyzer = LuceneAnalyzerFactory.createAnalyzer(strDefaultAnalyzerName, null);
+
+
+        HashMap<String, Analyzer> hsFieldName2Analyzer = new HashMap<String, Analyzer>();
+        for (Entry<String, DynamicFieldType> fieldname2FieldType : fieldConfig.fieldName2FieldType.entrySet())
+        {
+            String strFieldName = fieldname2FieldType.getKey();
+            try
+            {
+                String strAnalyzer4Field = fieldname2FieldType.getValue().getAnalyzer();
+                if(!StringUtils.nullOrWhitespace(strAnalyzer4Field))
+                    hsFieldName2Analyzer.put(strFieldName, LuceneAnalyzerFactory.createAnalyzer(strAnalyzer4Field, null));
+            }
+            catch (Exception e)
+            {
+                Logger.getLogger(LuceneAnalyzerFactory.class.getName()).warning("could not create analyzer from config of field '" + strFieldName + "'");
+            }
+        }
+
+        return new PerFieldAnalyzerWrapper(defaultAnalyzer, hsFieldName2Analyzer);
+    }
+
+
+
+    /**
+     * Creates a new <code>Analyzer</code>.
+     * 
+     * @param analyzerClassName The class name of the <code>Analyzer</code> to be created.
+     * @param userGivenStopWordFileName The file name of the stop word file, or <code>null</code> or empty, if no stop words should be set. If the given file name is
+     *            relative
+     * 
+     * @return the newly created analyzer
+     * 
+     * @throws Exception
+     */
+    public static Analyzer createAnalyzer(String analyzerClassName, String userGivenStopWordFileName) throws Exception
+    {
+        try
+        {
+            Analyzer analyzer;
+
+            Class<?> analyzerClass = Class.forName(analyzerClassName);
+            if(!StringUtils.nullOrWhitespace(userGivenStopWordFileName))
+            {
+                Class<?>[] parameterClasses = { String[].class };
+                Constructor<?> constructor;
+                try
+                {
+                    constructor = analyzerClass.getConstructor(parameterClasses);
+
+
+                    m_logger.finer("creating Analyzer " + analyzerClassName + " with stopword file " + userGivenStopWordFileName);
+                    InputStreamReader inReader = new InputStreamReader(new FileInputStream(userGivenStopWordFileName), "UTF-8");
+                    BufferedReader reader = new BufferedReader(inReader);
+                    ArrayList<String> wordList = new ArrayList<String>();
+                    String stopWord = reader.readLine();
+                    while (stopWord != null)
+                    {
+                        wordList.add(stopWord);
+                        stopWord = reader.readLine();
+                    }
+                    reader.close();
+                    String[] stopWords = wordList.toArray(new String[wordList.size()]);
+
+
+
+                    Object[] parameters = { stopWords };
+                    analyzer = (Analyzer) constructor.newInstance(parameters);
+                }
+                catch (NoSuchMethodException e)
+                {
+                    m_logger.warning("Analyzer '" + analyzerClassName + "' cannot be parameterized with stop word list. Specified stop word list will be ignored");
+                    constructor = analyzerClass.getConstructor(new Class[0]);
+                    Object[] parameters = {};
+                    analyzer = (Analyzer) constructor.newInstance(parameters);
+                }
+
+            }
+            else
+            {
+                m_logger.finer("creating Analyzer " + analyzerClassName + " without stopword file");
+
+
+                try
+                {
+                    //we try if there is a constructor with a single Version parameter
+                    Class<?>[] parameterClasses = { Version.class };
+                    Constructor<?> constructor = analyzerClass.getConstructor(parameterClasses);
+                    
+                    Object[] parameters = { Version.LUCENE_CURRENT };
+                    analyzer = (Analyzer) constructor.newInstance(parameters);
+                }
+                catch (NoSuchMethodException e)
+                {
+                    analyzer = (Analyzer) analyzerClass.newInstance();
+                }
+
+
+
+            }
+
+            return analyzer;
+
+        }
+        catch (Exception e)
+        {
+            m_logger.log(Level.WARNING, "Unable to instantiate Analyzer '" + analyzerClassName + "'.", e);
+            throw new Exception("Unable to instantiate Analyzer '" + analyzerClassName + "'.", e);
+        }
+    }
+
+}
diff --git a/src/main/java/de/dfki/km/leech/lucene/basic/LuceneUtilz.java b/src/main/java/de/dfki/km/leech/lucene/basic/LuceneUtilz.java
new file mode 100644
index 0000000..612bacc
--- /dev/null
+++ b/src/main/java/de/dfki/km/leech/lucene/basic/LuceneUtilz.java
@@ -0,0 +1,454 @@
+package de.dfki.km.leech.lucene.basic;
+
+
+
+import de.dfki.inquisitor.collections.TwoValuesBox;
+// import de.dfki.inquisitor.lucene.FieldConfig;
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.index.*;
+import org.apache.lucene.index.DocValuesType;
+import org.apache.lucene.queryparser.classic.QueryParser;
+import org.apache.lucene.search.*;
+import org.apache.lucene.search.MultiTermQuery.RewriteMethod;
+import org.apache.lucene.util.Bits;
+
+import java.io.IOException;
+import java.util.*;
+
+
+
+public class LuceneUtilz
+{
+
+
+
+
+    /**
+     * There exists a bug in lucene (at least currently) which yields to the fact that some field attributes are gone if reading a document, which makes re-inserting this
+     * document to the index impossible. As workaround we reinsert all attributes with stored values again to the given document object, with the according fieldType from
+     * fieldConfig.
+     * 
+     * @param doc the doc object that should be processed
+     */
+    static public void reInsertStoredFieldTypes(Document doc, FieldConfig fieldConfig)
+    {
+        LinkedList<IndexableField> llReInsertFields = new LinkedList<>();
+
+        Iterator<IndexableField> itFields = doc.iterator();
+        while (itFields.hasNext())
+        {
+            IndexableField oldField = itFields.next();
+
+            if(!oldField.fieldType().stored()) continue;
+
+            itFields.remove();
+
+            IndexableField newField;
+            if(oldField.fieldType().docValuesType() == DocValuesType.NUMERIC)
+                newField = fieldConfig.createField(oldField.name(), oldField.numericValue());
+            else
+                newField = fieldConfig.createField(oldField.name(), oldField.stringValue());
+
+            llReInsertFields.add(newField);
+        }
+
+        for (IndexableField newField : llReInsertFields)
+            doc.add(newField);
+
+    }
+
+
+
+    /**
+     * Extract all the terms in the index matching the query terms. Works also with wildcard queries
+     * 
+     * @return the terms in the index matching the query terms. Works also with wildcard queries
+     */
+    @SuppressWarnings("javadoc")
+    static public Set<Term> extractQueryTerms(String strQuery, QueryParser queryParser, IndexReader reader)
+    {
+        try
+        {
+            Query query = queryParser.parse(strQuery);
+
+
+            return extractQueryTerms(query, reader);
+
+        }
+        catch (Exception e)
+        {
+            throw new RuntimeException(e);
+        }
+    }
+
+
+
+    /**
+     * Extract all the terms in the index matching the query terms. Works also with wildcard queries
+     * 
+     * @return the terms in the index matching the query terms. Works also with wildcard queries
+     */
+    @SuppressWarnings("javadoc")
+    static public Set<Term> extractQueryTerms(Query query, IndexReader reader)
+    {
+        try
+        {
+
+            HashSet<Query> subQueries = LuceneUtilz.getSubQueries(query);
+            List<TwoValuesBox<MultiTermQuery, RewriteMethod>> llQuery2FormerRewrite = new LinkedList<>();
+
+            for (Query subQuery : subQueries)
+            {
+                if(subQuery instanceof MultiTermQuery)
+                {
+                    llQuery2FormerRewrite.add(new TwoValuesBox<MultiTermQuery, RewriteMethod>((MultiTermQuery) subQuery, ((MultiTermQuery) subQuery).getRewriteMethod()));
+                    // das brauchen wir, damit Lucene wieder die Terme in BooleanQueries reinmultipliziert (prefixQueries, etc.)
+                    ((MultiTermQuery) subQuery).setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_BOOLEAN_REWRITE);
+                }
+            }
+
+            Query rewritten = query.rewrite(reader);
+
+            HashSet<Term> hsTerms = new HashSet<>();
+
+            Weight rewrittenWeight = rewritten.createWeight(new IndexSearcher(reader), false);
+            rewrittenWeight.extractTerms(hsTerms);
+            // rewritten.extractTerms(hsTerms);
+
+            // jetzt setzen wir die rewrite Method wieder auf das ursprüngliche zurück
+            for (TwoValuesBox<MultiTermQuery, RewriteMethod> subQuery2FormerRewrite : llQuery2FormerRewrite)
+                subQuery2FormerRewrite.getFirst().setRewriteMethod(subQuery2FormerRewrite.getSecond());
+
+
+            return hsTerms;
+
+        }
+        catch (Exception e)
+        {
+            throw new RuntimeException(e);
+        }
+
+    }
+
+
+
+    public static List<String> analyzeText(String strFieldName, String strText, Analyzer analyzer, int iMaxResults)
+    {
+        try
+        {
+            LinkedList<String> llTokenStrings = new LinkedList<>();
+
+            // wir analysieren/normalisieren den Term für den Lookup
+            TokenStream tokenstream = analyzer.tokenStream(strFieldName, strText);
+
+            CharTermAttribute termAtt = tokenstream.addAttribute(CharTermAttribute.class);
+            tokenstream.reset(); // Resets this stream to the beginning. (Required)
+
+            for (int i = 0; i < iMaxResults; i++)
+            {
+
+                if(!tokenstream.incrementToken()) break;
+
+                llTokenStrings.add(termAtt.toString());
+            }
+
+            tokenstream.end(); // Perform end-of-stream operations, e.g. set the final offset.
+            tokenstream.close(); // Release resources associated with this stream.
+
+
+            return llTokenStrings;
+
+        }
+        catch (Exception e)
+        {
+            throw new RuntimeException(e);
+        }
+    }
+
+
+
+    static public Bits bits4Doc(final int iDocId, final int iBitsLength)
+    {
+        return new Bits()
+        {
+
+            @Override
+            public boolean get(int index)
+            {
+                if(index == iDocId)
+                    return true;
+                else
+                    return false;
+            }
+
+
+
+            @Override
+            public int length()
+            {
+                return iBitsLength;
+            }
+        };
+    }
+
+
+
+
+    static public Bits bits4Docs(final Set<Integer> sDocIds, final int iBitsLength)
+    {
+        return new Bits()
+        {
+
+            @Override
+            public boolean get(int index)
+            {
+                if(sDocIds.contains(index))
+                    return true;
+                else
+                    return false;
+            }
+
+
+
+            @Override
+            public int length()
+            {
+                return iBitsLength;
+            }
+        };
+    }
+
+
+
+
+
+
+    /**
+     * This method creates a query out of given text for a specific field, with a given analyzer. The method will create a TermQuery in the case the analyzer did not
+     * tokenized the input text, or a PhraseQuery in the case the analyzer did. All values in the query are fully analyzed an this searchable for the given field with
+     * respect to the given analyzer.
+     * 
+     * @return a TermQuery, PhraseQuery or null in the case there was no text left after processing the text with the analyzer
+     */
+    public static Query createQuery(String strFieldName, String strText, Analyzer analyzer)
+    {
+        List<String> lAnalyzedText = analyzeText(strFieldName, strText, analyzer, Integer.MAX_VALUE);
+
+        if(lAnalyzedText.size() > 1)
+        {
+            PhraseQuery pq = new PhraseQuery(strFieldName, lAnalyzedText.toArray(new String[0]));
+            // for (String strTerm : lAnalyzedText)
+            //     pq.add(new Term(strFieldName, strTerm));
+
+            return pq;
+        }
+        else if(lAnalyzedText.size() == 1) return new TermQuery(new Term(strFieldName, lAnalyzedText.get(0)));
+
+        return null;
+    }
+
+
+
+    public static List<Document> getDocsWithTerm(Term term2search, int iMaxResults, IndexSearcher indexSearcher, Set<String> fields2load)
+    {
+
+        try
+        {
+            LinkedList<Document> llDocs = new LinkedList<>();
+
+            TopDocs topDocs = indexSearcher.search(new TermQuery(term2search), iMaxResults);
+
+            for (int i = 0; i < topDocs.scoreDocs.length; i++)
+            {
+
+                int doc = topDocs.scoreDocs[i].doc;
+
+                if(fields2load == null)
+                    llDocs.add(indexSearcher.doc(doc));
+                else
+                    llDocs.add(indexSearcher.doc(doc, fields2load));
+
+            }
+
+            return llDocs;
+
+        }
+        catch (IOException e)
+        {
+            throw new RuntimeException(e);
+        }
+
+    }
+
+
+
+    /**
+     * Extracts all subqueries which have a boost factor of a given Query into an array
+     * 
+     * @param query Query to extract subqueries from
+     * @return an array of the subqueries which have a boost factor
+     */
+    public static Set<BooleanClause> getSubClauses(Query query)
+    {
+        HashSet<BooleanClause> subqueries = new HashSet<BooleanClause>();
+
+        getSubClauses(query, subqueries);
+
+
+        return subqueries;
+    }
+
+
+
+    private static void getSubClauses(Query query, HashSet<BooleanClause> subClauses)
+    {
+        if(!(query instanceof BooleanQuery)) return;
+
+        BooleanClause[] queryClauses = ((BooleanQuery) query).clauses().toArray(new BooleanClause[0]);
+
+        for (BooleanClause clause : queryClauses)
+        {
+            subClauses.add(clause);
+
+            if(clause.getQuery() instanceof BooleanQuery) getSubClauses(clause.getQuery(), subClauses);
+        }
+    }
+
+
+
+    /**
+     * Extracts all subqueries of a given Query. The given query will also be part of the returned set.
+     * 
+     * @param query Query to extract subqueries from
+     * 
+     * @return all subqueries
+     */
+    public static HashSet<Query> getSubQueries(Query query)
+    {
+        HashSet<Query> subqueries = new HashSet<Query>();
+        getSubQueries(query, subqueries);
+
+        return subqueries;
+    }
+
+
+
+    protected static void getSubQueries(Query query, HashSet<Query> subQueries)
+    {
+        if(query instanceof BooleanQuery)
+        {
+            BooleanClause[] queryClauses = ((BooleanQuery) query).clauses().toArray(new BooleanClause[0]);
+
+            for (int i = 0; i < queryClauses.length; i++)
+                getSubQueries(queryClauses[i].getQuery(), subQueries);
+        }
+
+        subQueries.add(query);
+    }
+
+
+    //
+    // static public int getTermFrq4Doc(Term term, int iDocId, IndexReader reader)
+    // {
+    //     return getTermFrq4Docs(term, bits4Doc(iDocId, reader.maxDoc()), reader);
+    // }
+    //
+    //
+    //
+    // static public int getTermFrq4Docs(Term term, Bits docBits, IndexReader reader)
+    // {
+    //
+    //     try
+    //     {
+    //         DocsEnum docEnum = MultiFields.getTermDocsEnum(reader, docBits, term.field(), term.bytes());
+    //         int termFreq = 0;
+    //
+    //         @SuppressWarnings("unused")
+    //         int doc = DocsEnum.NO_MORE_DOCS;
+    //         while ((doc = docEnum.nextDoc()) != DocsEnum.NO_MORE_DOCS)
+    //         {
+    //             termFreq += docEnum.freq();
+    //         }
+    //
+    //
+    //         return termFreq;
+    //
+    //     }
+    //     catch (Exception e)
+    //     {
+    //         throw new RuntimeException(e);
+    //     }
+    // }
+    //
+    //
+    //
+    //
+    //
+    // static public int getTermFrq4Docs(Term term, Set<Integer> sDocIds, IndexReader reader)
+    // {
+    //     return getTermFrq4Docs(term, bits4Docs(sDocIds, reader.maxDoc()), reader);
+    // }
+    //
+    //
+    //
+    //
+    // static public int getTermFrq4Index(Term term, IndexReader reader)
+    // {
+    //     return getTermFrq4Docs(term, MultiFields.getLiveDocs(reader), reader);
+    // }
+
+
+
+    /**
+     * Gets the document object and the index document index/number
+     */
+    @SuppressWarnings("javadoc")
+    public static TwoValuesBox<Document, Integer> getUniqueDocWithTerm(Term idTerm2search, IndexSearcher indexSearcher)
+    {
+        return getUniqueDocWithTerm(idTerm2search, indexSearcher, null);
+    }
+
+
+
+    /**
+     * Gets the document object and the index document index/number
+     */
+    @SuppressWarnings("javadoc")
+    public static TwoValuesBox<Document, Integer> getUniqueDocWithTerm(Term idTerm2search, IndexSearcher indexSearcher, Set<String> fields2load)
+    {
+
+        try
+        {
+            // XXX hier wollen wir einen einfachen Collecor, wir brauchen keine Scores!
+            TopDocs topDocs = indexSearcher.search(new TermQuery(idTerm2search), 1);
+
+
+            if(topDocs.totalHits == 0) return null;
+
+            if(topDocs.totalHits > 1) throw new IllegalStateException("multiple document entries for ID term search");
+
+
+            int doc = topDocs.scoreDocs[0].doc;
+
+            Document document;
+            if(fields2load == null)
+                document = indexSearcher.doc(doc);
+            else
+                document = indexSearcher.doc(doc, fields2load);
+
+            if(document == null) return null;
+
+
+            return new TwoValuesBox<Document, Integer>(document, doc);
+
+        }
+        catch (IOException e)
+        {
+            throw new RuntimeException(e);
+        }
+
+    }
+}
diff --git a/src/main/java/de/dfki/km/leech/lucene/basic/PageCountEstimator.java b/src/main/java/de/dfki/km/leech/lucene/basic/PageCountEstimator.java
new file mode 100644
index 0000000..7fce051
--- /dev/null
+++ b/src/main/java/de/dfki/km/leech/lucene/basic/PageCountEstimator.java
@@ -0,0 +1,107 @@
+package de.dfki.km.leech.lucene.basic;
+
+
+
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field.Store;
+import org.apache.lucene.document.LegacyIntField;
+import org.apache.lucene.document.StringField;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.Terms;
+
+import java.io.IOException;
+
+
+
+public class PageCountEstimator
+{
+
+
+    /**
+     * Adds a page count attribute to a document in the case no one is there. The method estimates the page cont (i.e. 400 terms => 1 page).
+     * 
+     * @param iDocNo the docs index number
+     * @param doc2modify the document
+     * @param strPageCountAttName the field for the generated page count (that should be created)
+     * @param isHeuristicPageCountFlagAttName an attribute name that will be generated as hint wether a document page count is calculated or not
+     * @param strBodyAttName the body attribute name to perform the calculation
+     * @param reader the lucene index reader
+     * 
+     * @return true in the case the doc was modified, false otherwise
+     * 
+     * @throws Exception
+     */
+    static public boolean addHeuristicDocPageCounts(int iDocNo, Document doc2modify, String strPageCountAttName, String isHeuristicPageCountFlagAttName,
+            String strBodyAttName, IndexReader reader) throws Exception
+    {
+        // sofern ein Attribut noch nicht vorhanden ist, wird es hier erzeugt - mit Hilfe einer Heuristik
+        // es wird auch noch ein zusätzliches Attribut eingetragen, welches anzeigt, daß die PageCount mit Hilfe
+        // einer Heuristik erzeugt wurde
+
+        // wenn es schon einen Eintrag für die Seitenzahlen gibt, wird das Dokument ignoriert (das war zumindest so, solange schöne Zahln im Index
+        // standen)
+        String strPageCountValue = doc2modify.get(strPageCountAttName);
+        // if(strPageCountValue != null)
+        if(strPageCountValue != null && doc2modify.get(isHeuristicPageCountFlagAttName) == null)
+        {
+
+            // wenn da so ein verkrutztes Leech-Ding drin steht, dann machen wir da ne schöne Zahl draus :)
+            int iIndexOfKrutzel = strPageCountValue.indexOf("^^");
+            if(iIndexOfKrutzel == -1) return false;
+
+            String strPageCountValueNice = strPageCountValue.substring(0, iIndexOfKrutzel);
+            doc2modify.removeFields(strPageCountAttName);
+
+            LegacyIntField field = new LegacyIntField(strPageCountAttName, Integer.parseInt(strPageCountValueNice), Store.YES);
+
+            if(field != null) doc2modify.add(field);
+
+            return true;
+        }
+
+        // wenn es keinen Eintrag für den Content gibt, wird das Dokument ebenfalls ignoriert
+        String strBodyValue = doc2modify.get(strBodyAttName);
+        if(strBodyValue == null) return false;
+
+        // wir haben einen Eintrag für den Body und keinen für die Seitenzahlen - also frisch ans Werk ;)
+
+        int iPageCount = 0;
+
+        // die Heuristik: 400 Terme ergeben eine Seite
+
+        int iDocTermCount = getDocumentTermCount(iDocNo, strBodyAttName, reader);
+
+        // ich sag jetzt mal einfach, daß ungefähr 400 Wörter auf einer Seite sind...
+        iPageCount = (iDocTermCount / 400) + 1;
+
+        // die geschätzte PageCount
+        doc2modify.removeFields(strPageCountAttName);
+        LegacyIntField field = new LegacyIntField(strPageCountAttName, iPageCount, Store.YES);
+        if(field != null) doc2modify.add(field);
+        // ein Flag, welches anzeigt, daß dieser TermCount geschätzt wurde
+        doc2modify.removeFields(isHeuristicPageCountFlagAttName);
+        StringField newField = new StringField(isHeuristicPageCountFlagAttName, "true", Store.YES);
+        if(newField != null) doc2modify.add(newField);
+
+
+        return true;
+    }
+
+
+
+    public static Integer getDocumentTermCount(int iDocNo, String strFieldName4TermCounting, IndexReader reader) throws IOException
+    {
+
+        long lTermCount = 0;
+
+
+        Terms termVector = reader.getTermVector(iDocNo, strFieldName4TermCounting);
+
+        // manchmal gibt es auch Dokumente, die keinen content bzw. keinen TermFreqVector haben....
+        if(termVector != null) lTermCount = termVector.getSumTotalTermFreq();
+
+
+        return Long.valueOf(lTermCount).intValue();
+    }
+
+}
diff --git a/src/main/java/de/dfki/km/leech/lucene/basic/Term2FrequenciesEntry.java b/src/main/java/de/dfki/km/leech/lucene/basic/Term2FrequenciesEntry.java
new file mode 100644
index 0000000..80e06b6
--- /dev/null
+++ b/src/main/java/de/dfki/km/leech/lucene/basic/Term2FrequenciesEntry.java
@@ -0,0 +1,41 @@
+package de.dfki.km.leech.lucene.basic;
+
+
+
+
+
+public class Term2FrequenciesEntry
+{
+
+    public String term;
+
+    public Integer documentFrequency;
+
+    public Long totalIndexFrequency;
+
+
+
+    public Term2FrequenciesEntry()
+    {
+    }
+
+
+
+    public Term2FrequenciesEntry(String term, Integer documentFrequency, Long totalIndexFrequency)
+    {
+        this.term = term;
+        this.documentFrequency = documentFrequency;
+        this.totalIndexFrequency = totalIndexFrequency;
+
+    }
+
+
+
+
+    @Override
+    public String toString()
+    {
+        return "Term:" + term + " docFRQ:" + documentFrequency + " totalFRQ:" + totalIndexFrequency;
+    }
+
+}
diff --git a/src/main/java/de/dfki/km/leech/lucene/basic/Term2FrequencyEntry.java b/src/main/java/de/dfki/km/leech/lucene/basic/Term2FrequencyEntry.java
new file mode 100644
index 0000000..a2ffa7a
--- /dev/null
+++ b/src/main/java/de/dfki/km/leech/lucene/basic/Term2FrequencyEntry.java
@@ -0,0 +1,65 @@
+package de.dfki.km.leech.lucene.basic;
+
+
+
+
+
+public class Term2FrequencyEntry
+{
+
+    public String term;
+
+    public Integer frequency;
+
+
+
+    public Term2FrequencyEntry()
+    {
+    }
+
+
+
+    public Term2FrequencyEntry(String strTerm, Integer iFrequency)
+    {
+        term = strTerm;
+        frequency = iFrequency;
+
+    }
+
+
+
+    public String getTerm()
+    {
+        return term;
+    }
+
+
+
+    public void setTerm(String term)
+    {
+        this.term = term;
+    }
+
+
+
+    public Integer getFrequency()
+    {
+        return frequency;
+    }
+
+
+
+    public void setFrequency(Integer frequency)
+    {
+        this.frequency = frequency;
+    }
+
+
+
+    @Override
+    public String toString()
+    {
+        return "Term:" + getTerm() + " FRQ:" + getFrequency();
+    }
+
+}
diff --git a/src/main/java/de/dfki/km/leech/lucene/basic/TermPosition.java b/src/main/java/de/dfki/km/leech/lucene/basic/TermPosition.java
new file mode 100644
index 0000000..eb5336e
--- /dev/null
+++ b/src/main/java/de/dfki/km/leech/lucene/basic/TermPosition.java
@@ -0,0 +1,59 @@
+package de.dfki.km.leech.lucene.basic;
+
+
+
+
+
+public class TermPosition
+{
+
+    Integer m_iEndOffset;
+
+    Integer m_iPosition;
+
+    Integer m_iStartOffset;
+
+
+
+    public Integer getEndOffset()
+    {
+        return m_iEndOffset;
+    }
+
+
+
+    public Integer getPosition()
+    {
+        return m_iPosition;
+    }
+
+
+
+    public Integer getStartOffset()
+    {
+        return m_iStartOffset;
+    }
+
+
+
+    public void setEndOffset(Integer endOffset)
+    {
+        m_iEndOffset = endOffset;
+    }
+
+
+
+    public void setPosition(Integer position)
+    {
+        m_iPosition = position;
+    }
+
+
+
+    public void setStartOffset(Integer startOffset)
+    {
+        m_iStartOffset = startOffset;
+    }
+
+
+}
diff --git a/src/main/java/de/dfki/km/leech/lucene/basic/TextWithTermVectorOffsetsField.java b/src/main/java/de/dfki/km/leech/lucene/basic/TextWithTermVectorOffsetsField.java
new file mode 100644
index 0000000..f4c397c
--- /dev/null
+++ b/src/main/java/de/dfki/km/leech/lucene/basic/TextWithTermVectorOffsetsField.java
@@ -0,0 +1,48 @@
+package de.dfki.km.leech.lucene.basic;
+
+
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.TextField;
+
+
+
+public class TextWithTermVectorOffsetsField extends Field
+{
+
+
+
+    /**
+     * Creates a new {@link TextWithTermVectorOffsetsField}. Default is to generate a stored field.
+     * 
+     * @param name field name
+     * @param value String value
+     * @throws IllegalArgumentException if the field name or value is null.
+     */
+    public TextWithTermVectorOffsetsField(String name, String value)
+    {
+
+        super(name, value, new DynamicFieldType(TextField.TYPE_STORED).setStoreTermVectorS(true).setStoreTermVectorOffsetS(true).freezE());
+
+    }
+
+
+
+    /**
+     * Creates a new {@link TextWithTermVectorOffsetsField}
+     * 
+     * @param name field name
+     * @param value String value
+     * @param stored Store.YES if the content should also be stored
+     * @throws IllegalArgumentException if the field name or value is null.
+     */
+    public TextWithTermVectorOffsetsField(String name, String value, Store stored)
+    {
+
+
+        super(name, value, new DynamicFieldType(stored == Store.YES ? TextField.TYPE_STORED : TextField.TYPE_NOT_STORED).setStoreTermVectorS(true)
+                .setStoreTermVectorOffsetS(true).freezE());
+
+    }
+
+
+}
diff --git a/src/main/java/de/dfki/km/leech/lucene/basic/URINotFoundException.java b/src/main/java/de/dfki/km/leech/lucene/basic/URINotFoundException.java
new file mode 100644
index 0000000..a937b3d
--- /dev/null
+++ b/src/main/java/de/dfki/km/leech/lucene/basic/URINotFoundException.java
@@ -0,0 +1,38 @@
+package de.dfki.km.leech.lucene.basic;
+
+
+
+public class URINotFoundException extends Exception
+{
+    private static final long serialVersionUID = 8317129753714055831L;
+
+
+
+    public URINotFoundException()
+    {
+        super();
+    }
+
+
+
+    public URINotFoundException(String message, Throwable cause)
+    {
+        super(message, cause);
+    }
+
+
+
+    public URINotFoundException(String message)
+    {
+        super(message);
+    }
+
+
+
+    public URINotFoundException(Throwable cause)
+    {
+        super(cause);
+    }
+
+
+}
diff --git a/src/main/java/de/dfki/km/leech/parser/CrawlerParser.java b/src/main/java/de/dfki/km/leech/parser/CrawlerParser.java
index 907633e..fad0e0e 100644
--- a/src/main/java/de/dfki/km/leech/parser/CrawlerParser.java
+++ b/src/main/java/de/dfki/km/leech/parser/CrawlerParser.java
@@ -31,7 +31,7 @@
 import org.xml.sax.ContentHandler;
 import org.xml.sax.SAXException;
 
-import de.dfki.inquisition.collections.MultiValueHashMap;
+import de.dfki.inquisitor.collections.MultiValueHashMap;
 import de.dfki.km.leech.config.CrawlerContext;
 import de.dfki.km.leech.parser.incremental.IncrementalCrawlingParser;
 import de.dfki.km.leech.sax.DataSinkContentHandler;
diff --git a/src/main/java/de/dfki/km/leech/parser/DirectoryCrawlerParser.java b/src/main/java/de/dfki/km/leech/parser/DirectoryCrawlerParser.java
index 4721d53..d5fc5c9 100644
--- a/src/main/java/de/dfki/km/leech/parser/DirectoryCrawlerParser.java
+++ b/src/main/java/de/dfki/km/leech/parser/DirectoryCrawlerParser.java
@@ -44,7 +44,7 @@
 import org.apache.tika.parser.Parser;
 import org.xml.sax.ContentHandler;
 
-import de.dfki.inquisition.collections.MultiValueHashMap;
+import de.dfki.inquisitor.collections.MultiValueHashMap;
 import de.dfki.km.leech.Leech;
 import de.dfki.km.leech.config.CrawlerContext;
 import de.dfki.km.leech.config.DirectoryCrawlerContext;
diff --git a/src/main/java/de/dfki/km/leech/parser/HtmlCrawlerParser.java b/src/main/java/de/dfki/km/leech/parser/HtmlCrawlerParser.java
index 5729f63..65b4da8 100644
--- a/src/main/java/de/dfki/km/leech/parser/HtmlCrawlerParser.java
+++ b/src/main/java/de/dfki/km/leech/parser/HtmlCrawlerParser.java
@@ -44,9 +44,9 @@
 import org.xml.sax.ContentHandler;
 import org.xml.sax.SAXException;
 
-import de.dfki.inquisition.collections.MultiValueHashMap;
-import de.dfki.inquisition.processes.StopWatch;
-import de.dfki.inquisition.text.StringUtils;
+import de.dfki.inquisitor.collections.MultiValueHashMap;
+import de.dfki.inquisitor.processes.StopWatch;
+import de.dfki.inquisitor.text.StringUtils;
 import de.dfki.km.leech.Leech;
 import de.dfki.km.leech.config.CrawlerContext;
 import de.dfki.km.leech.config.HtmlCrawlerContext;
diff --git a/src/main/java/de/dfki/km/leech/parser/ImapCrawlerParser.java b/src/main/java/de/dfki/km/leech/parser/ImapCrawlerParser.java
index f54c5ac..e5fedd4 100644
--- a/src/main/java/de/dfki/km/leech/parser/ImapCrawlerParser.java
+++ b/src/main/java/de/dfki/km/leech/parser/ImapCrawlerParser.java
@@ -55,8 +55,8 @@
 import org.xml.sax.ContentHandler;
 import org.xml.sax.SAXException;
 
-import de.dfki.inquisition.collections.MultiValueHashMap;
-import de.dfki.inquisition.text.StringUtils;
+import de.dfki.inquisitor.collections.MultiValueHashMap;
+import de.dfki.inquisitor.text.StringUtils;
 import de.dfki.km.leech.Leech;
 import de.dfki.km.leech.config.CrawlerContext;
 import de.dfki.km.leech.config.ImapCrawlerContext;
diff --git a/src/main/java/de/dfki/km/leech/parser/NonRecursiveCrawlerParser.java b/src/main/java/de/dfki/km/leech/parser/NonRecursiveCrawlerParser.java
index 816b8a8..747ee66 100644
--- a/src/main/java/de/dfki/km/leech/parser/NonRecursiveCrawlerParser.java
+++ b/src/main/java/de/dfki/km/leech/parser/NonRecursiveCrawlerParser.java
@@ -9,7 +9,7 @@
 import org.apache.tika.parser.ParseContext;
 import org.xml.sax.ContentHandler;
 
-import de.dfki.inquisition.collections.MultiValueHashMap;
+import de.dfki.inquisitor.collections.MultiValueHashMap;
 import de.dfki.km.leech.SubDataEntityContentHandler;
 import de.dfki.km.leech.metadata.LeechMetadata;
 
diff --git a/src/main/java/de/dfki/km/leech/parser/UrlListCrawlerParser.java b/src/main/java/de/dfki/km/leech/parser/UrlListCrawlerParser.java
index 8e60d1c..f5d9d8c 100644
--- a/src/main/java/de/dfki/km/leech/parser/UrlListCrawlerParser.java
+++ b/src/main/java/de/dfki/km/leech/parser/UrlListCrawlerParser.java
@@ -18,7 +18,7 @@
 import org.apache.tika.parser.Parser;
 import org.xml.sax.ContentHandler;
 
-import de.dfki.inquisition.collections.MultiValueHashMap;
+import de.dfki.inquisitor.collections.MultiValueHashMap;
 import de.dfki.km.leech.Leech;
 import de.dfki.km.leech.io.URLStreamProvider;
 
diff --git a/src/main/java/de/dfki/km/leech/parser/incremental/IncrementalCrawlingHistory.java b/src/main/java/de/dfki/km/leech/parser/incremental/IncrementalCrawlingHistory.java
index f3010da..7199b28 100644
--- a/src/main/java/de/dfki/km/leech/parser/incremental/IncrementalCrawlingHistory.java
+++ b/src/main/java/de/dfki/km/leech/parser/incremental/IncrementalCrawlingHistory.java
@@ -45,7 +45,7 @@
 import org.apache.lucene.store.SimpleFSDirectory;
 import org.apache.lucene.util.Bits;
 
-import de.dfki.inquisition.text.StringUtils;
+import de.dfki.inquisitor.text.StringUtils;
 import de.dfki.km.leech.config.CrawlerContext;
 
 
diff --git a/src/main/java/de/dfki/km/leech/parser/wikipedia/WikipediaDumpParser.java b/src/main/java/de/dfki/km/leech/parser/wikipedia/WikipediaDumpParser.java
index 87cbf39..adbf456 100644
--- a/src/main/java/de/dfki/km/leech/parser/wikipedia/WikipediaDumpParser.java
+++ b/src/main/java/de/dfki/km/leech/parser/wikipedia/WikipediaDumpParser.java
@@ -54,9 +54,9 @@
 import org.xml.sax.ContentHandler;
 import org.xml.sax.SAXException;
 
-import de.dfki.inquisition.collections.MultiValueBalancedTreeMap;
-import de.dfki.inquisition.collections.MultiValueHashMap;
-import de.dfki.inquisition.text.StringUtils;
+import de.dfki.inquisitor.collections.MultiValueBalancedTreeMap;
+import de.dfki.inquisitor.collections.MultiValueHashMap;
+import de.dfki.inquisitor.text.StringUtils;
 import de.dfki.km.leech.metadata.LeechMetadata;
 import de.dfki.km.leech.util.TikaUtils;
 
@@ -658,7 +658,7 @@ protected void parseGeoCoordinates(String strText, Metadata metadata)
 
 
 
-    protected void parseInfoBox(String strText, Metadata metadata, ContentHandler handler) throws SAXException
+    protected void parseInfoBox(String strText, Metadata metadata, ContentHandler handler) throws SAXException, IOException
     {
 
         // att-value paare mit | getrennt. Innerhalb eines values gibt es auch Zeilenumbrüche (mit '<br />') - dies gilt als Aufzählung
@@ -673,7 +673,7 @@ protected void parseInfoBox(String strText, Metadata metadata, ContentHandler ha
         // als erstes schneiden wir mal die Infobox raus. (?m) ist multiline und (?s) ist dotall ('.' matcht auch line breaks)
         int iStartInfoBox = -1;
         int iEndInfoBox = -1;
-        MatchResult infoMatch = StringUtils.findFirst("\\{\\{\\s*Infobox", strText);
+        MatchResult infoMatch = StringUtils.findFirstMatch("\\{\\{\\s*Infobox", strText);
         if(infoMatch != null)
         {
             iStartInfoBox = infoMatch.start();
diff --git a/src/main/java/de/dfki/km/leech/sax/CrawlReportContentHandler.java b/src/main/java/de/dfki/km/leech/sax/CrawlReportContentHandler.java
index 8d36c17..b64ffd5 100644
--- a/src/main/java/de/dfki/km/leech/sax/CrawlReportContentHandler.java
+++ b/src/main/java/de/dfki/km/leech/sax/CrawlReportContentHandler.java
@@ -28,10 +28,10 @@
 
 import org.apache.tika.metadata.Metadata;
 
-import de.dfki.inquisition.collections.CollectionUtilz;
-import de.dfki.inquisition.collections.MultiValueTreeMap;
-import de.dfki.inquisition.processes.StopWatch;
-import de.dfki.inquisition.text.StringUtils;
+import de.dfki.inquisitor.collections.CollectionUtilz;
+import de.dfki.inquisitor.collections.MultiValueTreeMap;
+import de.dfki.inquisitor.processes.StopWatch;
+import de.dfki.inquisitor.text.StringUtils;
 
 
 
diff --git a/src/main/java/de/dfki/km/leech/solr/ToSolrContentHandler.java b/src/main/java/de/dfki/km/leech/solr/ToSolrContentHandler.java
index 77b8070..c5da910 100644
--- a/src/main/java/de/dfki/km/leech/solr/ToSolrContentHandler.java
+++ b/src/main/java/de/dfki/km/leech/solr/ToSolrContentHandler.java
@@ -14,8 +14,8 @@
 import org.apache.solr.common.SolrInputDocument;
 import org.apache.tika.metadata.Metadata;
 
-import de.dfki.inquisition.collections.MultiValueHashMap;
-import de.dfki.inquisition.text.StringUtils;
+import de.dfki.inquisitor.collections.MultiValueHashMap;
+import de.dfki.inquisitor.text.StringUtils;
 import de.dfki.km.leech.metadata.LeechMetadata;
 import de.dfki.km.leech.parser.incremental.IncrementalCrawlingHistory;
 import de.dfki.km.leech.sax.DataSinkContentHandler;
diff --git a/src/main/java/de/dfki/km/leech/util/IndexPostprocessor.java b/src/main/java/de/dfki/km/leech/util/IndexPostprocessor.java
index fdd4d0d..3eb31f6 100644
--- a/src/main/java/de/dfki/km/leech/util/IndexPostprocessor.java
+++ b/src/main/java/de/dfki/km/leech/util/IndexPostprocessor.java
@@ -14,6 +14,7 @@
 import java.util.Set;
 import java.util.logging.Logger;
 
+import de.dfki.km.leech.lucene.basic.*;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.index.*;
@@ -26,14 +27,14 @@
 import org.apache.lucene.util.automaton.CompiledAutomaton;
 import org.apache.tika.metadata.Metadata;
 
-import de.dfki.inquisition.file.FileUtils;
-import de.dfki.inquisition.lucene.Buzzwords;
-import de.dfki.inquisition.lucene.DocumentFrqClass;
-import de.dfki.inquisition.lucene.FieldConfig;
-import de.dfki.inquisition.lucene.LuceneUtilz;
-import de.dfki.inquisition.lucene.PageCountEstimator;
-import de.dfki.inquisition.processes.StopWatch;
-import de.dfki.inquisition.text.StringUtils;
+import de.dfki.inquisitor.file.FileUtilz;
+// import de.dfki.inquisitor.lucene.Buzzwords;
+// import de.dfki.inquisitor.lucene.DocumentFrqClass;
+// import de.dfki.inquisitor.lucene.FieldConfig;
+// import de.dfki.inquisitor.lucene.LuceneUtilz;
+// import de.dfki.inquisitor.lucene.PageCountEstimator;
+import de.dfki.inquisitor.processes.StopWatch;
+import de.dfki.inquisitor.text.StringUtils;
 import de.dfki.km.leech.lucene.ToLuceneContentHandler;
 import de.dfki.km.leech.metadata.LeechMetadata;
 
@@ -90,11 +91,6 @@ static protected List<String> terms(String strFieldName, String strPrefix, int i
 
     /**
      * Enables the Buzzword creation by setting the related configuration parameters.
-     * 
-     * @param strNewField4Buzzwords
-     * @param sAttNames4BuzzwordCalculation
-     * @param iMaxNumberOfBuzzwords
-     * @param bSkipSimilarTerms
      */
     public void enableBuzzwordGeneration(String strNewField4Buzzwords, int iMaxNumberOfBuzzwords, boolean bSkipSimilarTerms)
     {
@@ -284,8 +280,8 @@ public void postprocessIndex(String strLuceneIndexPath, FieldConfig fieldConfig,
         }
         // fOurTmpDir.renameTo(fLuceneIndex);
 
-        FileUtils.deleteDirectory(new File(pUnpostProcessed.toString()));
-        FileUtils.deleteDirectory(fOurTmpDir.toFile());
+        FileUtilz.deleteDirectory(new File(pUnpostProcessed.toString()));
+        FileUtilz.deleteDirectory(fOurTmpDir.toFile());
 
 
 
diff --git a/src/main/java/de/dfki/km/leech/util/LuceneIndexCreator.java b/src/main/java/de/dfki/km/leech/util/LuceneIndexCreator.java
index 9af0f33..1025ece 100644
--- a/src/main/java/de/dfki/km/leech/util/LuceneIndexCreator.java
+++ b/src/main/java/de/dfki/km/leech/util/LuceneIndexCreator.java
@@ -13,6 +13,7 @@
 import java.util.logging.Level;
 import java.util.logging.Logger;
 
+import de.dfki.km.leech.lucene.basic.FieldConfig;
 import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.index.IndexWriterConfig;
 import org.apache.lucene.index.IndexWriterConfig.OpenMode;
@@ -23,10 +24,10 @@
 import org.apache.tika.parser.ParseContext;
 import org.xml.sax.SAXException;
 
-import de.dfki.inquisition.collections.MultiValueHashMap;
-import de.dfki.inquisition.lucene.FieldConfig;
-import de.dfki.inquisition.processes.StopWatch;
-import de.dfki.inquisition.text.StringUtils;
+import de.dfki.inquisitor.collections.MultiValueHashMap;
+// import de.dfki.inquisitor.lucene.FieldConfig;
+import de.dfki.inquisitor.processes.StopWatch;
+import de.dfki.inquisitor.text.StringUtils;
 import de.dfki.km.leech.Leech;
 import de.dfki.km.leech.config.CrawlerContext;
 import de.dfki.km.leech.lucene.LeechDefaultFieldConfig;
@@ -40,8 +41,7 @@
 
 
 /**
- * A very simple Lucene Index creator. FieldConfig is from {@link WikipediaDumpParser#getFieldConfig4ParserAttributes()}, currently you can only specify the source
- * dir/file and the target dir for the lucene index
+ * A very simple Lucene Index creator. Currently you can only specify the source dir/file and the target dir for the lucene index
  * 
  * @author Christian Reuschling, Dipl.Ing.(BA)
  * 
@@ -97,7 +97,7 @@ public static void createIndex(List<String> lUrls2Crawl, String strLuceneIndexPa
 
             Leech leech = new Leech();
 
-            long startTime = StopWatch.startAndLogTime(Level.INFO);
+            long startTime = StopWatch.startAndLogTime(LuceneIndexCreator.class);
 
 
             CrawlReportContentHandler reportContentHandler;
@@ -145,7 +145,7 @@ public static void createIndex(List<String> lUrls2Crawl, String strLuceneIndexPa
                 indexWriter.forceMerge(1, true);
                 indexWriter.close();
 
-                StopWatch.stopAndLogDistance(startTime, Level.INFO);
+                StopWatch.stopAndLogDistance(startTime, LuceneIndexCreator.class);
 
                 Logger.getLogger(LuceneIndexCreator.class.getName()).info("..finished crawling " + lUrls2Crawl);
             }
diff --git a/src/main/java/de/dfki/km/leech/util/SolrIndexCreator.java b/src/main/java/de/dfki/km/leech/util/SolrIndexCreator.java
index dc76d4b..8ae1cf2 100644
--- a/src/main/java/de/dfki/km/leech/util/SolrIndexCreator.java
+++ b/src/main/java/de/dfki/km/leech/util/SolrIndexCreator.java
@@ -13,8 +13,8 @@
 import org.xml.sax.ContentHandler;
 import org.xml.sax.SAXException;
 
-import de.dfki.inquisition.collections.MultiValueHashMap;
-import de.dfki.inquisition.processes.StopWatch;
+import de.dfki.inquisitor.collections.MultiValueHashMap;
+import de.dfki.inquisitor.processes.StopWatch;
 import de.dfki.km.leech.Leech;
 import de.dfki.km.leech.config.CrawlerContext;
 import de.dfki.km.leech.parser.wikipedia.WikipediaDumpParser.WikipediaDumpParserConfig;
@@ -76,7 +76,7 @@ public void createIndex(List<String> lUrls2Crawl, String strSolrUrl, MultiValueH
 
         Leech leech = new Leech();
 
-        long startTime = StopWatch.startAndLogTime(Level.INFO);
+        long startTime = StopWatch.startAndLogTime(SolrIndexCreator.class);
 
 
         CrawlReportContentHandler reportContentHandler;
@@ -123,7 +123,7 @@ public void createIndex(List<String> lUrls2Crawl, String strSolrUrl, MultiValueH
         leech.parse(lUrls2Crawl.toArray(new String[0]), finalContentHandler, context);
 
 
-        StopWatch.stopAndLogDistance(startTime, Level.INFO);
+        StopWatch.stopAndLogDistance(startTime, SolrIndexCreator.class);
 
     }
 
diff --git a/src/main/java/de/dfki/km/leech/util/TikaUtils.java b/src/main/java/de/dfki/km/leech/util/TikaUtils.java
index 0ff3145..9e2f340 100644
--- a/src/main/java/de/dfki/km/leech/util/TikaUtils.java
+++ b/src/main/java/de/dfki/km/leech/util/TikaUtils.java
@@ -29,7 +29,7 @@
 import org.apache.tika.parser.Parser;
 import org.xml.sax.ContentHandler;
 
-import de.dfki.inquisition.text.StringUtils;
+import de.dfki.inquisitor.text.StringUtils;
 import de.dfki.km.leech.config.CrawlerContext;
 import de.dfki.km.leech.parser.DirectoryCrawlerParser;