Skip to content

Commit

Permalink
implement clean code for Phase2
Browse files Browse the repository at this point in the history
  • Loading branch information
mjmaher987 committed Jul 25, 2021
1 parent 2c85ce7 commit bd7cc9a
Show file tree
Hide file tree
Showing 5 changed files with 199 additions and 0 deletions.
122 changes: 122 additions & 0 deletions src/main/java/InvertedIndex.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
package main.java;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class InvertedIndex {
List<String> stopwords = Arrays.asList("a", "able", "about",
"across", "after", "all", "almost", "also", "am", "among", "an",
"and", "any", "are", "as", "at", "be", "because", "been", "but",
"by", "can", "cannot", "could", "dear", "did", "do", "does",
"either", "else", "ever", "every", "for", "from", "get", "got",
"had", "has", "have", "he", "her", "hers", "him", "his", "how",
"however", "i", "if", "in", "into", "is", "it", "its", "just",
"least", "let", "like", "likely", "may", "me", "might", "most",
"must", "my", "neither", "no", "nor", "not", "of", "off", "often",
"on", "only", "or", "other", "our", "own", "rather", "said", "say",
"says", "she", "should", "since", "so", "some", "than", "that",
"the", "their", "them", "then", "there", "these", "they", "this",
"tis", "to", "too", "twas", "us", "wants", "was", "we", "were",
"what", "when", "where", "which", "while", "who", "whom", "why",
"will", "with", "would", "yet", "you", "your");

Map<String, List<Tuple>> indexedWords = new HashMap<>();
List<String> files = new ArrayList<>();

public void indexFile(File file) throws IOException {
int fileNumber = files.indexOf(file.getPath());
if (fileNumber == -1) {
files.add(file.getPath());
fileNumber = files.size() - 1;
}

BufferedReader reader = new BufferedReader(new FileReader(file));
for (String line = reader.readLine(); line != null; line = reader
.readLine()) {
for (String _word : line.split("\\W+")) {
String word = _word.toLowerCase();
if (stopwords.contains(word))
continue;
List<Tuple> idx = indexedWords.computeIfAbsent(word, k -> new LinkedList<>());
idx.add(new Tuple(fileNumber));
}
}
}

public Set<String> search(ArrayList<String> wordsToFind) {
Set<String> answer = new HashSet<>();
for (String words : wordsToFind) {

String word = words.toLowerCase();
for (String key : indexedWords.keySet()) {
Matcher matcher = Pattern.compile(word).matcher(key);
if (matcher.find()) {
List<Tuple> tupleList = indexedWords.get(key);
if (tupleList != null) {
for (Tuple t : tupleList) {
answer.add(files.get(t.fileNumber));
}
}
}
}
}
return answer;
}

public Set<String> findCommonFiles(Set<String> answer, ArrayList<Set<String>> wordsToFindCommon) {
Set<String> commonWords = findCommonWords(wordsToFindCommon);
if (answer.size() > 0 && commonWords != null) {
answer.retainAll(commonWords);
return answer;
} else if (answer.size() == 0 && commonWords != null) {
return commonWords;
} else if (answer.size() == 0) {
return null;
} else return answer;

}

public Set<String> deleteGivenFiles(Set<String> answer, Set<String> deleteFiles) {
for (String s : deleteFiles) {
answer.remove(s);
}
return answer;
}


public Set<String> findCommonWords(ArrayList<Set<String>> wordsToFindCommon) {
if (wordsToFindCommon.size() > 0) {
Set<String> commonWords = wordsToFindCommon.get(0);

if (wordsToFindCommon.size() > 1) {
for (int i = 1; i < wordsToFindCommon.size(); i++) {
commonWords.retainAll(wordsToFindCommon.get(i));
}
}
return commonWords;
}
return null;
}

private static class Tuple {
private final int fileNumber;

public Tuple(int fileno) {
this.fileNumber = fileno;
}

}
}

23 changes: 23 additions & 0 deletions src/main/java/Main.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
package main.java;

import java.io.File;

public class Main {
public static void main(String[] args) {

File directoryPath = new File("C:\\Users\\ASUS\\IdeaProjects\\codestar\\src\\main\\resources\\EnglishData");
File[] filesList = directoryPath.listFiles();


try {
InvertedIndex idx = new InvertedIndex();
assert filesList != null;
for (File file : filesList) {
idx.indexFile(file);
}
new TakeInput(idx);
} catch (Exception e) {
e.printStackTrace();
}
}
}
52 changes: 52 additions & 0 deletions src/main/java/TakeInput.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
package main.java;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class TakeInput {

public TakeInput(InvertedIndex idx) {
System.out.println("1");
getOrder(idx);
}

public void getOrder(InvertedIndex idx) {
Scanner scanner = new Scanner(System.in);
while (true) {
String input = scanner.nextLine();
String[] inputSplited = input.split("(\\s+)");
ArrayList<String> plusStrings = new ArrayList<>();
ArrayList<String> minusStrings = new ArrayList<>();
ArrayList<String> normalStrings = new ArrayList<>();
for (String string : inputSplited) addItemToOneOfThreeArrayLists(string, plusStrings, minusStrings, normalStrings);
Set<String> answer = idx.search(plusStrings);
Set<String> toDelete = idx.search(minusStrings);
ArrayList<Set<String>> commons = new ArrayList<>();
for (String normalString : normalStrings) {
ArrayList<String> arrayList = new ArrayList<>();
arrayList.add(normalString);
commons.add(idx.search(arrayList));
}
answer = idx.findCommonFiles(answer, commons);
System.out.println(answer);
answer = idx.deleteGivenFiles(answer, toDelete);
for (String s : answer) System.out.println(s);
}
}

private void addItemToOneOfThreeArrayLists(String string, ArrayList<String> plusStrings, ArrayList<String> minusStrings, ArrayList<String> normalStrings) {
Pattern pattern = Pattern.compile("^\\+(.+)$");
Matcher matcher = pattern.matcher(string);
Pattern pattern1 = Pattern.compile("^-(.+)$");
Matcher matcher1 = pattern1.matcher(string);
if (matcher.find()) {
String a = matcher.group(1);
plusStrings.add(a);
} else if (matcher1.find()) {
String a = matcher1.group(1);
minusStrings.add(a);
} else normalStrings.add(string);
}


}
1 change: 1 addition & 0 deletions src/main/resources/EnglishData/57110
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
mother I have a 42 yr old male friend, misdiagnosed as havin osteopporosis for two years, who recently found out that hi illness is the rare Gaucher's disease.Gaucher's disease symptoms include: brittle bones (he lost 9 inches off his hieght); enlarged liver and spleen; interna bleeding; and fatigue (all the time). The problem (in Type 1) i attributed to a genetic mutation where there is a lack of th enzyme glucocerebroside in macrophages so the cells swell up This will eventually cause deathEnyzme replacement therapy has been successfully developed an approved by the FDA in the last few years so that those patient administered with this drug (called Ceredase) report a remarkabl improvement in their condition. Ceredase, which is manufacture by biotech biggy company--Genzyme--costs the patient $380,00 per year. Gaucher\'s disease has justifyably been called "the mos expensive disease in the world"NEED INFOI have researched Gaucher's disease at the library but am relyin on netlanders to provide me with any additional information**news, stories, report**people you know with this diseas**ideas, articles about Genzyme Corp, how to get a hold o enough money to buy some, programs available to help wit costs**Basically ANY HELP YOU CAN OFFEThanks so very muchDeborah
1 change: 1 addition & 0 deletions src/main/resources/EnglishData/58043
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
>This wouldn't happen to be the same thing as chiggers, would it>A truly awful parasitic affliction, as I understand it. Tiny bug>dig deeply into the skin, burying themselves. Yuck! They have thes>things in OklahomaClose. My mother comes from Gainesville Tex, right across the borderThey claim to be the chigger capitol of the world, and I believe themWhen I grew up in Fort Worth it was bad enough, but in Gainesvillin the summer an attack was guaranteedDoug McDonal

0 comments on commit bd7cc9a

Please sign in to comment.