-
Notifications
You must be signed in to change notification settings - Fork 3
/
FrequencyByDoc.java
57 lines (47 loc) · 1.98 KB
/
FrequencyByDoc.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
/* This file is part of DocumentCluster, a program for clustering text
documents based on similarity. To use, specify the number of clusters
followed by the documents, which must be located in the data subdirectory.
Stopwords are eliminated by filtering the document contents against
stopwords.txt in the same directory. Words are stemmed using the Porter
Stemming algorithm. k-means clustering based on cosine similarity is used
for the clustering.
Copyright (C) 2013 Ezra Erb
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License version 3 as published
by the Free Software Foundation.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
I'd appreciate a note if you find this program useful or make
updates. Please contact me through LinkedIn or github (my profile also has
a link to the code depository)
*/
import java.util.*;
/* This class lists a word frequency value combined with the index of the
document the value was calcuated from. It mainly exists because Java has
no generic pair class that can handle primitive types */
public final class FrequencyByDoc
{
private double _frequency;
private int _document; // Index to document in corpus
public FrequencyByDoc(double frequency, int document)
{
_frequency = frequency;
_document = document;
}
public double getFrequency()
{
return _frequency;
}
public int getDocument()
{
return _document;
}
public String toString()
{
return String.valueOf(_document) + ":" + String.valueOf(_frequency);
}
}