Skip to content

Commit

Permalink
SAMOA-43: Add TextReader
Browse files Browse the repository at this point in the history
  • Loading branch information
abifet committed Aug 21, 2015
1 parent d454deb commit 0a07f80
Showing 1 changed file with 205 additions and 0 deletions.
205 changes: 205 additions & 0 deletions samoa-api/src/main/java/org/apache/samoa/streams/TextGenerator.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,205 @@
package org.apache.samoa.streams;

/*
* #%L
* SAMOA
* %%
* Copyright (C) 2014 - 2015 Apache Software Foundation
* %%
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* #L%
*/

import com.github.javacliparser.IntOption;
import org.apache.samoa.instances.*;
import org.apache.samoa.moa.core.InstanceExample;
import org.apache.samoa.moa.core.ObjectRepository;
import org.apache.samoa.moa.options.AbstractOptionHandler;
import org.apache.samoa.moa.streams.InstanceStream;
import org.apache.samoa.moa.tasks.TaskMonitor;

import java.util.ArrayList;
import java.util.Random;

/**
* Text generator that simulates sentiment analysis on tweets.
*/
public class TextGenerator extends AbstractOptionHandler implements InstanceStream {

private static final long serialVersionUID = 3028905554604259131L;

public IntOption numAttsOption = new IntOption("numAtts", 'a',
"The number of attributes to generate.", 1000, 0, Integer.MAX_VALUE);

public IntOption instanceRandomSeedOption = new IntOption(
"instanceRandomSeed", 'i',
"Seed for random generation of instances.", 1);

protected InstancesHeader streamHeader;

protected Random instanceRandom;

protected int[] wordTwitterGenerator;
protected double[] freqTwitterGenerator;
protected double[] sumFreqTwitterGenerator;
protected int[] classTwitterGenerator;

protected int sizeTable;
protected double probPositive = 0.1;
protected double probNegative = 0.1;
protected double zipfExponent = 1.5;
protected double lengthTweet = 15;

protected int countTweets = 0;

@Override
public InstancesHeader getHeader() {
return this.streamHeader;
}

@Override
public long estimatedRemainingInstances() {
return -1;
}

@Override
public boolean hasMoreInstances() {
return true;
}

@Override
public InstanceExample nextInstance() {
int[] votes;
double[] attVals;
attVals = new double[this.numAttsOption.getValue() + 1];

do {
int length = (int) (lengthTweet * (1.0 + this.instanceRandom.nextGaussian()));
if (length < 1) length = 1;
votes = new int[3];
for (int j = 0; j < length; j++) {
double rand = this.instanceRandom.nextDouble();
//binary search
int i = 0;
int min = 0;
int max = sizeTable - 1;
int mid;
do {
mid = (min + max) / 2;
if (rand > this.sumFreqTwitterGenerator[mid]) {
min = mid + 1;
} else {
max = mid - 1;
}
} while ((this.sumFreqTwitterGenerator[mid] != rand) && (min <= max));

attVals[this.wordTwitterGenerator[mid]] = 1;
votes[this.classTwitterGenerator[mid]]++;

}
} while (votes[1] == votes[2]);

Instance inst = new DenseInstance(1.0, attVals);
inst.setDataset(getHeader());
inst.setClassValue((votes[1] > votes[2]) ? 0 : 1);
this.countTweets++;
return new InstanceExample(inst);
}

@Override
public boolean isRestartable() {
return true;
}

@Override
public void restart() {

this.sizeTable = this.numAttsOption.getValue();

//Prepare table of words to generate tweets
this.wordTwitterGenerator = new int[sizeTable];
this.freqTwitterGenerator = new double[sizeTable];
this.sumFreqTwitterGenerator = new double[sizeTable];
this.classTwitterGenerator = new int[sizeTable];

this.countTweets = 0;

double sum = 0;
this.instanceRandom = new Random(this.instanceRandomSeedOption.getValue());
for (int i = 0; i < this.sizeTable; i++) {
this.wordTwitterGenerator[i] = i + 1;
this.freqTwitterGenerator[i] = 1.0 / Math.pow(i + 1, zipfExponent);
sum += this.freqTwitterGenerator[i];
this.sumFreqTwitterGenerator[i] = sum;
double rand = this.instanceRandom.nextDouble();
this.classTwitterGenerator[i] = (rand < probPositive ? 1 : (rand < probNegative + probPositive ? 2 : 0));
}
for (int i = 0; i < this.sizeTable; i++) {
this.freqTwitterGenerator[i] /= sum;
this.sumFreqTwitterGenerator[i] /= sum;
}

}

@Override
protected void prepareForUseImpl(TaskMonitor monitor, ObjectRepository repository) {
generateHeader();
restart();
}

@Override
public void getDescription(StringBuilder sb, int indent) {

}
private void generateHeader() {
ArrayList<Attribute> attributes = new ArrayList();
for (int i = 0; i < this.numAttsOption.getValue(); i++) {
attributes.add(new Attribute("att" + (i + 1)));
}
ArrayList<String> classLabels = new ArrayList();
for (int i = 0; i < 2; i++) {
classLabels.add("class" + (i + 1));
}
attributes.add(new Attribute("class", classLabels));
this.streamHeader = new InstancesHeader(new Instances(
getCLICreationString(InstanceStream.class), attributes, 0));
this.streamHeader.setClassIndex(this.streamHeader.numAttributes() - 1);
}


public void changePolarity(int numberWords) {
for (int i = 0; i < numberWords; ) {
int randWord = this.instanceRandom.nextInt(this.sizeTable);
int polarity = this.classTwitterGenerator[randWord];
if (polarity == 1) {
this.classTwitterGenerator[i] = 2;
i++;
}
if (polarity == 2) {
this.classTwitterGenerator[i] = 1;
i++;
}
}
}

public void changeFreqWords(int numberWords) {
for (int i = 0; i < numberWords; i++) {
int randWordTo = this.instanceRandom.nextInt(this.sizeTable);
int randWordFrom = this.instanceRandom.nextInt(this.sizeTable);
this.wordTwitterGenerator[randWordTo] = randWordFrom;
this.wordTwitterGenerator[randWordFrom] = randWordTo;
}
}


}

0 comments on commit 0a07f80

Please sign in to comment.