Skip to content

Commit

Permalink
Merge pull request #21 from paulgoetze/release/0.5.0
Browse files Browse the repository at this point in the history
Release v0.5.0
  • Loading branch information
paulgoetze authored Jun 17, 2017
2 parents d83cc86 + 0ea14db commit 1e14a52
Show file tree
Hide file tree
Showing 38 changed files with 952 additions and 520 deletions.
30 changes: 30 additions & 0 deletions .rubocop.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
AllCops:
TargetRubyVersion: 2.4
Exclude:
- 'bin/**/*'
- '*.gemspec'
- 'Gemfile'
- 'Gemfile.lock'

Style/Copyright:
Enabled: false

Style/Documentation:
Enabled: false

Metrics/LineLength:
Max: 80

Layout/MultilineMethodCallIndentation:
EnforcedStyle: indented

Style/FrozenStringLiteralComment:
Enabled: false

Metrics/ModuleLength:
Exclude:
- "**/*_spec.rb"

Metrics/BlockLength:
Exclude:
- "**/*_spec.rb"
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

[![Gem Version](https://badge.fury.io/rb/weka.svg)](http://badge.fury.io/rb/weka)
[![Travis Build](https://travis-ci.org/paulgoetze/weka-jruby.svg)](https://travis-ci.org/paulgoetze/weka-jruby)
[![Codacy Badge](https://api.codacy.com/project/badge/Grade/9634a6709ef545198e079a8daddff100)](https://www.codacy.com/app/paul-christoph-goetze/weka-jruby?utm_source=github.com&utm_medium=referral&utm_content=paulgoetze/weka-jruby&utm_campaign=Badge_Grade)

Machine Learning & Data Mining with JRuby based on the [Weka](http://www.cs.waikato.ac.nz/~ml/weka/index.html) Java library.

Expand Down Expand Up @@ -60,7 +61,7 @@ Here’s how to contribute:
Please try to add RSpec tests along with your new features. This will ensure that your code does not break existing functionality and that your feature is working as expected.

We use [Rubocop](https://github.com/bbatsov/rubocop) for code style recommendations.
Please make sure your contributions comply with the default config of Rubocop.
Please make sure your contributions comply with the project’s Rubocop config.

## Acknowledgement

Expand Down
28 changes: 23 additions & 5 deletions lib/weka/class_builder.rb
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,24 @@ def self.included(base)
end

module ClassMethods
def build_class(class_name, weka_module: nil, include_concerns: true)
def build_class(class_name, weka_module: nil, include_concerns: true, additional_includes: [])
java_import java_class_path(class_name, weka_module)
define_class(class_name, weka_module, include_concerns: include_concerns)
define_class(
class_name,
weka_module,
include_concerns: include_concerns,
additional_includes: additional_includes
)
end

def build_classes(*class_names, weka_module: nil, include_concerns: true)
def build_classes(*class_names, weka_module: nil, include_concerns: true, additional_includes: [])
class_names.each do |name|
build_class(name, weka_module: weka_module, include_concerns: include_concerns)
build_class(
name,
weka_module: weka_module,
include_concerns: include_concerns,
additional_includes: additional_includes
)
end
end

Expand Down Expand Up @@ -58,12 +68,13 @@ def toplevel_module?
name.scan('::').count == 1
end

def define_class(class_name, weka_module, include_concerns: true)
def define_class(class_name, weka_module, include_concerns: true, additional_includes: [])
module_eval <<-CLASS_DEFINITION, __FILE__, __LINE__ + 1
class #{class_name}
#{'include Concerns' if include_concerns}
#{include_serializable_for(class_name, weka_module)}
#{include_utils}
#{include_additionals(additional_includes)}
end
CLASS_DEFINITION
end
Expand All @@ -84,6 +95,13 @@ def utils_defined?
constantize(utils_super_modules).const_defined?(:Utils)
end

def include_additionals(modules)
modules = Array(modules)
return if modules.empty?

modules.map { |name| "include #{name}" }.join("\n")
end

def constantize(module_names)
Object.module_eval("::#{module_names}")
end
Expand Down
20 changes: 18 additions & 2 deletions lib/weka/classifiers/evaluation.rb
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
require 'weka/class_builder'

module Weka
module Classifiers
java_import 'weka.classifiers.Evaluation'

class Evaluation
include ClassBuilder

# Use both nomenclatures f_measure and fmeasure for consistency
# due to jruby's auto method generation of 'fMeasure' to 'f_measure' and
# 'weightedFMeasure' to 'weighted_fmeasure'.
Expand All @@ -29,8 +33,20 @@ class Evaluation
alias average_cost avg_cost

alias cumulative_margin_distribution to_cumulative_margin_distribution_string
end

Java::WekaClassifiers::Evaluation.__persistent__ = true
module Curve
def self.included(base)
base.class_eval do
alias_method :curve, :get_curve
end
end
end

build_classes :CostCurve,
:MarginCurve,
:ThresholdCurve,
weka_module: 'weka.classifiers.evaluation',
additional_includes: Curve
end
end
end
206 changes: 116 additions & 90 deletions lib/weka/classifiers/utils.rb
Original file line number Diff line number Diff line change
Expand Up @@ -5,128 +5,154 @@ module Weka
module Classifiers
module Utils
def self.included(base)
base.class_eval do
java_import 'java.util.Random'
base.include Buildable if base.instance_methods.include?(:build_classifier)
base.include Classifiable if base.instance_methods.include?(:classify_instance)
base.include Updatable if base.instance_methods.include?(:update_classifier)
base.include Distributable if base.instance_methods.include?(:distribution_for_instance)
end

if instance_methods.include?(:build_classifier)
attr_reader :training_instances
module Checks
private

def train_with_instances(instances)
ensure_class_attribute_assigned!(instances)
def ensure_class_attribute_assigned!(instances)
return if instances.class_attribute_defined?

@training_instances = instances
build_classifier(instances)
error = 'Class attribute is not assigned for Instances.'
hint = 'You can assign a class attribute with #class_attribute=.'
message = "#{error} #{hint}"

self
end
raise UnassignedClassError, message
end

def cross_validate(folds: 3)
ensure_trained_with_instances!
def ensure_trained_with_instances!
return unless training_instances.nil?

evaluation = Evaluation.new(training_instances)
random = Java::JavaUtil::Random.new(1)
error = 'Classifier is not trained with Instances.'
hint = 'You can set the training instances with #train_with_instances.'
message = "#{error} #{hint}"

evaluation.cross_validate_model(self, training_instances, folds.to_i, random)
evaluation
end
raise UnassignedTrainingInstancesError, message
end
end

def evaluate(test_instances)
ensure_trained_with_instances!
ensure_class_attribute_assigned!(test_instances)
module Transformers
private

evaluation = Evaluation.new(training_instances)
evaluation.evaluate_model(self, test_instances)
evaluation
end
end
def classifiable_instance_from(instance_or_values)
attributes = training_instances.attributes
instances = Weka::Core::Instances.new(attributes: attributes)

if instance_methods.include?(:classify_instance)
def classify(instance_or_values)
ensure_trained_with_instances!
class_attribute = training_instances.class_attribute
class_index = training_instances.class_index
instances.insert_attribute_at(class_attribute, class_index)

instance = classifiable_instance_from(instance_or_values)
index = classify_instance(instance)
instances.class_index = training_instances.class_index
instances.add_instance(instance_or_values)

class_value_of_index(index)
end
end
instance = instances.first
instance.set_class_missing
instance
end
end

if instance_methods.include?(:update_classifier)
def add_training_instance(instance)
training_instances.add(instance)
update_classifier(instance)
module Buildable
java_import 'java.util.Random'
include Checks

self
end
attr_reader :training_instances

def add_training_data(data)
values = training_instances.internal_values_of(data)
instance = Weka::Core::DenseInstance.new(values)
add_training_instance(instance)
end
end
def train_with_instances(instances)
ensure_class_attribute_assigned!(instances)

if instance_methods.include?(:distribution_for_instance)
def distribution_for(instance_or_values)
ensure_trained_with_instances!
@training_instances = instances
build_classifier(instances)

instance = classifiable_instance_from(instance_or_values)
distributions = distribution_for_instance(instance)
self
end

class_distributions_from(distributions)
end
end
def cross_validate(folds: 3)
ensure_trained_with_instances!

private
evaluation = Evaluation.new(training_instances)
random = Java::JavaUtil::Random.new(1)

def ensure_class_attribute_assigned!(instances)
return if instances.class_attribute_defined?
evaluation.cross_validate_model(
self,
training_instances,
folds.to_i,
random
)

error = 'Class attribute is not assigned for Instances.'
hint = 'You can assign a class attribute with #class_attribute=.'
message = "#{error} #{hint}"
evaluation
end

raise UnassignedClassError, message
end
def evaluate(test_instances)
ensure_trained_with_instances!
ensure_class_attribute_assigned!(test_instances)

def ensure_trained_with_instances!
return unless training_instances.nil?
evaluation = Evaluation.new(training_instances)
evaluation.evaluate_model(self, test_instances)
evaluation
end
end

error = 'Classifier is not trained with Instances.'
hint = 'You can set the training instances with #train_with_instances.'
message = "#{error} #{hint}"
module Classifiable
include Checks
include Transformers

raise UnassignedTrainingInstancesError, message
end
def classify(instance_or_values)
ensure_trained_with_instances!

def classifiable_instance_from(instance_or_values)
attributes = training_instances.attributes
instances = Weka::Core::Instances.new(attributes: attributes)
instance = classifiable_instance_from(instance_or_values)
index = classify_instance(instance)

class_attribute = training_instances.class_attribute
class_index = training_instances.class_index
instances.insert_attribute_at(class_attribute, class_index)
class_value_of_index(index)
end

instances.class_index = training_instances.class_index
instances.add_instance(instance_or_values)
private

instance = instances.first
instance.set_class_missing
instance
end
def class_value_of_index(index)
training_instances.class_attribute.value(index)
end
end

def class_value_of_index(index)
training_instances.class_attribute.value(index)
end
module Updatable
def add_training_instance(instance)
training_instances.add(instance)
update_classifier(instance)

self
end

def add_training_data(data)
values = training_instances.internal_values_of(data)
instance = Weka::Core::DenseInstance.new(values)
add_training_instance(instance)
end
end

module Distributable
include Checks
include Transformers

def distribution_for(instance_or_values)
ensure_trained_with_instances!

instance = classifiable_instance_from(instance_or_values)
distributions = distribution_for_instance(instance)

class_distributions_from(distributions)
end

private

def class_distributions_from(distributions)
class_values = training_instances.class_attribute.values
def class_distributions_from(distributions)
class_values = training_instances.class_attribute.values

distributions.each_with_index.reduce({}) do |result, (distribution, index)|
class_value = class_values[index]
result[class_value] = distribution
result
end
distributions.each_with_object({}).with_index do |(distribution, result), index|
class_value = class_values[index]
result[class_value] = distribution
result
end
end
end
Expand Down
Loading

0 comments on commit 1e14a52

Please sign in to comment.