-
Notifications
You must be signed in to change notification settings - Fork 0
Design exploration #2
base: master
Are you sure you want to change the base?
Changes from all commits
1345273
c602eec
7407331
a808405
866b1ed
0fa4e2b
6bc7a71
60e649b
1c4bcc4
5d03be2
2b0bbf6
d56a5a0
fbe9a66
6f1b906
f1a0312
d39a6e3
cbfa74c
afebfe1
7a2c4a9
f7f8f50
da9a9d7
834ffdd
9ffa8cd
dfe62f4
93ad90a
39b7379
6aa09e0
ace68b1
d473588
c494d3c
8616d44
2a54ddd
3d23c4b
a29de61
eef5f6f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
/target | ||
**/*.rs.bk | ||
Cargo.lock | ||
|
||
# IDEs | ||
.idea/ | ||
tags |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
[package] | ||
name = "linfa" | ||
version = "0.1.0" | ||
authors = ["LukeMathWalker <[email protected]>"] | ||
edition = "2018" | ||
|
||
[dependencies] | ||
|
||
[dev-dependencies] | ||
ndarray = "0.12.1" | ||
ndarray-rand = "0.9.0" | ||
rand = "*" | ||
derive_more = "0.13.0" |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
extern crate linfa; | ||
extern crate ndarray; | ||
extern crate ndarray_rand; | ||
extern crate rand; | ||
#[macro_use] | ||
extern crate derive_more; | ||
|
||
use crate::standard_scaler::{Config, OnlineOptimizer, ScalingError, StandardScaler}; | ||
use linfa::{Fit, IncrementalFit, Transformer}; | ||
use ndarray::{stack, Array1, ArrayBase, Axis, Data, Ix1}; | ||
use ndarray_rand::RandomExt; | ||
use rand::distributions::Uniform; | ||
|
||
mod standard_scaler; | ||
|
||
fn generate_batch(n_samples: usize) -> (Array1<f64>, Array1<f64>) { | ||
let distribution = Uniform::new(0., 10.); | ||
let x = Array1::random(n_samples, distribution); | ||
let y = Array1::random(n_samples, distribution); | ||
(x, y) | ||
} | ||
|
||
fn check<S>(scaler: &StandardScaler, x: &ArrayBase<S, Ix1>) -> Result<(), ScalingError> | ||
where | ||
S: Data<Elem = f64>, | ||
{ | ||
let old_batch_mean = x.mean_axis(Axis(0)).into_scalar(); | ||
let new_batch_mean = scaler.transform(&x)?.mean_axis(Axis(0)).into_scalar(); | ||
let old_batch_std = x.std_axis(Axis(0), 1.).into_scalar(); | ||
let new_batch_std = scaler.transform(&x)?.std_axis(Axis(0), 1.).into_scalar(); | ||
println!( | ||
"The mean.\nBefore scaling: {:?}\nAfter scaling: {:?}\n", | ||
old_batch_mean, new_batch_mean | ||
); | ||
println!( | ||
"The std deviation.\nBefore scaling: {:?}\nAfter scaling: {:?}\n", | ||
old_batch_std, new_batch_std | ||
); | ||
Ok(()) | ||
} | ||
|
||
/// Run it with: cargo run --example running_mean | ||
fn main() -> Result<(), ScalingError> { | ||
let n_samples = 20; | ||
let (x, y) = generate_batch(n_samples); | ||
|
||
let mut optimizer = OnlineOptimizer::default(); | ||
let standard_scaler = optimizer.fit(&x, &y, Config::default())?; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Passing the config in fit time might make difficult to compose estimators. Say if the esimator is a pipeline of estimators we wouldn't want to pass all the config in a fit. Having two steps a) building the pipeline b) fitting it is more natural IMO There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The two things are not mutually exclusive I'd say. You could compose the configuration of all steps in the pipeline and then pass that in when you want to fit it, it shouldn't look very different. |
||
|
||
check(&standard_scaler, &x)?; | ||
|
||
let (x2, y2) = generate_batch(n_samples); | ||
let standard_scaler = optimizer.incremental_fit(&x2, &y2, standard_scaler)?; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I find this conceptually difficult to follow. If anything I would have expected,
not the other way around. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah, that's a good point. It should be easy enough to flip it. |
||
|
||
let whole_x = stack(Axis(0), &[x.view(), x2.view()]).expect("Failed to stack arrays"); | ||
check(&standard_scaler, &whole_x)?; | ||
|
||
Ok(()) | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
use crate::standard_scaler::{Input, Output, StandardScaler}; | ||
use linfa::Blueprint; | ||
use ndarray::Data; | ||
|
||
pub struct Config { | ||
// Delta degrees of freedom. | ||
// With ddof = 1, you get the sample standard deviation | ||
// With ddof = 0, you get the population standard deviation | ||
pub ddof: f64, | ||
} | ||
|
||
/// Defaults to computing the sample standard deviation. | ||
impl Default for Config { | ||
fn default() -> Self { | ||
Self { ddof: 1. } | ||
} | ||
} | ||
|
||
impl<S> Blueprint<Input<S>, Output> for Config | ||
where | ||
S: Data<Elem = f64>, | ||
{ | ||
type Transformer = StandardScaler; | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
use std::error::Error; | ||
|
||
/// Fast-and-dirty error struct | ||
#[derive(Debug, Eq, PartialEq, From, Display)] | ||
pub struct ScalingError {} | ||
|
||
impl Error for ScalingError {} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
use ndarray::{Array1, ArrayBase, Ix1}; | ||
|
||
/// Short-hand notations | ||
type Input<S> = ArrayBase<S, Ix1>; | ||
type Output = Array1<f64>; | ||
|
||
mod config; | ||
mod error; | ||
mod optimizer; | ||
mod transformer; | ||
|
||
pub use config::Config; | ||
pub use error::ScalingError; | ||
pub use optimizer::OnlineOptimizer; | ||
pub use transformer::StandardScaler; |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,93 @@ | ||
use crate::standard_scaler::{Config, Input, Output, ScalingError, StandardScaler}; | ||
use linfa::{Fit, IncrementalFit}; | ||
use ndarray::{Axis, Data}; | ||
|
||
/// It keeps track of the number of samples seen so far, to allow for | ||
/// incremental computation of mean and standard deviation. | ||
pub struct OnlineOptimizer { | ||
pub n_samples: u64, | ||
} | ||
|
||
/// Initialize n_samples to 0. | ||
impl Default for OnlineOptimizer { | ||
fn default() -> Self { | ||
Self { n_samples: 0 } | ||
} | ||
} | ||
|
||
impl<S> Fit<Config, Input<S>, Output> for OnlineOptimizer | ||
where | ||
S: Data<Elem = f64>, | ||
{ | ||
type Error = ScalingError; | ||
|
||
fn fit( | ||
&mut self, | ||
inputs: &Input<S>, | ||
_targets: &Output, | ||
blueprint: Config, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why not call this
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Params is kind of an overloaded term: in this case, I'd say that we are passing hyperparameters (e.g. number of convolutional layers in a CNN), not parameters (e.g. the network weights). There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why do you prefer "model configuration" to "hyperparameter" for that purpose? |
||
) -> Result<StandardScaler, Self::Error> { | ||
if inputs.len() == 0 { | ||
return Err(ScalingError {}); | ||
} | ||
// Compute relevant quantities | ||
let mean = inputs.mean_axis(Axis(0)).into_scalar(); | ||
let standard_deviation = inputs.std_axis(Axis(0), blueprint.ddof).into_scalar(); | ||
// Initialize n_samples using the array length | ||
self.n_samples = inputs.len() as u64; | ||
// Return new, tuned scaler | ||
Ok(StandardScaler { | ||
ddof: blueprint.ddof, | ||
mean, | ||
standard_deviation, | ||
}) | ||
} | ||
} | ||
|
||
impl<S> IncrementalFit<StandardScaler, Input<S>, Output> for OnlineOptimizer | ||
where | ||
S: Data<Elem = f64>, | ||
{ | ||
type Error = ScalingError; | ||
|
||
fn incremental_fit( | ||
&mut self, | ||
inputs: &Input<S>, | ||
_targets: &Output, | ||
transformer: StandardScaler, | ||
) -> Result<StandardScaler, Self::Error> { | ||
if inputs.len() == 0 { | ||
// Nothing to be done | ||
return Ok(transformer); | ||
} | ||
|
||
let ddof = transformer.ddof; | ||
|
||
// Compute relevant quantities for the new batch | ||
let batch_n_samples = inputs.len(); | ||
let batch_mean = inputs.mean_axis(Axis(0)).into_scalar(); | ||
let batch_std = inputs.std_axis(Axis(0), ddof).into_scalar(); | ||
|
||
// Update | ||
let mean_delta = batch_mean - transformer.mean; | ||
let new_n_samples = self.n_samples + (batch_n_samples as u64); | ||
let new_mean = | ||
transformer.mean + mean_delta * (batch_n_samples as f64) / (new_n_samples as f64); | ||
let new_std = ((transformer.standard_deviation.powi(2) * (self.n_samples as f64 - ddof) | ||
+ batch_std.powi(2) * (batch_n_samples as f64 - ddof) | ||
+ mean_delta.powi(2) * (self.n_samples as f64) * (batch_n_samples as f64) | ||
/ (new_n_samples as f64)) | ||
/ (new_n_samples as f64 - ddof)) | ||
.sqrt(); | ||
|
||
// Update n_samples | ||
self.n_samples = new_n_samples; | ||
|
||
// Return tuned scaler | ||
Ok(StandardScaler { | ||
ddof, | ||
mean: new_mean, | ||
standard_deviation: new_std, | ||
}) | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
use crate::standard_scaler::{Input, Output, ScalingError}; | ||
use linfa::Transformer; | ||
use ndarray::Data; | ||
|
||
/// Given an input, it rescales it to have zero mean and unit variance. | ||
/// | ||
/// We use 64-bit floats for simplicity. | ||
pub struct StandardScaler { | ||
// Delta degrees of freedom. | ||
// With ddof = 1, you get the sample standard deviation | ||
// With ddof = 0, you get the population standard deviation | ||
pub ddof: f64, | ||
pub mean: f64, | ||
pub standard_deviation: f64, | ||
} | ||
|
||
impl<S> Transformer<Input<S>, Output> for StandardScaler | ||
where | ||
S: Data<Elem = f64>, | ||
{ | ||
type Error = ScalingError; | ||
|
||
fn transform(&self, inputs: &Input<S>) -> Result<Output, Self::Error> | ||
where | ||
S: Data<Elem = f64>, | ||
{ | ||
Ok((inputs - self.mean) / self.standard_deviation) | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,133 @@ | ||
use std::error; | ||
use std::iter; | ||
|
||
/// The basic `Transformer` trait. | ||
/// | ||
/// It is training-agnostic: a transformer takes an input and returns an output. | ||
/// | ||
/// There might be multiple ways to discover the best settings for every | ||
/// particular algorithm (e.g. training a logistic regressor using | ||
/// a pseudo-inverse matrix vs using gradient descent). | ||
/// It doesn't matter: the end result, the transformer, is a set of parameters. | ||
/// The way those parameter originated is an orthogonal concept. | ||
/// | ||
/// In the same way, it has no notion of loss or "correct" predictions. | ||
/// Those concepts are embedded elsewhere. | ||
/// | ||
/// It's generic over input and output types: | ||
/// - you can transform a fully in-memory dataset; | ||
/// - you can transform a stream of data; | ||
/// - you can return a class; | ||
/// - you can return a probability distribution. | ||
/// | ||
/// The mechanism for selecting the desired output, when not self-evident from the downstream | ||
/// usage, should be the same of the `::collect()` method. | ||
pub trait Transformer<I, O> { | ||
type Error: error::Error; | ||
|
||
fn transform(&self, inputs: &I) -> Result<O, Self::Error>; | ||
} | ||
|
||
/// One step closer to the peak. | ||
/// | ||
/// `Fit` is generic over a type `B` implementing the `Blueprint` trait: `B::Transformer` is used to | ||
/// constrain what type of inputs and targets are acceptable. | ||
/// | ||
/// `fit` takes an instance of `B` as one of its inputs, `blueprint`: it's consumed with move | ||
/// semantics and a new transformer is returned. | ||
/// | ||
/// Different types implementing `Fit` can work on the same `Blueprint` type! | ||
/// | ||
/// It's a transition in the transformer state machine: from `Blueprint` to `Transformer`. | ||
/// | ||
/// It's generic over input and output types: | ||
/// - you can fit on a fully in-memory dataset; | ||
/// - you can fit on a stream of data; | ||
/// - you can use integer-encoded class membership as a target; | ||
/// - you can use a one-hot-encoded class membership as a target. | ||
pub trait Fit<B, I, O> | ||
where | ||
B: Blueprint<I, O>, | ||
{ | ||
type Error: error::Error; | ||
|
||
fn fit(&mut self, inputs: &I, targets: &O, blueprint: B) | ||
-> Result<B::Transformer, Self::Error>; | ||
} | ||
|
||
/// We are not done with that `Transformer` yet. | ||
/// | ||
/// `IncrementalFit` is generic over a type `T` implementing the `Transformer` trait: `T` is used to | ||
/// constrain what type of inputs and targets are acceptable. | ||
/// | ||
/// `incremental_fit` takes an instance of `T` as one of its inputs, `transformer`: it's consumed with move | ||
/// semantics and a new transformer is returned. | ||
/// | ||
/// It's a transition in the transformer state machine: from `Transformer` to `Transformer`. | ||
/// | ||
/// It's generic over input and output types: | ||
/// - you can fit on a fully in-memory dataset; | ||
/// - you can fit on a stream of data; | ||
/// - you can use integer-encoded class membership as a target; | ||
/// - you can use a one-hot-encoded class membership as a target. | ||
pub trait IncrementalFit<T, I, O> | ||
where | ||
T: Transformer<I, O>, | ||
{ | ||
type Error: error::Error; | ||
|
||
fn incremental_fit( | ||
&mut self, | ||
inputs: &I, | ||
targets: &O, | ||
transformer: T, | ||
) -> Result<T, Self::Error>; | ||
} | ||
|
||
/// Where `Transformer`s are forged. | ||
/// | ||
/// `Blueprint` is a marker trait: it identifies what types can be used as starting points for | ||
/// building `Transformer`s. It's the initial stage of the transformer state machine. | ||
/// | ||
/// Every `Blueprint` is associated to a single `Transformer` type (is it wise to do so?). | ||
/// | ||
/// For the same transformer type `T`, nothing prevents a user from providing more than one `Blueprint`: | ||
/// multiple initialization strategies can sometimes be used to be build the same transformer type. | ||
/// | ||
/// Each of these strategies can take different (hyper)parameters, even though they return an | ||
/// instance of the same transformer type in the end. | ||
pub trait Blueprint<I, O> { | ||
type Transformer: Transformer<I, O>; | ||
} | ||
|
||
/// Where you need to go meta (hyperparameters!). | ||
/// | ||
/// `BlueprintGenerator`s can be used to explore different combination of hyperparameters | ||
/// when you are working with a certain `Transformer` type. | ||
/// | ||
/// `BlueprintGenerator::generate` returns, if successful, an `IntoIterator` type | ||
/// yielding instances of blueprints. | ||
pub trait BlueprintGenerator<B, I, O> | ||
where | ||
B: Blueprint<I, O>, | ||
{ | ||
type Error: error::Error; | ||
type Output: IntoIterator<Item = B>; | ||
|
||
fn generate(&self) -> Result<Self::Output, Self::Error>; | ||
} | ||
|
||
/// Any `Blueprint` can be used as `BlueprintGenerator`, as long as it's clonable: | ||
/// it returns an iterator with a single element, a clone of itself. | ||
impl<B, I, O> BlueprintGenerator<B, I, O> for B | ||
where | ||
B: Blueprint<I, O> + Clone, | ||
{ | ||
// Random error, didn't have time to get a proper one | ||
type Error = std::io::Error; | ||
type Output = iter::Once<B>; | ||
|
||
fn generate(&self) -> Result<Self::Output, Self::Error> { | ||
Ok(iter::once(self.clone())) | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
So if we have several models, this means we would need to use the full paths, e.g.
which might become somewhat difficult to manage?
Also purely from the user experience and readability (I understand this has other advantages) I find the builder pattern in rustlearn somewhat simpler because one doesn't have to deal with the optimizer.