Skip to content

Commit

Permalink
Run clmfmt fix on examples folder (#261)
Browse files Browse the repository at this point in the history
Co-authored-by: Anthony Khong <[email protected]>
  • Loading branch information
agilecreativity and anthony-khong authored Oct 13, 2020
1 parent 0adf6ae commit f30c6d3
Show file tree
Hide file tree
Showing 15 changed files with 193 additions and 199 deletions.
22 changes: 11 additions & 11 deletions examples/classification.clj
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
(ns examples.classification
(:require
[zero-one.geni.core :as g]
[zero-one.geni.ml :as ml]))
[zero-one.geni.core :as g]
[zero-one.geni.ml :as ml]))

;; Logistic Regression
(def training (g/read-libsvm! "test/resources/sample_libsvm_data.txt"))
Expand Down Expand Up @@ -52,15 +52,15 @@

(def pipeline
(ml/pipeline
label-indexer
feature-indexer
(ml/gbt-classifier {:label-col :indexed-label
:features-col :indexed-features
:max-iter 10
:feature-subset-strategy "auto"})
(ml/index-to-string {:input-col :prediction
:output-col :predicted-label
:labels (.labels label-indexer)})))
label-indexer
feature-indexer
(ml/gbt-classifier {:label-col :indexed-label
:features-col :indexed-features
:max-iter 10
:feature-subset-strategy "auto"})
(ml/index-to-string {:input-col :prediction
:output-col :predicted-label
:labels (.labels label-indexer)})))

(def model (ml/fit train-data pipeline))

Expand Down
27 changes: 13 additions & 14 deletions examples/clustering.clj
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
(ns examples.clustering
(:require
[zero-one.geni.core :as g]
[zero-one.geni.ml :as ml]))
[zero-one.geni.core :as g]
[zero-one.geni.ml :as ml]))

;; K-Means
(def dataset
Expand Down Expand Up @@ -38,15 +38,14 @@
(ml/transform model)
(g/limit 2)
(g/collect-col :topicDistribution))
#_
((0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0)
(0.07537574948606664
0.07537608757379946
0.07537444990446238
0.0753760406513953
0.32162508215023483
0.07537597087758716
0.0753743398006919
0.0753743698147718
0.075374842014245
0.07537306772674557))
#_((0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0)
(0.07537574948606664
0.07537608757379946
0.07537444990446238
0.0753760406513953
0.32162508215023483
0.07537597087758716
0.0753743398006919
0.0753743698147718
0.075374842014245
0.07537306772674557))
38 changes: 19 additions & 19 deletions examples/customer_segmentation.clj
Original file line number Diff line number Diff line change
Expand Up @@ -45,11 +45,11 @@
(-> invoices
(g/remove (g/null? :description))
(ml/transform
(ml/tokeniser {:input-col :description
:output-col :descriptors}))
(ml/tokeniser {:input-col :description
:output-col :descriptors}))
(ml/transform
(ml/stop-words-remover {:input-col :descriptors
:output-col :cleaned-descriptors}))
(ml/stop-words-remover {:input-col :descriptors
:output-col :cleaned-descriptors}))
(g/with-column :descriptor (g/explode :cleaned-descriptors))
(g/with-column :descriptor (g/regexp-replace :descriptor
(g/lit "[^a-zA-Z'']")
Expand Down Expand Up @@ -84,9 +84,9 @@
(def log-spending
(-> descriptors
(g/remove (g/||
(g/null? :customer-id)
(g/< :price 0.01)
(g/< :quantity 1)))
(g/null? :customer-id)
(g/< :price 0.01)
(g/< :quantity 1)))
(g/group-by :customer-id :descriptor)
(g/agg {:log-spend (g/log1p (g/sum (g/* :price :quantity)))})
(g/order-by (g/desc :log-spend))))
Expand All @@ -104,24 +104,24 @@

(def nmf-pipeline
(ml/pipeline
(ml/string-indexer {:input-col :descriptor
:output-col :descriptor-id})
(ml/als {:max-iter 100
:reg-param 0.01
:rank 8
:nonnegative true
:user-col :customer-id
:item-col :descriptor-id
:rating-col :log-spend})))
(ml/string-indexer {:input-col :descriptor
:output-col :descriptor-id})
(ml/als {:max-iter 100
:reg-param 0.01
:rank 8
:nonnegative true
:user-col :customer-id
:item-col :descriptor-id
:rating-col :log-spend})))

(def nmf-pipeline-model
(ml/fit log-spending nmf-pipeline))

(def id->descriptor
(ml/index-to-string
{:input-col :id
:output-col :descriptor
:labels (ml/labels (first (ml/stages nmf-pipeline-model)))}))
{:input-col :id
:output-col :descriptor
:labels (ml/labels (first (ml/stages nmf-pipeline-model)))}))

(def nmf-model (last (ml/stages nmf-pipeline-model)))

Expand Down
4 changes: 2 additions & 2 deletions examples/dataframe_api.clj
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
(ns examples.dataframe-api
(:require
[zero-one.geni.core :as g]
[zero-one.geni.test-resources :refer [melbourne-df]]))
[zero-one.geni.core :as g]
[zero-one.geni.test-resources :refer [melbourne-df]]))

(def dataframe melbourne-df)

Expand Down
66 changes: 32 additions & 34 deletions examples/features.clj
Original file line number Diff line number Diff line change
@@ -1,26 +1,26 @@
(ns examples.features
(:require
[zero-one.geni.core :as g]
[zero-one.geni.ml :as ml]
[zero-one.geni.test-resources :refer [libsvm-df]]))
[zero-one.geni.core :as g]
[zero-one.geni.ml :as ml]
[zero-one.geni.test-resources :refer [libsvm-df]]))

;; Tokeniser, Hashing TF and IDF
(def sentence-data
(g/table->dataset
[[0.0 "Hi I heard about Spark"]
[0.0 "I wish Java could use case classes"]
[1.0 "Logistic regression models are neat"]]
[:label :sentence]))
[[0.0 "Hi I heard about Spark"]
[0.0 "I wish Java could use case classes"]
[1.0 "Logistic regression models are neat"]]
[:label :sentence]))

(def pipeline
(ml/pipeline
(ml/tokenizer {:input-col :sentence
:output-col :words})
(ml/hashing-tf {:num-features 20
:input-col :words
:output-col :raw-features})
(ml/idf {:input-col :raw-features
:output-col :features})))
(ml/tokenizer {:input-col :sentence
:output-col :words})
(ml/hashing-tf {:num-features 20
:input-col :words
:output-col :raw-features})
(ml/idf {:input-col :raw-features
:output-col :features})))

(def pipeline-model
(ml/fit sentence-data pipeline))
Expand All @@ -30,22 +30,21 @@
(g/collect-col :features))

;;=>
#_
((0.28768207245178085
0.6931471805599453
0.28768207245178085
0.5753641449035617)
(0.6931471805599453
0.6931471805599453
1.3862943611198906
0.28768207245178085
0.6931471805599453
0.28768207245178085)
(0.6931471805599453
0.6931471805599453
0.28768207245178085
0.6931471805599453
0.6931471805599453))
#_((0.28768207245178085
0.6931471805599453
0.28768207245178085
0.5753641449035617)
(0.6931471805599453
0.6931471805599453
1.3862943611198906
0.28768207245178085
0.6931471805599453
0.28768207245178085)
(0.6931471805599453
0.6931471805599453
0.28768207245178085
0.6931471805599453
0.6931471805599453))

;; PCA
(def dataframe
Expand All @@ -65,10 +64,9 @@
(g/collect-col :pca-features))

;;=>
#_
((1.6485728230883814 -4.0132827005162985 -1.0091435193998504)
(-4.645104331781533 -1.1167972663619048 -1.0091435193998501)
(-6.428880535676488 -5.337951427775359 -1.009143519399851))
#_((1.6485728230883814 -4.0132827005162985 -1.0091435193998504)
(-4.645104331781533 -1.1167972663619048 -1.0091435193998501)
(-6.428880535676488 -5.337951427775359 -1.009143519399851))

;; Standard Scaler
(def scaler
Expand Down
4 changes: 2 additions & 2 deletions examples/foreign_idioms.clj
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
(ns examples.foreign-idioms
(:require
[zero-one.geni.core :as g]
[zero-one.geni.test-resources :refer [melbourne-df]]))
[zero-one.geni.core :as g]
[zero-one.geni.test-resources :refer [melbourne-df]]))

(def dataframe melbourne-df)

Expand Down
12 changes: 6 additions & 6 deletions examples/frequent_pattern.clj
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
(ns examples.classification
(:require
[zero-one.geni.core :as g]
[zero-one.geni.ml :as ml]))
[zero-one.geni.core :as g]
[zero-one.geni.ml :as ml]))

(def dataset
(-> (g/table->dataset
[['("1" "2" "5")]
['("1" "2" "3" "5")]
['("1" "2")]]
[:items])))
[['("1" "2" "5")]
['("1" "2" "3" "5")]
['("1" "2")]]
[:items])))

(def model
(ml/fit
Expand Down
73 changes: 36 additions & 37 deletions examples/model_selection.clj
Original file line number Diff line number Diff line change
@@ -1,23 +1,23 @@
(ns examples.classification
(:require
[zero-one.geni.core :as g]
[zero-one.geni.ml :as ml]))
[zero-one.geni.core :as g]
[zero-one.geni.ml :as ml]))

(def training
(g/table->dataset
[[0 "a b c d e spark" 1.0]
[1 "b d" 0.0]
[2 "spark f g h" 1.0]
[3 "hadoop mapreduce" 0.0]
[4 "b spark who" 1.0]
[5 "g d a y" 0.0]
[6 "spark fly" 1.0]
[7 "was mapreduce" 0.0]
[8 "e spark program" 1.0]
[9 "a e c l" 0.0]
[10 "spark compile" 1.0]
[11 "hadoop software" 0.0]]
[:id :text :label]))
[[0 "a b c d e spark" 1.0]
[1 "b d" 0.0]
[2 "spark f g h" 1.0]
[3 "hadoop mapreduce" 0.0]
[4 "b spark who" 1.0]
[5 "g d a y" 0.0]
[6 "spark fly" 1.0]
[7 "was mapreduce" 0.0]
[8 "e spark program" 1.0]
[9 "a e c l" 0.0]
[10 "spark compile" 1.0]
[11 "hadoop software" 0.0]]
[:id :text :label]))

(def hashing-tf
(ml/hashing-tf {:input-col :words :output-col :features}))
Expand All @@ -27,14 +27,14 @@

(def pipeline
(ml/pipeline
(ml/tokeniser {:input-col :text :output-col :words})
hashing-tf
logistic-reg))
(ml/tokeniser {:input-col :text :output-col :words})
hashing-tf
logistic-reg))

(def param-grid
(ml/param-grid
{hashing-tf {:num-features (mapv int [10 100 1000])}
logistic-reg {:reg-param [0.1 0.01]}}))
{hashing-tf {:num-features (mapv int [10 100 1000])}
logistic-reg {:reg-param [0.1 0.01]}}))

(def cross-validator
(ml/cross-validator {:estimator pipeline
Expand All @@ -59,20 +59,19 @@
g/collect)

;;=>
#_
({:id 4,
:text "spark i j k",
:probability (1.994512925146874E-4 0.9998005487074854),
:prediction 1.0}
{:id 5,
:text "l m n",
:probability (0.9326577483128498 0.06734225168715033),
:prediction 0.0}
{:id 6,
:text "mapreduce spark",
:probability (0.5561018462768792 0.4438981537231208),
:prediction 0.0}
{:id 7,
:text "apache hadoop",
:probability (0.9488612611950926 0.051138738804907484),
:prediction 0.0})
#_({:id 4,
:text "spark i j k",
:probability (1.994512925146874E-4 0.9998005487074854),
:prediction 1.0}
{:id 5,
:text "l m n",
:probability (0.9326577483128498 0.06734225168715033),
:prediction 0.0}
{:id 6,
:text "mapreduce spark",
:probability (0.5561018462768792 0.4438981537231208),
:prediction 0.0}
{:id 7,
:text "apache hadoop",
:probability (0.9488612611950926 0.051138738804907484),
:prediction 0.0})
Loading

0 comments on commit f30c6d3

Please sign in to comment.