From dda3bac2b6cf523ce1f2e08b152feef4f9cca17e Mon Sep 17 00:00:00 2001 From: Kirill Date: Mon, 7 Jun 2021 13:07:13 +0300 Subject: [PATCH] Add reduce sklearn config for blog (#73) --- configs/blogs/skl_conda_config.json | 427 ++++++++++++++++++++++++++++ 1 file changed, 427 insertions(+) create mode 100755 configs/blogs/skl_conda_config.json diff --git a/configs/blogs/skl_conda_config.json b/configs/blogs/skl_conda_config.json new file mode 100755 index 000000000..07557d2bf --- /dev/null +++ b/configs/blogs/skl_conda_config.json @@ -0,0 +1,427 @@ +{ + "common": { + "lib": ["sklearn"], + "data-format": ["pandas"], + "data-order": ["F"], + "dtype": ["float64"] + }, + "cases": [ + { + "algorithm": "kmeans", + "dataset": [ + { + "source": "synthetic", + "type": "blobs", + "n_clusters": 1000, + "n_features": 20, + "training": { + "n_samples": 1000000 + } + } + ], + "time-method": ["box_filter"], + "time-limit": [50], + "n-clusters": [1000], + "maxiter": [50], + "tol": [0.0] + }, + { + "algorithm": "kmeans", + "dataset": [ + { + "source": "synthetic", + "type": "blobs", + "n_clusters": 5, + "n_features": 50, + "training": { + "n_samples": 10000000 + } + } + ], + "time-method": ["box_filter"], + "time-limit": [50], + "n-clusters": [5], + "maxiter": [50], + "init": ["k-means++"], + "tol": [0.0] + }, + { + "algorithm": "kmeans", + "dataset": [ + { + "source": "synthetic", + "type": "blobs", + "n_clusters": 20, + "n_features": 50, + "training": { + "n_samples": 3000000 + } + } + ], + "time-method": ["box_filter"], + "time-limit": [50], + "n-clusters": [20], + "maxiter": [50], + "tol": [0.0] + }, + { + "algorithm": "pca", + "dataset": [ + { + "source": "synthetic", + "type": "classification", + "n_classes": 2, + "n_features": 100, + "training": { + "n_samples": 1000000 + }, + "testing": { + "n_samples": 100000 + } + }, + { + "source": "synthetic", + "type": "classification", + "n_classes": 2, + "n_features": 2000, + "training": { + "n_samples": 10000 + } + }, + { + "source": "synthetic", + "type": "classification", + "n_classes": 2, + "n_features": 1000, + "training": { + "n_samples": 30000 + } + }, + { + "source": "synthetic", + "type": "classification", + "n_classes": 2, + "n_features": 4000, + "training": { + "n_samples": 6000 + } + } + ], + "svd-solver": ["full"], + "n-components": [10] + }, + { + "algorithm": "df_clsf", + "dtype": ["float32"], + "dataset": [ + { + "source": "npy", + "name": "higgs1m", + "training": + { + "x": "data/higgs1m_x_train.npy", + "y": "data/higgs1m_y_train.npy" + }, + "testing": + { + "x": "data/higgs1m_x_test.npy", + "y": "data/higgs1m_y_test.npy" + } + } + ], + "num-trees": [50], + "max-depth": [16], + "max-leaf-nodes": [131072], + "max-features": [0.2] + }, + { + "algorithm": "ridge", + "dataset": [ + { + "source": "synthetic", + "type": "regression", + "n_features": 20, + "training": { + "n_samples": 10000000 + } + } + ], + "alpha": [5] + }, + { + "algorithm": "linear", + "dataset": [ + { + "source": "synthetic", + "type": "regression", + "n_features": 20, + "training": { + "n_samples": 10000000 + } + } + ] + }, + { + "algorithm": "log_reg", + "dataset": [ + { + "source": "synthetic", + "type": "classification", + "n_classes": 2, + "n_features": 20, + "training": { + "n_samples": 10000000 + } + }, + { + "source": "synthetic", + "type": "classification", + "n_classes": 2, + "n_features": 100, + "training": { + "n_samples": 2000000 + } + }, + { + "source": "synthetic", + "type": "classification", + "n_classes": 5, + "n_features": 20, + "training": { + "n_samples": 10000000 + } + }, + { + "source": "synthetic", + "type": "classification", + "n_classes": 5, + "n_features": 100, + "training": { + "n_samples": 2000000 + } + } + ], + "maxiter": [100], + "tol": [0] + }, + { + "algorithm": "svm", + "dataset": [ + { + "source": "npy", + "name": "a9a", + "training": + { + "x": "data/a9a_x_train.npy", + "y": "data/a9a_y_train.npy" + }, + "testing": + { + "x": "data/a9a_x_test.npy", + "y": "data/a9a_y_test.npy" + } + } + ], + "C": [500.0], + "kernel": ["rbf"] + }, + { + "algorithm": "svm", + "dataset": [ + { + "source": "npy", + "name": "gisette", + "training": + { + "x": "data/gisette_x_train.npy", + "y": "data/gisette_y_train.npy" + }, + "testing": + { + "x": "data/gisette_x_test.npy", + "y": "data/gisette_y_test.npy" + } + } + ], + "C": [1.5e-3], + "kernel": ["linear"] + }, + { + "algorithm": "svm", + "dataset": [ + { + "source": "npy", + "name": "connect", + "training": + { + "x": "data/connect_x_train.npy", + "y": "data/connect_y_train.npy" + }, + "testing": + { + "x": "data/connect_x_test.npy", + "y": "data/connect_y_test.npy" + } + } + ], + "C": [100.0], + "kernel": ["linear"] + }, + { + "algorithm": "svm", + "dataset": [ + { + "source": "npy", + "name": "mnist", + "training": + { + "x": "data/mnist_x_train.npy", + "y": "data/mnist_y_train.npy" + }, + "testing": + { + "x": "data/mnist_x_test.npy", + "y": "data/mnist_y_test.npy" + } + } + ], + "C": [50.0], + "kernel": ["rbf"] + }, + { + "algorithm": "dbscan", + "dataset": [ + { + "source": "synthetic", + "type": "blobs", + "n_clusters": 50, + "n_features": 3, + "training": { + "n_samples": 500000 + } + }, + { + "source": "synthetic", + "type": "blobs", + "n_clusters": 50, + "n_features": 10, + "training": { + "n_samples": 500000 + } + }, + { + "source": "synthetic", + "type": "blobs", + "n_clusters": 100, + "n_features": 50, + "training": { + "n_samples": 500000 + } + } + ] + }, + { + "algorithm": "knn_clsf", + "dtype": ["float32"], + "dataset": [ + { + "source": "synthetic", + "type": "classification", + "n_classes": 2, + "n_features": 3, + "training": { + "n_samples": 100000 + }, + "testing": { + "n_samples": 100000 + } + }, + { + "source": "synthetic", + "type": "classification", + "n_classes": 2, + "n_features": 10, + "training": { + "n_samples": 100000 + }, + "testing": { + "n_samples": 100000 + } + }, + { + "source": "synthetic", + "type": "classification", + "n_classes": 2, + "n_features": 50, + "training": { + "n_samples": 20000 + }, + "testing": { + "n_samples": 20000 + } + }, + { + "source": "synthetic", + "type": "classification", + "n_classes": 10, + "n_features": 16, + "training": { + "n_samples": 250000 + }, + "testing": { + "n_samples": 250000 + } + } + ], + "method": ["brute"] + }, + { + "algorithm": "knn_clsf", + "dtype": ["float32"], + "dataset": [ + { + "source": "synthetic", + "type": "classification", + "n_classes": 2, + "n_features": 50, + "training": { + "n_samples": 20000 + }, + "testing": { + "n_samples": 20000 + } + }, + { + "source": "synthetic", + "type": "classification", + "n_classes": 10, + "n_features": 16, + "training": { + "n_samples": 250000 + }, + "testing": { + "n_samples": 250000 + } + } + ], + "method": ["kd_tree"] + }, + { + "algorithm": "train_test_split", + "dataset": [ + { + "source": "synthetic", + "type": "classification", + "n_classes": 2, + "n_features": 100, + "training": { + "n_samples": 1000000 + } + } + ], + "include-y": [""], + "train-size": [0.75], + "test-size": [0.25] + } + ] +}