-
Notifications
You must be signed in to change notification settings - Fork 47
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
3 changed files
with
121 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
{ | ||
"author": "Xinping Song" | ||
"algorithm": "RF-SCM/Magpie v1.0" | ||
"algorithm_long": | ||
"bibtex_refs": ['@article{Dunn2020,\n' | ||
' doi = {10.1038/s41524-020-00406-3},\n' | ||
' url = {https://doi.org/10.1038/s41524-020-00406-3},\n' | ||
' year = {2020},\n' | ||
' month = sep,\n' | ||
' publisher = {Springer Science and Business Media {LLC}},\n' | ||
' volume = {6},\n' | ||
' number = {1},\n' | ||
' author = {Alexander Dunn and Qi Wang and Alex Ganose and Daniel Dopp and ' | ||
'Anubhav Jain},\n' | ||
' title = {Benchmarking materials property prediction methods: the Matbench ' | ||
'test set and Automatminer reference algorithm},\n' | ||
' journal = {npj Computational Materials}\n' | ||
'}', | ||
'@article{Breiman2001,\n' | ||
' doi = {10.1023/a:1010933404324},\n' | ||
' url = {https://doi.org/10.1023/a:1010933404324},\n' | ||
' year = {2001},\n' | ||
' publisher = {Springer Science and Business Media {LLC}},\n' | ||
' volume = {45},\n' | ||
' number = {1},\n' | ||
' pages = {5--32},\n' | ||
' author = {Leo Breiman},\n' | ||
' journal = {Machine Learning}\n' | ||
'}', | ||
'@article{Ward2016,\n' | ||
' doi = {10.1038/npjcompumats.2016.28},\n' | ||
' url = {https://doi.org/10.1038/npjcompumats.2016.28},\n' | ||
' year = {2016},\n' | ||
' month = aug,\n' | ||
' publisher = {Springer Science and Business Media {LLC}},\n' | ||
' volume = {2},\n' | ||
' number = {1},\n' | ||
' author = {Logan Ward and Ankit Agrawal and Alok Choudhary and Christopher ' | ||
'Wolverton},\n' | ||
' title = {A general-purpose machine learning framework for predicting ' | ||
'properties of inorganic materials},\n' | ||
' journal = {npj Computational Materials}\n' | ||
'}', | ||
'@article {QUA:QUA24917,author = {Faber, Felix and Lindmaa, Alexander and von ' | ||
'Lilienfeld, O. Anatole and Armiento, Rickard},title = {Crystal structure ' | ||
'representations for machine learning models of formation energies},journal = ' | ||
'{International Journal of Quantum Chemistry},volume = {115},number = ' | ||
'{16},issn = {1097-461X},url = {http://dx.doi.org/10.1002/qua.24917},doi = ' | ||
'{10.1002/qua.24917},pages = {1094--1101},keywords = {machine learning, ' | ||
'formation energies, representations, crystal structure, periodic ' | ||
'systems},year = {2015},}'] | ||
"notes": | ||
"requirements":{"python": ["scikit-learn==0.23.2", "numpy==1.22.4", "matbench==0.6.0"]} | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,66 @@ | ||
""" | ||
Code for training and recording the matbench_v0.1 random forest benchmark. | ||
The ML pipeline is placed within the Automatminer pipeline code infrastructure for convenience. | ||
All training and inference was done on a single 128-core HPC node. | ||
Reduce the number of jobs n_jobs for less memory usage on consumer machines. | ||
""" | ||
|
||
if __name__ == '__main__': | ||
from automatminer import MatPipe | ||
from automatminer.automl.adaptors import SinglePipelineAdaptor, TPOTAdaptor | ||
from automatminer.featurization import AutoFeaturizer | ||
from automatminer.preprocessing import DataCleaner, FeatureReducer | ||
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor | ||
|
||
from matbench.bench import MatbenchBenchmark | ||
from multiprocessing import set_start_method | ||
|
||
set_start_method("spawn", force=True) | ||
|
||
# The learner is a single 500-estimator Random Forest model | ||
learner = SinglePipelineAdaptor( | ||
regressor=RandomForestRegressor(n_estimators=500), | ||
classifier=RandomForestClassifier(n_estimators=500), | ||
) | ||
pipe_config = { | ||
"learner": learner, | ||
"reducer": FeatureReducer(reducers=[]), | ||
"cleaner": DataCleaner(feature_na_method="mean", max_na_frac=0.01, na_method_fit="drop", na_method_transform="mean"), | ||
"autofeaturizer": AutoFeaturizer(n_jobs=8, preset="debug"), | ||
} | ||
|
||
pipe = MatPipe(**pipe_config) | ||
|
||
mb = MatbenchBenchmark(autoload=False) | ||
|
||
i = 0 | ||
|
||
#for task in mb.tasks: | ||
task = mb.matbench_jdft2d | ||
print(task) | ||
task.load() | ||
for fold in task.folds: | ||
|
||
df_train = task.get_train_and_val_data(fold, as_type="df") | ||
|
||
# Fit the RF with matpipe | ||
pipe.fit(df_train, task.metadata.target) | ||
|
||
df_test = task.get_test_data(fold, include_target=False, as_type="df") | ||
predictions = pipe.predict(df_test)[f"{task.metadata.target} predicted"] | ||
|
||
# A single configuration is used | ||
params = {'note': 'single config; see benchmark user metadata'} | ||
|
||
task.record(fold, predictions, params=params) | ||
|
||
mb.to_file("results_" + str(i) + ".json.gz") | ||
i += 1 | ||
|
||
# Save your results | ||
mb.to_file("results.json.gz") | ||
|
||
|
Binary file not shown.