Update dependencies and configuration files

endomorphosis · Apr 7, 2024 · 3550895 · 3550895
1 parent 8d3cf27
commit 3550895
Show file tree

Hide file tree

Showing 71 changed files with 28,435 additions and 565 deletions.
diff --git a/README.md b/README.md
@@ -1,48 +1,91 @@
 # Data Economy Hackathon
 IPFS Huggingface Bridge
 
-Author - Benjamin Barber @endomorphosis
+for transformers.js visit:
+https://github.com/endomorphosis/ipfs_transformers_js
 
-QA / website - Kevin De Haan @coregod360
+for huggingface datasets python library visit
+https://github.com/endomorphosis/ipfs_datasets
 
-CLEANUP / Windows compatibility / Breakfix 03/31/2024 - 04/07/2024
+for orbitdbkit nodejs library visit
+https://github.com/endomorphosis/orbitdb-benchmark/
+
+Author - Benjamin Barber
+QA - Kevin De Haan
 
 # About
 
 This is a model manager and wrapper for huggingface, looks up a index of models from an collection of models, and will download a model from either https/s3/ipfs, depending on which source is the fastest.
 
 # How to use
+~~~shell
+pip install .
+~~~
 
-to install
-
-python3 setup.py
-
-In your python script
+look run ``python3 example.py`` for examples of usage.
 
-from transformers import AutoModelForSeq2SeqLM
+this is designed to be a drop in replacement, which requires only 2 lines to be changed
 
-from ipfs_transformers import AutoModelForSeq2SeqLM
-
-model = AutoModelForSeq2SeqLM.from_auto_download("google/t5_11b_trueteacher_and_anli")
+In your python script
+~~~shell
+from transformers import AutoModel
+from ipfs_transformers import AutoModel
+model = AutoModel.from_auto_download("bge-small-en-v1.5")  
+~~~
 
 or 
 
-from transformers import AutoModelForSeq2SeqLM
+~~~shell
+from transformers import AutoModel
+from ipfs_transformers import AutoModel
+model = AutoModel.from_ipfs("QmccfbkWLYs9K3yucc6b3eSt8s8fKcyRRt24e3CDaeRhM1")
+~~~
+
+or to use with with s3 caching 
+~~~shell
+from transformers import AutoModel
+from ipfs_transformers import AutoModel
+model = T5Model.from_auto_download(
+    model_name="google-bert/t5_11b_trueteacher_and_anli",
+    s3cfg={
+        "bucket": "cloud",
+        "endpoint": "https://storage.googleapis.com",
+        "secret_key": "",
+        "access_key": ""
+    }
+)
+~~~
+
+# To scrape huggingface
+
+with interactive prompt:
+
+~~~shell
+node scraper.js [source] [model name]
+~~~
+
+~~~shell
+node scraper.js 
+~~~
 
-from ipfs_transformers import AutoModelForSeq2SeqLM
+import a model already defined:
 
-model = AutoModelForSeq2SeqLM.from_ipfs("QmWJr4M1VN5KpJjqCsJsJg7PDmFoqQYs1BKpYxcdMY1qkh")
+~~~shell
+node scraper.js hf "modelname" (as defined in your .json files)
+~~~
 
-To scrape huggingface
+import all models previously defined:
 
-interactive prompt:
+~~~shell
+node scraper.js hf 
+~~~
 
-node scraper.js 
+## TODO integrate orbitDB
 
-import a model:
+## TODO finish translating model manager to node.js and replace existing ipfs-cluster wrapper
 
-node scraper.js hf "modelname" (as defined in your .json files)
+## TODO finish finish translating model manager to browser js and replace existing ipfs-cluster wrapper
 
-import all models 
+## TODO integrate transformers.js (browser implementation)
 
-node scraper.js hf 
+## TODO integrate bacalhau dockerfile
diff --git a/example.py b/example.py
@@ -0,0 +1,21 @@
+from transformers import AutoModel
+from ipfs_transformers import AutoModel
+
+model = AutoModel.from_auto_download("bge-small-en-v1.5")  
+print(dir(model))
+model = AutoModel.from_ipfs("QmccfbkWLYs9K3yucc6b3eSt8s8fKcyRRt24e3CDaeRhM1")
+print(dir(model))
+
+
+## OPTIONAL S3 Caching ##
+
+#model = T5Model.from_auto_download(
+#    model_name="google-bert/t5_11b_trueteacher_and_anli",
+#    s3cfg={
+#        "bucket": "cloud",
+#        "endpoint": "https://storage.googleapis.com",
+#        "secret_key": "",
+#        "access_key": "",
+#    }
+#)
+#print(dir(model))
diff --git a/huggingface_scraper/generate_manifest.js b/huggingface_scraper/generate_manifest.js
@@ -3,13 +3,21 @@ import path, { parse } from 'path'
 import prompt_sync from 'prompt-sync'
 import prompt_sync_history from 'prompt-sync-history'
 import hf_embed_calc from './manifests/manifest_hf_embed.js'
+import hf_embed_add from './manifests/manifest_hf_embed.js'
 import llama_cpp_calc from './manifests/manifest_llama_cpp.js'
+import llama_cpp_add from './manifests/manifest_llama_cpp.js'
 import hf_lm_calc from './manifests/manifest_hf_lm.js'
+import hf_lm_add from './manifests/manifest_hf_lm.js'
 import hf_t5_calc from './manifests/manifest_hf_t5.js'
+import hf_t5_add from './manifests/manifest_hf_t5.js'
 import knn_calc from './manifests/manifest_knn.js'
+import knn_add from './manifests/manifest_knn.js'
 import api_calc from './manifests/manifest_api.js'
+import api_add from './manifests/manifest_api.js'
 import hf_faster_whisper_calc from './manifests/manifest_hf_faster_whisper.js'
+import hf_faster_whisper_add from './manifests/manifest_hf_faster_whisper.js'
 import hf_transformers_calc from './manifests/manifest_hf_transformers.js'
+import hf_transformers_add from './manifests/manifest_hf_transformers.js'
 import diffusion_calc from './manifests/manifest_diffusion.js'
 import {complete, parse_templates, generate_test} from './utils.js'
 
@@ -19,18 +27,72 @@ export class Generate_Manifest{
         this.hwRequirements = {}
     }
 
+
+    generate_from_prompt(generate){
+        let generation = this.generate(generate)
+        let test_generation = false
+        if (test_generation){
+            let add_generation = this.add_generator_to_model_data(this, generation)
+        }
+        return generation
+    }
+
     main(generate){
-        let self = this
-        let generation = self.generate(generate)
+        let generation = this.generate(generate)
+        let test_generation = false
+        if (test_generation){
+            let add_generation = this.add_generator_to_model_data(this, generation)
+        }
         return generation
     }
 
+    add_generator_to_model_data(generation){
+        if (generation.skill == undefined){
+            throw("skill is undefined")
+        }
+        else if (generation.skill == 'hf_transformers'){
+            results = hf_transformers_add(this)
+        }
+        else if (generation.skill == 'hf_embed'){
+            results = hf_embed_add(this)
+        }
+        else if (generation.skill == 'llama_cpp'){
+            results = llama_cpp_add(this)
+        }
+        else if (generation.skill == 'diffusion'){
+            results = diffusion_add(this)
+        }
+        else if (generation.skill == 'knn'){
+            results = knn_add(this)
+        }
+        else if (generation.skill == 'api'){
+            results = api_add(this)
+        }
+        else if (generation.skill == 'custom'){
+            results = custom_add(this)
+        }
+        else if (generation.skill == 'hf_faster_whisper'){
+            results = hf_faster_whisper_add(this)
+        }
+        else if (generation.skill == 'hf_lm'){
+            results = hf_lm_add(this)
+        }
+        else if (generation.skill == 'hf_t5'){
+            results = hf_t5_add(this)
+        }
+        else{
+            throw("skill is not defined")
+        }
+
+
+        return results
+    }
+
     generate(generate){
-        let self = {}
-        let metadata = self.metadata
-        let hwRequirements = self.hwRequirements
+        let metadata = this.metadata
+        let hwRequirements = this.hwRequirements
         let results
-        let model_types = ['hf_transformers', 'hf_embed', 'llama_cpp', 'diffusion', 'knn', 'api', 'whisper', 'custom']
+        let model_types = ['hf_transformers', 'hf_embed', 'llama_cpp', 'diffusion', 'knn', 'api', 'whisper', 'hf_lm', 'hf_t5']
         // prompt the user for input
         console.log("1. hf_transformers")
         console.log("2. hf_embed")
@@ -39,7 +101,8 @@ export class Generate_Manifest{
         console.log("5. knn")
         console.log("6. api")
         console.log("7. whisper")
-        console.log("8. custom")
+        console.log("8. hf_lm")
+        console.log("9. hf_t5")
         // request console input
         let prompt = prompt_sync(({
             history: prompt_sync_history(),
@@ -60,63 +123,62 @@ export class Generate_Manifest{
             }
         }
 
-        self.skill = model_type
+        this.skill = model_type
 
         if (model_type == 'hf_transformers'){
-            results = hf_transformers_calc(self)
+            results = hf_transformers_calc(this)
         }
 
         if (model_type == 'hf_faster_whisper'){
-            results = hf_faster_whisper_calc(self)
+            results = hf_faster_whisper_calc(this)
         }
 
         if (model_type == 'hf_lm'){
-            results = hf_lm_calc(self)
+            results = hf_lm_calc(this)
         }
 
         if (model_type == "hf_t5"){
-            results = hf_t5_calc(self)
+            results = hf_t5_calc(this)
         }
 
         if (model_type == 'hf_embed'){
-            results = hf_embed_calc(self)
+            results = hf_embed_calc(this)
         }
 
         if (model_type == 'llama_cpp'){
-            results = llama_cpp_calc(self)
+            results = llama_cpp_calc(this)
         }
 
         if (model_type == 'diffusion'){
-            results = diffusion_calc(self)
+            results = diffusion_calc(this)
         }
 
         if (model_type == 'knn'){
-            results = knn_calc(self)
+            results = knn_calc(this)
         }
 
         if (model_type == 'api'){
-            results = api_calc(self)
+            results = api_calc(this)
         }
 
         if (model_type == 'custom'){
-            results = custom_calc(self)
+            results = custom_calc(this)
         }
 
         try{
-            test = generate_test(results)
+           let test = generate_test(results)
         }
         catch(err){
             console.log(err)
-            throw("Error in testing the manifest " +  model_type)
-
+//            throw("Error in testing the manifest " +  model_type)
         }
         finally{
             return results
         }
 
     }
 
-    custom_calc(self){
+    custom_calc(){
         throw("custom is not yet supported")
     }
 

diff --git a/huggingface_scraper/generator.js b/huggingface_scraper/generator.js
@@ -7,7 +7,7 @@ import generate_hf_embed from './modeldata/generate_hf_embed.json' assert { type
 import generate_hf_faster_whisper from './modeldata/generate_hf_faster_whisper.json' assert { type: 'json' };
 import generate_hf_lm from './modeldata/generate_hf_lm.json' assert { type: 'json' };
 import generate_hf_t5 from './modeldata/generate_hf_t5.json' assert { type: 'json' };
-import generate_hf_diffusion from './modeldata/generate_hf_diffusion.json' assert { type: 'json' };
+import generate_hf_diffusion from './modeldata/generate_diffusion.json' assert { type: 'json' };
 import generate_api from './modeldata/generate_api.json' assert { type: 'json' };
 import generate_knn from './modeldata/generate_knn.json' assert { type: 'json' };
 
@@ -249,15 +249,15 @@ export class Generator{
 
     main(){
         this
-        this.llama_cpp = template_llama_cpp(self.local_model_path, self.collection_path)
-        this.hf_transformers = template_hf_transformers(self.local_model_path, self.collection_path)
-        this.hf_embed = template_hf_embed(self.local_model_path, self.collection_path)
-        this.hf_faster_whisper = template_hf_faster_whisper(self.local_model_path, self.collection_path)
-        this.hf_lm = template_hf_lm(self.local_model_path, self.collection_path)
-        this.hf_t5 = template_hf_t5(self.local_model_path, self.collection_path)
-        this.hf_diffusion = template_hf_diffusion(self.local_model_path, self.collection_path)
-        this.api = template_api(self.local_model_path, self.collection_path)
-        this.knn = template_knn(self.local_model_path, self.collection_path)
-        return self 
+        this.llama_cpp = template_llama_cpp(this.local_model_path, this.collection_path)
+        this.hf_transformers = template_hf_transformers(this.local_model_path, this.collection_path)
+        this.hf_embed = template_hf_embed(this.local_model_path, this.collection_path)
+        this.hf_faster_whisper = template_hf_faster_whisper(this.local_model_path, this.collection_path)
+        this.hf_lm = template_hf_lm(this.local_model_path, this.collection_path)
+        this.hf_t5 = template_hf_t5(this.local_model_path, this.collection_path)
+        this.hf_diffusion = template_hf_diffusion(this.local_model_path, this.collection_path)
+        this.api = template_api(this.local_model_path, this.collection_path)
+        this.knn = template_knn(this.local_model_path, this.collection_path)
+        return this 
     }
 }