Skip to content

Commit

Permalink
Update dependencies and configuration files
Browse files Browse the repository at this point in the history
  • Loading branch information
endomorphosis committed Apr 7, 2024
1 parent 8d3cf27 commit 3550895
Show file tree
Hide file tree
Showing 71 changed files with 28,435 additions and 565 deletions.
87 changes: 65 additions & 22 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,48 +1,91 @@
# Data Economy Hackathon
IPFS Huggingface Bridge

Author - Benjamin Barber @endomorphosis
for transformers.js visit:
https://github.com/endomorphosis/ipfs_transformers_js

QA / website - Kevin De Haan @coregod360
for huggingface datasets python library visit
https://github.com/endomorphosis/ipfs_datasets

CLEANUP / Windows compatibility / Breakfix 03/31/2024 - 04/07/2024
for orbitdbkit nodejs library visit
https://github.com/endomorphosis/orbitdb-benchmark/

Author - Benjamin Barber
QA - Kevin De Haan

# About

This is a model manager and wrapper for huggingface, looks up a index of models from an collection of models, and will download a model from either https/s3/ipfs, depending on which source is the fastest.

# How to use
~~~shell
pip install .
~~~

to install

python3 setup.py

In your python script
look run ``python3 example.py`` for examples of usage.

from transformers import AutoModelForSeq2SeqLM
this is designed to be a drop in replacement, which requires only 2 lines to be changed

from ipfs_transformers import AutoModelForSeq2SeqLM

model = AutoModelForSeq2SeqLM.from_auto_download("google/t5_11b_trueteacher_and_anli")
In your python script
~~~shell
from transformers import AutoModel
from ipfs_transformers import AutoModel
model = AutoModel.from_auto_download("bge-small-en-v1.5")
~~~

or

from transformers import AutoModelForSeq2SeqLM
~~~shell
from transformers import AutoModel
from ipfs_transformers import AutoModel
model = AutoModel.from_ipfs("QmccfbkWLYs9K3yucc6b3eSt8s8fKcyRRt24e3CDaeRhM1")
~~~

or to use with with s3 caching
~~~shell
from transformers import AutoModel
from ipfs_transformers import AutoModel
model = T5Model.from_auto_download(
model_name="google-bert/t5_11b_trueteacher_and_anli",
s3cfg={
"bucket": "cloud",
"endpoint": "https://storage.googleapis.com",
"secret_key": "",
"access_key": ""
}
)
~~~

# To scrape huggingface

with interactive prompt:

~~~shell
node scraper.js [source] [model name]
~~~

~~~shell
node scraper.js
~~~

from ipfs_transformers import AutoModelForSeq2SeqLM
import a model already defined:

model = AutoModelForSeq2SeqLM.from_ipfs("QmWJr4M1VN5KpJjqCsJsJg7PDmFoqQYs1BKpYxcdMY1qkh")
~~~shell
node scraper.js hf "modelname" (as defined in your .json files)
~~~

To scrape huggingface
import all models previously defined:

interactive prompt:
~~~shell
node scraper.js hf
~~~

node scraper.js
## TODO integrate orbitDB

import a model:
## TODO finish translating model manager to node.js and replace existing ipfs-cluster wrapper

node scraper.js hf "modelname" (as defined in your .json files)
## TODO finish finish translating model manager to browser js and replace existing ipfs-cluster wrapper

import all models
## TODO integrate transformers.js (browser implementation)

node scraper.js hf
## TODO integrate bacalhau dockerfile
21 changes: 21 additions & 0 deletions example.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
from transformers import AutoModel
from ipfs_transformers import AutoModel

model = AutoModel.from_auto_download("bge-small-en-v1.5")
print(dir(model))
model = AutoModel.from_ipfs("QmccfbkWLYs9K3yucc6b3eSt8s8fKcyRRt24e3CDaeRhM1")
print(dir(model))


## OPTIONAL S3 Caching ##

#model = T5Model.from_auto_download(
# model_name="google-bert/t5_11b_trueteacher_and_anli",
# s3cfg={
# "bucket": "cloud",
# "endpoint": "https://storage.googleapis.com",
# "secret_key": "",
# "access_key": "",
# }
#)
#print(dir(model))
106 changes: 84 additions & 22 deletions huggingface_scraper/generate_manifest.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,21 @@ import path, { parse } from 'path'
import prompt_sync from 'prompt-sync'
import prompt_sync_history from 'prompt-sync-history'
import hf_embed_calc from './manifests/manifest_hf_embed.js'
import hf_embed_add from './manifests/manifest_hf_embed.js'
import llama_cpp_calc from './manifests/manifest_llama_cpp.js'
import llama_cpp_add from './manifests/manifest_llama_cpp.js'
import hf_lm_calc from './manifests/manifest_hf_lm.js'
import hf_lm_add from './manifests/manifest_hf_lm.js'
import hf_t5_calc from './manifests/manifest_hf_t5.js'
import hf_t5_add from './manifests/manifest_hf_t5.js'
import knn_calc from './manifests/manifest_knn.js'
import knn_add from './manifests/manifest_knn.js'
import api_calc from './manifests/manifest_api.js'
import api_add from './manifests/manifest_api.js'
import hf_faster_whisper_calc from './manifests/manifest_hf_faster_whisper.js'
import hf_faster_whisper_add from './manifests/manifest_hf_faster_whisper.js'
import hf_transformers_calc from './manifests/manifest_hf_transformers.js'
import hf_transformers_add from './manifests/manifest_hf_transformers.js'
import diffusion_calc from './manifests/manifest_diffusion.js'
import {complete, parse_templates, generate_test} from './utils.js'

Expand All @@ -19,18 +27,72 @@ export class Generate_Manifest{
this.hwRequirements = {}
}


generate_from_prompt(generate){
let generation = this.generate(generate)
let test_generation = false
if (test_generation){
let add_generation = this.add_generator_to_model_data(this, generation)
}
return generation
}

main(generate){
let self = this
let generation = self.generate(generate)
let generation = this.generate(generate)
let test_generation = false
if (test_generation){
let add_generation = this.add_generator_to_model_data(this, generation)
}
return generation
}

add_generator_to_model_data(generation){
if (generation.skill == undefined){
throw("skill is undefined")
}
else if (generation.skill == 'hf_transformers'){
results = hf_transformers_add(this)
}
else if (generation.skill == 'hf_embed'){
results = hf_embed_add(this)
}
else if (generation.skill == 'llama_cpp'){
results = llama_cpp_add(this)
}
else if (generation.skill == 'diffusion'){
results = diffusion_add(this)
}
else if (generation.skill == 'knn'){
results = knn_add(this)
}
else if (generation.skill == 'api'){
results = api_add(this)
}
else if (generation.skill == 'custom'){
results = custom_add(this)
}
else if (generation.skill == 'hf_faster_whisper'){
results = hf_faster_whisper_add(this)
}
else if (generation.skill == 'hf_lm'){
results = hf_lm_add(this)
}
else if (generation.skill == 'hf_t5'){
results = hf_t5_add(this)
}
else{
throw("skill is not defined")
}


return results
}

generate(generate){
let self = {}
let metadata = self.metadata
let hwRequirements = self.hwRequirements
let metadata = this.metadata
let hwRequirements = this.hwRequirements
let results
let model_types = ['hf_transformers', 'hf_embed', 'llama_cpp', 'diffusion', 'knn', 'api', 'whisper', 'custom']
let model_types = ['hf_transformers', 'hf_embed', 'llama_cpp', 'diffusion', 'knn', 'api', 'whisper', 'hf_lm', 'hf_t5']
// prompt the user for input
console.log("1. hf_transformers")
console.log("2. hf_embed")
Expand All @@ -39,7 +101,8 @@ export class Generate_Manifest{
console.log("5. knn")
console.log("6. api")
console.log("7. whisper")
console.log("8. custom")
console.log("8. hf_lm")
console.log("9. hf_t5")
// request console input
let prompt = prompt_sync(({
history: prompt_sync_history(),
Expand All @@ -60,63 +123,62 @@ export class Generate_Manifest{
}
}

self.skill = model_type
this.skill = model_type

if (model_type == 'hf_transformers'){
results = hf_transformers_calc(self)
results = hf_transformers_calc(this)
}

if (model_type == 'hf_faster_whisper'){
results = hf_faster_whisper_calc(self)
results = hf_faster_whisper_calc(this)
}

if (model_type == 'hf_lm'){
results = hf_lm_calc(self)
results = hf_lm_calc(this)
}

if (model_type == "hf_t5"){
results = hf_t5_calc(self)
results = hf_t5_calc(this)
}

if (model_type == 'hf_embed'){
results = hf_embed_calc(self)
results = hf_embed_calc(this)
}

if (model_type == 'llama_cpp'){
results = llama_cpp_calc(self)
results = llama_cpp_calc(this)
}

if (model_type == 'diffusion'){
results = diffusion_calc(self)
results = diffusion_calc(this)
}

if (model_type == 'knn'){
results = knn_calc(self)
results = knn_calc(this)
}

if (model_type == 'api'){
results = api_calc(self)
results = api_calc(this)
}

if (model_type == 'custom'){
results = custom_calc(self)
results = custom_calc(this)
}

try{
test = generate_test(results)
let test = generate_test(results)
}
catch(err){
console.log(err)
throw("Error in testing the manifest " + model_type)

// throw("Error in testing the manifest " + model_type)
}
finally{
return results
}

}

custom_calc(self){
custom_calc(){
throw("custom is not yet supported")
}

Expand Down
22 changes: 11 additions & 11 deletions huggingface_scraper/generator.js
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ import generate_hf_embed from './modeldata/generate_hf_embed.json' assert { type
import generate_hf_faster_whisper from './modeldata/generate_hf_faster_whisper.json' assert { type: 'json' };
import generate_hf_lm from './modeldata/generate_hf_lm.json' assert { type: 'json' };
import generate_hf_t5 from './modeldata/generate_hf_t5.json' assert { type: 'json' };
import generate_hf_diffusion from './modeldata/generate_hf_diffusion.json' assert { type: 'json' };
import generate_hf_diffusion from './modeldata/generate_diffusion.json' assert { type: 'json' };
import generate_api from './modeldata/generate_api.json' assert { type: 'json' };
import generate_knn from './modeldata/generate_knn.json' assert { type: 'json' };

Expand Down Expand Up @@ -249,15 +249,15 @@ export class Generator{

main(){
this
this.llama_cpp = template_llama_cpp(self.local_model_path, self.collection_path)
this.hf_transformers = template_hf_transformers(self.local_model_path, self.collection_path)
this.hf_embed = template_hf_embed(self.local_model_path, self.collection_path)
this.hf_faster_whisper = template_hf_faster_whisper(self.local_model_path, self.collection_path)
this.hf_lm = template_hf_lm(self.local_model_path, self.collection_path)
this.hf_t5 = template_hf_t5(self.local_model_path, self.collection_path)
this.hf_diffusion = template_hf_diffusion(self.local_model_path, self.collection_path)
this.api = template_api(self.local_model_path, self.collection_path)
this.knn = template_knn(self.local_model_path, self.collection_path)
return self
this.llama_cpp = template_llama_cpp(this.local_model_path, this.collection_path)
this.hf_transformers = template_hf_transformers(this.local_model_path, this.collection_path)
this.hf_embed = template_hf_embed(this.local_model_path, this.collection_path)
this.hf_faster_whisper = template_hf_faster_whisper(this.local_model_path, this.collection_path)
this.hf_lm = template_hf_lm(this.local_model_path, this.collection_path)
this.hf_t5 = template_hf_t5(this.local_model_path, this.collection_path)
this.hf_diffusion = template_hf_diffusion(this.local_model_path, this.collection_path)
this.api = template_api(this.local_model_path, this.collection_path)
this.knn = template_knn(this.local_model_path, this.collection_path)
return this
}
}
Loading

0 comments on commit 3550895

Please sign in to comment.