Skip to content

Commit

Permalink
Added huggingface tagger (without LFS)
Browse files Browse the repository at this point in the history
  • Loading branch information
PrinsINT committed Jun 12, 2024
1 parent 34e28f0 commit 8d28522
Show file tree
Hide file tree
Showing 90 changed files with 491,428 additions and 0 deletions.
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
[submodule "pie/base/pie"]
path = pie/base/pie
url = https://github.com/INL/int-pie
[submodule "huggingface/base/huggingface"]
path = huggingface/base/huggingface
url = https://github.com/INL/int-huggingface-tagger
12 changes: 12 additions & 0 deletions buildall.sh
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,15 @@ docker build -t instituutnederlandsetaal/taggers-dockerized-pie-tdn-bab:$VERSION
docker build -t instituutnederlandsetaal/taggers-dockerized-pie-tdn-clvn:$VERSION_LABEL pie/TDN-CLVN
docker build -t instituutnederlandsetaal/taggers-dockerized-pie-tdn-cour:$VERSION_LABEL pie/TDN-COUR
docker build -t instituutnederlandsetaal/taggers-dockerized-pie-tdn-dbnldq:$VERSION_LABEL pie/TDN-DBNLDQ

# Huggingface
# Commented for now, as we need Git LFS to build these. Perhaps in the future.
# docker build -t instituutnederlandsetaal/taggers-dockerized-hug-base:$VERSION_LABEL huggingface/base
# docker build -t instituutnederlandsetaal/taggers-dockerized-hug-tdn-1400-1600:$VERSION_LABEL huggingface/TDN-1400-1600
# docker build -t instituutnederlandsetaal/taggers-dockerized-hug-tdn-1600-1900:$VERSION_LABEL huggingface/TDN-1600-1900
# docker build -t instituutnederlandsetaal/taggers-dockerized-hug-tdn-all:$VERSION_LABEL huggingface/TDN-ALL
# docker build -t instituutnederlandsetaal/taggers-dockerized-hug-tdn-all-enhanced:$VERSION_LABEL huggingface/TDN-ALL-ENHANCED
# docker build -t instituutnederlandsetaal/taggers-dockerized-hug-tdn-bab:$VERSION_LABEL huggingface/TDN-BAB
# docker build -t instituutnederlandsetaal/taggers-dockerized-hug-tdn-clvn:$VERSION_LABEL huggingface/TDN-CLVN
# docker build -t instituutnederlandsetaal/taggers-dockerized-hug-tdn-cour:$VERSION_LABEL huggingface/TDN-COUR
# docker build -t instituutnederlandsetaal/taggers-dockerized-hug-tdn-dbnldq:$VERSION_LABEL huggingface/TDN-DBNLDQ
12 changes: 12 additions & 0 deletions buildandpushall.sh
Original file line number Diff line number Diff line change
Expand Up @@ -17,3 +17,15 @@ docker push instituutnederlandsetaal/taggers-dockerized-pie-tdn-bab:$VERSION_LAB
docker push instituutnederlandsetaal/taggers-dockerized-pie-tdn-clvn:$VERSION_LABEL
docker push instituutnederlandsetaal/taggers-dockerized-pie-tdn-cour:$VERSION_LABEL
docker push instituutnederlandsetaal/taggers-dockerized-pie-tdn-dbnldq:$VERSION_LABEL

# Huggingface
# Commented for now, as we need Git LFS to build these. Perhaps in the future.
# docker push instituutnederlandsetaal/taggers-dockerized-hug-base:$VERSION_LABEL
# docker push instituutnederlandsetaal/taggers-dockerized-hug-tdn-1400-1600:$VERSION_LABEL
# docker push instituutnederlandsetaal/taggers-dockerized-hug-tdn-1600-1900:$VERSION_LABEL
# docker push instituutnederlandsetaal/taggers-dockerized-hug-tdn-all:$VERSION_LABEL
# docker push instituutnederlandsetaal/taggers-dockerized-hug-tdn-all-enhanced:$VERSION_LABEL
# docker push instituutnederlandsetaal/taggers-dockerized-hug-tdn-bab:$VERSION_LABEL
# docker push instituutnederlandsetaal/taggers-dockerized-hug-tdn-clvn:$VERSION_LABEL
# docker push instituutnederlandsetaal/taggers-dockerized-hug-tdn-cour:$VERSION_LABEL
# docker push instituutnederlandsetaal/taggers-dockerized-hug-tdn-dbnldq:$VERSION_LABEL
81 changes: 81 additions & 0 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,87 @@ services:
ports:
- 8106:8080

# huggingface (port: 811x)
hug-tdn-1400-1600:
image: instituutnederlandsetaal/taggers-dockerized-hug-tdn-1400-1600:${APP_VERSION}
build:
context: huggingface/TDN-1400-1600
environment:
- CALLBACK_SERVER=${CALLBACK_SERVER}
restart: unless-stopped
ports:
- 8110:8080

hug-tdn-1600-1900:
image: instituutnederlandsetaal/taggers-dockerized-hug-tdn-1600-1900:${APP_VERSION}
build:
context: huggingface/TDN-1600-1900
environment:
- CALLBACK_SERVER=${CALLBACK_SERVER}
restart: unless-stopped
ports:
- 8111:8080

hug-tdn-all:
image: instituutnederlandsetaal/taggers-dockerized-hug-tdn-all:${APP_VERSION}
build:
context: huggingface/TDN-ALL
environment:
- CALLBACK_SERVER=${CALLBACK_SERVER}
restart: unless-stopped
ports:
- 8112:8080

hug-tdn-bab:
image: instituutnederlandsetaal/taggers-dockerized-hug-tdn-bab:${APP_VERSION}
build:
context: huggingface/TDN-BAB
environment:
- CALLBACK_SERVER=${CALLBACK_SERVER}
restart: unless-stopped
ports:
- 8113:8080

hug-tdn-clvn:
image: instituutnederlandsetaal/taggers-dockerized-hug-tdn-clvn:${APP_VERSION}
build:
context: huggingface/TDN-CLVN
environment:
- CALLBACK_SERVER=${CALLBACK_SERVER}
restart: unless-stopped
ports:
- 8114:8080

hug-tdn-cour:
image: instituutnederlandsetaal/taggers-dockerized-hug-tdn-cour:${APP_VERSION}
build:
context: huggingface/TDN-COUR
environment:
- CALLBACK_SERVER=${CALLBACK_SERVER}
restart: unless-stopped
ports:
- 8115:8080

hug-tdn-dbnldq:
image: instituutnederlandsetaal/taggers-dockerized-hug-tdn-dbnldq:${APP_VERSION}
build:
context: huggingface/TDN-DBNLDQ
environment:
- CALLBACK_SERVER=${CALLBACK_SERVER}
restart: unless-stopped
ports:
- 8116:8080

hug-tdn-all-enhanced:
image: instituutnederlandsetaal/taggers-dockerized-hug-tdn-all-enhanced:${APP_VERSION}
build:
context: huggingface/TDN-ALL-ENHANCED
environment:
- CALLBACK_SERVER=${CALLBACK_SERVER}
restart: unless-stopped
ports:
- 8117:8080

networks:
default:
name: taggers-network
Expand Down
3 changes: 3 additions & 0 deletions huggingface/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# Huggingface tagger
`base/` provides a base docker image from which the specific huggingface models derive.
The base image provides the needed runtime (python, packages, `process.py`, etc.). A huggingface model simply has to derive from the base image, copy a `model.config` and the files that model.config refers to.
4 changes: 4 additions & 0 deletions huggingface/TDN-1400-1600/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
ARG tag=dev
FROM instituutnederlandsetaal/taggers-dockerized-hug-base:$tag

COPY --link . ./
7 changes: 7 additions & 0 deletions huggingface/TDN-1400-1600/added_tokens.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
{
"[CLS]": 2,
"[MASK]": 4,
"[PAD]": 0,
"[SEP]": 3,
"[UNK]": 1
}
Loading

0 comments on commit 8d28522

Please sign in to comment.