Skip to content

Commit

Permalink
update
Browse files Browse the repository at this point in the history
  • Loading branch information
epwalsh committed Apr 10, 2024
1 parent 08a7b11 commit ae9c3b4
Showing 1 changed file with 6 additions and 3 deletions.
9 changes: 6 additions & 3 deletions configs/mcli/mitchish70.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
name: olmo-70b
image: mosaicml/pytorch:2.2.1_cu121-python3.11-ubuntu20.04
#image: public.ecr.aws/z0f8p3z5/olmo:pytorch2.2.1_cu121-python3.11-ubuntu20.04
# image: mosaicml/pytorch:2.2.1_cu121-python3.11-ubuntu20.04
image: public.ecr.aws/z0f8p3z5/olmo:pytorch2.2.1_cu121-python3.11-ubuntu20.04
scheduling:
priority: auto
# preemptible: true # means it can be retried
Expand Down Expand Up @@ -71,7 +71,6 @@ compute:
- inst-ht0xx-r15z3-workers
- inst-entnk-r15z3-workers
- inst-hvw6t-r15z3-workers
- inst-3to96-r15z3-workers
- inst-4ki3x-r15z3-workers
- inst-aixwt-r15z3-workers
- inst-pbivr-r15z3-workers
Expand Down Expand Up @@ -125,13 +124,15 @@ compute:
- inst-97xv1-r15z3-workers
- inst-vaqst-r15z3-workers
- inst-i6mnk-r15z3-workers
- inst-xtbwa-r15z3-workers
# Bad nodes:
# - inst-zgb86-r15z3-workers
# - inst-hdlqg-r15z3-workers
# - inst-6jp2q-r15z3-workers
# - inst-bw20d-r15z3-workers
# - inst-4zdz3-r15z3-workers
# - inst-zlnho-r15z3-workers
# - inst-3to96-r15z3-workers
integrations:
- integration_type: git_repo
git_repo: allenai/OLMo
Expand Down Expand Up @@ -204,3 +205,5 @@ command: |-
# --device_train_microbatch_size=2 \
# gpus: 896
# --global_train_batch_size=1792 \
# gpus: 600 # (75 nodes)
# --global_train_batch_size=1800 \

0 comments on commit ae9c3b4

Please sign in to comment.