-
Notifications
You must be signed in to change notification settings - Fork 80
/
hf_download.py
151 lines (132 loc) · 4.25 KB
/
hf_download.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
"""
@File :hf_download.py
@Description :Download huggingface models and datasets from mirror site.
@Author :Xiaojian Yuan
"""
import argparse
import os
import sys
# Check if huggingface_hub is installed, if not, install it
try:
import huggingface_hub
except ImportError:
print("Install huggingface_hub.")
os.system("pip install -U huggingface_hub")
parser = argparse.ArgumentParser(description="HuggingFace Download Accelerator Script.")
parser.add_argument(
"--model",
"-M",
default=None,
type=str,
help="model name in huggingface, e.g., baichuan-inc/Baichuan2-7B-Chat",
)
parser.add_argument(
"--token",
"-T",
default=None,
type=str,
help="hugging face access token for download meta-llama/Llama-2-7b-hf, e.g., hf_***** ",
)
parser.add_argument(
"--include",
default=None,
type=str,
help="Specify the file to be downloaded",
)
parser.add_argument(
"--exclude",
default=None,
type=str,
help="Files you don't want to download",
)
parser.add_argument(
"--dataset",
"-D",
default=None,
type=str,
help="dataset name in huggingface, e.g., zh-plus/tiny-imagenet",
)
parser.add_argument(
"--save_dir",
"-S",
default=None,
type=str,
help="path to be saved after downloading.",
)
parser.add_argument(
"--use_hf_transfer", default=True, type=eval, help="Use hf-transfer, default: True"
)
parser.add_argument(
"--use_mirror", default=True, type=eval, help="Download from mirror, default: True"
)
args = parser.parse_args()
if args.use_hf_transfer:
# Check if hf_transfer is installed, if not, install it
try:
import hf_transfer
except ImportError:
print("Install hf_transfer.")
os.system("pip install -U hf-transfer -i https://pypi.org/simple")
# Enable hf-transfer if specified
os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
print("export HF_HUB_ENABLE_HF_TRANSFER=", os.getenv("HF_HUB_ENABLE_HF_TRANSFER"))
if args.model is None and args.dataset is None:
print(
"Specify the name of the model or dataset, e.g., --model baichuan-inc/Baichuan2-7B-Chat"
)
sys.exit()
elif args.model is not None and args.dataset is not None:
print("Only one model or dataset can be downloaded at a time.")
sys.exit()
if args.use_mirror:
# Set default endpoint to mirror site if specified
os.environ["HF_ENDPOINT"] = "https://hf-mirror.com"
print("export HF_ENDPOINT=", os.getenv("HF_ENDPOINT")) # https://hf-mirror.com
if args.token is not None:
token_option = "--token %s" % args.token
else:
token_option = ""
if args.include is not None:
include_option = "--include %s" % args.include
else:
include_option = ""
if args.exclude is not None:
exclude_option = "--exclude %s" % args.exclude
else:
exclude_option = ""
if args.model is not None:
model_name = args.model.split("/")
save_dir_option = ""
if args.save_dir is not None:
if len(model_name) > 1:
save_path = os.path.join(
args.save_dir, "models--%s--%s" % (model_name[0], model_name[1])
)
else:
save_path = os.path.join(
args.save_dir, "models--%s" % (model_name[0])
)
save_dir_option = "--local-dir %s" % save_path
download_shell = (
"huggingface-cli download %s %s %s --local-dir-use-symlinks False --resume-download %s %s"
% (token_option, include_option, exclude_option, args.model, save_dir_option)
)
os.system(download_shell)
elif args.dataset is not None:
dataset_name = args.dataset.split("/")
save_dir_option = ""
if args.save_dir is not None:
if len(dataset_name) > 1:
save_path = os.path.join(
args.save_dir, "datasets--%s--%s" % (dataset_name[0], dataset_name[1])
)
else:
save_path = os.path.join(
args.save_dir, "datasets--%s" % (dataset_name[0])
)
save_dir_option = "--local-dir %s" % save_path
download_shell = (
"huggingface-cli download %s %s %s --local-dir-use-symlinks False --resume-download --repo-type dataset %s %s"
% (token_option, include_option, exclude_option, args.dataset, save_dir_option)
)
os.system(download_shell)