generated from Knowledge-Graph-Hub/kg-example
-
Notifications
You must be signed in to change notification settings - Fork 4
/
run.py
100 lines (74 loc) · 2.53 KB
/
run.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
"""Run script."""
import click
from kg_phenio import download as kg_download
from kg_phenio import transform as kg_transform
from kg_phenio.merge_utils.merge_kg import load_and_merge
from kg_phenio.transform import DATA_SOURCES
from kg_phenio.normalize import normalize
@click.group()
def cli():
pass
@cli.command()
@click.option(
"yaml_file",
"-y",
required=True,
default="download.yaml",
type=click.Path(exists=True),
)
@click.option("output_dir", "-o", required=True, default="data/raw")
@click.option(
"ignore_cache",
"-i",
is_flag=True,
default=False,
help="ignore cache and download files even if they exist [false]",
)
def download(*args, **kwargs) -> None:
"""Downloads data files from list of URLs (default: download.yaml) into data
directory (default: data/raw).
Args:
yaml_file: Specify the YAML file containing a list of datasets to download.
output_dir: A string pointing to the directory to download data to.
ignore_cache: If specified, will ignore existing files and download again.
Returns:
None.
"""
kg_download(*args, **kwargs)
return None
@cli.command()
@click.option("yaml", "-y", default="transform.yaml", type=click.Path(exists=True))
@click.option("input_dir", "-i", default="data/raw", type=click.Path(exists=True))
@click.option("output_dir", "-o", default="data/transformed")
@click.option(
"sources", "-s", default=None, multiple=True, type=click.Choice(DATA_SOURCES.keys())
)
def transform(*args, **kwargs) -> None:
"""Calls scripts in kg_phenio/transform/[source name]/ to transform each source
into nodes and edges.
Args:
yaml: A string pointing to a KGX compatible config YAML.
input_dir: A string pointing to the directory to import data from.
output_dir: A string pointing to the directory to output data to.
sources: A list of sources to transform.
Returns:
None.
"""
# call transform script for each source
kg_transform(*args, **kwargs)
return None
@cli.command()
@click.option("yaml", "-y", default="merge.yaml", type=click.Path(exists=True))
@click.option("processes", "-p", default=1, type=int)
def merge(yaml: str, processes: int) -> None:
"""Use KGX to load subgraphs to create a merged graph.
Args:
yaml: A string pointing to a KGX compatible config YAML.
processes: Number of processes to use.
Returns:
None.
"""
load_and_merge(yaml, processes)
normalize()
if __name__ == "__main__":
cli()