Skip to content

Commit

Permalink
updated rdf and report .py scripts to read in proper files
Browse files Browse the repository at this point in the history
  • Loading branch information
AO33 committed Nov 14, 2024
1 parent f89ca7a commit 748e788
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 15 deletions.
22 changes: 11 additions & 11 deletions scripts/generate-rdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,22 +3,22 @@
from kgx.cli.cli_utils import transform as kgx_transform
from loguru import logger

logger.info(f"Creating rdf output: output/geneontology.org_macroMolecularMachine_associations.nt.gz ...")

src_files = []
src_nodes = f"output/geneontology.org_macroMolecularMachine_associations_nodes.tsv"
src_edges = f"output/geneontology.org_macroMolecularMachine_associations_edges.tsv"
outfile = "output/go_annotation_associations.nt.gz"
src_nodes = "output/go_annotation_nodes.tsv"
src_edges = "output/go_annotation_edges.tsv"

logger.info("Creating rdf output: {}...".format(outfile))

if Path(src_nodes).is_file():
src_files.append(src_nodes)
if Path(src_edges).is_file():
src_files.append(src_edges)

kgx_transform(
inputs=src_files,
input_format="tsv",
stream=True,
output=f"output/geneontology.org_macroMolecularMachine_associations.nt.gz",
output_format="nt",
output_compression="gz",
)
kgx_transform(inputs=src_files,
input_format="tsv",
stream=True,
output=outfile,
output_format="nt",
output_compression="gz")
14 changes: 10 additions & 4 deletions scripts/generate-report.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,11 @@

import duckdb

nodes_file = "output/geneontology.org_macroMolecularMachine_associations_nodes.tsv"
edges_file = "output/geneontology.org_macroMolecularMachine_associations_edges.tsv"
nodes_file = "output/go_annotation_nodes.tsv"
nodes_report = "output/go_annotation_nodes_report.tsv"

edges_file = "output/go_annotation_edges.tsv"
edges_report = "output/go_annotation_edges_report.tsv"


# Nodes
Expand All @@ -14,7 +17,9 @@
GROUP BY all
ORDER BY all
"""
duckdb.sql(f"copy ({query}) to 'output/geneontology.org_macroMolecularMachine_associations_nodes_report.tsv' (header, delimiter '\t')")

ncommand = "copy ({}) to '{}' (header, delimiter '\t')".format(query, nodes_report)
duckdb.sql(ncommand)

# Edges
if Path(edges_file).exists():
Expand All @@ -25,4 +30,5 @@
GROUP BY all
ORDER BY all
"""
duckdb.sql(f"copy ({query}) to 'output/geneontology.org_macroMolecularMachine_associations_edges_report.tsv' (header, delimiter '\t')")
ecommand = "copy ({}) to '{}' (header, delimiter '\t')".format(query, edges_report)
duckdb.sql(ecommand)

0 comments on commit 748e788

Please sign in to comment.