Skip to content

Commit

Permalink
add util to copy published data using the entity structure
Browse files Browse the repository at this point in the history
  • Loading branch information
jarosenb committed Mar 13, 2024
1 parent 5e487c2 commit d66aa40
Show file tree
Hide file tree
Showing 4 changed files with 92 additions and 7 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -309,6 +309,7 @@ def transform_pub_entities(project_id: str, version: Optional[int] = None):
entity_listing = get_entities_from_publication(project_id, version=version)
base_pub_meta = IndexedPublication.from_id(project_id, revision=version).to_dict()
pub_graph = construct_publication_graph(project_id, version)
path_mappings = []

for _, node_data in pub_graph.nodes.items():
node_entity = next(
Expand All @@ -317,7 +318,10 @@ def transform_pub_entities(project_id: str, version: Optional[int] = None):
if not node_entity:
continue
data_path = str(Path(node_data["basePath"]) / "data")
new_entity_value = transform_entity(node_entity, base_pub_meta, data_path)
new_entity_value, path_mapping = transform_entity(
node_entity, base_pub_meta, data_path
)
path_mappings.append(path_mapping)
node_data["value"] = new_entity_value

project_users = construct_users(entity_listing[0])
Expand Down Expand Up @@ -345,7 +349,7 @@ def transform_pub_entities(project_id: str, version: Optional[int] = None):
pub_graph.nodes[pub]["publicationDate"] = str(base_pub_meta["created"])
pub_graph.nodes[pub]["status"] = "published"

return pub_graph
return pub_graph, path_mappings


def combine_pub_versions(project_id: str) -> nx.DiGraph:
Expand All @@ -358,7 +362,7 @@ def combine_pub_versions(project_id: str) -> nx.DiGraph:

versions = range(2, latest_version + 1)
for version in versions:
version_graph = transform_pub_entities(project_id, version)
version_graph, _ = transform_pub_entities(project_id, version)
version_pubs = version_graph.successors("NODE_ROOT")
pub_graph: nx.DiGraph = nx.compose(pub_graph, version_graph)
for node_id in version_pubs:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ def ingest_publications():
for pub in all_pubs:
# print(pub["projectId"])
try:
pub_graph = transform_pub_entities(pub["projectId"])
pub_graph, _ = transform_pub_entities(pub["projectId"])
pub_base = next(
(
pub_graph.nodes[node_id]["value"]
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
"""Operations to format published data in accordance with the project structure"""

import shutil
import os
from pathlib import Path
from designsafe.apps.api.projects_v2.migration_utils.graph_constructor import (
transform_pub_entities,
)


def format_publication_data(
project_id,
version=None,
v1_pubs_root="/corral-repl/tacc/NHERI/published",
v2_pubs_root="/corral-repl/tacc/NHERI/published-v2",
):
"""
Format publication data in accordance with the project structure.
Hard links are used for "copying" files in order to avoid duplicating them on disk.
"""
pub_graph, path_mappings = transform_pub_entities(project_id, version)

base_project = next(
(
node
for node in pub_graph
if pub_graph.nodes[node]["name"] == "designsafe.project"
)
)
prj_value = pub_graph.nodes[base_project]

if prj_value["value"]["projectType"] == "other":
base_path = prj_value["basePath"]
v1_full_path = Path(v1_pubs_root) / Path(project_id)
v2_full_path = Path(v2_pubs_root) / Path(base_path.lstrip("/")) / "data"
os.makedirs(str(v2_full_path.parent), exist_ok=True)
shutil.copytree(v1_full_path, v2_full_path, dirs_exist_ok=True)
return

for mapping in path_mappings:
for v1_path, v2_path in mapping.items():
v1_full_path = (
Path(v1_pubs_root) / Path(project_id) / Path(v1_path.lstrip("/"))
)
v2_full_path = Path(v2_pubs_root) / Path(v2_path.lstrip("/"))
os.makedirs(str(v2_full_path.parent), exist_ok=True)
if v1_full_path.is_dir():
shutil.copytree(
v1_full_path,
v2_full_path,
dirs_exist_ok=True,
)
else:
shutil.copy2(v1_full_path, v2_full_path)


def format_publication_data_symlink(
project_id,
version=None,
v1_pubs_root="/corral-repl/tacc/NHERI/published",
v2_pubs_root="/corral-repl/tacc/NHERI/published-v2",
):
"""
Format publication data in accordance with the project structure.
Hard links are used for "copying" files in order to avoid duplicating them on disk.
"""
_, path_mappings = transform_pub_entities(project_id, version)

for mapping in path_mappings:
for v1_path, v2_path in mapping.items():
v1_full_path = (
Path(v1_pubs_root) / Path(project_id) / Path(v1_path.lstrip("/"))
)
v2_full_path = Path(v2_pubs_root) / Path(v2_path.lstrip("/"))
os.makedirs(str(v2_full_path.parent), exist_ok=True)
os.symlink(v1_full_path, v2_full_path)
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Utilities to convert published entitities to a consistent schema."""

from pathlib import Path
from typing import TypedDict
from django.conf import settings
Expand Down Expand Up @@ -224,7 +225,7 @@ def update_file_objs(
updated_file_objs.append(
{**file_obj, "path": path_mapping[file_obj["path"]], "system": system_id}
)
return updated_file_objs
return updated_file_objs, path_mapping


def transform_entity(entity: dict, base_pub_meta: dict, base_path: str):
Expand Down Expand Up @@ -252,9 +253,13 @@ def transform_entity(entity: dict, base_pub_meta: dict, base_path: str):
entity["value"]["fileObjs"] = file_objs
if entity["value"].get("fileTags", False):
entity["value"]["fileTags"] = update_file_tag_paths(entity, base_path)
entity["value"]["fileObjs"] = update_file_objs(
new_file_objs, path_mapping = update_file_objs(
entity, base_path, system_id=settings.PUBLISHED_SYSTEM
)

entity["value"]["fileObjs"] = new_file_objs
else:
path_mapping = {}

validated_model = model.model_validate(entity["value"])
return validated_model.model_dump()
return validated_model.model_dump(), path_mapping

0 comments on commit d66aa40

Please sign in to comment.