Skip to content

Commit

Permalink
Use get_ensemble in csv-export
Browse files Browse the repository at this point in the history
Add experiment name to output.
  • Loading branch information
dafeda committed Aug 22, 2024
1 parent 9add0ee commit eb40cb8
Show file tree
Hide file tree
Showing 2 changed files with 44 additions and 27 deletions.
31 changes: 23 additions & 8 deletions src/ert/gui/tools/export/export_panel.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from __future__ import annotations

import json
from typing import TYPE_CHECKING, Optional

from qtpy.QtWidgets import QCheckBox, QWidget
Expand All @@ -25,33 +26,38 @@ def __init__(
) -> None:
description = "The CSV export requires some information before it starts:"
super().__init__("export", description, parent)

self.storage = storage # Store the storage reference
subs_list = ert_config.substitution_list
default_csv_output_path = subs_list.get("<CSV_OUTPUT_PATH>", "output.csv")
self.output_path_model = PathModel(default_csv_output_path)
output_path_chooser = PathChooser(self.output_path_model)

design_matrix_default = subs_list.get("<DESIGN_MATRIX_PATH>", "")
self.design_matrix_path_model = PathModel(
design_matrix_default, is_required=False, must_exist=True
)
design_matrix_path_chooser = PathChooser(self.design_matrix_path_model)

self.list_edit = ListEditBox(
[ensemble.name for ensemble in storage.ensembles if ensemble.has_data()]
)
# Create a dictionary of ensemble names to their IDs and experiment names
self.ensemble_dict = {
ensemble.name: {
"ensemble_id": str(ensemble.id),
"experiment_name": ensemble.experiment.name,
}
for ensemble in storage.ensembles
if ensemble.has_data()
}

self.list_edit = ListEditBox(list(self.ensemble_dict.keys()))

self.drop_const_columns_check = QCheckBox()
self.drop_const_columns_check.setChecked(False)
self.drop_const_columns_check.setToolTip(
"If checked, exclude columns whose value is the same for every entry"
)

self.addLabeledOption("Output file path", output_path_chooser)
self.addLabeledOption("Design matrix path", design_matrix_path_chooser)
self.addLabeledOption("List of ensembles to export", self.list_edit)
self.addLabeledOption("Drop constant columns", self.drop_const_columns_check)

self.addButtons()

@property
Expand All @@ -60,7 +66,16 @@ def output_path(self) -> Optional[str]:

@property
def ensemble_list(self) -> str:
return ",".join(self.list_edit.getItems())
selected_ensembles = self.list_edit.getItems()
selected_dict = {
self.ensemble_dict[name]["ensemble_id"]: {
"name": name,
"experiment_name": self.ensemble_dict[name]["experiment_name"],
}
for name in selected_ensembles
if name in self.ensemble_dict
}
return json.dumps(selected_dict)

@property
def design_matrix_path(self) -> Optional[str]:
Expand Down
40 changes: 21 additions & 19 deletions src/ert/resources/workflows/jobs/internal-gui/scripts/csv_export.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
import json
import os
from uuid import UUID

import pandas

Expand All @@ -22,9 +24,9 @@ class CSVExportJob(ErtScript):
Optional arguments:
ensemble_list: a comma separated list of ensembles to export (no spaces allowed)
if no list is provided the current ensemble is exported
a single * can be used to export all ensembles
ensemble_dict: a string representation of a dictionary mapping ensemble IDs to names
if no dictionary is provided, the current ensemble is exported
a single "*" can be used to export all ensembles
design_matrix: a path to a file containing the design matrix
Expand Down Expand Up @@ -53,37 +55,34 @@ def run(
workflow_args,
):
output_file = workflow_args[0]
ensemble_list = None if len(workflow_args) < 2 else workflow_args[1]
ensemble_dict_str = None if len(workflow_args) < 2 else workflow_args[1]
design_matrix_path = None if len(workflow_args) < 3 else workflow_args[2]
_ = True if len(workflow_args) < 4 else workflow_args[3]
drop_const_cols = False if len(workflow_args) < 5 else workflow_args[4]
ensembles = []
facade = LibresFacade(ert_config)

ensembles = ensemble_list.split(",")

if ensemble_list is None or len(ensembles) == 0:
ensembles = "default"
ensemble_dict = json.loads(ensemble_dict_str)

if design_matrix_path is not None:
if not os.path.exists(design_matrix_path):
raise UserWarning("The design matrix file does not exists!")

raise UserWarning("The design matrix file does not exist!")
if not os.path.isfile(design_matrix_path):
raise UserWarning("The design matrix is not a file!")

data = pandas.DataFrame()

for ensemble in ensembles:
ensemble = ensemble.strip()

for ensemble_id, ensemble_info in ensemble_dict.items():
try:
ensemble = self.storage.get_ensemble_by_name(ensemble)
ensemble = storage.get_ensemble(UUID(ensemble_id))
except KeyError as exc:
raise UserWarning(f"The ensemble '{ensemble}' does not exist!") from exc
raise UserWarning(
f"The ensemble with ID '{ensemble_id}' does not exist!"
) from exc

if not ensemble.has_data():
raise UserWarning(f"The ensemble '{ensemble}' does not have any data!")
raise UserWarning(
f"The ensemble '{ensemble.name}' does not have any data!"
)

ensemble_data = ensemble.load_all_gen_kw_data()

Expand All @@ -105,13 +104,16 @@ def run(

ensemble_data["Iteration"] = ensemble.iteration
ensemble_data["Ensemble"] = ensemble.name
ensemble_data["Experiment"] = ensemble_info["experiment_name"]
ensemble_data.set_index(
["Ensemble", "Iteration"], append=True, inplace=True
["Ensemble", "Experiment", "Iteration"], append=True, inplace=True
)

data = pandas.concat([data, ensemble_data])

data = data.reorder_levels(["Realization", "Iteration", "Date", "Ensemble"])
data = data.reorder_levels(
["Realization", "Iteration", "Date", "Experiment", "Ensemble"]
)
if drop_const_cols:
data = data.loc[:, (data != data.iloc[0]).any()]

Expand Down

0 comments on commit eb40cb8

Please sign in to comment.