Skip to content

Commit

Permalink
Don't return duplicate data from export tool
Browse files Browse the repository at this point in the history
Happens when there are multiple experiments that have ensembles with same names.
  • Loading branch information
dafeda committed Aug 30, 2024
1 parent 4116c09 commit 882805a
Show file tree
Hide file tree
Showing 5 changed files with 168 additions and 95 deletions.
36 changes: 21 additions & 15 deletions src/ert/gui/ertwidgets/listeditbox.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from typing import Iterable, List, Optional
from typing import Dict, Iterable, Optional
from uuid import UUID

from qtpy.QtCore import QSize, Qt
from qtpy.QtGui import QIcon, QKeyEvent
Expand Down Expand Up @@ -86,13 +87,14 @@ class ListEditBox(QWidget):
NO_ITEMS_SPECIFIED_MSG = "The list must contain at least one item or * (for all)."
DEFAULT_MSG = "A list of comma separated ensemble names or * for all."

def __init__(self, possible_items: List[str]) -> None:
def __init__(self, possible_items: Dict[UUID, str]) -> None:
QWidget.__init__(self)

self._editing = True
self._possible_items = possible_items
self._possible_items_dict = possible_items
self._possible_items = list(possible_items.values())

self._list_edit_line = AutoCompleteLineEdit(possible_items, self)
self._list_edit_line = AutoCompleteLineEdit(self._possible_items, self)
self._list_edit_line.setMinimumWidth(350)

layout = QHBoxLayout()
Expand Down Expand Up @@ -127,41 +129,45 @@ def getListText(self) -> str:
text = "".join(text.split())
return text

def getItems(self) -> List[str]:
def getItems(self) -> Dict[UUID, str]:
text = self.getListText()
items = text.split(",")

if len(items) == 1 and items[0] == "*":
items = self._possible_items
return self._possible_items_dict

return [item for item in items if len(item) > 0]
result = {}
for item in items:
item = item.strip()
for uuid, name in self._possible_items_dict.items():
if name == item:
result[uuid] = name
break

return result

def validateList(self) -> None:
"""Called whenever the list is modified"""
palette = self._list_edit_line.palette()

items = self.getItems()

valid = True
message = ""

if len(items) == 0:
valid = False
message = ListEditBox.NO_ITEMS_SPECIFIED_MSG
else:
for item in items:
if item not in self._possible_items:
for _, name in items.items():
if name not in self._possible_items_dict.values():
valid = False
message = ListEditBox.ITEM_DOES_NOT_EXIST_MSG % item
message = ListEditBox.ITEM_DOES_NOT_EXIST_MSG % name
break

validity_type = ValidationSupport.WARNING

color = ValidationSupport.ERROR_COLOR if not valid else self._valid_color

self._validation_support.setValidationMessage(message, validity_type)
self._list_edit_line.setToolTip(message)
palette.setColor(self._list_edit_line.backgroundRole(), color)

self._list_edit_line.setPalette(palette)

if valid:
Expand Down
22 changes: 18 additions & 4 deletions src/ert/gui/tools/export/export_panel.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from __future__ import annotations

import json
from typing import TYPE_CHECKING, Optional

from qtpy.QtWidgets import QCheckBox, QWidget
Expand All @@ -23,6 +24,7 @@ def __init__(
storage: LocalStorage,
parent: Optional[QWidget] = None,
) -> None:
self.storage = storage
description = "The CSV export requires some information before it starts:"
super().__init__("export", description, parent)

Expand All @@ -37,9 +39,12 @@ def __init__(
)
design_matrix_path_chooser = PathChooser(self.design_matrix_path_model)

self.list_edit = ListEditBox(
[ensemble.name for ensemble in storage.ensembles if ensemble.has_data()]
)
ensemble_with_data_dict = {
ensemble.id: ensemble.name
for ensemble in storage.ensembles
if ensemble.has_data()
}
self.list_edit = ListEditBox(ensemble_with_data_dict)

self.drop_const_columns_check = QCheckBox()
self.drop_const_columns_check.setChecked(False)
Expand All @@ -60,7 +65,16 @@ def output_path(self) -> Optional[str]:

@property
def ensemble_list(self) -> str:
return ",".join(self.list_edit.getItems())
ensembles = {
str(ensemble.id): {
"ensemble_name": ensemble.name,
"experiment_name": ensemble.experiment.name,
}
for ensemble in self.storage.ensembles
if ensemble.name in self.list_edit.getItems().values()
}

return json.dumps(ensembles)

@property
def design_matrix_path(self) -> Optional[str]:
Expand Down
88 changes: 47 additions & 41 deletions src/ert/resources/workflows/jobs/internal-gui/scripts/csv_export.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import json
import os

import pandas
Expand All @@ -22,9 +23,9 @@ class CSVExportJob(ErtScript):
Optional arguments:
ensemble_list: a comma separated list of ensembles to export (no spaces allowed)
if no list is provided the current ensemble is exported
a single * can be used to export all ensembles
ensemble_list: a JSON string representation of a dictionary where keys are
UUID strings and values are ensemble names.
A single * can be used to export all ensembles
design_matrix: a path to a file containing the design matrix
Expand Down Expand Up @@ -57,59 +58,64 @@ def run(
design_matrix_path = None if len(workflow_args) < 3 else workflow_args[2]
_ = True if len(workflow_args) < 4 else workflow_args[3]
drop_const_cols = False if len(workflow_args) < 5 else workflow_args[4]
ensembles = []
facade = LibresFacade(ert_config)

ensembles = ensemble_list.split(",")
ensemble_dict = json.loads(ensemble_list) if ensemble_list else {}

if ensemble_list is None or len(ensembles) == 0:
ensembles = "default"
# Use the keys (UUIDs as strings) to get ensembles
ensembles = []
for ensemble_id in ensemble_dict:
ensemble = self.storage.get_ensemble(ensemble_id)
ensembles.append(ensemble)

if design_matrix_path is not None:
if not os.path.exists(design_matrix_path):
raise UserWarning("The design matrix file does not exists!")
raise UserWarning("The design matrix file does not exist!")

if not os.path.isfile(design_matrix_path):
raise UserWarning("The design matrix is not a file!")

data = pandas.DataFrame()

for ensemble in ensembles:
ensemble = ensemble.strip()

try:
ensemble = self.storage.get_ensemble_by_name(ensemble)
except KeyError as exc:
raise UserWarning(f"The ensemble '{ensemble}' does not exist!") from exc

if not ensemble.has_data():
raise UserWarning(f"The ensemble '{ensemble}' does not have any data!")

ensemble_data = ensemble.load_all_gen_kw_data()
if not ensemble.has_data():
raise UserWarning(
f"The ensemble '{ensemble.name}' does not have any data!"
)

ensemble_data = ensemble.load_all_gen_kw_data()

if design_matrix_path is not None:
design_matrix_data = loadDesignMatrix(design_matrix_path)
if not design_matrix_data.empty:
ensemble_data = ensemble_data.join(
design_matrix_data, how="outer"
)

misfit_data = facade.load_all_misfit_data(ensemble)
if not misfit_data.empty:
ensemble_data = ensemble_data.join(misfit_data, how="outer")

summary_data = ensemble.load_all_summary_data()
if not summary_data.empty:
ensemble_data = ensemble_data.join(summary_data, how="outer")
else:
ensemble_data["Date"] = None
ensemble_data.set_index(["Date"], append=True, inplace=True)

ensemble_data["Iteration"] = ensemble.iteration
ensemble_data["Ensemble"] = ensemble.name
ensemble_data.set_index(
["Ensemble", "Iteration"], append=True, inplace=True
)

data = pandas.concat([data, ensemble_data])

if design_matrix_path is not None:
design_matrix_data = loadDesignMatrix(design_matrix_path)
if not design_matrix_data.empty:
ensemble_data = ensemble_data.join(design_matrix_data, how="outer")

misfit_data = facade.load_all_misfit_data(ensemble)
if not misfit_data.empty:
ensemble_data = ensemble_data.join(misfit_data, how="outer")

summary_data = ensemble.load_all_summary_data()
if not summary_data.empty:
ensemble_data = ensemble_data.join(summary_data, how="outer")
else:
ensemble_data["Date"] = None
ensemble_data.set_index(["Date"], append=True, inplace=True)

ensemble_data["Iteration"] = ensemble.iteration
ensemble_data["Ensemble"] = ensemble.name
ensemble_data.set_index(
["Ensemble", "Iteration"], append=True, inplace=True
)

data = pandas.concat([data, ensemble_data])
except KeyError as exc:
raise UserWarning(
f"The ensemble '{ensemble.name}' does not exist!"
) from exc

data = data.reorder_levels(["Realization", "Iteration", "Date", "Ensemble"])
if drop_const_cols:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import contextlib
import json
import os

import numpy
Expand Down Expand Up @@ -77,37 +78,27 @@ def run(
storage,
workflow_args,
):
"""The run method will export the RFT's for all wells and all ensembles.
"""The run method will export the RFT's for all wells and all ensembles."""

The successful operation of this method hinges on two naming
conventions:
1. All the GEN_DATA RFT observations have key RFT_$WELL
2. The trajectory files are in $trajectory_path/$WELL.txt
or $trajectory_path/$WELL_R.txt
"""
output_file = workflow_args[0]
trajectory_path = workflow_args[1]
ensemble_list = None if len(workflow_args) < 3 else workflow_args[2]
drop_const_cols = False if len(workflow_args) < 4 else bool(workflow_args[3])

wells = set()

ensemble_names = []
if ensemble_list is not None:
ensemble_names = ensemble_list.split(",")
# Parse the ensemble_list from JSON string to dictionary
ensemble_dict = json.loads(ensemble_list) if ensemble_list else {}

if len(ensemble_names) == 0:
if not ensemble_dict:
raise UserWarning("No ensembles given to load from")

data = []
for ensemble_name in ensemble_names:
ensemble_name = ensemble_name.strip()
ensemble_data = []
for ensemble_id, ensemble_info in ensemble_dict.items():
ensemble_name = ensemble_info["ensemble_name"]

try:
ensemble = storage.get_ensemble_by_name(ensemble_name)
ensemble = storage.get_ensemble(ensemble_id)
except KeyError as exc:
raise UserWarning(
f"The ensemble '{ensemble_name}' does not exist!"
Expand All @@ -130,6 +121,7 @@ def run(
" GENERAL_OBSERVATIONS starting with RFT_*"
)

ensemble_data = []
for obs_key in obs_keys:
well = obs_key.replace("RFT_", "")
wells.add(well)
Expand All @@ -155,8 +147,6 @@ def run(
columns=realizations,
)

realizations = ensemble.get_realization_list_with_responses()

# Trajectory
trajectory_file = os.path.join(trajectory_path, f"{well}.txt")
if not os.path.isfile(trajectory_file):
Expand Down Expand Up @@ -223,8 +213,12 @@ def getArguments(self, parent, storage):
trajectory_chooser = PathChooser(trajectory_model)
trajectory_chooser.setObjectName("trajectory_chooser")

all_ensemble_list = [ensemble.name for ensemble in storage.ensembles]
list_edit = ListEditBox(all_ensemble_list)
ensemble_with_data_dict = {
ensemble.id: ensemble.name
for ensemble in storage.ensembles
if ensemble.has_data()
}
list_edit = ListEditBox(ensemble_with_data_dict)
list_edit.setObjectName("list_of_ensembles")

drop_const_columns_check = QCheckBox()
Expand All @@ -244,12 +238,21 @@ def getArguments(self, parent, storage):
success = dialog.showAndTell()

if success:
ensemble_list = ",".join(list_edit.getItems())
ensemble_list = {
str(ensemble.id): {
"ensemble_name": ensemble.name,
"experiment_name": ensemble.experiment.name,
}
for ensemble in storage.ensembles
if ensemble.name in list_edit.getItems().values()
}
with contextlib.suppress(ValueError):
return [
output_path_model.getPath(),
trajectory_model.getPath(),
ensemble_list,
json.dumps(
ensemble_list
), # Return the ensemble list as a JSON string
drop_const_columns_check.isChecked(),
]

Expand Down
Loading

0 comments on commit 882805a

Please sign in to comment.