From 882805a1bef8a176320d6e265219efc3185a8b86 Mon Sep 17 00:00:00 2001 From: Feda Curic Date: Fri, 23 Aug 2024 08:38:07 +0200 Subject: [PATCH] Don't return duplicate data from export tool Happens when there are multiple experiments that have ensembles with same names. --- src/ert/gui/ertwidgets/listeditbox.py | 36 ++++---- src/ert/gui/tools/export/export_panel.py | 22 ++++- .../jobs/internal-gui/scripts/csv_export.py | 88 ++++++++++--------- .../scripts/gen_data_rft_export.py | 49 ++++++----- tests/unit_tests/gui/test_csv_export.py | 68 +++++++++++--- 5 files changed, 168 insertions(+), 95 deletions(-) diff --git a/src/ert/gui/ertwidgets/listeditbox.py b/src/ert/gui/ertwidgets/listeditbox.py index 64ef9cca3f0..92aeadc1dc2 100644 --- a/src/ert/gui/ertwidgets/listeditbox.py +++ b/src/ert/gui/ertwidgets/listeditbox.py @@ -1,4 +1,5 @@ -from typing import Iterable, List, Optional +from typing import Dict, Iterable, Optional +from uuid import UUID from qtpy.QtCore import QSize, Qt from qtpy.QtGui import QIcon, QKeyEvent @@ -86,13 +87,14 @@ class ListEditBox(QWidget): NO_ITEMS_SPECIFIED_MSG = "The list must contain at least one item or * (for all)." DEFAULT_MSG = "A list of comma separated ensemble names or * for all." - def __init__(self, possible_items: List[str]) -> None: + def __init__(self, possible_items: Dict[UUID, str]) -> None: QWidget.__init__(self) self._editing = True - self._possible_items = possible_items + self._possible_items_dict = possible_items + self._possible_items = list(possible_items.values()) - self._list_edit_line = AutoCompleteLineEdit(possible_items, self) + self._list_edit_line = AutoCompleteLineEdit(self._possible_items, self) self._list_edit_line.setMinimumWidth(350) layout = QHBoxLayout() @@ -127,21 +129,27 @@ def getListText(self) -> str: text = "".join(text.split()) return text - def getItems(self) -> List[str]: + def getItems(self) -> Dict[UUID, str]: text = self.getListText() items = text.split(",") if len(items) == 1 and items[0] == "*": - items = self._possible_items + return self._possible_items_dict - return [item for item in items if len(item) > 0] + result = {} + for item in items: + item = item.strip() + for uuid, name in self._possible_items_dict.items(): + if name == item: + result[uuid] = name + break + + return result def validateList(self) -> None: """Called whenever the list is modified""" palette = self._list_edit_line.palette() - items = self.getItems() - valid = True message = "" @@ -149,19 +157,17 @@ def validateList(self) -> None: valid = False message = ListEditBox.NO_ITEMS_SPECIFIED_MSG else: - for item in items: - if item not in self._possible_items: + for _, name in items.items(): + if name not in self._possible_items_dict.values(): valid = False - message = ListEditBox.ITEM_DOES_NOT_EXIST_MSG % item + message = ListEditBox.ITEM_DOES_NOT_EXIST_MSG % name + break validity_type = ValidationSupport.WARNING - color = ValidationSupport.ERROR_COLOR if not valid else self._valid_color - self._validation_support.setValidationMessage(message, validity_type) self._list_edit_line.setToolTip(message) palette.setColor(self._list_edit_line.backgroundRole(), color) - self._list_edit_line.setPalette(palette) if valid: diff --git a/src/ert/gui/tools/export/export_panel.py b/src/ert/gui/tools/export/export_panel.py index f4241441ed8..03c1a1c33df 100644 --- a/src/ert/gui/tools/export/export_panel.py +++ b/src/ert/gui/tools/export/export_panel.py @@ -1,5 +1,6 @@ from __future__ import annotations +import json from typing import TYPE_CHECKING, Optional from qtpy.QtWidgets import QCheckBox, QWidget @@ -23,6 +24,7 @@ def __init__( storage: LocalStorage, parent: Optional[QWidget] = None, ) -> None: + self.storage = storage description = "The CSV export requires some information before it starts:" super().__init__("export", description, parent) @@ -37,9 +39,12 @@ def __init__( ) design_matrix_path_chooser = PathChooser(self.design_matrix_path_model) - self.list_edit = ListEditBox( - [ensemble.name for ensemble in storage.ensembles if ensemble.has_data()] - ) + ensemble_with_data_dict = { + ensemble.id: ensemble.name + for ensemble in storage.ensembles + if ensemble.has_data() + } + self.list_edit = ListEditBox(ensemble_with_data_dict) self.drop_const_columns_check = QCheckBox() self.drop_const_columns_check.setChecked(False) @@ -60,7 +65,16 @@ def output_path(self) -> Optional[str]: @property def ensemble_list(self) -> str: - return ",".join(self.list_edit.getItems()) + ensembles = { + str(ensemble.id): { + "ensemble_name": ensemble.name, + "experiment_name": ensemble.experiment.name, + } + for ensemble in self.storage.ensembles + if ensemble.name in self.list_edit.getItems().values() + } + + return json.dumps(ensembles) @property def design_matrix_path(self) -> Optional[str]: diff --git a/src/ert/resources/workflows/jobs/internal-gui/scripts/csv_export.py b/src/ert/resources/workflows/jobs/internal-gui/scripts/csv_export.py index 829b9665ab4..18d5b340f8c 100644 --- a/src/ert/resources/workflows/jobs/internal-gui/scripts/csv_export.py +++ b/src/ert/resources/workflows/jobs/internal-gui/scripts/csv_export.py @@ -1,3 +1,4 @@ +import json import os import pandas @@ -22,9 +23,9 @@ class CSVExportJob(ErtScript): Optional arguments: - ensemble_list: a comma separated list of ensembles to export (no spaces allowed) - if no list is provided the current ensemble is exported - a single * can be used to export all ensembles + ensemble_list: a JSON string representation of a dictionary where keys are + UUID strings and values are ensemble names. + A single * can be used to export all ensembles design_matrix: a path to a file containing the design matrix @@ -57,17 +58,19 @@ def run( design_matrix_path = None if len(workflow_args) < 3 else workflow_args[2] _ = True if len(workflow_args) < 4 else workflow_args[3] drop_const_cols = False if len(workflow_args) < 5 else workflow_args[4] - ensembles = [] facade = LibresFacade(ert_config) - ensembles = ensemble_list.split(",") + ensemble_dict = json.loads(ensemble_list) if ensemble_list else {} - if ensemble_list is None or len(ensembles) == 0: - ensembles = "default" + # Use the keys (UUIDs as strings) to get ensembles + ensembles = [] + for ensemble_id in ensemble_dict: + ensemble = self.storage.get_ensemble(ensemble_id) + ensembles.append(ensemble) if design_matrix_path is not None: if not os.path.exists(design_matrix_path): - raise UserWarning("The design matrix file does not exists!") + raise UserWarning("The design matrix file does not exist!") if not os.path.isfile(design_matrix_path): raise UserWarning("The design matrix is not a file!") @@ -75,41 +78,44 @@ def run( data = pandas.DataFrame() for ensemble in ensembles: - ensemble = ensemble.strip() - try: - ensemble = self.storage.get_ensemble_by_name(ensemble) - except KeyError as exc: - raise UserWarning(f"The ensemble '{ensemble}' does not exist!") from exc - - if not ensemble.has_data(): - raise UserWarning(f"The ensemble '{ensemble}' does not have any data!") - - ensemble_data = ensemble.load_all_gen_kw_data() + if not ensemble.has_data(): + raise UserWarning( + f"The ensemble '{ensemble.name}' does not have any data!" + ) + + ensemble_data = ensemble.load_all_gen_kw_data() + + if design_matrix_path is not None: + design_matrix_data = loadDesignMatrix(design_matrix_path) + if not design_matrix_data.empty: + ensemble_data = ensemble_data.join( + design_matrix_data, how="outer" + ) + + misfit_data = facade.load_all_misfit_data(ensemble) + if not misfit_data.empty: + ensemble_data = ensemble_data.join(misfit_data, how="outer") + + summary_data = ensemble.load_all_summary_data() + if not summary_data.empty: + ensemble_data = ensemble_data.join(summary_data, how="outer") + else: + ensemble_data["Date"] = None + ensemble_data.set_index(["Date"], append=True, inplace=True) + + ensemble_data["Iteration"] = ensemble.iteration + ensemble_data["Ensemble"] = ensemble.name + ensemble_data.set_index( + ["Ensemble", "Iteration"], append=True, inplace=True + ) + + data = pandas.concat([data, ensemble_data]) - if design_matrix_path is not None: - design_matrix_data = loadDesignMatrix(design_matrix_path) - if not design_matrix_data.empty: - ensemble_data = ensemble_data.join(design_matrix_data, how="outer") - - misfit_data = facade.load_all_misfit_data(ensemble) - if not misfit_data.empty: - ensemble_data = ensemble_data.join(misfit_data, how="outer") - - summary_data = ensemble.load_all_summary_data() - if not summary_data.empty: - ensemble_data = ensemble_data.join(summary_data, how="outer") - else: - ensemble_data["Date"] = None - ensemble_data.set_index(["Date"], append=True, inplace=True) - - ensemble_data["Iteration"] = ensemble.iteration - ensemble_data["Ensemble"] = ensemble.name - ensemble_data.set_index( - ["Ensemble", "Iteration"], append=True, inplace=True - ) - - data = pandas.concat([data, ensemble_data]) + except KeyError as exc: + raise UserWarning( + f"The ensemble '{ensemble.name}' does not exist!" + ) from exc data = data.reorder_levels(["Realization", "Iteration", "Date", "Ensemble"]) if drop_const_cols: diff --git a/src/ert/resources/workflows/jobs/internal-gui/scripts/gen_data_rft_export.py b/src/ert/resources/workflows/jobs/internal-gui/scripts/gen_data_rft_export.py index c04c426252a..9ac47046252 100644 --- a/src/ert/resources/workflows/jobs/internal-gui/scripts/gen_data_rft_export.py +++ b/src/ert/resources/workflows/jobs/internal-gui/scripts/gen_data_rft_export.py @@ -1,4 +1,5 @@ import contextlib +import json import os import numpy @@ -77,16 +78,8 @@ def run( storage, workflow_args, ): - """The run method will export the RFT's for all wells and all ensembles. + """The run method will export the RFT's for all wells and all ensembles.""" - The successful operation of this method hinges on two naming - conventions: - - 1. All the GEN_DATA RFT observations have key RFT_$WELL - 2. The trajectory files are in $trajectory_path/$WELL.txt - or $trajectory_path/$WELL_R.txt - - """ output_file = workflow_args[0] trajectory_path = workflow_args[1] ensemble_list = None if len(workflow_args) < 3 else workflow_args[2] @@ -94,20 +87,18 @@ def run( wells = set() - ensemble_names = [] - if ensemble_list is not None: - ensemble_names = ensemble_list.split(",") + # Parse the ensemble_list from JSON string to dictionary + ensemble_dict = json.loads(ensemble_list) if ensemble_list else {} - if len(ensemble_names) == 0: + if not ensemble_dict: raise UserWarning("No ensembles given to load from") data = [] - for ensemble_name in ensemble_names: - ensemble_name = ensemble_name.strip() - ensemble_data = [] + for ensemble_id, ensemble_info in ensemble_dict.items(): + ensemble_name = ensemble_info["ensemble_name"] try: - ensemble = storage.get_ensemble_by_name(ensemble_name) + ensemble = storage.get_ensemble(ensemble_id) except KeyError as exc: raise UserWarning( f"The ensemble '{ensemble_name}' does not exist!" @@ -130,6 +121,7 @@ def run( " GENERAL_OBSERVATIONS starting with RFT_*" ) + ensemble_data = [] for obs_key in obs_keys: well = obs_key.replace("RFT_", "") wells.add(well) @@ -155,8 +147,6 @@ def run( columns=realizations, ) - realizations = ensemble.get_realization_list_with_responses() - # Trajectory trajectory_file = os.path.join(trajectory_path, f"{well}.txt") if not os.path.isfile(trajectory_file): @@ -223,8 +213,12 @@ def getArguments(self, parent, storage): trajectory_chooser = PathChooser(trajectory_model) trajectory_chooser.setObjectName("trajectory_chooser") - all_ensemble_list = [ensemble.name for ensemble in storage.ensembles] - list_edit = ListEditBox(all_ensemble_list) + ensemble_with_data_dict = { + ensemble.id: ensemble.name + for ensemble in storage.ensembles + if ensemble.has_data() + } + list_edit = ListEditBox(ensemble_with_data_dict) list_edit.setObjectName("list_of_ensembles") drop_const_columns_check = QCheckBox() @@ -244,12 +238,21 @@ def getArguments(self, parent, storage): success = dialog.showAndTell() if success: - ensemble_list = ",".join(list_edit.getItems()) + ensemble_list = { + str(ensemble.id): { + "ensemble_name": ensemble.name, + "experiment_name": ensemble.experiment.name, + } + for ensemble in storage.ensembles + if ensemble.name in list_edit.getItems().values() + } with contextlib.suppress(ValueError): return [ output_path_model.getPath(), trajectory_model.getPath(), - ensemble_list, + json.dumps( + ensemble_list + ), # Return the ensemble list as a JSON string drop_const_columns_check.isChecked(), ] diff --git a/tests/unit_tests/gui/test_csv_export.py b/tests/unit_tests/gui/test_csv_export.py index 2b391feda7c..70597d819f4 100644 --- a/tests/unit_tests/gui/test_csv_export.py +++ b/tests/unit_tests/gui/test_csv_export.py @@ -1,27 +1,25 @@ +import contextlib import os +import shutil from pathlib import Path +import pandas as pd import pytest from qtpy.QtCore import Qt, QTimer -from qtpy.QtWidgets import ( - QMessageBox, -) +from qtpy.QtWidgets import QComboBox, QMessageBox, QWidget from ert.gui.ertwidgets.listeditbox import ListEditBox from ert.gui.ertwidgets.pathchooser import PathChooser +from ert.gui.simulation.experiment_panel import EnsembleExperimentPanel, ExperimentPanel +from ert.gui.simulation.run_dialog import RunDialog from ert.gui.tools.export.export_panel import ExportDialog from ert.libres_facade import LibresFacade +from ert.run_models import EnsembleExperiment -from .conftest import ( - get_child, - wait_for_child, -) +from .conftest import get_child, wait_for_child -@pytest.mark.parametrize("ensemble_select", ["default_0", "*"]) -def test_csv_export(esmda_has_run, qtbot, ensemble_select): - gui = esmda_has_run - +def export_data(gui, qtbot, ensemble_select, export_path="output.csv"): file_name = None def handle_export_dialog(): @@ -53,9 +51,13 @@ def handle_finished_box(): QTimer.singleShot(3000, handle_finished_box) gui.tools["Export data"].trigger() - assert file_name == "output.csv" + assert file_name == export_path qtbot.waitUntil(lambda: os.path.exists(file_name)) + return file_name + + +def verify_exported_content(file_name, gui, ensemble_select): file_content = Path(file_name).read_text(encoding="utf-8") ensemble_names = [ensemble_select] if ensemble_select == "*": @@ -72,3 +74,45 @@ def handle_finished_box(): f",{name},{gen_kw_data.iloc[i]['COEFFS:a']},{gen_kw_data.iloc[i]['COEFFS:b']},{gen_kw_data.iloc[i]['COEFFS:c']},{misfit_data.iloc[i]['MISFIT:POLY_OBS']},{misfit_data.iloc[i]['MISFIT:TOTAL']}" in file_content ) + + +@pytest.mark.parametrize("ensemble_select", ["default_0", "*"]) +def test_csv_export(esmda_has_run, qtbot, ensemble_select): + gui = esmda_has_run + + file_name = export_data(gui, qtbot, ensemble_select) + verify_exported_content(file_name, gui, ensemble_select) + + +def run_experiment_and_export(gui, qtbot): + experiment_panel = get_child(gui, ExperimentPanel) + simulation_mode_combo = get_child(experiment_panel, QComboBox) + simulation_mode_combo.setCurrentText(EnsembleExperiment.name()) + ensemble_experiment_panel = get_child(experiment_panel, EnsembleExperimentPanel) + ensemble_experiment_panel._ensemble_name_field.setText("iter-0") + + # Avoids run path dialog + with contextlib.suppress(FileNotFoundError): + shutil.rmtree("poly_out") + + run_experiment = get_child(experiment_panel, QWidget, name="run_experiment") + qtbot.mouseClick(run_experiment, Qt.LeftButton) + + run_dialog = wait_for_child(gui, qtbot, RunDialog) + qtbot.waitUntil(run_dialog.done_button.isVisible, timeout=100000) + qtbot.waitUntil(lambda: run_dialog._tab_widget.currentWidget() is not None) + qtbot.mouseClick(run_dialog.done_button, Qt.LeftButton) + + +def test_that_export_tool_does_not_produce_duplicate_data( + ensemble_experiment_has_run_no_failure, qtbot +): + gui = ensemble_experiment_has_run_no_failure + + run_experiment_and_export(gui, qtbot) + + file_name = export_data(gui, qtbot, "*") + + df = pd.read_csv(file_name) + # Make sure data is not duplicated. + assert df.iloc[0]["COEFFS:a"] != df.iloc[20]["COEFFS:a"]