From 6d53507bc0aff8a663124d112d927945c0dd8003 Mon Sep 17 00:00:00 2001 From: Naoki Kanazawa Date: Mon, 10 Apr 2023 13:10:03 +0900 Subject: [PATCH 01/27] Add dataframe support for extended equality --- test/extended_equality.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/test/extended_equality.py b/test/extended_equality.py index 3cfbfba3ad..0448d8c4f8 100644 --- a/test/extended_equality.py +++ b/test/extended_equality.py @@ -21,12 +21,14 @@ from typing import Any, List, Union import numpy as np +import pandas as pd import uncertainties from lmfit import Model from multimethod import multimethod from qiskit_experiments.curve_analysis.curve_data import CurveFitResult from qiskit_experiments.data_processing import DataAction, DataProcessor from qiskit_experiments.database_service.utils import ( + ThreadSafeDataFrame, ThreadSafeList, ThreadSafeOrderedDict, ) @@ -272,6 +274,15 @@ def _check_configurable_classes( return is_equivalent(data1.config(), data2.config(), **kwargs) +@_is_equivalent_dispatcher.register +def _check_dataframes( + data1: Union[pd.DataFrame, ThreadSafeDataFrame], + data2: Union[pd.DataFrame, ThreadSafeDataFrame], +): + """Check equality of data frame which may involve Qiskit Experiments class value.""" + return is_equivalent(data1.to_dict(orient="index"), data2.to_dict(orient="index")) + + @_is_equivalent_dispatcher.register def _check_experiment_data( data1: ExperimentData, From a24d7b4d861e1686cd784a56ee0f0cd9a33e62ee Mon Sep 17 00:00:00 2001 From: Naoki Kanazawa Date: Mon, 10 Apr 2023 13:10:56 +0900 Subject: [PATCH 02/27] Replace ExperimentData._analysis_results with dataframe This change decouples AnalysisResult from ExperimentData. Since AnalysisResult not only defines data model but also provide API for the IBM experiment service, this coupling limits capability of experiment data analysis. ExperimentData.analysis_results still returns AnalysisResult for backward compatibility, but these object is not identical object. Returned object is AnalysisResult which is newly generated from dataframe. --- .../database_service/device_component.py | 7 +- qiskit_experiments/database_service/utils.py | 238 +++++++++- qiskit_experiments/framework/base_analysis.py | 72 ++- .../framework/composite/composite_analysis.py | 40 +- .../framework/experiment_data.py | 431 ++++++++++++++---- test/framework/test_framework.py | 4 +- 6 files changed, 651 insertions(+), 141 deletions(-) diff --git a/qiskit_experiments/database_service/device_component.py b/qiskit_experiments/database_service/device_component.py index 2d0bbdca59..a85baff4c9 100644 --- a/qiskit_experiments/database_service/device_component.py +++ b/qiskit_experiments/database_service/device_component.py @@ -81,9 +81,10 @@ def to_component(string: str) -> DeviceComponent: Raises: ValueError: If input string is not a valid device component. """ + if isinstance(string, DeviceComponent): + return string if string.startswith("Q"): return Qubit(int(string[1:])) - elif string.startswith("R"): + if string.startswith("R"): return Resonator(int(string[1:])) - else: - return UnknownComponent(string) + return UnknownComponent(string) diff --git a/qiskit_experiments/database_service/utils.py b/qiskit_experiments/database_service/utils.py index 2a388a6acb..772f481905 100644 --- a/qiskit_experiments/database_service/utils.py +++ b/qiskit_experiments/database_service/utils.py @@ -19,12 +19,15 @@ from abc import ABC, abstractmethod from collections import OrderedDict from datetime import datetime, timezone -from typing import Callable, Tuple, Dict, Any, Union, Type, Optional +from typing import Callable, Tuple, List, Dict, Any, Union, Type, Optional import json +import uuid +import pandas as pd import dateutil.parser import pkg_resources from dateutil import tz + from qiskit.version import __version__ as terra_version from qiskit_ibm_experiment import ( @@ -276,3 +279,236 @@ def append(self, value): """Append to the list.""" with self._lock: self._container.append(value) + + +class ThreadSafeDataFrame(ThreadSafeContainer): + """Thread safe data frame. + + This class wraps pandas dataframe with predefined column labels, + which is specified by the class method `_default_columns`. + Subclass can override this method to provide default labels specific to its data structure. + + This object is expected to be used internally in the ExperimentData. + """ + + def __init__(self, init_values=None): + """ThreadSafeContainer constructor.""" + self._columns = self._default_columns() + self._extra = [] + super().__init__(init_values) + + @classmethod + def _default_columns(cls) -> List[str]: + return [] + + def _init_container(self, init_values: Optional[Union[Dict, pd.DataFrame]] = None): + """Initialize the container.""" + if init_values is None: + return pd.DataFrame(columns=self.get_columns()) + if isinstance(init_values, pd.DataFrame): + input_columns = list(init_values.columns) + if input_columns != self.get_columns(): + raise ValueError( + f"Input data frame contains unexpected columns {input_columns}. " + f"{self.__class__.__name__} defines {self.get_columns()} as default columns." + ) + return init_values + if isinstance(init_values, dict): + return pd.DataFrame.from_dict( + data=init_values, + orient="index", + columns=self.get_columns(), + ) + raise TypeError(f"Initial value of {type(init_values)} is not valid data type.") + + def get_columns(self) -> List[str]: + """Return current column names. + + Returns: + List of column names. + """ + return self._columns.copy() + + def add_columns(self, *new_columns: str, default_value: Any = None): + """Add new columns to the table. + + This operation mutates the current container. + + Args: + new_columns: Name of columns to add. + default_value: Default value to fill added columns. + """ + # Order sensitive + new_columns = [c for c in new_columns if c not in self.get_columns()] + self._extra.extend(new_columns) + + # Update current table + with self._lock: + for new_column in new_columns: + self._container.insert(len(self._container.columns), new_column, default_value) + self._columns.extend(new_columns) + + def clear(self): + """Remove all elements from this container.""" + with self._lock: + self._container = self._init_container() + self._columns = self._default_columns() + self._extra = [] + + def container( + self, + collapse_extra: bool = True, + ) -> pd.DataFrame: + """Return bare pandas dataframe. + + Args: + collapse_extra: Set True to show only default columns. + + Returns: + Bare pandas dataframe. This object is no longer thread safe. + """ + with self._lock: + container = self._container + + if collapse_extra: + return container[self._default_columns()] + return container + + def add_entry(self, **kwargs): + """Add new entry to the dataframe. + + Args: + kwargs: Description of new entry to register. + """ + columns = self.get_columns() + missing = kwargs.keys() - set(columns) + if missing: + self.add_columns(*sorted(missing)) + + template = dict.fromkeys(self.get_columns()) + template.update(kwargs) + + if not template["result_id"]: + template["result_id"] = uuid.uuid4().hex + name = self._unique_table_index(template["result_id"]) + with self._lock: + self._container.loc[name] = list(template.values()) + + def _unique_table_index(self, index_name: str): + """Generate unique index name with 8 characters.""" + if not isinstance(index_name, str): + index_name = str(index_name) + truncated = index_name[:8] + with self.lock: + while truncated in self._container.index: + truncated = uuid.uuid4().hex[:8] + return truncated + + def _repr_html_(self) -> Union[str, None]: + """Return HTML representation of this dataframe.""" + with self._lock: + # Remove underscored columns. + return self._container._repr_html_() + + def __getattr__(self, item): + lock = object.__getattribute__(self, "_lock") + + with lock: + # Lock when access to container's member. + container = object.__getattribute__(self, "_container") + if hasattr(container, item): + return getattr(container, item) + raise AttributeError(f"'ThreadSafeDataFrame' object has no attribute '{item}'") + + def __json_encode__(self) -> Dict[str, Any]: + return { + "class": "ThreadSafeDataFrame", + "data": self._container.to_dict(orient="index"), + "columns": self._columns, + "extra": self._extra, + } + + @classmethod + def __json_decode__(cls, value: Dict[str, Any]) -> "ThreadSafeDataFrame": + if not value.get("class", None) == "ThreadSafeDataFrame": + raise ValueError("JSON decoded value for ThreadSafeDataFrame is not valid class type.") + + instance = object.__new__(AnalysisResultTable) + # Need to update self._columns first to set extra columns in the dataframe container. + instance._columns = value.get("columns", cls._default_columns()) + instance._extra = value.get("extra", []) + instance._lock = threading.RLock() + instance._container = instance._init_container(init_values=value.get("data", {})) + return instance + + +class AnalysisResultTable(ThreadSafeDataFrame): + """Thread safe dataframe to store the analysis results.""" + + @classmethod + def _default_columns(cls) -> List[str]: + return [ + "name", + "value", + "quality", + "components", + "experiment", + "experiment_id", + "result_id", + "tags", + "backend", + "run_time", + "created_time", + ] + + @classmethod + def _tier1(cls) -> List[str]: + """The data group that the analysis class produces.""" + return [ + "name", + "value", + "components", + "quality", + ] + + @classmethod + def _tier2(cls) -> List[str]: + """The data group of metadata that the experiment class provides.""" + return [ + "experiment", + "backend", + "run_time", + ] + + @classmethod + def _tier3(cls) -> List[str]: + """The data group which is used to communicate with the experiment service.""" + return [ + "experiment_id", + "result_id", + "tags", + "created_time", + ] + + def filter_columns(self, verbosity: int) -> List[str]: + """Return column names at given verbosity level. + + Extra columns are always added. + + Args: + verbosity: Level of verbosity of returned data table (1, 2, 3): + + * 1 (minimum): Return data from the analysis. + * 2 (normal): With supplemental data experiment. + * 3 (finest): With extra data to communicate with experiment service. + + Return: + Valid column names. + """ + if verbosity == 1: + return self._tier1() + self._extra + if verbosity == 2: + return self._tier1() + self._tier2() + self._extra + if verbosity == 3: + return self._tier1() + self._tier2() + self._tier3() + self._extra + raise ValueError(f"verbosity {verbosity} is not defined. Choose value from 1, 2, 3.") diff --git a/qiskit_experiments/framework/base_analysis.py b/qiskit_experiments/framework/base_analysis.py index 53101a0856..07b3f07642 100644 --- a/qiskit_experiments/framework/base_analysis.py +++ b/qiskit_experiments/framework/base_analysis.py @@ -15,6 +15,7 @@ from abc import ABC, abstractmethod import copy from collections import OrderedDict +from datetime import datetime, timezone from typing import List, Tuple, Union, Dict from qiskit_experiments.database_service.device_component import Qubit @@ -23,7 +24,6 @@ from qiskit_experiments.framework.experiment_data import ExperimentData from qiskit_experiments.framework.configs import AnalysisConfig from qiskit_experiments.framework.analysis_result_data import AnalysisResultData -from qiskit_experiments.framework.analysis_result import AnalysisResult class BaseAnalysis(ABC, StoreInitArgs): @@ -153,8 +153,6 @@ def run( if not replace_results and _requires_copy(experiment_data): experiment_data = experiment_data.copy() - experiment_components = self._get_experiment_components(experiment_data) - # Set Analysis options if not options: analysis = self @@ -162,21 +160,41 @@ def run( analysis = self.copy() analysis.set_options(**options) - def run_analysis(expdata): + def run_analysis(expdata: ExperimentData): # Clearing previous analysis data experiment_data._clear_results() - # making new analysis + experiment_components = self._get_experiment_components(experiment_data) + + # Making new analysis results, figures = analysis._run_analysis(expdata) - # Add components - analysis_results = [ - analysis._format_analysis_result( - result, expdata.experiment_id, experiment_components - ) - for result in results - ] - # Update experiment data with analysis results - if analysis_results: - expdata.add_analysis_results(analysis_results) + + if results: + for result in results: + supplementary = result.extra + if result.chisq is not None: + supplementary["chisq"] = result.chisq + if "experiment" not in supplementary: + supplementary["experiment"] = expdata.experiment_type + if "experiment_id" not in supplementary: + supplementary["experiment_id"] = expdata.experiment_id + if "backend" not in supplementary: + supplementary["backend"] = expdata.backend_name + if "run_time" not in supplementary: + # TODO add job RUNNING time + supplementary["run_time"] = None + if "created_time" not in supplementary: + supplementary["created_time"] = datetime.now(timezone.utc) + # Bypass generation of AnalysisResult, i.e. calling add_analysis_results. + # AnalysisResult is a data container with experiment service API. + # Since analysis is a local operation in the client, + # we should directly populate analysis result dataframe. + expdata.add_analysis_results( + name=result.name, + value=result.value, + quality=result.quality, + components=result.device_components or experiment_components, + **supplementary, + ) if figures: expdata.add_figures(figures, figure_names=self.options.figure_names) @@ -195,30 +213,6 @@ def _get_experiment_components(self, experiment_data: ExperimentData): return experiment_components - def _format_analysis_result(self, data, experiment_id, experiment_components=None): - """Format run analysis result to DbAnalysisResult""" - device_components = [] - if data.device_components: - device_components = data.device_components - elif experiment_components: - device_components = experiment_components - - if isinstance(data, AnalysisResult): - # Update device components and experiment id - data.device_components = device_components - data.experiment_id = experiment_id - return data - - return AnalysisResult( - name=data.name, - value=data.value, - device_components=device_components, - experiment_id=experiment_id, - chisq=data.chisq, - quality=data.quality, - extra=data.extra, - ) - @abstractmethod def _run_analysis( self, diff --git a/qiskit_experiments/framework/composite/composite_analysis.py b/qiskit_experiments/framework/composite/composite_analysis.py index 3e030ea993..039585e4b4 100644 --- a/qiskit_experiments/framework/composite/composite_analysis.py +++ b/qiskit_experiments/framework/composite/composite_analysis.py @@ -148,7 +148,7 @@ def _component_experiment_data(self, experiment_data: ExperimentData) -> List[Ex """Return a list of marginalized experiment data for component experiments. Args: - experiment_data: a composite experiment experiment data container. + experiment_data: a composite experiment data container. Returns: The list of analysis-ready marginalized experiment data for each @@ -355,15 +355,37 @@ def _combine_results( """ analysis_results = [] figures = [] - for i, sub_expdata in enumerate(component_experiment_data): + for sub_expdata in component_experiment_data: figures += sub_expdata._figures.values() - for result in sub_expdata.analysis_results(): - # Add metadata to distinguish the component experiment - # the result was generated from - result.extra["component_experiment"] = { - "experiment_type": sub_expdata.experiment_type, - "component_index": i, + + # Convert Dataframe Series back into AnalysisResultData + # This is due to limitation that _run_analysis must return List[AnalysisResultData], + # and some composite analysis such as TphiAnalysis overrides this method to + # return extra quantity computed from sub analysis results. + # This produces unnecessary data conversion. + # The _run_analysis mechanism seems just complicating the entire logic. + # Since it's impossible to deprecate the usage of this protected method, + # we should implement new CompositeAnalysis class with much more efficient + # internal logic. Note that the child data structure is no longer necessary + # because dataframe offers more efficient data filtering mechanisms. + analysis_table = sub_expdata.analysis_results(verbosity=3, dataframe=True) + for _, series in analysis_table.iterrows(): + data_dict = series.to_dict() + primary_info = { + "name": data_dict.pop("name"), + "value": data_dict.pop("value"), + "quality": data_dict.pop("quality"), + "device_components": data_dict.pop("components"), } - analysis_results.append(result) + chisq = data_dict.pop("chisq", np.nan) + if chisq: + primary_info["chisq"] = chisq + data_dict["experiment"] = sub_expdata.experiment_type + if "experiment_id" in data_dict: + # Use experiment ID of parent experiment data. + # Sub experiment data is merged and discarded. + del data_dict["experiment_id"] + analysis_result = AnalysisResultData(**primary_info, extra=data_dict) + analysis_results.append(analysis_result) return analysis_results, figures diff --git a/qiskit_experiments/framework/experiment_data.py b/qiskit_experiments/framework/experiment_data.py index bbd11cd6c7..0e2299fee5 100644 --- a/qiskit_experiments/framework/experiment_data.py +++ b/qiskit_experiments/framework/experiment_data.py @@ -21,7 +21,7 @@ from datetime import datetime, timezone from concurrent import futures from threading import Event -from functools import wraps +from functools import wraps, singledispatch from collections import deque import contextlib import copy @@ -33,6 +33,7 @@ import json import traceback import numpy as np +import pandas as pd from dateutil import tz from matplotlib import pyplot from matplotlib.figure import Figure as MatplotlibFigure @@ -40,16 +41,23 @@ from qiskit.providers.jobstatus import JobStatus, JOB_FINAL_STATES from qiskit.exceptions import QiskitError from qiskit.providers import Job, Backend, Provider +from qiskit.utils.deprecation import deprecate_arg -from qiskit_ibm_experiment import IBMExperimentService -from qiskit_ibm_experiment import ExperimentData as ExperimentDataclass +from qiskit_ibm_experiment import ( + IBMExperimentService, + ExperimentData as ExperimentDataclass, + AnalysisResultData as AnalysisResultDataclass, + ResultQuality, +) from qiskit_experiments.framework.json import ExperimentEncoder, ExperimentDecoder from qiskit_experiments.database_service.utils import ( qiskit_version, plot_to_svg_bytes, ThreadSafeOrderedDict, ThreadSafeList, + AnalysisResultTable, ) +from qiskit_experiments.database_service.device_component import to_component, DeviceComponent from qiskit_experiments.framework.analysis_result import AnalysisResult from qiskit_experiments.framework import BackendData from qiskit_experiments.database_service.exceptions import ( @@ -338,7 +346,7 @@ def __init__( # data storage self._result_data = ThreadSafeList() self._figures = ThreadSafeOrderedDict(self._db_data.figure_names) - self._analysis_results = ThreadSafeOrderedDict() + self._analysis_results = AnalysisResultTable() self._deleted_figures = deque() self._deleted_analysis_results = deque() @@ -660,9 +668,8 @@ def hgp(self, new_hgp: str) -> None: def _clear_results(self): """Delete all currently stored analysis results and figures""" # Schedule existing analysis results for deletion next save call - for key in self._analysis_results.keys(): - self._deleted_analysis_results.append(key) - self._analysis_results = ThreadSafeOrderedDict() + self._deleted_analysis_results.extend(list(self._analysis_results["result_id"])) + self._analysis_results.clear() # Schedule existing figures for deletion next save call for key in self._figures.keys(): self._deleted_figures.append(key) @@ -727,10 +734,6 @@ def auto_save(self, save_val: bool) -> None: if save_val is True: self.save(save_children=False) self._auto_save = save_val - for res in self._analysis_results.values(): - # Setting private variable directly to avoid duplicate save. This - # can be removed when we start tracking changes. - res._auto_save = save_val for data in self.child_data(): data.auto_save = save_val @@ -1302,28 +1305,106 @@ def figure( return num_bytes return figure_data + @deprecate_arg( + name="results", + since="0.6", + additional_msg="Use keyword arguments rather than creating AnalysisResult object.", + package_name="qiskit-experiments", + pending=True, + ) @do_auto_save def add_analysis_results( self, - results: Union[AnalysisResult, List[AnalysisResult]], + results: Optional[Union[AnalysisResult, List[AnalysisResult]]] = None, + *, + name: Optional[str] = None, + value: Optional[Any] = None, + quality: Optional[str] = None, + components: Optional[List[DeviceComponent]] = None, + experiment: Optional[str] = None, + experiment_id: Optional[str] = None, + result_id: Optional[str] = None, + tags: Optional[List[str]] = None, + backend: Optional[str] = None, + run_time: Optional[datetime] = None, + created_time: Optional[datetime] = None, + **extra_values, ) -> None: """Save the analysis result. Args: results: Analysis results to be saved. + name: Name of the result entry. + value: Analyzed quantity. + quality: Quality of the data. + components: Associated device components. + experiment: String identifier of experiment. + experiment_id: Experiment ID associated with this analysis. + result_id: ID of this analysis entry. If not set a random UUID is generated. + tags: List of arbitrary tags. + backend: Name of associated backend. + run_time: The date time when the experiment started to run on the device. + created_time: The date time when this analysis is performed. + extra_values: Arbitrary keyword arguments for supplementary information. + New dataframe columns are created in the analysis result table with added keys. """ - if not isinstance(results, list): - results = [results] - - for result in results: - self._analysis_results[result.result_id] = result - - with contextlib.suppress(ExperimentDataError): - result.service = self.service - result.auto_save = self.auto_save - - if self.auto_save and self._service: - result.save() + if results is not None: + # TODO deprecate this path + if not isinstance(results, list): + results = [results] + for result in results: + extra_values = result.extra.copy() + if result.chisq is not None: + # Move chisq to extra. + # This is not global outcome, e.g. QPT doesn't provide chisq. + extra_values["chisq"] = result.chisq + experiment = extra_values.pop("experiment", self.experiment_type) + backend = extra_values.pop("backend", self.backend_name) + run_time = extra_values.pop("run_time", None) + created_time = extra_values.pop("created_time", None) + self._analysis_results.add_entry( + name=result.name, + value=result.value, + quality=result.quality, + components=result.device_components, + experiment=experiment, + experiment_id=result.experiment_id, + result_id=result.result_id, + tags=result.tags, + backend=backend, + run_time=run_time, + created_time=created_time, + **extra_values, + ) + if self.auto_save: + result.save() + else: + experiment = experiment or self.experiment_type + experiment_id = experiment_id or self.experiment_id + tags = tags or [] + backend = backend or self.backend_name + + self._analysis_results.add_entry( + name=name, + value=value, + quality=quality, + components=components, + experiment=experiment, + experiment_id=experiment_id, + result_id=result_id, + tags=tags or [], + backend=backend, + run_time=run_time, # TODO add job RUNNING time + created_time=created_time, + **extra_values, + ) + if self.auto_save: + service_result = _series_to_service_result( + series=self._analysis_results.iloc[-1], + service=self._service, + auto_save=False, + ) + service_result.save() @do_auto_save def delete_analysis_result( @@ -1339,24 +1420,29 @@ def delete_analysis_result( Analysis result ID. Raises: - ExperimentEntryNotFound: If analysis result not found. + ExperimentEntryNotFound: If analysis result not found or multiple entries are found. """ + # Retrieve from DB if needed. + to_delete = self.analysis_results( + index=result_key, + block=False, + verbosity=3, + dataframe=True, + ) + if not isinstance(to_delete, pd.Series): + raise ExperimentEntryNotFound( + f"Multiple entries are found with result_key = {result_key}. " + "Try another key that can uniquely determine entry to delete." + ) - if isinstance(result_key, int): - result_key = self._analysis_results.keys()[result_key] - else: - # Retrieve from DB if needed. - result_key = self.analysis_results(result_key, block=False).result_id - - del self._analysis_results[result_key] - self._deleted_analysis_results.append(result_key) - + self._analysis_results.drop(to_delete.name, inplace=True) if self._service and self.auto_save: with service_exception_to_warning(): - self.service.delete_analysis_result(result_id=result_key) - self._deleted_analysis_results.remove(result_key) + self.service.delete_analysis_result(result_id=to_delete.result_id) + else: + self._deleted_analysis_results.append(to_delete.result_id) - return result_key + return to_delete.result_id def _retrieve_analysis_results(self, refresh: bool = False): """Retrieve service analysis results. @@ -1366,24 +1452,48 @@ def _retrieve_analysis_results(self, refresh: bool = False): an experiment service is available. """ # Get job results if missing experiment data. - if self.service and (not self._analysis_results or refresh): + if self.service and (len(self._analysis_results) == 0 or refresh): retrieved_results = self.service.analysis_results( experiment_id=self.experiment_id, limit=None, json_decoder=self._json_decoder ) for result in retrieved_results: - result_id = result.result_id - - self._analysis_results[result_id] = AnalysisResult(service=self.service) - self._analysis_results[result_id].set_data(result) - self._analysis_results[result_id]._created_in_db = True + # Canonicalize IBM specific data structure. + # TODO define proper data schema on frontend and delegate this to service. + cano_quality = AnalysisResult.RESULT_QUALITY_TO_TEXT.get(result.quality, "unknown") + cano_components = [to_component(c) for c in result.device_components] + extra = result.result_data["_extra"] + if result.chisq is not None: + extra["chisq"] = result.chisq + self._analysis_results.add_entry( + name=result.result_type, + value=result.result_data["_value"], + quality=cano_quality, + components=cano_components, + experiment_id=result.experiment_id, + result_id=result.result_id, + tags=result.tags, + backend=result.backend_name, + created_time=result.creation_datetime, + **extra, + ) + @deprecate_arg( + name="dataframe", + deprecation_description="Setting ``dataframe`` to False in analysis_results", + since="0.6", + package_name="qiskit-experiments", + pending=True, + predicate=lambda dataframe: not dataframe, + ) def analysis_results( self, index: Optional[Union[int, slice, str]] = None, refresh: bool = False, block: bool = True, timeout: Optional[float] = None, - ) -> Union[AnalysisResult, List[AnalysisResult]]: + verbosity: int = 2, + dataframe: bool = False, + ) -> Union[AnalysisResult, List[AnalysisResult], pd.DataFrame, pd.Series]: """Return analysis results associated with this experiment. Args: @@ -1394,16 +1504,23 @@ def analysis_results( * int: Specific index of the analysis results. * slice: A list slice of indexes. * str: ID or name of the analysis result. + refresh: Retrieve the latest analysis results from the server, if an experiment service is available. block: If True block for any analysis callbacks to finish running. timeout: max time in seconds to wait for analysis callbacks to finish running. + verbosity: Level of verbosity of returned data table (1, 2, 3): + + * 1 (minimum): Return data from the analysis. + * 2 (normal): With supplemental data about experiment. + * 3 (finest): With extra data to communicate with experiment service. + + dataframe: Set True to return analysis results in the dataframe format. Returns: Analysis results for this experiment. Raises: - TypeError: If the input `index` has an invalid type. ExperimentEntryNotFound: If the entry cannot be found. """ if block: @@ -1411,42 +1528,41 @@ def analysis_results( self._analysis_futures.values(), name="analysis", timeout=timeout ) self._retrieve_analysis_results(refresh=refresh) - if index is None: - return self._analysis_results.values() - - def _make_not_found_message(index: Union[int, slice, str]) -> str: - """Helper to make error message for index not found""" - msg = [f"Analysis result {index} not found."] - errors = self.errors() - if errors: - msg.append(f"Errors: {errors}") - return "\n".join(msg) - - if isinstance(index, int): - if index >= len(self._analysis_results.values()): - raise ExperimentEntryNotFound(_make_not_found_message(index)) - return self._analysis_results.values()[index] - if isinstance(index, slice): - results = self._analysis_results.values()[index] - if not results: - raise ExperimentEntryNotFound(_make_not_found_message(index)) - return results - if isinstance(index, str): - # Check by result ID - if index in self._analysis_results: - return self._analysis_results[index] - # Check by name - filtered = [ - result for result in self._analysis_results.values() if result.name == index - ] - if not filtered: - raise ExperimentEntryNotFound(_make_not_found_message(index)) - if len(filtered) == 1: - return filtered[0] - else: - return filtered - raise TypeError(f"Invalid index type {type(index)}.") + out = self._analysis_results.container(collapse_extra=False) + + if index is not None: + out = _filter_analysis_results(index, out) + if out is None: + msg = [f"Analysis result {index} not found."] + errors = self.errors() + if errors: + msg.append(f"Errors: {errors}") + raise ExperimentEntryNotFound("\n".join(msg)) + + if dataframe: + valid_columns = self._analysis_results.filter_columns(verbosity) + out = out[valid_columns] + if len(out) == 1 and index is not None: + # For backward compatibility. + # One can directly access attributes with Series. e.g. out.value + return out.iloc[0] + return out + + # Convert back into List[AnalysisResult] which is payload for IBM experiment service. + # This will be removed in future version. + service_results = [] + for _, series in out.iterrows(): + service_results.append( + _series_to_service_result( + series=series, + service=self._service, + auto_save=self._auto_save, + ) + ) + if len(service_results) == 1 and index is not None: + return service_results[0] + return service_results # Save and load from the database @@ -1575,8 +1691,15 @@ def save( return analysis_results_to_create = [] - for result in self._analysis_results.values(): - analysis_results_to_create.append(result._db_data) + for _, series in self._analysis_results.container(collapse_extra=False).iterrows(): + # TODO We should support saving entire dataframe + # Calling API per entry takes huge amount of time. + legacy_result = _series_to_service_result( + series=series, + service=self._service, + auto_save=False, + ) + analysis_results_to_create.append(legacy_result._db_data) try: self.service.create_analysis_results( data=analysis_results_to_create, @@ -2181,9 +2304,7 @@ def copy(self, copy_results: bool = True) -> "ExperimentData": # Copy results and figures. # This requires analysis callbacks to finish self._wait_for_futures(self._analysis_futures.values(), name="analysis") - with self._analysis_results.lock: - new_instance._analysis_results = ThreadSafeOrderedDict() - new_instance.add_analysis_results([result.copy() for result in self.analysis_results()]) + new_instance._analysis_results = self._analysis_results.copy_object() with self._figures.lock: new_instance._figures = ThreadSafeOrderedDict() new_instance.add_figures(self._figures.values()) @@ -2215,8 +2336,6 @@ def _set_service(self, service: IBMExperimentService, replace: bool = None) -> N if self._service and not replace: raise ExperimentDataError("An experiment service is already being used.") self._service = service - for result in self._analysis_results.values(): - result.service = service with contextlib.suppress(Exception): self.auto_save = self._service.options.get("auto_save", False) for data in self.child_data(): @@ -2485,3 +2604,141 @@ def __getstate__(self): def __json_encode__(self): return self.__getstate__() + + +def _series_to_service_result( + series: pd.Series, + service: IBMExperimentService, + auto_save: bool, + source: Optional[Dict[str, Any]] = None, +) -> AnalysisResult: + """Helper function to convert dataframe to AnalysisResult payload for IBM experiment service. + + .. note:: + + Now :class:`.AnalysisResult` is only used to save data in the experiment service. + All local operations must be done with :class:`.AnalysisResultTable` dataframe. + ExperimentData._analysis_results are totally decoupled from + the model of IBM experiment service until this function is implicitly called. + + Args: + series: Pandas dataframe Series (a row of dataframe). + service: Experiment service. + auto_save: Do auto save when entry value changes. + + Returns: + Legacy AnalysisResult payload. + """ + # TODO This must be done on experiment service rather than by client. + + data_dict = series.to_dict() + + # Format values + value = data_dict.pop("value") + result_id = data_dict.pop("result_id") + experiment_id = data_dict.pop("experiment_id") + result_type = data_dict.pop("name") + chisq = data_dict.pop("chisq", None) + components = list(map(to_component, data_dict.pop("components"))) + quality_raw = data_dict.pop("quality") or "unknown" + try: + quality = ResultQuality(quality_raw.upper()) + except ValueError: + quality = "unknown" + tags = data_dict.pop("tags") + backend_name = data_dict.pop("backend") + creation_datetime = data_dict.pop("created_time") + + result_data = AnalysisResult.format_result_data( + value=value, + extra=data_dict, + chisq=chisq, + source=source, + ) + + ibm_payload = AnalysisResultDataclass( + result_id=result_id, + experiment_id=experiment_id, + result_type=result_type, + result_data=result_data, + device_components=components, + quality=quality, + tags=tags, + backend_name=backend_name, + creation_datetime=creation_datetime, + chisq=chisq, + ) + + service_result = AnalysisResult() + service_result.set_data(ibm_payload) + + with contextlib.suppress(ExperimentDataError): + service_result.service = service + service_result.auto_save = auto_save + + return service_result + + +def _filter_analysis_results( + search_key: Union[int, slice, str], + data: pd.DataFrame, +) -> pd.DataFrame: + """Helper function to search result data for given key. + + Args: + search_key: Key to search for. + data: Full result dataframe. + + Returns: + Truncated dataframe. + """ + out = _search_data(search_key, data) + if isinstance(out, pd.Series): + return pd.DataFrame([out]) + return out + + +@singledispatch +def _search_data(search_key, data): + if search_key is None: + return data + raise TypeError( + f"Invalid search key {search_key}. " f"This must be either int, slice or str type." + ) + + +@_search_data.register +def _search_with_int( + search_key: int, + data: pd.DataFrame, +): + if search_key >= len(data): + return None + return data.iloc[search_key] + + +@_search_data.register +def _search_with_slice( + search_key: slice, + data: pd.DataFrame, +): + out = data[search_key] + if len(out) == 0: + return None + return out + + +@_search_data.register +def _search_with_str( + search_key: str, + data: pd.DataFrame, +): + if search_key in data.index: + # This key is table entry hash + return data.loc[search_key] + + # This key is name of entry + out = data[data["name"] == search_key] + if len(out) == 0: + return None + return out diff --git a/test/framework/test_framework.py b/test/framework/test_framework.py index 265630371e..4c122f05ec 100644 --- a/test/framework/test_framework.py +++ b/test/framework/test_framework.py @@ -125,8 +125,8 @@ def test_analysis_replace_results_true(self): expdata2 = analysis.run(expdata1, replace_results=True, seed=12345) self.assertExperimentDone(expdata2) - self.assertEqual(expdata1, expdata2) - self.assertEqual(expdata1.analysis_results(), expdata2.analysis_results()) + self.assertEqualExtended(expdata1, expdata2) + self.assertEqualExtended(expdata1.analysis_results(), expdata2.analysis_results()) self.assertEqual(result_ids, list(expdata2._deleted_analysis_results)) def test_analysis_replace_results_false(self): From 65269e9995f6fbe1329613cca8d480792512e434 Mon Sep 17 00:00:00 2001 From: Naoki Kanazawa Date: Tue, 11 Apr 2023 02:14:08 +0900 Subject: [PATCH 03/27] Upgrade database_service unittests These test cases use MagicMock for convenience, rather than using a mocked AnalysisResult. However, now added result entry is internally converted into dataframe format and the input object is discarded after add_analysis_results call. So it's no longer possible to track method call or evaluate identity of input object. Since user will never store MagicMock to experiment data, this change to unittest should not introduce any edge case and should not decrease the practical test coverage. --- .../test_db_experiment_data.py | 29 ++++++++++++++----- 1 file changed, 22 insertions(+), 7 deletions(-) diff --git a/test/database_service/test_db_experiment_data.py b/test/database_service/test_db_experiment_data.py index 508e73ac72..56bb9efe15 100644 --- a/test/database_service/test_db_experiment_data.py +++ b/test/database_service/test_db_experiment_data.py @@ -450,28 +450,43 @@ def test_add_get_analysis_result(self): """Test adding and getting analysis results.""" exp_data = ExperimentData(experiment_type="qiskit_test") results = [] - for idx in range(5): + result_ids = list(map(str, range(5))) + for idx in result_ids: res = mock.MagicMock() res.result_id = idx results.append(res) exp_data.add_analysis_results(res) - self.assertEqual(results, exp_data.analysis_results()) - self.assertEqual(results[1], exp_data.analysis_results(1)) - self.assertEqual(results[2:4], exp_data.analysis_results(slice(2, 4))) - self.assertEqual(results[4], exp_data.analysis_results(results[4].result_id)) + # We cannot compare results with exp_data.analysis_results() + # This test is too hacky since it tris to compare MagicMock with AnalysisResult. + self.assertEqual( + [res.result_id for res in exp_data.analysis_results()], + result_ids, + ) + self.assertEqual( + exp_data.analysis_results(1).result_id, + result_ids[1], + ) + self.assertEqual( + [res.result_id for res in exp_data.analysis_results(slice(2, 4))], + result_ids[2:4], + ) def test_add_get_analysis_results(self): """Test adding and getting a list of analysis results.""" exp_data = ExperimentData(experiment_type="qiskit_test") results = [] - for idx in range(5): + result_ids = list(map(str, range(5))) + for idx in result_ids: res = mock.MagicMock() res.result_id = idx results.append(res) exp_data.add_analysis_results(results) + get_result_ids = [res.result_id for res in exp_data.analysis_results()] - self.assertEqual(results, exp_data.analysis_results()) + # We cannot compare results with exp_data.analysis_results() + # This test is too hacky since it tris to compare MagicMock with AnalysisResult. + self.assertEqual(get_result_ids, result_ids) def test_delete_analysis_result(self): """Test deleting analysis result.""" From 323a0d9a45dbf37684fdb5183652cae7d61d3b96 Mon Sep 17 00:00:00 2001 From: Naoki Kanazawa Date: Fri, 12 May 2023 15:46:06 +0900 Subject: [PATCH 04/27] Docs update Co-authored-by: Helena Zhang --- qiskit_experiments/framework/experiment_data.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/qiskit_experiments/framework/experiment_data.py b/qiskit_experiments/framework/experiment_data.py index 0e2299fee5..3eefce6a72 100644 --- a/qiskit_experiments/framework/experiment_data.py +++ b/qiskit_experiments/framework/experiment_data.py @@ -1308,7 +1308,7 @@ def figure( @deprecate_arg( name="results", since="0.6", - additional_msg="Use keyword arguments rather than creating AnalysisResult object.", + additional_msg="Use keyword arguments rather than creating an AnalysisResult object.", package_name="qiskit-experiments", pending=True, ) @@ -1338,8 +1338,8 @@ def add_analysis_results( value: Analyzed quantity. quality: Quality of the data. components: Associated device components. - experiment: String identifier of experiment. - experiment_id: Experiment ID associated with this analysis. + experiment: String identifier of the associated experiment. + experiment_id: ID of the associated experiment. result_id: ID of this analysis entry. If not set a random UUID is generated. tags: List of arbitrary tags. backend: Name of associated backend. From 7b5f901c67c4f3a9e2cb4126cc781cccb15d4fb2 Mon Sep 17 00:00:00 2001 From: Naoki Kanazawa Date: Wed, 14 Jun 2023 01:21:45 +0900 Subject: [PATCH 05/27] Replace verbosity with explicit column names --- qiskit_experiments/database_service/utils.py | 93 +++++++++---------- .../framework/experiment_data.py | 15 +-- 2 files changed, 54 insertions(+), 54 deletions(-) diff --git a/qiskit_experiments/database_service/utils.py b/qiskit_experiments/database_service/utils.py index 772f481905..a4f20c7297 100644 --- a/qiskit_experiments/database_service/utils.py +++ b/qiskit_experiments/database_service/utils.py @@ -16,6 +16,7 @@ import logging import threading import traceback +import warnings from abc import ABC, abstractmethod from collections import OrderedDict from datetime import datetime, timezone @@ -449,10 +450,10 @@ class AnalysisResultTable(ThreadSafeDataFrame): def _default_columns(cls) -> List[str]: return [ "name", + "experiment", + "components", "value", "quality", - "components", - "experiment", "experiment_id", "result_id", "tags", @@ -461,54 +462,50 @@ def _default_columns(cls) -> List[str]: "created_time", ] - @classmethod - def _tier1(cls) -> List[str]: - """The data group that the analysis class produces.""" - return [ - "name", - "value", - "components", - "quality", - ] - - @classmethod - def _tier2(cls) -> List[str]: - """The data group of metadata that the experiment class provides.""" - return [ - "experiment", - "backend", - "run_time", - ] - - @classmethod - def _tier3(cls) -> List[str]: - """The data group which is used to communicate with the experiment service.""" - return [ - "experiment_id", - "result_id", - "tags", - "created_time", - ] - - def filter_columns(self, verbosity: int) -> List[str]: - """Return column names at given verbosity level. - - Extra columns are always added. + def filter_columns(self, columns: Union[str, List[str]]) -> List[str]: + """Filter columns names available in this table. Args: - verbosity: Level of verbosity of returned data table (1, 2, 3): + columns: Specifying a set of columns to return. You can pass a list of each + column name to return, otherwise builtin column groups are available. - * 1 (minimum): Return data from the analysis. - * 2 (normal): With supplemental data experiment. - * 3 (finest): With extra data to communicate with experiment service. + * "all": Return all columns, including metadata to communicate + with experiment service, such as entry IDs. + * "default": Return columns including analysis result with supplementary + information about experiment. + * "minimal": Return only analysis subroutine returns. - Return: - Valid column names. """ - if verbosity == 1: - return self._tier1() + self._extra - if verbosity == 2: - return self._tier1() + self._tier2() + self._extra - if verbosity == 3: - return self._tier1() + self._tier2() + self._tier3() + self._extra - raise ValueError(f"verbosity {verbosity} is not defined. Choose value from 1, 2, 3.") + if columns == "all": + return self._columns + if columns == "default": + return [ + "name", + "experiment", + "components", + "value", + "quality", + "backend", + "run_time", + ] + self._extra + if columns == "minimal": + return [ + "name", + "components", + "value", + "quality", + ] + self._extra + if not isinstance(columns, str): + out = [] + for column in columns: + if column in self._columns: + out.append(column) + else: + warnings.warn( + f"Specified column name {column} does not exist in this table.", + UserWarning + ) + return out + raise ValueError( + f"Column group {columns} is not valid name. Use either 'all', 'default', 'minimal'." + ) diff --git a/qiskit_experiments/framework/experiment_data.py b/qiskit_experiments/framework/experiment_data.py index 3eefce6a72..995890da97 100644 --- a/qiskit_experiments/framework/experiment_data.py +++ b/qiskit_experiments/framework/experiment_data.py @@ -1491,7 +1491,7 @@ def analysis_results( refresh: bool = False, block: bool = True, timeout: Optional[float] = None, - verbosity: int = 2, + columns: Union[str, List[str]] = "default", dataframe: bool = False, ) -> Union[AnalysisResult, List[AnalysisResult], pd.DataFrame, pd.Series]: """Return analysis results associated with this experiment. @@ -1509,11 +1509,14 @@ def analysis_results( an experiment service is available. block: If True block for any analysis callbacks to finish running. timeout: max time in seconds to wait for analysis callbacks to finish running. - verbosity: Level of verbosity of returned data table (1, 2, 3): + columns: Specifying a set of columns to return. You can pass a list of each + column name to return, otherwise builtin column groups are available. - * 1 (minimum): Return data from the analysis. - * 2 (normal): With supplemental data about experiment. - * 3 (finest): With extra data to communicate with experiment service. + * "all": Return all columns, including metadata to communicate + with experiment service, such as entry IDs. + * "default": Return columns including analysis result with supplementary + information about experiment. + * "minimal": Return only analysis subroutine returns. dataframe: Set True to return analysis results in the dataframe format. @@ -1541,7 +1544,7 @@ def analysis_results( raise ExperimentEntryNotFound("\n".join(msg)) if dataframe: - valid_columns = self._analysis_results.filter_columns(verbosity) + valid_columns = self._analysis_results.filter_columns(columns) out = out[valid_columns] if len(out) == 1 and index is not None: # For backward compatibility. From be515f73c49c714332d016291f26d98e69097f2c Mon Sep 17 00:00:00 2001 From: Naoki Kanazawa Date: Wed, 14 Jun 2023 18:36:38 +0900 Subject: [PATCH 06/27] Add test for dataframe classes --- qiskit_experiments/database_service/utils.py | 74 +++++-- .../framework/experiment_data.py | 4 +- test/extended_equality.py | 1 + test/framework/test_data_table.py | 185 ++++++++++++++++++ 4 files changed, 245 insertions(+), 19 deletions(-) create mode 100644 test/framework/test_data_table.py diff --git a/qiskit_experiments/database_service/utils.py b/qiskit_experiments/database_service/utils.py index a4f20c7297..3a2ece8961 100644 --- a/qiskit_experiments/database_service/utils.py +++ b/qiskit_experiments/database_service/utils.py @@ -375,12 +375,24 @@ def container( return container[self._default_columns()] return container - def add_entry(self, **kwargs): + def add_entry( + self, + index: str, + **kwargs, + ): """Add new entry to the dataframe. Args: + index: Name of this entry. Must be unique in this table. kwargs: Description of new entry to register. + + Raises: + ValueError: When index is not unique in this table. """ + with self.lock: + if index in self._container.index: + raise ValueError(f"Table index {index} already exists in the table.") + columns = self.get_columns() missing = kwargs.keys() - set(columns) if missing: @@ -389,21 +401,8 @@ def add_entry(self, **kwargs): template = dict.fromkeys(self.get_columns()) template.update(kwargs) - if not template["result_id"]: - template["result_id"] = uuid.uuid4().hex - name = self._unique_table_index(template["result_id"]) with self._lock: - self._container.loc[name] = list(template.values()) - - def _unique_table_index(self, index_name: str): - """Generate unique index name with 8 characters.""" - if not isinstance(index_name, str): - index_name = str(index_name) - truncated = index_name[:8] - with self.lock: - while truncated in self._container.index: - truncated = uuid.uuid4().hex[:8] - return truncated + self._container.loc[index] = list(template.values()) def _repr_html_(self) -> Union[str, None]: """Return HTML representation of this dataframe.""" @@ -475,6 +474,8 @@ def filter_columns(self, columns: Union[str, List[str]]) -> List[str]: information about experiment. * "minimal": Return only analysis subroutine returns. + Raises: + ValueError: When column is given in string which doesn't match with any builtin group. """ if columns == "all": return self._columns @@ -502,10 +503,49 @@ def filter_columns(self, columns: Union[str, List[str]]) -> List[str]: out.append(column) else: warnings.warn( - f"Specified column name {column} does not exist in this table.", - UserWarning + f"Specified column name {column} does not exist in this table.", UserWarning ) return out raise ValueError( f"Column group {columns} is not valid name. Use either 'all', 'default', 'minimal'." ) + + # pylint: disable=arguments-renamed + def add_entry( + self, + result_id: Optional[str] = None, + **kwargs, + ): + """Add new entry to the table. + + Args: + result_id: Result ID. Automatically generated when not provided. + This must be valid hexadecimal UUID string. + kwargs: Description of new entry to register. + """ + if result_id: + try: + result_id = uuid.UUID(result_id, version=4).hex + except ValueError as ex: + raise ValueError(f"{result_id} is not a valid hexadecimal UUID string.") from ex + else: + result_id = self._unique_table_index() + + super().add_entry( + index=result_id[:8], + result_id=result_id, + **kwargs, + ) + + def _unique_table_index(self): + """Generate unique UUID which is unique in the table with first 8 characters.""" + with self.lock: + n = 0 + while n < 1000: + tmp_id = uuid.uuid4().hex + if tmp_id[:8] not in self._container.index: + return tmp_id + raise RuntimeError( + "Unique result_id string cannot be prepared for this table within 1000 trials. " + "Reduce number of entries, or manually provide a unique result_id." + ) diff --git a/qiskit_experiments/framework/experiment_data.py b/qiskit_experiments/framework/experiment_data.py index 995890da97..eb705642eb 100644 --- a/qiskit_experiments/framework/experiment_data.py +++ b/qiskit_experiments/framework/experiment_data.py @@ -1385,13 +1385,13 @@ def add_analysis_results( backend = backend or self.backend_name self._analysis_results.add_entry( + result_id=result_id, name=name, value=value, quality=quality, components=components, experiment=experiment, experiment_id=experiment_id, - result_id=result_id, tags=tags or [], backend=backend, run_time=run_time, # TODO add job RUNNING time @@ -1426,7 +1426,7 @@ def delete_analysis_result( to_delete = self.analysis_results( index=result_key, block=False, - verbosity=3, + columns="all", dataframe=True, ) if not isinstance(to_delete, pd.Series): diff --git a/test/extended_equality.py b/test/extended_equality.py index 0448d8c4f8..021174baa2 100644 --- a/test/extended_equality.py +++ b/test/extended_equality.py @@ -278,6 +278,7 @@ def _check_configurable_classes( def _check_dataframes( data1: Union[pd.DataFrame, ThreadSafeDataFrame], data2: Union[pd.DataFrame, ThreadSafeDataFrame], + **kwargs, ): """Check equality of data frame which may involve Qiskit Experiments class value.""" return is_equivalent(data1.to_dict(orient="index"), data2.to_dict(orient="index")) diff --git a/test/framework/test_data_table.py b/test/framework/test_data_table.py new file mode 100644 index 0000000000..ade552ce03 --- /dev/null +++ b/test/framework/test_data_table.py @@ -0,0 +1,185 @@ +# This code is part of Qiskit. +# +# (C) Copyright IBM 2021. +# +# This code is licensed under the Apache License, Version 2.0. You may +# obtain a copy of this license in the LICENSE.txt file in the root directory +# of this source tree or at http://www.apache.org/licenses/LICENSE-2.0. +# +# Any modifications or derivative works of this code must retain this +# copyright notice, and modified files need to carry a notice indicating +# that they have been altered from the originals. + +"""Test case for data table.""" + +from test.base import QiskitExperimentsTestCase + +import numpy as np +import pandas as pd +from qiskit.tools import parallel_map + +from qiskit_experiments.database_service.utils import ThreadSafeDataFrame, AnalysisResultTable + + +class TestBaseTable(QiskitExperimentsTestCase): + """Test case for data frame base class.""" + + class TestTable(ThreadSafeDataFrame): + """A table class under test with test columns.""" + + @classmethod + def _default_columns(cls): + return ["value1", "value2", "value3"] + + def test_initializing_with_dict(self): + """Test initializing table with dictionary. Columns are filled with default.""" + table = TestBaseTable.TestTable( + { + "x": [1.0, 2.0, 3.0], + "y": [4.0, 5.0, 6.0], + } + ) + self.assertListEqual(table.get_columns(), ["value1", "value2", "value3"]) + + def test_raises_initializing_with_wrong_table(self): + """Test table cannot be initialized with non-default columns.""" + wrong_table = pd.DataFrame.from_dict( + data={"x": [1.0, 2.0], "y": [3.0, 4.0], "z": [5.0, 6.0]}, + orient="index", + columns=["wrong", "columns"], + ) + with self.assertRaises(ValueError): + # columns doesn't match with default_columns + TestBaseTable.TestTable(wrong_table) + + def test_add_entry(self): + """Test adding data with default keys to table.""" + table = TestBaseTable.TestTable() + table.add_entry(index="x", value1=0.0, value2=1.0, value3=2.0) + + self.assertListEqual(table.loc["x"].to_list(), [0.0, 1.0, 2.0]) + + def test_add_entry_with_missing_key(self): + """Test adding entry with partly specified keys.""" + table = TestBaseTable.TestTable() + table.add_entry(index="x", value1=0.0, value3=2.0) + + # NaN value cannot be compared with assert + np.testing.assert_equal(table.loc["x"].to_list(), [0.0, float("nan"), 2.0]) + + def test_add_entry_with_new_key(self): + """Test adding data with new keys to table.""" + table = TestBaseTable.TestTable() + table.add_entry(index="x", value1=0.0, value2=1.0, value3=2.0, extra=3.0) + + self.assertListEqual(table.get_columns(), ["value1", "value2", "value3", "extra"]) + self.assertListEqual(table.loc["x"].to_list(), [0.0, 1.0, 2.0, 3.0]) + + def test_add_entry_with_new_key_with_existing_entry(self): + """Test adding new key will expand existing entry.""" + table = TestBaseTable.TestTable() + table.add_entry(index="x", value1=0.0, value2=1.0, value3=2.0) + table.add_entry(index="y", value1=0.0, value2=1.0, value3=2.0, extra=3.0) + + self.assertListEqual(table.get_columns(), ["value1", "value2", "value3", "extra"]) + self.assertListEqual(table.loc["y"].to_list(), [0.0, 1.0, 2.0, 3.0]) + + # NaN value cannot be compared with assert + np.testing.assert_equal(table.loc["x"].to_list(), [0.0, 1.0, 2.0, float("nan")]) + + def test_return_only_default_columns(self): + """Test extra entry is correctly recognized.""" + table = TestBaseTable.TestTable() + table.add_entry(index="x", value1=0.0, value2=1.0, value3=2.0, extra=3.0) + + default_table = table.container(collapse_extra=True) + self.assertListEqual(default_table.loc["x"].to_list(), [0.0, 1.0, 2.0]) + + def test_raises_adding_duplicated_index(self): + """Test adding duplicated index should raise.""" + table = TestBaseTable.TestTable() + table.add_entry(index="x", value1=0.0, value2=1.0, value3=2.0) + + with self.assertRaises(ValueError): + # index x is already used + table.add_entry(index="x", value1=3.0, value2=4.0, value3=5.0) + + def test_clear_container(self): + """Test reset table.""" + table = TestBaseTable.TestTable() + table.add_entry(index="x", value1=0.0, value2=1.0, value3=2.0) + self.assertEqual(len(table), 1) + + table.clear() + self.assertEqual(len(table), 0) + + def test_multi_thread_add_entry(self): + """Test add entry with parallel thread access.""" + table = TestBaseTable.TestTable() + + def _test_thread_local_add_entry(args, thread_table): + index, kwargs = args + thread_table.add_entry(index, **kwargs) + + # Mutate thread safe table in parallel thread. No race should occur. + parallel_map( + _test_thread_local_add_entry, + [ + ["x", {"value1": 0.0, "value2": 1.0, "value3": 2.0}], + ["y", {"value1": 3.0, "value2": 4.0, "value3": 5.0}], + ["z", {"value1": 6.0, "value2": 7.0, "value3": 8.0}], + ], + task_kwargs={"thread_table": table}, + ) + self.assertEqual(len(table), 3) + + self.assertListEqual(table.loc["x"].to_list(), [0.0, 1.0, 2.0]) + self.assertListEqual(table.loc["y"].to_list(), [3.0, 4.0, 5.0]) + self.assertListEqual(table.loc["z"].to_list(), [6.0, 7.0, 8.0]) + + def test_round_trip(self): + """Test JSON roundtrip serialization with the experiment encoder.""" + table = TestBaseTable.TestTable() + table.add_entry(index="x", value1=0.0, value2=1.0, value3=2.0) + table.add_entry(index="y", value1=1.0, extra=2.0) + + self.assertRoundTripSerializable(table) + + +class TestAnalysisTable(QiskitExperimentsTestCase): + """Test case for extra functionality of analysis table.""" + + def test_add_entry_with_result_id(self): + """Test adding entry with result_id. Index is created by truncating long string.""" + table = AnalysisResultTable() + table.add_entry(result_id="9a0bdec8c0104ef7bb7db84939717a6b", value=0.123) + self.assertEqual(table.loc["9a0bdec8"].value, 0.123) + + def test_raises_adding_entry_with_invalid_result_id(self): + """Test adding entry with non-hexadecimal UUID result id.""" + table = AnalysisResultTable() + with self.assertRaises(ValueError): + table.add_entry(result_id="12345678910") + + def test_extra_column_name_is_always_returned(self): + """Test extra column names are always returned in filtered column names.""" + table = AnalysisResultTable() + table.add_entry(extra=0.123) + + minimal_columns = table.filter_columns("minimal") + self.assertTrue("extra" in minimal_columns) + + default_columns = table.filter_columns("default") + self.assertTrue("extra" in default_columns) + + all_columns = table.filter_columns("all") + self.assertTrue("extra" in all_columns) + + def test_no_overlap_result_id(self): + """Test automatically prepare unique result IDs for sufficient number of entries.""" + table = AnalysisResultTable() + + for i in range(100): + table.add_entry(value=i) + + self.assertEqual(len(table), 100) From 467532497661fb54eaa2e72f35687dad3da3adb5 Mon Sep 17 00:00:00 2001 From: Naoki Kanazawa Date: Thu, 22 Jun 2023 01:15:22 +0900 Subject: [PATCH 07/27] Add code comment for short uuid --- qiskit_experiments/database_service/utils.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/qiskit_experiments/database_service/utils.py b/qiskit_experiments/database_service/utils.py index 3a2ece8961..fd68dc10b6 100644 --- a/qiskit_experiments/database_service/utils.py +++ b/qiskit_experiments/database_service/utils.py @@ -531,8 +531,12 @@ def add_entry( else: result_id = self._unique_table_index() + # Short unique index is generated from full UUID. + # Showing full UUID unnecessary occupies horizontal space of the html table. + short_index = result_id[:8] + super().add_entry( - index=result_id[:8], + index=short_index, result_id=result_id, **kwargs, ) From 20760f7ec2e1cae61e5e9ff937e5e7488b3e77f5 Mon Sep 17 00:00:00 2001 From: Naoki Kanazawa Date: Thu, 22 Jun 2023 03:19:25 +0900 Subject: [PATCH 08/27] Add job running time to ExperimentData --- .../framework/experiment_data.py | 25 ++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/qiskit_experiments/framework/experiment_data.py b/qiskit_experiments/framework/experiment_data.py index eb705642eb..604631727e 100644 --- a/qiskit_experiments/framework/experiment_data.py +++ b/qiskit_experiments/framework/experiment_data.py @@ -334,6 +334,7 @@ def __init__( # job handling related self._jobs = ThreadSafeOrderedDict(job_ids) self._job_futures = ThreadSafeOrderedDict() + self._running_time = None self._analysis_callbacks = ThreadSafeOrderedDict() self._analysis_futures = ThreadSafeOrderedDict() # Set 2 workers for analysis executor so there can be 1 actively running @@ -442,9 +443,26 @@ def updated_datetime(self) -> datetime: """ return utc_to_local(self._db_data.updated_datetime) + @property + def running_time(self) -> datetime: + """Return the running time of this experiment data. + + The running time is the time the latest successful job was run on + the remote quantum machine. This can change as more jobs finish. + + .. warning:: + + IBM job returns running time in tzlocal(), + but we don't know the local time of the remote quantum machine. + This may return wrong datetime if server and client are in different timezone. + + """ + return utc_to_local(self._running_time) + @property def end_datetime(self) -> datetime: """Return the end datetime of this experiment data. + The end datetime is the time the latest job data was added without errors; this can change as more jobs finish. @@ -893,6 +911,10 @@ def _add_job_data( jid = job.job_id() try: job_result = job.result() + try: + self._running_time = job.time_per_step().get("running", None) + except AttributeError: + pass self._add_result_data(job_result, jid) LOG.debug("Job data added [Job ID: %s]", jid) # sets the endtime to be the time the last successful job was added @@ -1360,7 +1382,7 @@ def add_analysis_results( extra_values["chisq"] = result.chisq experiment = extra_values.pop("experiment", self.experiment_type) backend = extra_values.pop("backend", self.backend_name) - run_time = extra_values.pop("run_time", None) + run_time = extra_values.pop("run_time", self.running_time) created_time = extra_values.pop("created_time", None) self._analysis_results.add_entry( name=result.name, @@ -2443,6 +2465,7 @@ def __json_encode__(self): "_jobs": self._safe_serialize_jobs(), # Handle non-serializable objects "_experiment": self._experiment, "_child_data": self._child_data, + "_running_time": self._running_time, } # the attribute self._service in charge of the connection and communication with the # experiment db. It doesn't have meaning in the json format so there is no need to serialize From b84dfa14fd8ddb566f1b50a64da3d992099688ae Mon Sep 17 00:00:00 2001 From: Naoki Kanazawa Date: Thu, 22 Jun 2023 04:07:41 +0900 Subject: [PATCH 09/27] Extend AnalysisResultData to be comparable with AnalysisResultTable columns --- .../framework/analysis_result_data.py | 93 ++++++++++++++++++- qiskit_experiments/framework/base_analysis.py | 60 ++++++------ .../framework/experiment_data.py | 50 ++++------ 3 files changed, 142 insertions(+), 61 deletions(-) diff --git a/qiskit_experiments/framework/analysis_result_data.py b/qiskit_experiments/framework/analysis_result_data.py index b4d6f6aac6..e957bea336 100644 --- a/qiskit_experiments/framework/analysis_result_data.py +++ b/qiskit_experiments/framework/analysis_result_data.py @@ -16,6 +16,9 @@ import logging from typing import Optional, Dict, Any, List +from qiskit_experiments.database_service.device_component import DeviceComponent + + LOG = logging.getLogger(__name__) @@ -23,14 +26,70 @@ class AnalysisResultData: """Dataclass for experiment analysis results""" - # TODO: move stderr and unit into custom value class name: str value: Any + experiment: str = None chisq: Optional[float] = None quality: Optional[str] = None + experiment_id: Optional[str] = None + result_id: Optional[str] = None + tags: List = dataclasses.field(default_factory=list) + backend: Optional[str] = None + run_time: Optional[str] = None + created_time: Optional[str] = None extra: Dict[str, Any] = dataclasses.field(default_factory=dict, hash=False, compare=False) device_components: List = dataclasses.field(default_factory=list) + @classmethod + def from_table_element( + cls, + name: str, + value: Any, + experiment: Optional[str] = None, + components: Optional[List[DeviceComponent]] = None, + quality: Optional[str] = None, + experiment_id: Optional[str] = None, + result_id: Optional[str] = None, + tags: Optional[List[str]] = None, + backend: Optional[str] = None, + run_time: Optional[str] = None, + created_time: Optional[str] = None, + **extra, + ): + """A factory method of AnalysisResultData from a single element in AnalysisResultTable. + + Args: + name: Name of this entity. + value: Result value. + experiment: Type of experiment. + components: Device component that the experiment was run on. + quality: Quality of this result. + experiment_id: ID of associated experiment. + result_id: Unique ID of this data entry in the storage. + tags: List of tags. + backend: Device name that the experiment was run on. + run_time: A time at the experiment was run. + created_time: A time at this value was computed. + **extra: Extra information. + """ + chisq = extra.pop("chisq", None) + + return AnalysisResultData( + name=name, + value=value, + experiment=experiment, + chisq=chisq, + quality=quality, + experiment_id=experiment_id, + result_id=result_id, + tags=tags, + backend=backend, + run_time=run_time, + created_time=created_time, + device_components=components, + extra=extra, + ) + def __str__(self): out = f"{self.name}:" out += f"\n- value:{self.value}" @@ -47,3 +106,35 @@ def __str__(self): def __iter__(self): """Return iterator of data fields (attr, value)""" return iter((field.name, getattr(self, field.name)) for field in dataclasses.fields(self)) + + +def as_table_element( + result_data: AnalysisResultData, +) -> Dict[str, Any]: + """Python dataclass as_dict-like function to return + canonical data for analysis AnalysisResultTable. + + Args: + result_data: AnalysisResultData dataclass to format. + + Returns: + Formatted data representation in dictionary format. + """ + out = { + "name": result_data.name, + "experiment": result_data.experiment, + "components": result_data.device_components, + "value": result_data.value, + "quality": result_data.quality, + "experiment_id": result_data.experiment_id, + "result_id": result_data.result_id, + "tags": result_data.tags, + "backend": result_data.backend, + "run_time": result_data.run_time, + "created_time": result_data.created_time, + } + if result_data.chisq is not None: + out["chisq"] = result_data.chisq + out.update(result_data.extra) + + return out diff --git a/qiskit_experiments/framework/base_analysis.py b/qiskit_experiments/framework/base_analysis.py index 07b3f07642..a896faf67b 100644 --- a/qiskit_experiments/framework/base_analysis.py +++ b/qiskit_experiments/framework/base_analysis.py @@ -21,9 +21,9 @@ from qiskit_experiments.database_service.device_component import Qubit from qiskit_experiments.framework import Options from qiskit_experiments.framework.store_init_args import StoreInitArgs -from qiskit_experiments.framework.experiment_data import ExperimentData +from qiskit_experiments.framework.experiment_data import ExperimentData, FigureData from qiskit_experiments.framework.configs import AnalysisConfig -from qiskit_experiments.framework.analysis_result_data import AnalysisResultData +from qiskit_experiments.framework.analysis_result_data import AnalysisResultData, as_table_element class BaseAnalysis(ABC, StoreInitArgs): @@ -163,40 +163,42 @@ def run( def run_analysis(expdata: ExperimentData): # Clearing previous analysis data experiment_data._clear_results() - experiment_components = self._get_experiment_components(experiment_data) # Making new analysis results, figures = analysis._run_analysis(expdata) if results: for result in results: - supplementary = result.extra - if result.chisq is not None: - supplementary["chisq"] = result.chisq - if "experiment" not in supplementary: - supplementary["experiment"] = expdata.experiment_type - if "experiment_id" not in supplementary: - supplementary["experiment_id"] = expdata.experiment_id - if "backend" not in supplementary: - supplementary["backend"] = expdata.backend_name - if "run_time" not in supplementary: - # TODO add job RUNNING time - supplementary["run_time"] = None - if "created_time" not in supplementary: - supplementary["created_time"] = datetime.now(timezone.utc) - # Bypass generation of AnalysisResult, i.e. calling add_analysis_results. - # AnalysisResult is a data container with experiment service API. - # Since analysis is a local operation in the client, - # we should directly populate analysis result dataframe. - expdata.add_analysis_results( - name=result.name, - value=result.value, - quality=result.quality, - components=result.device_components or experiment_components, - **supplementary, - ) + # Populate missing data fields + if not result.experiment_id: + result.experiment_id = expdata.experiment_id + if not result.experiment: + result.experiment = expdata.experiment_type + if not result.device_components: + result.device_components = self._get_experiment_components(expdata) + if not result.backend: + result.backend = expdata.backend_name + if not result.created_time: + result.created_time = datetime.now(timezone.utc) + if not result.run_time: + result.run_time = expdata.running_time + + # To canonical kwargs to add to the analysis table. + table_format = as_table_element(result) + + # Remove result_id to make sure the id is unique in the scope of the container. + # This will let the container generate a unique id. + del table_format["result_id"] + + expdata.add_analysis_results(**table_format) + if figures: - expdata.add_figures(figures, figure_names=self.options.figure_names) + figure_to_add = [] + for figure in figures: + if not isinstance(figure, FigureData): + figure = FigureData(figure=figure) + figure_to_add.append(figure) + expdata.add_figures(figure_to_add, figure_names=self.options.figure_names) experiment_data.add_analysis_callback(run_analysis) diff --git a/qiskit_experiments/framework/experiment_data.py b/qiskit_experiments/framework/experiment_data.py index 604631727e..2dd0821d8c 100644 --- a/qiskit_experiments/framework/experiment_data.py +++ b/qiskit_experiments/framework/experiment_data.py @@ -59,6 +59,7 @@ ) from qiskit_experiments.database_service.device_component import to_component, DeviceComponent from qiskit_experiments.framework.analysis_result import AnalysisResult +from qiskit_experiments.framework.analysis_result_data import AnalysisResultData from qiskit_experiments.framework import BackendData from qiskit_experiments.database_service.exceptions import ( ExperimentDataError, @@ -2657,46 +2658,33 @@ def _series_to_service_result( """ # TODO This must be done on experiment service rather than by client. - data_dict = series.to_dict() - - # Format values - value = data_dict.pop("value") - result_id = data_dict.pop("result_id") - experiment_id = data_dict.pop("experiment_id") - result_type = data_dict.pop("name") - chisq = data_dict.pop("chisq", None) - components = list(map(to_component, data_dict.pop("components"))) - quality_raw = data_dict.pop("quality") or "unknown" - try: - quality = ResultQuality(quality_raw.upper()) - except ValueError: - quality = "unknown" - tags = data_dict.pop("tags") - backend_name = data_dict.pop("backend") - creation_datetime = data_dict.pop("created_time") + qe_result = AnalysisResultData.from_table_element(**series.to_dict()) result_data = AnalysisResult.format_result_data( - value=value, - extra=data_dict, - chisq=chisq, + value=qe_result.value, + extra=qe_result.extra, + chisq=qe_result.chisq, source=source, ) - - ibm_payload = AnalysisResultDataclass( - result_id=result_id, - experiment_id=experiment_id, - result_type=result_type, + try: + quality = ResultQuality(str(qe_result.quality).upper()) + except ValueError: + quality = "unknown" + experiment_service_payload = AnalysisResultDataclass( + result_id=qe_result.result_id, + experiment_id=qe_result.experiment_id, + result_type=qe_result.name, result_data=result_data, - device_components=components, + device_components=list(map(to_component, qe_result.device_components)), quality=quality, - tags=tags, - backend_name=backend_name, - creation_datetime=creation_datetime, - chisq=chisq, + tags=qe_result.tags, + backend_name=qe_result.backend, + creation_datetime=qe_result.created_time, + chisq=qe_result.chisq, ) service_result = AnalysisResult() - service_result.set_data(ibm_payload) + service_result.set_data(experiment_service_payload) with contextlib.suppress(ExperimentDataError): service_result.service = service From f549642c0d5d110c37eb1658f417ead784bfff83 Mon Sep 17 00:00:00 2001 From: Naoki Kanazawa Date: Thu, 22 Jun 2023 04:10:27 +0900 Subject: [PATCH 10/27] Add type alias for figure data --- qiskit_experiments/framework/experiment_data.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/qiskit_experiments/framework/experiment_data.py b/qiskit_experiments/framework/experiment_data.py index 2dd0821d8c..bfc21cc51e 100644 --- a/qiskit_experiments/framework/experiment_data.py +++ b/qiskit_experiments/framework/experiment_data.py @@ -141,7 +141,8 @@ def __init__(self, figure, name=None, metadata=None): Args: figure: the raw figure itself. Can be SVG or matplotlib.Figure. name: Optional, the name of the figure. - metadata: Optional, any metadata to be stored with the figure.""" + metadata: Optional, any metadata to be stored with the figure. + """ self.figure = figure self._name = name self.metadata = metadata or {} @@ -195,6 +196,9 @@ def _repr_svg_(self): return None +_FigureT = Union[str, bytes, MatplotlibFigure, FigureData] + + class ExperimentData: """Experiment data container class. @@ -1136,9 +1140,9 @@ def data( @do_auto_save def add_figures( self, - figures: Union[str, bytes, pyplot.Figure, list], - figure_names: Optional[Union[str, list]] = None, - overwrite: Optional[bool] = False, + figures: Union[_FigureT, List[_FigureT]], + figure_names: Optional[Union[str, List[str]]] = None, + overwrite: bool = False, save_figure: Optional[bool] = None, ) -> Union[str, List[str]]: """Add the experiment figure. From 3c154d4e7867b31f1e81daf9a9efee4af83ad834 Mon Sep 17 00:00:00 2001 From: Naoki Kanazawa Date: Thu, 22 Jun 2023 04:11:04 +0900 Subject: [PATCH 11/27] Overhaul composite analysis. Simplified sub container initialization. --- .../framework/composite/composite_analysis.py | 317 +++++------------- .../framework/experiment_data.py | 1 - .../analysis/local_readout_error_analysis.py | 2 +- .../tomography/mit_tomography_analysis.py | 36 +- 4 files changed, 109 insertions(+), 247 deletions(-) diff --git a/qiskit_experiments/framework/composite/composite_analysis.py b/qiskit_experiments/framework/composite/composite_analysis.py index 039585e4b4..decc43a6d8 100644 --- a/qiskit_experiments/framework/composite/composite_analysis.py +++ b/qiskit_experiments/framework/composite/composite_analysis.py @@ -13,15 +13,13 @@ Composite Experiment Analysis class. """ -from typing import List, Dict, Union, Optional, Tuple +from typing import List, Dict, Union, Optional, Iterator import warnings -import numpy as np -from qiskit.result import marginal_distribution -from qiskit.result.postprocess import format_counts_memory + +from qiskit.result import marginal_distribution, marginal_memory from qiskit_experiments.framework import BaseAnalysis, ExperimentData from qiskit_experiments.framework.analysis_result_data import AnalysisResultData from qiskit_experiments.framework.base_analysis import _requires_copy -from qiskit_experiments.exceptions import AnalysisError class CompositeAnalysis(BaseAnalysis): @@ -112,225 +110,141 @@ def run( if not replace_results and _requires_copy(experiment_data): experiment_data = experiment_data.copy() - if not self._flatten_results: - # Initialize child components if they are not initalized - # This only needs to be done if results are not being flattened - self._add_child_data(experiment_data) - # Run analysis with replace_results = True since we have already # created the copy if it was required return super().run(experiment_data, replace_results=True, **options) def _run_analysis(self, experiment_data: ExperimentData): - # Return list of experiment data containers for each component experiment - # containing the marginalized data from the composite experiment - component_expdata = self._component_experiment_data(experiment_data) - # Run the component analysis on each component data - for i, sub_expdata in enumerate(component_expdata): + component_exp_data = [] + iter_components = self._initialize_component_experiment_data(experiment_data) + for i, sub_exp_data in enumerate(iter_components): # Since copy for replace result is handled at the parent level # we always run with replace result on component analysis - self._analyses[i].run(sub_expdata, replace_results=True) + self._analyses[i].run(sub_exp_data, replace_results=True) + component_exp_data.append(sub_exp_data) # Analysis is running in parallel so we add loop to wait # for all component analysis to finish before returning # the parent experiment analysis results - for sub_expdata in component_expdata: - sub_expdata.block_for_results() - # Optionally flatten results from all component experiments - # for adding to the main experiment data container - if self._flatten_results: - return self._combine_results(component_expdata) - - return [], [] + analysis_results = [] + figures = [] + for i, sub_exp_data in enumerate(component_exp_data): + sub_exp_data.block_for_results() + + if not self._flatten_results: + experiment_data.add_child_data(sub_exp_data) + continue + + # Convert table to AnalysisResultData lists for backward compatibility. + # In principle this is not necessary because data can be directly concatenated to + # the table of outer container, i.e. experiment_data._analysis_results, however + # some custom composite analysis class, such as TphiAnalysis overrides + # the _run_analysis method to perform further analysis on + # sub-analysis outcomes. This is indeed an overhead, + # and at some point we should restrict such subclass implementation. + analysis_table = sub_exp_data.analysis_results(columns="all", dataframe=True) + for _, series in analysis_table.iterrows(): + data = AnalysisResultData.from_table_element(**series.to_dict()) + data.experiment_id = experiment_data.experiment_id + analysis_results.append(data) - def _component_experiment_data(self, experiment_data: ExperimentData) -> List[ExperimentData]: - """Return a list of marginalized experiment data for component experiments. + for fig_key in sub_exp_data.figure_names: + figures.append(sub_exp_data.figure(figure_key=fig_key)) - Args: - experiment_data: a composite experiment data container. + del sub_exp_data - Returns: - The list of analysis-ready marginalized experiment data for each - component experiment. + return analysis_results, figures - Raises: - AnalysisError: If the component experiment data cannot be extracted. - """ - if not self._flatten_results: - # Retrieve child data for component experiments for updating - component_index = experiment_data.metadata.get("component_child_index", []) - if not component_index: - raise AnalysisError("Unable to extract component child experiment data") - component_expdata = [experiment_data.child_data(i) for i in component_index] - else: - # Initialize temporary ExperimentData containers for - # each component experiment to analysis on. These will - # not be saved but results and figures will be collected - # from them - component_expdata = self._initialize_component_experiment_data(experiment_data) - - # Compute marginalize data for each component experiment - marginalized_data = self._marginalized_component_data(experiment_data.data()) - - # Add the marginalized component data and component job metadata - # to each component child experiment. Note that this will clear - # any currently stored data in the experiment. Since copying of - # child data is handled by the `replace_results` kwarg of the - # parent container it is safe to always clear and replace the - # results of child containers in this step - for sub_expdata, sub_data in zip(component_expdata, marginalized_data): - # Clear any previously stored data and add marginalized data - sub_expdata._result_data.clear() - sub_expdata.add_data(sub_data) - - return component_expdata - - def _marginalized_component_data(self, composite_data: List[Dict]) -> List[List[Dict]]: - """Return marginalized data for component experiments. + def _marginalize_data( + self, + composite_data: List[Dict], + component_index: int, + ) -> List[Dict]: + """Return marginalized data for component with particular index. Args: composite_data: a list of composite experiment circuit data. + component_index: an index of component to return. Returns: - A List of lists of marginalized circuit data for each component + A lists of marginalized circuit data for each component experiment in the composite experiment. """ - # Marginalize data - marginalized_data = {} + out = [] for datum in composite_data: metadata = datum.get("metadata", {}) - # Add marginalized data to sub experiments + if component_index not in metadata["composite_index"]: + # This circuit is not tied to the component experiment at "component_index". + continue + index = metadata["composite_index"].index(component_index) + if "composite_clbits" in metadata: composite_clbits = metadata["composite_clbits"] else: composite_clbits = None - # Pre-process the memory if any to avoid redundant calls to format_counts_memory - f_memory = self._format_memory(datum, composite_clbits) - - for i, index in enumerate(metadata["composite_index"]): - if index not in marginalized_data: - # Initialize data list for marginalized - marginalized_data[index] = [] - sub_data = {"metadata": metadata["composite_metadata"][i]} - if "counts" in datum: - if composite_clbits is not None: - sub_data["counts"] = marginal_distribution( - counts=datum["counts"], - indices=composite_clbits[i], - ) - else: - sub_data["counts"] = datum["counts"] - if "memory" in datum: - if composite_clbits is not None: - # level 2 - if f_memory is not None: - idx = slice( - -1 - composite_clbits[i][-1], -composite_clbits[i][0] or None - ) - sub_data["memory"] = [shot[idx] for shot in f_memory] - # level 1 - else: - mem = np.array(datum["memory"]) - - # Averaged level 1 data - if len(mem.shape) == 2: - sub_data["memory"] = mem[composite_clbits[i]].tolist() - # Single-shot level 1 data - if len(mem.shape) == 3: - sub_data["memory"] = mem[:, composite_clbits[i]].tolist() - else: - sub_data["memory"] = datum["memory"] - marginalized_data[index].append(sub_data) - - # Sort by index - return [marginalized_data[i] for i in sorted(marginalized_data.keys())] - - @staticmethod - def _format_memory(datum: Dict, composite_clbits: List): - """A helper method to convert level 2 memory (if it exists) to bit-string format.""" - f_memory = None - if ( - "memory" in datum - and composite_clbits is not None - and isinstance(datum["memory"][0], str) - ): - num_cbits = 1 + max(cbit for cbit_list in composite_clbits for cbit in cbit_list) - header = {"memory_slots": num_cbits} - f_memory = list(format_counts_memory(shot, header) for shot in datum["memory"]) - - return f_memory - - def _add_child_data(self, experiment_data: ExperimentData): - """Save empty component experiment data as child data. - - This will initialize empty ExperimentData objects for each component - experiment and add them as child data to the main composite experiment - ExperimentData container container for saving. - - Args: - experiment_data: a composite experiment experiment data container. - """ - component_index = experiment_data.metadata.get("component_child_index", []) - if component_index: - # Child components are already initialized - return - - # Initialize the component experiment data containers and add them - # as child data to the current experiment data - child_components = self._initialize_component_experiment_data(experiment_data) - start_index = len(experiment_data.child_data()) - for i, subdata in enumerate(child_components): - experiment_data.add_child_data(subdata) - component_index.append(start_index + i) - - # Store the indices of the added child data in metadata - experiment_data.metadata["component_child_index"] = component_index + component_data = {"metadata": metadata["composite_metadata"][index]} + + # Use terra result marginalization utils. + # These functions support parallel execution and are implemented in Rust. + if "counts" in datum: + if composite_clbits is not None: + component_data["counts"] = marginal_distribution( + counts=datum["counts"], + indices=composite_clbits[index], + ) + else: + component_data["counts"] = datum["counts"] + if "memory" in datum: + if composite_clbits is not None: + component_data["memory"] = marginal_memory( + memory=datum["memory"], + indices=composite_clbits[index], + ) + else: + component_data["memory"] = datum["memory"] + out.append(component_data) + return out def _initialize_component_experiment_data( - self, experiment_data: ExperimentData - ) -> List[ExperimentData]: + self, + experiment_data: ExperimentData, + ) -> Iterator[ExperimentData]: """Initialize empty experiment data containers for component experiments. Args: - experiment_data: a composite experiment experiment data container. + experiment_data: a composite experiment data container. - Returns: - The list of experiment data containers for each component experiment - containing the component metadata, and tags, share level, and - auto save settings of the composite experiment. + Yields: + Experiment data containers for each component experiment + containing the component metadata, and tags, share level. """ + metadata = experiment_data.metadata + # Extract component experiment types and metadata so they can be # added to the component experiment data containers - metadata = experiment_data.metadata num_components = len(self._analyses) experiment_types = metadata.get("component_types", [None] * num_components) component_metadata = metadata.get("component_metadata", [{}] * num_components) # Create component experiments and set the backend and # metadata for the components - component_expdata = [] - for i, _ in enumerate(self._analyses): - subdata = ExperimentData(backend=experiment_data.backend) - subdata.experiment_type = experiment_types[i] - subdata.metadata.update(component_metadata[i]) - - if self._flatten_results: - # Explicitly set auto_save to false so the temporary - # data can't accidentally be saved - subdata.auto_save = False - else: - # Copy tags, share_level and auto_save from the parent - # experiment data if results are not being flattened. - subdata.tags = experiment_data.tags - subdata.share_level = experiment_data.share_level - subdata.auto_save = experiment_data.auto_save + composite_data = experiment_data.data() + child_data_ids = [] + for i in range(num_components): + # Create empty container with metadata + sub_exp_data = ExperimentData(backend=experiment_data.backend) + sub_exp_data.experiment_type = experiment_types[i] + sub_exp_data.metadata.update(component_metadata[i]) + sub_exp_data.auto_save = False - component_expdata.append(subdata) + # Add marginalized experiment data + sub_exp_data.add_data(self._marginalize_data(composite_data, i)) + child_data_ids.append(sub_exp_data.experiment_id) - return component_expdata + yield sub_exp_data def _set_flatten_results(self): """Recursively set flatten_results to True for all composite components.""" @@ -338,54 +252,3 @@ def _set_flatten_results(self): for analysis in self._analyses: if isinstance(analysis, CompositeAnalysis): analysis._set_flatten_results() - - def _combine_results( - self, component_experiment_data: List[ExperimentData] - ) -> Tuple[List[AnalysisResultData], List["matplotlib.figure.Figure"]]: - """Combine analysis results from component experiment data. - - Args: - component_experiment_data: list of experiment data containers containing the - analysis results for each component experiment. - - Returns: - A pair of the combined list of all analysis results from each of the - component experiments, and a list of all figures from each component - experiment. - """ - analysis_results = [] - figures = [] - for sub_expdata in component_experiment_data: - figures += sub_expdata._figures.values() - - # Convert Dataframe Series back into AnalysisResultData - # This is due to limitation that _run_analysis must return List[AnalysisResultData], - # and some composite analysis such as TphiAnalysis overrides this method to - # return extra quantity computed from sub analysis results. - # This produces unnecessary data conversion. - # The _run_analysis mechanism seems just complicating the entire logic. - # Since it's impossible to deprecate the usage of this protected method, - # we should implement new CompositeAnalysis class with much more efficient - # internal logic. Note that the child data structure is no longer necessary - # because dataframe offers more efficient data filtering mechanisms. - analysis_table = sub_expdata.analysis_results(verbosity=3, dataframe=True) - for _, series in analysis_table.iterrows(): - data_dict = series.to_dict() - primary_info = { - "name": data_dict.pop("name"), - "value": data_dict.pop("value"), - "quality": data_dict.pop("quality"), - "device_components": data_dict.pop("components"), - } - chisq = data_dict.pop("chisq", np.nan) - if chisq: - primary_info["chisq"] = chisq - data_dict["experiment"] = sub_expdata.experiment_type - if "experiment_id" in data_dict: - # Use experiment ID of parent experiment data. - # Sub experiment data is merged and discarded. - del data_dict["experiment_id"] - analysis_result = AnalysisResultData(**primary_info, extra=data_dict) - analysis_results.append(analysis_result) - - return analysis_results, figures diff --git a/qiskit_experiments/framework/experiment_data.py b/qiskit_experiments/framework/experiment_data.py index bfc21cc51e..a54015d711 100644 --- a/qiskit_experiments/framework/experiment_data.py +++ b/qiskit_experiments/framework/experiment_data.py @@ -1376,7 +1376,6 @@ def add_analysis_results( New dataframe columns are created in the analysis result table with added keys. """ if results is not None: - # TODO deprecate this path if not isinstance(results, list): results = [results] for result in results: diff --git a/qiskit_experiments/library/characterization/analysis/local_readout_error_analysis.py b/qiskit_experiments/library/characterization/analysis/local_readout_error_analysis.py index 0bb3263496..29f31b9b17 100644 --- a/qiskit_experiments/library/characterization/analysis/local_readout_error_analysis.py +++ b/qiskit_experiments/library/characterization/analysis/local_readout_error_analysis.py @@ -78,7 +78,7 @@ def _run_analysis( ) figures = [figure] else: - figures = None + figures = [] return analysis_results, figures def _generate_matrices(self, data) -> List[np.array]: diff --git a/qiskit_experiments/library/tomography/mit_tomography_analysis.py b/qiskit_experiments/library/tomography/mit_tomography_analysis.py index 898313d1ea..e864089132 100644 --- a/qiskit_experiments/library/tomography/mit_tomography_analysis.py +++ b/qiskit_experiments/library/tomography/mit_tomography_analysis.py @@ -92,36 +92,36 @@ def set_options(self, **fields): def _run_analysis(self, experiment_data): # Return list of experiment data containers for each component experiment # containing the marginalized data from the composite experiment + results, figures = [], [] + roerror_analysis, tomo_analysis = self._analyses - roerror_data, tomo_data = self._component_experiment_data(experiment_data) + roerror_data, tomo_data = list(self._initialize_component_experiment_data(experiment_data)) # Run readout error analysis - roerror_analysis.run(roerror_data, replace_results=True).block_for_results() - - # Construct noisy measurement basis - mitigator = roerror_data.analysis_results(0).value + roerror_results, roerror_figures = roerror_analysis._run_analysis(roerror_data) # Construct noisy measurement basis + mitigator = roerror_results[0].value measurement_basis = PauliMeasurementBasis(mitigator=mitigator) tomo_analysis.set_options(measurement_basis=measurement_basis) # Run mitigated tomography analysis - tomo_analysis.run(tomo_data, replace_results=True).block_for_results() - for res in tomo_data.analysis_results(block=False): - res.extra["mitigated"] = True + tomo_results, tomo_figures = tomo_analysis._run_analysis(tomo_data) + for data in tomo_results: + data.extra["mitigated"] = True - # Combine results so that tomography results are ordered first - combined_data = [tomo_data, roerror_data] + # Tomography results are ordered first + results.extend(tomo_results + roerror_results) + figures.extend(tomo_figures + roerror_figures) # Run unmitigated tomography analysis if self.options.unmitigated_fit: + nomit_data = tomo_data.copy() tomo_analysis.set_options(measurement_basis=PauliMeasurementBasis()) - nomit_data = tomo_analysis.run(tomo_data, replace_results=False).block_for_results() - for res in nomit_data.analysis_results(block=False): - res.extra["mitigated"] = False - combined_data.append(nomit_data) - - if self._flatten_results: - return self._combine_results(combined_data) + nomit_results, nomit_figures = tomo_analysis._run_analysis(nomit_data) + for data in nomit_results: + data.extra["mitigated"] = False + results.extend(nomit_results) + figures.extend(nomit_figures) - return [], [] + return results, figures From c9de11cdea82e85e5caa3e23fb94b7d63beb6874 Mon Sep 17 00:00:00 2001 From: Naoki Kanazawa Date: Sun, 2 Jul 2023 13:50:41 +0900 Subject: [PATCH 12/27] Revert "Overhaul composite analysis. Simplified sub container initialization." This reverts commit 6ebd975dfb19e4f6d4e43d2d100922ee9e1226ca. --- .../framework/composite/composite_analysis.py | 317 +++++++++++++----- .../framework/experiment_data.py | 1 + .../analysis/local_readout_error_analysis.py | 2 +- .../tomography/mit_tomography_analysis.py | 36 +- 4 files changed, 247 insertions(+), 109 deletions(-) diff --git a/qiskit_experiments/framework/composite/composite_analysis.py b/qiskit_experiments/framework/composite/composite_analysis.py index decc43a6d8..039585e4b4 100644 --- a/qiskit_experiments/framework/composite/composite_analysis.py +++ b/qiskit_experiments/framework/composite/composite_analysis.py @@ -13,13 +13,15 @@ Composite Experiment Analysis class. """ -from typing import List, Dict, Union, Optional, Iterator +from typing import List, Dict, Union, Optional, Tuple import warnings - -from qiskit.result import marginal_distribution, marginal_memory +import numpy as np +from qiskit.result import marginal_distribution +from qiskit.result.postprocess import format_counts_memory from qiskit_experiments.framework import BaseAnalysis, ExperimentData from qiskit_experiments.framework.analysis_result_data import AnalysisResultData from qiskit_experiments.framework.base_analysis import _requires_copy +from qiskit_experiments.exceptions import AnalysisError class CompositeAnalysis(BaseAnalysis): @@ -110,141 +112,225 @@ def run( if not replace_results and _requires_copy(experiment_data): experiment_data = experiment_data.copy() + if not self._flatten_results: + # Initialize child components if they are not initalized + # This only needs to be done if results are not being flattened + self._add_child_data(experiment_data) + # Run analysis with replace_results = True since we have already # created the copy if it was required return super().run(experiment_data, replace_results=True, **options) def _run_analysis(self, experiment_data: ExperimentData): + # Return list of experiment data containers for each component experiment + # containing the marginalized data from the composite experiment + component_expdata = self._component_experiment_data(experiment_data) - component_exp_data = [] - iter_components = self._initialize_component_experiment_data(experiment_data) - for i, sub_exp_data in enumerate(iter_components): + # Run the component analysis on each component data + for i, sub_expdata in enumerate(component_expdata): # Since copy for replace result is handled at the parent level # we always run with replace result on component analysis - self._analyses[i].run(sub_exp_data, replace_results=True) - component_exp_data.append(sub_exp_data) + self._analyses[i].run(sub_expdata, replace_results=True) # Analysis is running in parallel so we add loop to wait # for all component analysis to finish before returning # the parent experiment analysis results - analysis_results = [] - figures = [] - for i, sub_exp_data in enumerate(component_exp_data): - sub_exp_data.block_for_results() - - if not self._flatten_results: - experiment_data.add_child_data(sub_exp_data) - continue - - # Convert table to AnalysisResultData lists for backward compatibility. - # In principle this is not necessary because data can be directly concatenated to - # the table of outer container, i.e. experiment_data._analysis_results, however - # some custom composite analysis class, such as TphiAnalysis overrides - # the _run_analysis method to perform further analysis on - # sub-analysis outcomes. This is indeed an overhead, - # and at some point we should restrict such subclass implementation. - analysis_table = sub_exp_data.analysis_results(columns="all", dataframe=True) - for _, series in analysis_table.iterrows(): - data = AnalysisResultData.from_table_element(**series.to_dict()) - data.experiment_id = experiment_data.experiment_id - analysis_results.append(data) + for sub_expdata in component_expdata: + sub_expdata.block_for_results() + # Optionally flatten results from all component experiments + # for adding to the main experiment data container + if self._flatten_results: + return self._combine_results(component_expdata) - for fig_key in sub_exp_data.figure_names: - figures.append(sub_exp_data.figure(figure_key=fig_key)) + return [], [] - del sub_exp_data + def _component_experiment_data(self, experiment_data: ExperimentData) -> List[ExperimentData]: + """Return a list of marginalized experiment data for component experiments. - return analysis_results, figures + Args: + experiment_data: a composite experiment data container. - def _marginalize_data( - self, - composite_data: List[Dict], - component_index: int, - ) -> List[Dict]: - """Return marginalized data for component with particular index. + Returns: + The list of analysis-ready marginalized experiment data for each + component experiment. + + Raises: + AnalysisError: If the component experiment data cannot be extracted. + """ + if not self._flatten_results: + # Retrieve child data for component experiments for updating + component_index = experiment_data.metadata.get("component_child_index", []) + if not component_index: + raise AnalysisError("Unable to extract component child experiment data") + component_expdata = [experiment_data.child_data(i) for i in component_index] + else: + # Initialize temporary ExperimentData containers for + # each component experiment to analysis on. These will + # not be saved but results and figures will be collected + # from them + component_expdata = self._initialize_component_experiment_data(experiment_data) + + # Compute marginalize data for each component experiment + marginalized_data = self._marginalized_component_data(experiment_data.data()) + + # Add the marginalized component data and component job metadata + # to each component child experiment. Note that this will clear + # any currently stored data in the experiment. Since copying of + # child data is handled by the `replace_results` kwarg of the + # parent container it is safe to always clear and replace the + # results of child containers in this step + for sub_expdata, sub_data in zip(component_expdata, marginalized_data): + # Clear any previously stored data and add marginalized data + sub_expdata._result_data.clear() + sub_expdata.add_data(sub_data) + + return component_expdata + + def _marginalized_component_data(self, composite_data: List[Dict]) -> List[List[Dict]]: + """Return marginalized data for component experiments. Args: composite_data: a list of composite experiment circuit data. - component_index: an index of component to return. Returns: - A lists of marginalized circuit data for each component + A List of lists of marginalized circuit data for each component experiment in the composite experiment. """ - out = [] + # Marginalize data + marginalized_data = {} for datum in composite_data: metadata = datum.get("metadata", {}) - if component_index not in metadata["composite_index"]: - # This circuit is not tied to the component experiment at "component_index". - continue - index = metadata["composite_index"].index(component_index) - + # Add marginalized data to sub experiments if "composite_clbits" in metadata: composite_clbits = metadata["composite_clbits"] else: composite_clbits = None - component_data = {"metadata": metadata["composite_metadata"][index]} - - # Use terra result marginalization utils. - # These functions support parallel execution and are implemented in Rust. - if "counts" in datum: - if composite_clbits is not None: - component_data["counts"] = marginal_distribution( - counts=datum["counts"], - indices=composite_clbits[index], - ) - else: - component_data["counts"] = datum["counts"] - if "memory" in datum: - if composite_clbits is not None: - component_data["memory"] = marginal_memory( - memory=datum["memory"], - indices=composite_clbits[index], - ) - else: - component_data["memory"] = datum["memory"] - out.append(component_data) - return out + # Pre-process the memory if any to avoid redundant calls to format_counts_memory + f_memory = self._format_memory(datum, composite_clbits) + + for i, index in enumerate(metadata["composite_index"]): + if index not in marginalized_data: + # Initialize data list for marginalized + marginalized_data[index] = [] + sub_data = {"metadata": metadata["composite_metadata"][i]} + if "counts" in datum: + if composite_clbits is not None: + sub_data["counts"] = marginal_distribution( + counts=datum["counts"], + indices=composite_clbits[i], + ) + else: + sub_data["counts"] = datum["counts"] + if "memory" in datum: + if composite_clbits is not None: + # level 2 + if f_memory is not None: + idx = slice( + -1 - composite_clbits[i][-1], -composite_clbits[i][0] or None + ) + sub_data["memory"] = [shot[idx] for shot in f_memory] + # level 1 + else: + mem = np.array(datum["memory"]) + + # Averaged level 1 data + if len(mem.shape) == 2: + sub_data["memory"] = mem[composite_clbits[i]].tolist() + # Single-shot level 1 data + if len(mem.shape) == 3: + sub_data["memory"] = mem[:, composite_clbits[i]].tolist() + else: + sub_data["memory"] = datum["memory"] + marginalized_data[index].append(sub_data) + + # Sort by index + return [marginalized_data[i] for i in sorted(marginalized_data.keys())] + + @staticmethod + def _format_memory(datum: Dict, composite_clbits: List): + """A helper method to convert level 2 memory (if it exists) to bit-string format.""" + f_memory = None + if ( + "memory" in datum + and composite_clbits is not None + and isinstance(datum["memory"][0], str) + ): + num_cbits = 1 + max(cbit for cbit_list in composite_clbits for cbit in cbit_list) + header = {"memory_slots": num_cbits} + f_memory = list(format_counts_memory(shot, header) for shot in datum["memory"]) + + return f_memory + + def _add_child_data(self, experiment_data: ExperimentData): + """Save empty component experiment data as child data. + + This will initialize empty ExperimentData objects for each component + experiment and add them as child data to the main composite experiment + ExperimentData container container for saving. + + Args: + experiment_data: a composite experiment experiment data container. + """ + component_index = experiment_data.metadata.get("component_child_index", []) + if component_index: + # Child components are already initialized + return + + # Initialize the component experiment data containers and add them + # as child data to the current experiment data + child_components = self._initialize_component_experiment_data(experiment_data) + start_index = len(experiment_data.child_data()) + for i, subdata in enumerate(child_components): + experiment_data.add_child_data(subdata) + component_index.append(start_index + i) + + # Store the indices of the added child data in metadata + experiment_data.metadata["component_child_index"] = component_index def _initialize_component_experiment_data( - self, - experiment_data: ExperimentData, - ) -> Iterator[ExperimentData]: + self, experiment_data: ExperimentData + ) -> List[ExperimentData]: """Initialize empty experiment data containers for component experiments. Args: - experiment_data: a composite experiment data container. + experiment_data: a composite experiment experiment data container. - Yields: - Experiment data containers for each component experiment - containing the component metadata, and tags, share level. + Returns: + The list of experiment data containers for each component experiment + containing the component metadata, and tags, share level, and + auto save settings of the composite experiment. """ - metadata = experiment_data.metadata - # Extract component experiment types and metadata so they can be # added to the component experiment data containers + metadata = experiment_data.metadata num_components = len(self._analyses) experiment_types = metadata.get("component_types", [None] * num_components) component_metadata = metadata.get("component_metadata", [{}] * num_components) # Create component experiments and set the backend and # metadata for the components - composite_data = experiment_data.data() - child_data_ids = [] - for i in range(num_components): - # Create empty container with metadata - sub_exp_data = ExperimentData(backend=experiment_data.backend) - sub_exp_data.experiment_type = experiment_types[i] - sub_exp_data.metadata.update(component_metadata[i]) - sub_exp_data.auto_save = False + component_expdata = [] + for i, _ in enumerate(self._analyses): + subdata = ExperimentData(backend=experiment_data.backend) + subdata.experiment_type = experiment_types[i] + subdata.metadata.update(component_metadata[i]) + + if self._flatten_results: + # Explicitly set auto_save to false so the temporary + # data can't accidentally be saved + subdata.auto_save = False + else: + # Copy tags, share_level and auto_save from the parent + # experiment data if results are not being flattened. + subdata.tags = experiment_data.tags + subdata.share_level = experiment_data.share_level + subdata.auto_save = experiment_data.auto_save - # Add marginalized experiment data - sub_exp_data.add_data(self._marginalize_data(composite_data, i)) - child_data_ids.append(sub_exp_data.experiment_id) + component_expdata.append(subdata) - yield sub_exp_data + return component_expdata def _set_flatten_results(self): """Recursively set flatten_results to True for all composite components.""" @@ -252,3 +338,54 @@ def _set_flatten_results(self): for analysis in self._analyses: if isinstance(analysis, CompositeAnalysis): analysis._set_flatten_results() + + def _combine_results( + self, component_experiment_data: List[ExperimentData] + ) -> Tuple[List[AnalysisResultData], List["matplotlib.figure.Figure"]]: + """Combine analysis results from component experiment data. + + Args: + component_experiment_data: list of experiment data containers containing the + analysis results for each component experiment. + + Returns: + A pair of the combined list of all analysis results from each of the + component experiments, and a list of all figures from each component + experiment. + """ + analysis_results = [] + figures = [] + for sub_expdata in component_experiment_data: + figures += sub_expdata._figures.values() + + # Convert Dataframe Series back into AnalysisResultData + # This is due to limitation that _run_analysis must return List[AnalysisResultData], + # and some composite analysis such as TphiAnalysis overrides this method to + # return extra quantity computed from sub analysis results. + # This produces unnecessary data conversion. + # The _run_analysis mechanism seems just complicating the entire logic. + # Since it's impossible to deprecate the usage of this protected method, + # we should implement new CompositeAnalysis class with much more efficient + # internal logic. Note that the child data structure is no longer necessary + # because dataframe offers more efficient data filtering mechanisms. + analysis_table = sub_expdata.analysis_results(verbosity=3, dataframe=True) + for _, series in analysis_table.iterrows(): + data_dict = series.to_dict() + primary_info = { + "name": data_dict.pop("name"), + "value": data_dict.pop("value"), + "quality": data_dict.pop("quality"), + "device_components": data_dict.pop("components"), + } + chisq = data_dict.pop("chisq", np.nan) + if chisq: + primary_info["chisq"] = chisq + data_dict["experiment"] = sub_expdata.experiment_type + if "experiment_id" in data_dict: + # Use experiment ID of parent experiment data. + # Sub experiment data is merged and discarded. + del data_dict["experiment_id"] + analysis_result = AnalysisResultData(**primary_info, extra=data_dict) + analysis_results.append(analysis_result) + + return analysis_results, figures diff --git a/qiskit_experiments/framework/experiment_data.py b/qiskit_experiments/framework/experiment_data.py index a54015d711..bfc21cc51e 100644 --- a/qiskit_experiments/framework/experiment_data.py +++ b/qiskit_experiments/framework/experiment_data.py @@ -1376,6 +1376,7 @@ def add_analysis_results( New dataframe columns are created in the analysis result table with added keys. """ if results is not None: + # TODO deprecate this path if not isinstance(results, list): results = [results] for result in results: diff --git a/qiskit_experiments/library/characterization/analysis/local_readout_error_analysis.py b/qiskit_experiments/library/characterization/analysis/local_readout_error_analysis.py index 29f31b9b17..0bb3263496 100644 --- a/qiskit_experiments/library/characterization/analysis/local_readout_error_analysis.py +++ b/qiskit_experiments/library/characterization/analysis/local_readout_error_analysis.py @@ -78,7 +78,7 @@ def _run_analysis( ) figures = [figure] else: - figures = [] + figures = None return analysis_results, figures def _generate_matrices(self, data) -> List[np.array]: diff --git a/qiskit_experiments/library/tomography/mit_tomography_analysis.py b/qiskit_experiments/library/tomography/mit_tomography_analysis.py index e864089132..898313d1ea 100644 --- a/qiskit_experiments/library/tomography/mit_tomography_analysis.py +++ b/qiskit_experiments/library/tomography/mit_tomography_analysis.py @@ -92,36 +92,36 @@ def set_options(self, **fields): def _run_analysis(self, experiment_data): # Return list of experiment data containers for each component experiment # containing the marginalized data from the composite experiment - results, figures = [], [] - roerror_analysis, tomo_analysis = self._analyses - roerror_data, tomo_data = list(self._initialize_component_experiment_data(experiment_data)) + roerror_data, tomo_data = self._component_experiment_data(experiment_data) # Run readout error analysis - roerror_results, roerror_figures = roerror_analysis._run_analysis(roerror_data) + roerror_analysis.run(roerror_data, replace_results=True).block_for_results() + + # Construct noisy measurement basis + mitigator = roerror_data.analysis_results(0).value # Construct noisy measurement basis - mitigator = roerror_results[0].value measurement_basis = PauliMeasurementBasis(mitigator=mitigator) tomo_analysis.set_options(measurement_basis=measurement_basis) # Run mitigated tomography analysis - tomo_results, tomo_figures = tomo_analysis._run_analysis(tomo_data) - for data in tomo_results: - data.extra["mitigated"] = True + tomo_analysis.run(tomo_data, replace_results=True).block_for_results() + for res in tomo_data.analysis_results(block=False): + res.extra["mitigated"] = True - # Tomography results are ordered first - results.extend(tomo_results + roerror_results) - figures.extend(tomo_figures + roerror_figures) + # Combine results so that tomography results are ordered first + combined_data = [tomo_data, roerror_data] # Run unmitigated tomography analysis if self.options.unmitigated_fit: - nomit_data = tomo_data.copy() tomo_analysis.set_options(measurement_basis=PauliMeasurementBasis()) - nomit_results, nomit_figures = tomo_analysis._run_analysis(nomit_data) - for data in nomit_results: - data.extra["mitigated"] = False - results.extend(nomit_results) - figures.extend(nomit_figures) + nomit_data = tomo_analysis.run(tomo_data, replace_results=False).block_for_results() + for res in nomit_data.analysis_results(block=False): + res.extra["mitigated"] = False + combined_data.append(nomit_data) + + if self._flatten_results: + return self._combine_results(combined_data) - return results, figures + return [], [] From a82ef79e6b34c800722b5ad8fbd0238a75fff69a Mon Sep 17 00:00:00 2001 From: Naoki Kanazawa Date: Sun, 2 Jul 2023 21:31:25 +0900 Subject: [PATCH 13/27] Minimum cleanup for composite analysis --- .../framework/composite/composite_analysis.py | 31 ++++++------------- 1 file changed, 10 insertions(+), 21 deletions(-) diff --git a/qiskit_experiments/framework/composite/composite_analysis.py b/qiskit_experiments/framework/composite/composite_analysis.py index 039585e4b4..85e8baf0a0 100644 --- a/qiskit_experiments/framework/composite/composite_analysis.py +++ b/qiskit_experiments/framework/composite/composite_analysis.py @@ -140,8 +140,11 @@ def _run_analysis(self, experiment_data: ExperimentData): # Optionally flatten results from all component experiments # for adding to the main experiment data container if self._flatten_results: - return self._combine_results(component_expdata) - + analysis_results, figures = self._combine_results(component_expdata) + for res in analysis_results: + # Override experiment ID because entries are flattened + res.experiment_id = experiment_data.experiment_id + return analysis_results, figures return [], [] def _component_experiment_data(self, experiment_data: ExperimentData) -> List[ExperimentData]: @@ -340,7 +343,8 @@ def _set_flatten_results(self): analysis._set_flatten_results() def _combine_results( - self, component_experiment_data: List[ExperimentData] + self, + component_experiment_data: List[ExperimentData], ) -> Tuple[List[AnalysisResultData], List["matplotlib.figure.Figure"]]: """Combine analysis results from component experiment data. @@ -368,24 +372,9 @@ def _combine_results( # we should implement new CompositeAnalysis class with much more efficient # internal logic. Note that the child data structure is no longer necessary # because dataframe offers more efficient data filtering mechanisms. - analysis_table = sub_expdata.analysis_results(verbosity=3, dataframe=True) + analysis_table = sub_expdata.analysis_results(columns="all", dataframe=True) for _, series in analysis_table.iterrows(): - data_dict = series.to_dict() - primary_info = { - "name": data_dict.pop("name"), - "value": data_dict.pop("value"), - "quality": data_dict.pop("quality"), - "device_components": data_dict.pop("components"), - } - chisq = data_dict.pop("chisq", np.nan) - if chisq: - primary_info["chisq"] = chisq - data_dict["experiment"] = sub_expdata.experiment_type - if "experiment_id" in data_dict: - # Use experiment ID of parent experiment data. - # Sub experiment data is merged and discarded. - del data_dict["experiment_id"] - analysis_result = AnalysisResultData(**primary_info, extra=data_dict) - analysis_results.append(analysis_result) + data = AnalysisResultData.from_table_element(**series.to_dict()) + analysis_results.append(data) return analysis_results, figures From 4fc1b6d736223f02c92d3a85b515882fbe34ccd1 Mon Sep 17 00:00:00 2001 From: Naoki Kanazawa Date: Mon, 3 Jul 2023 10:40:00 +0900 Subject: [PATCH 14/27] Bugfix - Replace deep copied values in AnalysisResults - Fix dataframe equality - Allow non hexadecimal UUID string as result id --- qiskit_experiments/database_service/utils.py | 17 +++++++++++------ qiskit_experiments/framework/experiment_data.py | 10 ++++++++++ test/extended_equality.py | 10 +++++++++- test/framework/test_data_table.py | 6 ------ 4 files changed, 30 insertions(+), 13 deletions(-) diff --git a/qiskit_experiments/database_service/utils.py b/qiskit_experiments/database_service/utils.py index fd68dc10b6..b3508edeee 100644 --- a/qiskit_experiments/database_service/utils.py +++ b/qiskit_experiments/database_service/utils.py @@ -402,6 +402,8 @@ def add_entry( template.update(kwargs) with self._lock: + if not isinstance(index, str): + index = str(index) self._container.loc[index] = list(template.values()) def _repr_html_(self) -> Union[str, None]: @@ -524,15 +526,18 @@ def add_entry( kwargs: Description of new entry to register. """ if result_id: - try: - result_id = uuid.UUID(result_id, version=4).hex - except ValueError as ex: - raise ValueError(f"{result_id} is not a valid hexadecimal UUID string.") from ex + with self.lock: + if result_id[:8] in self._container.index: + raise ValueError( + f"The short ID of the result_id '{result_id[:8]}' already exists in the " + "experiment data. Please use another ID to avoid index collision." + ) else: result_id = self._unique_table_index() - # Short unique index is generated from full UUID. - # Showing full UUID unnecessary occupies horizontal space of the html table. + # Short unique index is generated from result id. + # Showing full result id unnecessary occupies horizontal space of the html table. + # This mechanism is similar with the github commit hash. short_index = result_id[:8] super().add_entry( diff --git a/qiskit_experiments/framework/experiment_data.py b/qiskit_experiments/framework/experiment_data.py index bfc21cc51e..fe0f4c4ebd 100644 --- a/qiskit_experiments/framework/experiment_data.py +++ b/qiskit_experiments/framework/experiment_data.py @@ -2670,6 +2670,16 @@ def _series_to_service_result( chisq=qe_result.chisq, source=source, ) + + # Overwrite formatted result data dictionary with original objects. + # The format_result_data method implicitly deep copies input value and extra field, + # but it means the dictionary stores input objects with different object id. + # This affects computation of error propagation with ufloats, because it + # recognizes the value correlation with object id. + # See test.curve_analysis.test_baseclass.TestCurveAnalysis.test_end_to_end_compute_new_entry. + result_data["_value"] = qe_result.value + result_data["_extra"] = qe_result.extra + try: quality = ResultQuality(str(qe_result.quality).upper()) except ValueError: diff --git a/test/extended_equality.py b/test/extended_equality.py index 021174baa2..53b0dbec75 100644 --- a/test/extended_equality.py +++ b/test/extended_equality.py @@ -281,7 +281,11 @@ def _check_dataframes( **kwargs, ): """Check equality of data frame which may involve Qiskit Experiments class value.""" - return is_equivalent(data1.to_dict(orient="index"), data2.to_dict(orient="index")) + return is_equivalent( + data1.to_dict(orient="index"), + data2.to_dict(orient="index"), + **kwargs, + ) @_is_equivalent_dispatcher.register @@ -331,4 +335,8 @@ def _check_all_attributes( **kwargs, ): """Helper function to check all attributes.""" + test = {} + for att in attrs: + test[att] = is_equivalent(getattr(data1, att), getattr(data2, att), **kwargs) + return all(is_equivalent(getattr(data1, att), getattr(data2, att), **kwargs) for att in attrs) diff --git a/test/framework/test_data_table.py b/test/framework/test_data_table.py index ade552ce03..0ead01fc64 100644 --- a/test/framework/test_data_table.py +++ b/test/framework/test_data_table.py @@ -155,12 +155,6 @@ def test_add_entry_with_result_id(self): table.add_entry(result_id="9a0bdec8c0104ef7bb7db84939717a6b", value=0.123) self.assertEqual(table.loc["9a0bdec8"].value, 0.123) - def test_raises_adding_entry_with_invalid_result_id(self): - """Test adding entry with non-hexadecimal UUID result id.""" - table = AnalysisResultTable() - with self.assertRaises(ValueError): - table.add_entry(result_id="12345678910") - def test_extra_column_name_is_always_returned(self): """Test extra column names are always returned in filtered column names.""" table = AnalysisResultTable() From df5557bfc5fc9fc10dfd8cc28f61faaa96f15ee4 Mon Sep 17 00:00:00 2001 From: Naoki Kanazawa Date: Mon, 3 Jul 2023 13:52:14 +0900 Subject: [PATCH 15/27] Reorganize: move result table class to framework --- qiskit_experiments/database_service/utils.py | 120 +------------- qiskit_experiments/framework/__init__.py | 2 + .../framework/analysis_result_table.py | 149 ++++++++++++++++++ .../framework/experiment_data.py | 2 +- test/framework/test_data_table.py | 5 +- 5 files changed, 156 insertions(+), 122 deletions(-) create mode 100644 qiskit_experiments/framework/analysis_result_table.py diff --git a/qiskit_experiments/database_service/utils.py b/qiskit_experiments/database_service/utils.py index b3508edeee..7aacef5dff 100644 --- a/qiskit_experiments/database_service/utils.py +++ b/qiskit_experiments/database_service/utils.py @@ -16,13 +16,11 @@ import logging import threading import traceback -import warnings from abc import ABC, abstractmethod from collections import OrderedDict from datetime import datetime, timezone from typing import Callable, Tuple, List, Dict, Any, Union, Type, Optional import json -import uuid import pandas as pd import dateutil.parser @@ -435,126 +433,10 @@ def __json_decode__(cls, value: Dict[str, Any]) -> "ThreadSafeDataFrame": if not value.get("class", None) == "ThreadSafeDataFrame": raise ValueError("JSON decoded value for ThreadSafeDataFrame is not valid class type.") - instance = object.__new__(AnalysisResultTable) + instance = object.__new__(cls) # Need to update self._columns first to set extra columns in the dataframe container. instance._columns = value.get("columns", cls._default_columns()) instance._extra = value.get("extra", []) instance._lock = threading.RLock() instance._container = instance._init_container(init_values=value.get("data", {})) return instance - - -class AnalysisResultTable(ThreadSafeDataFrame): - """Thread safe dataframe to store the analysis results.""" - - @classmethod - def _default_columns(cls) -> List[str]: - return [ - "name", - "experiment", - "components", - "value", - "quality", - "experiment_id", - "result_id", - "tags", - "backend", - "run_time", - "created_time", - ] - - def filter_columns(self, columns: Union[str, List[str]]) -> List[str]: - """Filter columns names available in this table. - - Args: - columns: Specifying a set of columns to return. You can pass a list of each - column name to return, otherwise builtin column groups are available. - - * "all": Return all columns, including metadata to communicate - with experiment service, such as entry IDs. - * "default": Return columns including analysis result with supplementary - information about experiment. - * "minimal": Return only analysis subroutine returns. - - Raises: - ValueError: When column is given in string which doesn't match with any builtin group. - """ - if columns == "all": - return self._columns - if columns == "default": - return [ - "name", - "experiment", - "components", - "value", - "quality", - "backend", - "run_time", - ] + self._extra - if columns == "minimal": - return [ - "name", - "components", - "value", - "quality", - ] + self._extra - if not isinstance(columns, str): - out = [] - for column in columns: - if column in self._columns: - out.append(column) - else: - warnings.warn( - f"Specified column name {column} does not exist in this table.", UserWarning - ) - return out - raise ValueError( - f"Column group {columns} is not valid name. Use either 'all', 'default', 'minimal'." - ) - - # pylint: disable=arguments-renamed - def add_entry( - self, - result_id: Optional[str] = None, - **kwargs, - ): - """Add new entry to the table. - - Args: - result_id: Result ID. Automatically generated when not provided. - This must be valid hexadecimal UUID string. - kwargs: Description of new entry to register. - """ - if result_id: - with self.lock: - if result_id[:8] in self._container.index: - raise ValueError( - f"The short ID of the result_id '{result_id[:8]}' already exists in the " - "experiment data. Please use another ID to avoid index collision." - ) - else: - result_id = self._unique_table_index() - - # Short unique index is generated from result id. - # Showing full result id unnecessary occupies horizontal space of the html table. - # This mechanism is similar with the github commit hash. - short_index = result_id[:8] - - super().add_entry( - index=short_index, - result_id=result_id, - **kwargs, - ) - - def _unique_table_index(self): - """Generate unique UUID which is unique in the table with first 8 characters.""" - with self.lock: - n = 0 - while n < 1000: - tmp_id = uuid.uuid4().hex - if tmp_id[:8] not in self._container.index: - return tmp_id - raise RuntimeError( - "Unique result_id string cannot be prepared for this table within 1000 trials. " - "Reduce number of entries, or manually provide a unique result_id." - ) diff --git a/qiskit_experiments/framework/__init__.py b/qiskit_experiments/framework/__init__.py index f76c4cad55..c9a480e09e 100644 --- a/qiskit_experiments/framework/__init__.py +++ b/qiskit_experiments/framework/__init__.py @@ -86,6 +86,7 @@ AnalysisStatus AnalysisResult AnalysisResultData + AnalysisResultTable ExperimentConfig AnalysisConfig ExperimentEncoder @@ -137,6 +138,7 @@ from .backend_timing import BackendTiming from .configs import ExperimentConfig, AnalysisConfig from .analysis_result_data import AnalysisResultData +from .analysis_result_table import AnalysisResultTable from .experiment_data import ExperimentData from .composite import ( ParallelExperiment, diff --git a/qiskit_experiments/framework/analysis_result_table.py b/qiskit_experiments/framework/analysis_result_table.py new file mode 100644 index 0000000000..9e388c3abb --- /dev/null +++ b/qiskit_experiments/framework/analysis_result_table.py @@ -0,0 +1,149 @@ +# This code is part of Qiskit. +# +# (C) Copyright IBM 2023. +# +# This code is licensed under the Apache License, Version 2.0. You may +# obtain a copy of this license in the LICENSE.txt file in the root directory +# of this source tree or at http://www.apache.org/licenses/LICENSE-2.0. +# +# Any modifications or derivative works of this code must retain this +# copyright notice, and modified files need to carry a notice indicating +# that they have been altered from the originals. + +"""Table representation of analysis results.""" + +import logging +import uuid +import warnings +from typing import List, Union, Optional + +from qiskit_experiments.database_service.utils import ThreadSafeDataFrame + +LOG = logging.getLogger(__name__) + + +class AnalysisResultTable(ThreadSafeDataFrame): + """Table form container of analysis results. + + This table is a dataframe wrapper with the thread-safe mechanism with predefined columns. + This object is attached to the :class:`.ExperimentData` container to store + analysis results. Each table row contains series of metadata in addition to the + result value itself. + + User can rely on the dataframe filtering mechanism to analyze large scale experiment + results, e.g. massive parallel experiment and batch experiment outcomes, efficiently. + See `pandas dataframe documentation `_ + for more details. + """ + + @classmethod + def _default_columns(cls) -> List[str]: + return [ + "name", + "experiment", + "components", + "value", + "quality", + "experiment_id", + "result_id", + "tags", + "backend", + "run_time", + "created_time", + ] + + def filter_columns(self, columns: Union[str, List[str]]) -> List[str]: + """Filter columns names available in this table. + + Args: + columns: Specifying a set of columns to return. You can pass a list of each + column name to return, otherwise builtin column groups are available. + + * "all": Return all columns, including metadata to communicate + with experiment service, such as entry IDs. + * "default": Return columns including analysis result with supplementary + information about experiment. + * "minimal": Return only analysis subroutine returns. + + Raises: + ValueError: When column is given in string which doesn't match with any builtin group. + """ + if columns == "all": + return self._columns + if columns == "default": + return [ + "name", + "experiment", + "components", + "value", + "quality", + "backend", + "run_time", + ] + self._extra + if columns == "minimal": + return [ + "name", + "components", + "value", + "quality", + ] + self._extra + if not isinstance(columns, str): + out = [] + for column in columns: + if column in self._columns: + out.append(column) + else: + warnings.warn( + f"Specified column name {column} does not exist in this table.", UserWarning + ) + return out + raise ValueError( + f"Column group {columns} is not valid name. Use either 'all', 'default', 'minimal'." + ) + + # pylint: disable=arguments-renamed + def add_entry( + self, + result_id: Optional[str] = None, + **kwargs, + ): + """Add new entry to the table. + + Args: + result_id: Result ID. Automatically generated when not provided. + This must be valid hexadecimal UUID string. + kwargs: Description of new entry to register. + """ + if result_id: + with self.lock: + if result_id[:8] in self._container.index: + raise ValueError( + f"The short ID of the result_id '{result_id[:8]}' already exists in the " + "experiment data. Please use another ID to avoid index collision." + ) + else: + result_id = self._unique_table_index() + + # Short unique index is generated from result id. + # Showing full result id unnecessary occupies horizontal space of the html table. + # This mechanism is similar with the github commit hash. + short_index = result_id[:8] + + super().add_entry( + index=short_index, + result_id=result_id, + **kwargs, + ) + + def _unique_table_index(self): + """Generate unique UUID which is unique in the table with first 8 characters.""" + with self.lock: + n = 0 + while n < 1000: + tmp_id = uuid.uuid4().hex + if tmp_id[:8] not in self._container.index: + return tmp_id + raise RuntimeError( + "Unique result_id string cannot be prepared for this table within 1000 trials. " + "Reduce number of entries, or manually provide a unique result_id." + ) diff --git a/qiskit_experiments/framework/experiment_data.py b/qiskit_experiments/framework/experiment_data.py index fe0f4c4ebd..f0f2a41396 100644 --- a/qiskit_experiments/framework/experiment_data.py +++ b/qiskit_experiments/framework/experiment_data.py @@ -55,11 +55,11 @@ plot_to_svg_bytes, ThreadSafeOrderedDict, ThreadSafeList, - AnalysisResultTable, ) from qiskit_experiments.database_service.device_component import to_component, DeviceComponent from qiskit_experiments.framework.analysis_result import AnalysisResult from qiskit_experiments.framework.analysis_result_data import AnalysisResultData +from qiskit_experiments.framework.analysis_result_table import AnalysisResultTable from qiskit_experiments.framework import BackendData from qiskit_experiments.database_service.exceptions import ( ExperimentDataError, diff --git a/test/framework/test_data_table.py b/test/framework/test_data_table.py index 0ead01fc64..7f39c24af8 100644 --- a/test/framework/test_data_table.py +++ b/test/framework/test_data_table.py @@ -1,6 +1,6 @@ # This code is part of Qiskit. # -# (C) Copyright IBM 2021. +# (C) Copyright IBM 2023. # # This code is licensed under the Apache License, Version 2.0. You may # obtain a copy of this license in the LICENSE.txt file in the root directory @@ -18,7 +18,8 @@ import pandas as pd from qiskit.tools import parallel_map -from qiskit_experiments.database_service.utils import ThreadSafeDataFrame, AnalysisResultTable +from qiskit_experiments.database_service.utils import ThreadSafeDataFrame +from qiskit_experiments.framework.analysis_result_table import AnalysisResultTable class TestBaseTable(QiskitExperimentsTestCase): From cddcaab9cd2b08acd7ed0e9a6218f886c2866f66 Mon Sep 17 00:00:00 2001 From: Naoki Kanazawa Date: Mon, 3 Jul 2023 14:44:03 +0900 Subject: [PATCH 16/27] Add reno --- ...ame-analysis-results-ec8863e826a70621.yaml | 33 +++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100644 releasenotes/notes/add-dataframe-analysis-results-ec8863e826a70621.yaml diff --git a/releasenotes/notes/add-dataframe-analysis-results-ec8863e826a70621.yaml b/releasenotes/notes/add-dataframe-analysis-results-ec8863e826a70621.yaml new file mode 100644 index 0000000000..bd349b8b2a --- /dev/null +++ b/releasenotes/notes/add-dataframe-analysis-results-ec8863e826a70621.yaml @@ -0,0 +1,33 @@ +--- +features: + - | + :class:`.ExperimentData` has been upgraded to store analysis result data in + a table format with the new inline container :class:`.AnalysisResultTable`. + In this release, the :meth:`.ExperimentData.analysis_results` method still returns + a conventional list of :class:`.AnalysisResult` for backward compatibility, + however, when you call the method with new argument ``dataframe=True`` it returns + analysis results all in one piece with the table format. For example, + + .. code-block:: python + + exp = StandardRB((0,), lengths, backend) + experiment_data = exp.run().block_for_results() + + experiment_data.analysis_results(dataframe=True, columns="default") + + Information contained in the returned table can be filtered with ``columns`` argument, + which may take either ``all``, ``default``, ``minimal``, or list of column names. + Returning a list of :class:`.AnalysisResult` will be deprecated in a future release + along with the ``dataframe`` option. + + Related to this update, :meth:`.ExperimentData.add_analysis_results` method now takes + keyword arguments keyed on the table column names, in addition to the argument of + ``results`` which is either :class:`.AnalysisResult` or a list of it. + This allows users and developers to bypass creation of :class:`.AnalysisResult` instance + for registering new entry in the :class:`.ExperimentData` instance. + + Note that the conventional :class:`.AnalysisResult` is originally a payload object for + saving an analysis result in a remote database, as it implements a REST API + for the IBM Experiment Service, which is not necessary at all in + the context of experiment data analysis. + In a future release, :class:`.AnalysisResult` will be hidden from Qiskit Experiments users. From b2579d84eb9cd92cd1a366c41309fac0000dc426 Mon Sep 17 00:00:00 2001 From: Naoki Kanazawa Date: Mon, 3 Jul 2023 21:46:15 +0900 Subject: [PATCH 17/27] fix test --- test/framework/test_data_table.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/test/framework/test_data_table.py b/test/framework/test_data_table.py index 7f39c24af8..7f6202089d 100644 --- a/test/framework/test_data_table.py +++ b/test/framework/test_data_table.py @@ -22,6 +22,12 @@ from qiskit_experiments.framework.analysis_result_table import AnalysisResultTable +def _callable_thread_local_add_entry(args, thread_table): + """A test callable that is called from multi-thread.""" + index, kwargs = args + thread_table.add_entry(index, **kwargs) + + class TestBaseTable(QiskitExperimentsTestCase): """Test case for data frame base class.""" @@ -118,13 +124,9 @@ def test_multi_thread_add_entry(self): """Test add entry with parallel thread access.""" table = TestBaseTable.TestTable() - def _test_thread_local_add_entry(args, thread_table): - index, kwargs = args - thread_table.add_entry(index, **kwargs) - # Mutate thread safe table in parallel thread. No race should occur. parallel_map( - _test_thread_local_add_entry, + _callable_thread_local_add_entry, [ ["x", {"value1": 0.0, "value2": 1.0, "value3": 2.0}], ["y", {"value1": 3.0, "value2": 4.0, "value3": 5.0}], From adfd41e85a6c9d57d9c31dbb595724780e997830 Mon Sep 17 00:00:00 2001 From: Naoki Kanazawa Date: Mon, 3 Jul 2023 23:06:59 +0900 Subject: [PATCH 18/27] more threadsafe --- qiskit_experiments/database_service/utils.py | 47 +++++++------- test/framework/test_data_table.py | 64 ++++++++++++++++++++ 2 files changed, 89 insertions(+), 22 deletions(-) diff --git a/qiskit_experiments/database_service/utils.py b/qiskit_experiments/database_service/utils.py index 7aacef5dff..3973a256c7 100644 --- a/qiskit_experiments/database_service/utils.py +++ b/qiskit_experiments/database_service/utils.py @@ -326,7 +326,8 @@ def get_columns(self) -> List[str]: Returns: List of column names. """ - return self._columns.copy() + with self._lock: + return self._columns.copy() def add_columns(self, *new_columns: str, default_value: Any = None): """Add new columns to the table. @@ -337,15 +338,17 @@ def add_columns(self, *new_columns: str, default_value: Any = None): new_columns: Name of columns to add. default_value: Default value to fill added columns. """ - # Order sensitive - new_columns = [c for c in new_columns if c not in self.get_columns()] - self._extra.extend(new_columns) - - # Update current table with self._lock: + # Order sensitive + new_columns = [c for c in new_columns if c not in self.get_columns()] + if len(new_columns) == 0: + return + + # Update columns for new_column in new_columns: self._container.insert(len(self._container.columns), new_column, default_value) - self._columns.extend(new_columns) + self._columns.extend(new_columns) + self._extra.extend(new_columns) def clear(self): """Remove all elements from this container.""" @@ -367,7 +370,7 @@ def container( Bare pandas dataframe. This object is no longer thread safe. """ with self._lock: - container = self._container + container = self._container.copy() if collapse_extra: return container[self._default_columns()] @@ -387,19 +390,18 @@ def add_entry( Raises: ValueError: When index is not unique in this table. """ - with self.lock: + with self._lock: if index in self._container.index: raise ValueError(f"Table index {index} already exists in the table.") - columns = self.get_columns() - missing = kwargs.keys() - set(columns) - if missing: - self.add_columns(*sorted(missing)) + columns = self.get_columns() + missing = kwargs.keys() - set(columns) + if missing: + self.add_columns(*sorted(missing)) - template = dict.fromkeys(self.get_columns()) - template.update(kwargs) + template = dict.fromkeys(self.get_columns()) + template.update(kwargs) - with self._lock: if not isinstance(index, str): index = str(index) self._container.loc[index] = list(template.values()) @@ -421,12 +423,13 @@ def __getattr__(self, item): raise AttributeError(f"'ThreadSafeDataFrame' object has no attribute '{item}'") def __json_encode__(self) -> Dict[str, Any]: - return { - "class": "ThreadSafeDataFrame", - "data": self._container.to_dict(orient="index"), - "columns": self._columns, - "extra": self._extra, - } + with self._lock: + return { + "class": "ThreadSafeDataFrame", + "data": self._container.to_dict(orient="index"), + "columns": self._columns, + "extra": self._extra, + } @classmethod def __json_decode__(cls, value: Dict[str, Any]) -> "ThreadSafeDataFrame": diff --git a/test/framework/test_data_table.py b/test/framework/test_data_table.py index 7f6202089d..6dac4840c2 100644 --- a/test/framework/test_data_table.py +++ b/test/framework/test_data_table.py @@ -140,6 +140,70 @@ def test_multi_thread_add_entry(self): self.assertListEqual(table.loc["y"].to_list(), [3.0, 4.0, 5.0]) self.assertListEqual(table.loc["z"].to_list(), [6.0, 7.0, 8.0]) + def test_multi_thread_add_entry_with_new_column(self): + """Test add entry with parallel thread access, each thread adds new column.""" + table = TestBaseTable.TestTable() + + # Mutate thread safe table in parallel thread. No race should occur. + parallel_map( + _callable_thread_local_add_entry, + [ + ["x", {"value1": 0.0, "value2": 1.0, "value3": 2.0, "new_x": "x_val"}], + ["y", {"value1": 3.0, "value2": 4.0, "value3": 5.0, "new_y": "y_val"}], + ["z", {"value1": 6.0, "value2": 7.0, "value3": 8.0, "new_z": "z_val"}], + ], + task_kwargs={"thread_table": table}, + ) + self.assertEqual(len(table), 3) + + self.assertDictEqual( + table.loc["x"].to_dict(), + { + "value1": 0.0, + "value2": 1.0, + "value3": 2.0, + "new_x": "x_val", + "new_y": None, + "new_z": None, + }, + ) + self.assertDictEqual( + table.loc["y"].to_dict(), + { + "value1": 3.0, + "value2": 4.0, + "value3": 5.0, + "new_x": None, + "new_y": "y_val", + "new_z": None, + }, + ) + self.assertDictEqual( + table.loc["z"].to_dict(), + { + "value1": 6.0, + "value2": 7.0, + "value3": 8.0, + "new_x": None, + "new_y": None, + "new_z": "z_val", + }, + ) + + def test_container_is_immutable(self): + """Test modifying container doesn't mutate the original payload.""" + table = TestBaseTable.TestTable() + table.add_entry(index="x", value1=0.1, value2=0.2, value3=0.3) + + dataframe = table.container() + dataframe.at["x", "value1"] = 100 + + # Local object can be modified + self.assertListEqual(dataframe.loc["x"].to_list(), [100, 0.2, 0.3]) + + # Original object in the experiment payload is preserved + self.assertListEqual(table.loc["x"].to_list(), [0.1, 0.2, 0.3]) + def test_round_trip(self): """Test JSON roundtrip serialization with the experiment encoder.""" table = TestBaseTable.TestTable() From b288d244dcc1755ce8e20ccedec9aeb5d96d2f2e Mon Sep 17 00:00:00 2001 From: Naoki Kanazawa Date: Tue, 4 Jul 2023 01:33:56 +0900 Subject: [PATCH 19/27] Drop multi processing test. Qiskit parallel_map is multiprocess based, and it copies container into each process. Saving data in the copied container just discards added data when the process finishes. For now Qiskit Experiments don't use multi processing and we don't need to care these tests immediately. --- test/framework/test_data_table.py | 71 ------------------------------- 1 file changed, 71 deletions(-) diff --git a/test/framework/test_data_table.py b/test/framework/test_data_table.py index 6dac4840c2..eb27de0bd6 100644 --- a/test/framework/test_data_table.py +++ b/test/framework/test_data_table.py @@ -16,7 +16,6 @@ import numpy as np import pandas as pd -from qiskit.tools import parallel_map from qiskit_experiments.database_service.utils import ThreadSafeDataFrame from qiskit_experiments.framework.analysis_result_table import AnalysisResultTable @@ -120,76 +119,6 @@ def test_clear_container(self): table.clear() self.assertEqual(len(table), 0) - def test_multi_thread_add_entry(self): - """Test add entry with parallel thread access.""" - table = TestBaseTable.TestTable() - - # Mutate thread safe table in parallel thread. No race should occur. - parallel_map( - _callable_thread_local_add_entry, - [ - ["x", {"value1": 0.0, "value2": 1.0, "value3": 2.0}], - ["y", {"value1": 3.0, "value2": 4.0, "value3": 5.0}], - ["z", {"value1": 6.0, "value2": 7.0, "value3": 8.0}], - ], - task_kwargs={"thread_table": table}, - ) - self.assertEqual(len(table), 3) - - self.assertListEqual(table.loc["x"].to_list(), [0.0, 1.0, 2.0]) - self.assertListEqual(table.loc["y"].to_list(), [3.0, 4.0, 5.0]) - self.assertListEqual(table.loc["z"].to_list(), [6.0, 7.0, 8.0]) - - def test_multi_thread_add_entry_with_new_column(self): - """Test add entry with parallel thread access, each thread adds new column.""" - table = TestBaseTable.TestTable() - - # Mutate thread safe table in parallel thread. No race should occur. - parallel_map( - _callable_thread_local_add_entry, - [ - ["x", {"value1": 0.0, "value2": 1.0, "value3": 2.0, "new_x": "x_val"}], - ["y", {"value1": 3.0, "value2": 4.0, "value3": 5.0, "new_y": "y_val"}], - ["z", {"value1": 6.0, "value2": 7.0, "value3": 8.0, "new_z": "z_val"}], - ], - task_kwargs={"thread_table": table}, - ) - self.assertEqual(len(table), 3) - - self.assertDictEqual( - table.loc["x"].to_dict(), - { - "value1": 0.0, - "value2": 1.0, - "value3": 2.0, - "new_x": "x_val", - "new_y": None, - "new_z": None, - }, - ) - self.assertDictEqual( - table.loc["y"].to_dict(), - { - "value1": 3.0, - "value2": 4.0, - "value3": 5.0, - "new_x": None, - "new_y": "y_val", - "new_z": None, - }, - ) - self.assertDictEqual( - table.loc["z"].to_dict(), - { - "value1": 6.0, - "value2": 7.0, - "value3": 8.0, - "new_x": None, - "new_y": None, - "new_z": "z_val", - }, - ) - def test_container_is_immutable(self): """Test modifying container doesn't mutate the original payload.""" table = TestBaseTable.TestTable() From 438d828d1943eb81016c59f0524a92a8e465c8ee Mon Sep 17 00:00:00 2001 From: Naoki Kanazawa Date: Tue, 4 Jul 2023 02:26:03 +0900 Subject: [PATCH 20/27] Drop __getattr__ from the ThreadSafeDataFrame. This was implemented to provide convenient access to internal dataframe container so that ThreadSafeDataFrame becomes comparable with pandas DataFrame (indeed pandas implements lots of convenient method to manipulate data). However, this is dangerous in multithread environment, because lock is released after acquired item (this may be some method to manipulate data) is returned. The holder can still mutate the internal container asynchronously in this situation. To avoid this problem, __getattr__ is dropped and all necessary methods are implemented as method with reentrant lock. --- qiskit_experiments/database_service/utils.py | 55 ++++++++++--- .../framework/analysis_result_table.py | 78 +++++++++++-------- .../framework/experiment_data.py | 10 +-- test/extended_equality.py | 4 + test/framework/test_data_table.py | 44 +++++++++-- 5 files changed, 134 insertions(+), 57 deletions(-) diff --git a/qiskit_experiments/database_service/utils.py b/qiskit_experiments/database_service/utils.py index 3973a256c7..71be44cba1 100644 --- a/qiskit_experiments/database_service/utils.py +++ b/qiskit_experiments/database_service/utils.py @@ -376,17 +376,58 @@ def container( return container[self._default_columns()] return container + def drop_entry( + self, + index: str, + ): + """Drop entry from the dataframe. + + Args: + index: Name of entry to drop. + + Raises: + ValueError: When index is not in this table. + """ + with self._lock: + if index not in self._container.index: + raise ValueError(f"Table index {index} doesn't exist in this table.") + self._container.drop(index, inplace=True) + + def get_entry( + self, + index: str, + ) -> pd.Series: + """Get entry from the dataframe. + + Args: + index: Name of entry to acquire. + + Returns: + Pandas Series of acquired entry. This doesn't mutate the table. + + Raises: + ValueError: When index is not in this table. + """ + with self._lock: + if index not in self._container.index: + raise ValueError(f"Table index {index} doesn't exist in this table.") + + return self._container.loc[index] + def add_entry( self, index: str, **kwargs, - ): + ) -> pd.Series: """Add new entry to the dataframe. Args: index: Name of this entry. Must be unique in this table. kwargs: Description of new entry to register. + Returns: + Pandas Series of added entry. This doesn't mutate the table. + Raises: ValueError: When index is not unique in this table. """ @@ -406,22 +447,14 @@ def add_entry( index = str(index) self._container.loc[index] = list(template.values()) + return self._container.iloc[-1] + def _repr_html_(self) -> Union[str, None]: """Return HTML representation of this dataframe.""" with self._lock: # Remove underscored columns. return self._container._repr_html_() - def __getattr__(self, item): - lock = object.__getattribute__(self, "_lock") - - with lock: - # Lock when access to container's member. - container = object.__getattribute__(self, "_container") - if hasattr(container, item): - return getattr(container, item) - raise AttributeError(f"'ThreadSafeDataFrame' object has no attribute '{item}'") - def __json_encode__(self) -> Dict[str, Any]: with self._lock: return { diff --git a/qiskit_experiments/framework/analysis_result_table.py b/qiskit_experiments/framework/analysis_result_table.py index 9e388c3abb..074d89c0b1 100644 --- a/qiskit_experiments/framework/analysis_result_table.py +++ b/qiskit_experiments/framework/analysis_result_table.py @@ -17,6 +17,8 @@ import warnings from typing import List, Union, Optional +import pandas as pd + from qiskit_experiments.database_service.utils import ThreadSafeDataFrame LOG = logging.getLogger(__name__) @@ -52,6 +54,11 @@ def _default_columns(cls) -> List[str]: "created_time", ] + def result_ids(self) -> List[str]: + """Return all result IDs in this table.""" + with self._lock: + return self._container["result_id"].to_list() + def filter_columns(self, columns: Union[str, List[str]]) -> List[str]: """Filter columns names available in this table. @@ -68,35 +75,37 @@ def filter_columns(self, columns: Union[str, List[str]]) -> List[str]: Raises: ValueError: When column is given in string which doesn't match with any builtin group. """ - if columns == "all": - return self._columns - if columns == "default": - return [ - "name", - "experiment", - "components", - "value", - "quality", - "backend", - "run_time", - ] + self._extra - if columns == "minimal": - return [ - "name", - "components", - "value", - "quality", - ] + self._extra - if not isinstance(columns, str): - out = [] - for column in columns: - if column in self._columns: - out.append(column) - else: - warnings.warn( - f"Specified column name {column} does not exist in this table.", UserWarning - ) - return out + with self._lock: + if columns == "all": + return self._columns + if columns == "default": + return [ + "name", + "experiment", + "components", + "value", + "quality", + "backend", + "run_time", + ] + self._extra + if columns == "minimal": + return [ + "name", + "components", + "value", + "quality", + ] + self._extra + if not isinstance(columns, str): + out = [] + for column in columns: + if column in self._columns: + out.append(column) + else: + warnings.warn( + f"Specified column name {column} does not exist in this table.", + UserWarning, + ) + return out raise ValueError( f"Column group {columns} is not valid name. Use either 'all', 'default', 'minimal'." ) @@ -106,16 +115,19 @@ def add_entry( self, result_id: Optional[str] = None, **kwargs, - ): + ) -> pd.Series: """Add new entry to the table. Args: result_id: Result ID. Automatically generated when not provided. This must be valid hexadecimal UUID string. kwargs: Description of new entry to register. + + Returns: + Pandas Series of added entry. This doesn't mutate the table. """ if result_id: - with self.lock: + with self._lock: if result_id[:8] in self._container.index: raise ValueError( f"The short ID of the result_id '{result_id[:8]}' already exists in the " @@ -129,7 +141,7 @@ def add_entry( # This mechanism is similar with the github commit hash. short_index = result_id[:8] - super().add_entry( + return super().add_entry( index=short_index, result_id=result_id, **kwargs, @@ -137,7 +149,7 @@ def add_entry( def _unique_table_index(self): """Generate unique UUID which is unique in the table with first 8 characters.""" - with self.lock: + with self._lock: n = 0 while n < 1000: tmp_id = uuid.uuid4().hex diff --git a/qiskit_experiments/framework/experiment_data.py b/qiskit_experiments/framework/experiment_data.py index f0f2a41396..e8d5a2cf05 100644 --- a/qiskit_experiments/framework/experiment_data.py +++ b/qiskit_experiments/framework/experiment_data.py @@ -691,7 +691,7 @@ def hgp(self, new_hgp: str) -> None: def _clear_results(self): """Delete all currently stored analysis results and figures""" # Schedule existing analysis results for deletion next save call - self._deleted_analysis_results.extend(list(self._analysis_results["result_id"])) + self._deleted_analysis_results.extend(list(self._analysis_results.result_ids())) self._analysis_results.clear() # Schedule existing figures for deletion next save call for key in self._figures.keys(): @@ -1411,7 +1411,7 @@ def add_analysis_results( tags = tags or [] backend = backend or self.backend_name - self._analysis_results.add_entry( + series = self._analysis_results.add_entry( result_id=result_id, name=name, value=value, @@ -1427,7 +1427,7 @@ def add_analysis_results( ) if self.auto_save: service_result = _series_to_service_result( - series=self._analysis_results.iloc[-1], + series=series, service=self._service, auto_save=False, ) @@ -1462,7 +1462,7 @@ def delete_analysis_result( "Try another key that can uniquely determine entry to delete." ) - self._analysis_results.drop(to_delete.name, inplace=True) + self._analysis_results.drop_entry(str(to_delete.name)) if self._service and self.auto_save: with service_exception_to_warning(): self.service.delete_analysis_result(result_id=to_delete.result_id) @@ -1737,8 +1737,6 @@ def save( json_encoder=self._json_encoder, max_workers=max_workers, ) - for result in self._analysis_results.values(): - result._created_in_db = True except Exception as ex: # pylint: disable=broad-except # Don't automatically fail the experiment just because its data cannot be saved. LOG.error("Unable to save the experiment data: %s", traceback.format_exc()) diff --git a/test/extended_equality.py b/test/extended_equality.py index 53b0dbec75..751763b8ee 100644 --- a/test/extended_equality.py +++ b/test/extended_equality.py @@ -281,6 +281,10 @@ def _check_dataframes( **kwargs, ): """Check equality of data frame which may involve Qiskit Experiments class value.""" + if isinstance(data1, ThreadSafeDataFrame): + data1 = data1.container(collapse_extra=False) + if isinstance(data2, ThreadSafeDataFrame): + data2 = data2.container(collapse_extra=False) return is_equivalent( data1.to_dict(orient="index"), data2.to_dict(orient="index"), diff --git a/test/framework/test_data_table.py b/test/framework/test_data_table.py index eb27de0bd6..6c8b24adb5 100644 --- a/test/framework/test_data_table.py +++ b/test/framework/test_data_table.py @@ -14,6 +14,7 @@ from test.base import QiskitExperimentsTestCase +import uuid import numpy as np import pandas as pd @@ -58,12 +59,17 @@ def test_raises_initializing_with_wrong_table(self): # columns doesn't match with default_columns TestBaseTable.TestTable(wrong_table) + def test_get_entry(self): + """Test getting an entry from the table.""" + table = TestBaseTable.TestTable({"x": [1.0, 2.0, 3.0]}) + self.assertListEqual(table.get_entry("x").to_list(), [1.0, 2.0, 3.0]) + def test_add_entry(self): """Test adding data with default keys to table.""" table = TestBaseTable.TestTable() table.add_entry(index="x", value1=0.0, value2=1.0, value3=2.0) - self.assertListEqual(table.loc["x"].to_list(), [0.0, 1.0, 2.0]) + self.assertListEqual(table.get_entry("x").to_list(), [0.0, 1.0, 2.0]) def test_add_entry_with_missing_key(self): """Test adding entry with partly specified keys.""" @@ -71,7 +77,7 @@ def test_add_entry_with_missing_key(self): table.add_entry(index="x", value1=0.0, value3=2.0) # NaN value cannot be compared with assert - np.testing.assert_equal(table.loc["x"].to_list(), [0.0, float("nan"), 2.0]) + np.testing.assert_equal(table.get_entry("x").to_list(), [0.0, float("nan"), 2.0]) def test_add_entry_with_new_key(self): """Test adding data with new keys to table.""" @@ -79,7 +85,7 @@ def test_add_entry_with_new_key(self): table.add_entry(index="x", value1=0.0, value2=1.0, value3=2.0, extra=3.0) self.assertListEqual(table.get_columns(), ["value1", "value2", "value3", "extra"]) - self.assertListEqual(table.loc["x"].to_list(), [0.0, 1.0, 2.0, 3.0]) + self.assertListEqual(table.get_entry("x").to_list(), [0.0, 1.0, 2.0, 3.0]) def test_add_entry_with_new_key_with_existing_entry(self): """Test adding new key will expand existing entry.""" @@ -88,10 +94,24 @@ def test_add_entry_with_new_key_with_existing_entry(self): table.add_entry(index="y", value1=0.0, value2=1.0, value3=2.0, extra=3.0) self.assertListEqual(table.get_columns(), ["value1", "value2", "value3", "extra"]) - self.assertListEqual(table.loc["y"].to_list(), [0.0, 1.0, 2.0, 3.0]) + self.assertListEqual(table.get_entry("y").to_list(), [0.0, 1.0, 2.0, 3.0]) # NaN value cannot be compared with assert - np.testing.assert_equal(table.loc["x"].to_list(), [0.0, 1.0, 2.0, float("nan")]) + np.testing.assert_equal(table.get_entry("x").to_list(), [0.0, 1.0, 2.0, float("nan")]) + + def test_drop_entry(self): + """Test drop entry from the table.""" + table = TestBaseTable.TestTable() + table.add_entry(index="x", value1=0.0, value2=1.0, value3=2.0) + table.drop_entry("x") + + self.assertEqual(len(table), 0) + + def test_drop_non_existing_entry(self): + """Test dropping non-existing entry raises ValueError.""" + table = TestBaseTable.TestTable() + with self.assertRaises(ValueError): + table.drop_entry("x") def test_return_only_default_columns(self): """Test extra entry is correctly recognized.""" @@ -131,7 +151,7 @@ def test_container_is_immutable(self): self.assertListEqual(dataframe.loc["x"].to_list(), [100, 0.2, 0.3]) # Original object in the experiment payload is preserved - self.assertListEqual(table.loc["x"].to_list(), [0.1, 0.2, 0.3]) + self.assertListEqual(table.get_entry("x").to_list(), [0.1, 0.2, 0.3]) def test_round_trip(self): """Test JSON roundtrip serialization with the experiment encoder.""" @@ -149,7 +169,7 @@ def test_add_entry_with_result_id(self): """Test adding entry with result_id. Index is created by truncating long string.""" table = AnalysisResultTable() table.add_entry(result_id="9a0bdec8c0104ef7bb7db84939717a6b", value=0.123) - self.assertEqual(table.loc["9a0bdec8"].value, 0.123) + self.assertEqual(table.get_entry("9a0bdec8").value, 0.123) def test_extra_column_name_is_always_returned(self): """Test extra column names are always returned in filtered column names.""" @@ -165,6 +185,16 @@ def test_extra_column_name_is_always_returned(self): all_columns = table.filter_columns("all") self.assertTrue("extra" in all_columns) + def test_listing_result_id(self): + """Test returning result IDs of all stored entries.""" + table = AnalysisResultTable() + + ref_ids = [uuid.uuid4().hex for _ in range(10)] + for ref_id in ref_ids: + table.add_entry(result_id=ref_id, value=0) + + self.assertListEqual(table.result_ids(), ref_ids) + def test_no_overlap_result_id(self): """Test automatically prepare unique result IDs for sufficient number of entries.""" table = AnalysisResultTable() From c2cd09041860b289d13f07c8fba6edf8b325e5de Mon Sep 17 00:00:00 2001 From: Naoki Kanazawa Date: Mon, 7 Aug 2023 21:20:03 +0900 Subject: [PATCH 21/27] Fix analysis save bug - result id must be str(uuid) including "-" - np.nan must be replaced with None --- .../framework/analysis_result_table.py | 31 ++++++++++++------- .../framework/experiment_data.py | 4 +-- test/framework/test_data_table.py | 4 +-- 3 files changed, 24 insertions(+), 15 deletions(-) diff --git a/qiskit_experiments/framework/analysis_result_table.py b/qiskit_experiments/framework/analysis_result_table.py index 074d89c0b1..4ae46691d8 100644 --- a/qiskit_experiments/framework/analysis_result_table.py +++ b/qiskit_experiments/framework/analysis_result_table.py @@ -13,6 +13,7 @@ """Table representation of analysis results.""" import logging +import re import uuid import warnings from typing import List, Union, Optional @@ -38,6 +39,8 @@ class AnalysisResultTable(ThreadSafeDataFrame): for more details. """ + VALID_ID_REGEX = re.compile(r"\A(?P\w{8})-\w{4}-\w{4}-\w{4}-\w{12}\Z") + @classmethod def _default_columns(cls) -> List[str]: return [ @@ -126,23 +129,29 @@ def add_entry( Returns: Pandas Series of added entry. This doesn't mutate the table. """ - if result_id: - with self._lock: - if result_id[:8] in self._container.index: - raise ValueError( - f"The short ID of the result_id '{result_id[:8]}' already exists in the " - "experiment data. Please use another ID to avoid index collision." - ) - else: + if not result_id: result_id = self._unique_table_index() + matched = self.VALID_ID_REGEX.match(result_id) + if matched is None: + raise ValueError( + f"The result ID {result_id} is not a valid result ID string." + ) + # Short unique index is generated from result id. # Showing full result id unnecessary occupies horizontal space of the html table. # This mechanism is similar with the github commit hash. - short_index = result_id[:8] + short_id = matched.group("short_id") + + with self._lock: + if short_id in self._container.index: + raise ValueError( + f"The short ID of the result_id '{short_id}' already exists in the " + "experiment data. Please use another ID to avoid index collision." + ) return super().add_entry( - index=short_index, + index=short_id, result_id=result_id, **kwargs, ) @@ -152,7 +161,7 @@ def _unique_table_index(self): with self._lock: n = 0 while n < 1000: - tmp_id = uuid.uuid4().hex + tmp_id = str(uuid.uuid4()) if tmp_id[:8] not in self._container.index: return tmp_id raise RuntimeError( diff --git a/qiskit_experiments/framework/experiment_data.py b/qiskit_experiments/framework/experiment_data.py index e8d5a2cf05..7d4b18e445 100644 --- a/qiskit_experiments/framework/experiment_data.py +++ b/qiskit_experiments/framework/experiment_data.py @@ -2659,8 +2659,7 @@ def _series_to_service_result( Legacy AnalysisResult payload. """ # TODO This must be done on experiment service rather than by client. - - qe_result = AnalysisResultData.from_table_element(**series.to_dict()) + qe_result = AnalysisResultData.from_table_element(**series.replace({np.nan: None}).to_dict()) result_data = AnalysisResult.format_result_data( value=qe_result.value, @@ -2682,6 +2681,7 @@ def _series_to_service_result( quality = ResultQuality(str(qe_result.quality).upper()) except ValueError: quality = "unknown" + experiment_service_payload = AnalysisResultDataclass( result_id=qe_result.result_id, experiment_id=qe_result.experiment_id, diff --git a/test/framework/test_data_table.py b/test/framework/test_data_table.py index 6c8b24adb5..0c0ec29d7c 100644 --- a/test/framework/test_data_table.py +++ b/test/framework/test_data_table.py @@ -168,7 +168,7 @@ class TestAnalysisTable(QiskitExperimentsTestCase): def test_add_entry_with_result_id(self): """Test adding entry with result_id. Index is created by truncating long string.""" table = AnalysisResultTable() - table.add_entry(result_id="9a0bdec8c0104ef7bb7db84939717a6b", value=0.123) + table.add_entry(result_id="9a0bdec8-c010-4ef7-bb7d-b84939717a6b", value=0.123) self.assertEqual(table.get_entry("9a0bdec8").value, 0.123) def test_extra_column_name_is_always_returned(self): @@ -189,7 +189,7 @@ def test_listing_result_id(self): """Test returning result IDs of all stored entries.""" table = AnalysisResultTable() - ref_ids = [uuid.uuid4().hex for _ in range(10)] + ref_ids = [str(uuid.uuid4()) for _ in range(10)] for ref_id in ref_ids: table.add_entry(result_id=ref_id, value=0) From cc5cf413c4e2a4fa239e65dd5cf309042766b15f Mon Sep 17 00:00:00 2001 From: Naoki Kanazawa Date: Mon, 7 Aug 2023 21:31:08 +0900 Subject: [PATCH 22/27] Remove redundant timezone conversion and use tzlocal --- qiskit_experiments/framework/base_analysis.py | 3 ++- qiskit_experiments/framework/experiment_data.py | 14 +++++++------- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/qiskit_experiments/framework/base_analysis.py b/qiskit_experiments/framework/base_analysis.py index a896faf67b..5f3d89acc7 100644 --- a/qiskit_experiments/framework/base_analysis.py +++ b/qiskit_experiments/framework/base_analysis.py @@ -16,6 +16,7 @@ import copy from collections import OrderedDict from datetime import datetime, timezone +from dateutil import tz from typing import List, Tuple, Union, Dict from qiskit_experiments.database_service.device_component import Qubit @@ -179,7 +180,7 @@ def run_analysis(expdata: ExperimentData): if not result.backend: result.backend = expdata.backend_name if not result.created_time: - result.created_time = datetime.now(timezone.utc) + result.created_time = datetime.now(tz.tzlocal()) if not result.run_time: result.run_time = expdata.running_time diff --git a/qiskit_experiments/framework/experiment_data.py b/qiskit_experiments/framework/experiment_data.py index 7d4b18e445..9625ae9709 100644 --- a/qiskit_experiments/framework/experiment_data.py +++ b/qiskit_experiments/framework/experiment_data.py @@ -421,7 +421,7 @@ def creation_datetime(self) -> datetime: in the local timezone. """ - return utc_to_local(self._db_data.creation_datetime) + return self._db_data.creation_datetime @property def start_datetime(self) -> datetime: @@ -431,11 +431,11 @@ def start_datetime(self) -> datetime: The timestamp when this experiment began running in the local timezone. """ - return utc_to_local(self._db_data.start_datetime) + return self._db_data.start_datetime @start_datetime.setter def start_datetime(self, new_start_datetime: datetime) -> None: - self._db_data.start_datetime = local_to_utc(new_start_datetime) + self._db_data.start_datetime = new_start_datetime @property def updated_datetime(self) -> datetime: @@ -446,7 +446,7 @@ def updated_datetime(self) -> datetime: in the local timezone. """ - return utc_to_local(self._db_data.updated_datetime) + return self._db_data.updated_datetime @property def running_time(self) -> datetime: @@ -462,7 +462,7 @@ def running_time(self) -> datetime: This may return wrong datetime if server and client are in different timezone. """ - return utc_to_local(self._running_time) + return self._running_time @property def end_datetime(self) -> datetime: @@ -476,11 +476,11 @@ def end_datetime(self) -> datetime: in the local timezone. """ - return utc_to_local(self._db_data.end_datetime) + return self._db_data.end_datetime @end_datetime.setter def end_datetime(self, new_end_datetime: datetime) -> None: - self._db_data.end_datetime = local_to_utc(new_end_datetime) + self._db_data.end_datetime = new_end_datetime @property def hub(self) -> str: From b00945b25b86ddd2caa7a750d6716d680748c4bd Mon Sep 17 00:00:00 2001 From: Naoki Kanazawa Date: Mon, 7 Aug 2023 21:55:25 +0900 Subject: [PATCH 23/27] Fix missing experiment and run_time column in loaded data. These are added to extra field since IBM Experiment Service doesn't have data field for them. --- qiskit_experiments/framework/experiment_data.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/qiskit_experiments/framework/experiment_data.py b/qiskit_experiments/framework/experiment_data.py index 9625ae9709..e5d2ad327b 100644 --- a/qiskit_experiments/framework/experiment_data.py +++ b/qiskit_experiments/framework/experiment_data.py @@ -2677,6 +2677,11 @@ def _series_to_service_result( result_data["_value"] = qe_result.value result_data["_extra"] = qe_result.extra + # IBM Experiment Service doesn't have data field for experiment and run time. + # These are added to extra field so that these data can be saved. + result_data["_extra"]["experiment"] = qe_result.experiment + result_data["_extra"]["run_time"] = qe_result.run_time + try: quality = ResultQuality(str(qe_result.quality).upper()) except ValueError: From 2cda8dc557760c3459cd63981e9666a4af5f452c Mon Sep 17 00:00:00 2001 From: Naoki Kanazawa Date: Tue, 8 Aug 2023 01:02:49 +0900 Subject: [PATCH 24/27] Add test for key order preservation and remove redundant code --- qiskit_experiments/database_service/utils.py | 6 ++---- test/framework/test_data_table.py | 10 ++++++++++ 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/qiskit_experiments/database_service/utils.py b/qiskit_experiments/database_service/utils.py index 71be44cba1..81740546ca 100644 --- a/qiskit_experiments/database_service/utils.py +++ b/qiskit_experiments/database_service/utils.py @@ -435,10 +435,8 @@ def add_entry( if index in self._container.index: raise ValueError(f"Table index {index} already exists in the table.") - columns = self.get_columns() - missing = kwargs.keys() - set(columns) - if missing: - self.add_columns(*sorted(missing)) + if kwargs.keys() - set(self.get_columns()): + self.add_columns(*kwargs.keys()) template = dict.fromkeys(self.get_columns()) template.update(kwargs) diff --git a/test/framework/test_data_table.py b/test/framework/test_data_table.py index 0c0ec29d7c..3afead20f3 100644 --- a/test/framework/test_data_table.py +++ b/test/framework/test_data_table.py @@ -87,6 +87,16 @@ def test_add_entry_with_new_key(self): self.assertListEqual(table.get_columns(), ["value1", "value2", "value3", "extra"]) self.assertListEqual(table.get_entry("x").to_list(), [0.0, 1.0, 2.0, 3.0]) + def test_add_entry_with_multiple_new_keys(self): + """Test new keys are added to column and the key order is preserved.""" + table = TestBaseTable.TestTable() + table.add_entry(index="x", phi=0.1, lamb=0.2, theta=0.3) + + self.assertListEqual( + table.get_columns(), + ["value1", "value2", "value3", "phi", "lamb", "theta"], + ) + def test_add_entry_with_new_key_with_existing_entry(self): """Test adding new key will expand existing entry.""" table = TestBaseTable.TestTable() From 665ff8f37d5343f8e541480ad6343bfdd4a741fd Mon Sep 17 00:00:00 2001 From: Naoki Kanazawa Date: Tue, 8 Aug 2023 02:05:26 +0900 Subject: [PATCH 25/27] Code fix for auto figure name. Innermost experiment creates figure name with its experiment type, qubits, and experiment id. --- .../framework/analysis_result_table.py | 4 +--- qiskit_experiments/framework/base_analysis.py | 14 +++++++++++--- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/qiskit_experiments/framework/analysis_result_table.py b/qiskit_experiments/framework/analysis_result_table.py index 4ae46691d8..89aa4b79a1 100644 --- a/qiskit_experiments/framework/analysis_result_table.py +++ b/qiskit_experiments/framework/analysis_result_table.py @@ -134,9 +134,7 @@ def add_entry( matched = self.VALID_ID_REGEX.match(result_id) if matched is None: - raise ValueError( - f"The result ID {result_id} is not a valid result ID string." - ) + raise ValueError(f"The result ID {result_id} is not a valid result ID string.") # Short unique index is generated from result id. # Showing full result id unnecessary occupies horizontal space of the html table. diff --git a/qiskit_experiments/framework/base_analysis.py b/qiskit_experiments/framework/base_analysis.py index 5f3d89acc7..7122b6f548 100644 --- a/qiskit_experiments/framework/base_analysis.py +++ b/qiskit_experiments/framework/base_analysis.py @@ -15,10 +15,11 @@ from abc import ABC, abstractmethod import copy from collections import OrderedDict -from datetime import datetime, timezone -from dateutil import tz +from datetime import datetime from typing import List, Tuple, Union, Dict +from dateutil import tz + from qiskit_experiments.database_service.device_component import Qubit from qiskit_experiments.framework import Options from qiskit_experiments.framework.store_init_args import StoreInitArgs @@ -197,7 +198,14 @@ def run_analysis(expdata: ExperimentData): figure_to_add = [] for figure in figures: if not isinstance(figure, FigureData): - figure = FigureData(figure=figure) + qubits_repr = "_".join( + map(str, expdata.metadata.get("device_components", [])[:5]) + ) + short_id = expdata.experiment_id[:8] + figure = FigureData( + figure=figure, + name=f"{expdata.experiment_type}_{qubits_repr}_{short_id}.svg", + ) figure_to_add.append(figure) expdata.add_figures(figure_to_add, figure_names=self.options.figure_names) From 2bdd09def85ffcee774aa07f59ffe6921c3a18a0 Mon Sep 17 00:00:00 2001 From: Naoki Kanazawa Date: Wed, 9 Aug 2023 00:18:55 +0900 Subject: [PATCH 26/27] Relax the validation for result id. User should be able to use arbitrary id as long as they don't use the save feature. The validation error is replaced with the user warning not to stop this workflow. --- .../framework/analysis_result_table.py | 20 ++++++++---- .../test_db_experiment_data.py | 32 +++++++++++++------ 2 files changed, 36 insertions(+), 16 deletions(-) diff --git a/qiskit_experiments/framework/analysis_result_table.py b/qiskit_experiments/framework/analysis_result_table.py index 89aa4b79a1..053655a2a7 100644 --- a/qiskit_experiments/framework/analysis_result_table.py +++ b/qiskit_experiments/framework/analysis_result_table.py @@ -128,18 +128,26 @@ def add_entry( Returns: Pandas Series of added entry. This doesn't mutate the table. + + Raises: + ValueError: When the truncated result id causes a collision in the table. """ if not result_id: result_id = self._unique_table_index() matched = self.VALID_ID_REGEX.match(result_id) if matched is None: - raise ValueError(f"The result ID {result_id} is not a valid result ID string.") - - # Short unique index is generated from result id. - # Showing full result id unnecessary occupies horizontal space of the html table. - # This mechanism is similar with the github commit hash. - short_id = matched.group("short_id") + warnings.warn( + f"The result ID {result_id} is not a valid result ID string. " + "This entry might fail in saving with the experiment service.", + UserWarning, + ) + short_id = result_id[:8] + else: + # Short unique index is generated from result id. + # Showing full result id unnecessary occupies horizontal space of the html table. + # This mechanism is similar with the github commit hash. + short_id = matched.group("short_id") with self._lock: if short_id in self._container.index: diff --git a/test/database_service/test_db_experiment_data.py b/test/database_service/test_db_experiment_data.py index 56bb9efe15..6d6713248e 100644 --- a/test/database_service/test_db_experiment_data.py +++ b/test/database_service/test_db_experiment_data.py @@ -36,7 +36,6 @@ from qiskit_experiments.framework import ExperimentData from qiskit_experiments.framework import AnalysisResult from qiskit_experiments.framework import BackendData -from qiskit_experiments.framework.experiment_data import local_to_utc from qiskit_experiments.database_service.exceptions import ( ExperimentDataError, ExperimentEntryNotFound, @@ -148,7 +147,9 @@ def _callback(_exp_data): [dat["counts"] for dat in _exp_data.data()], a_job.result().get_counts() ) exp_data.add_figures(str.encode("hello world")) - exp_data.add_analysis_results(mock.MagicMock()) + res = mock.MagicMock() + res.result_id = str(uuid.uuid4()) + exp_data.add_analysis_results(res) nonlocal called_back called_back = True @@ -455,7 +456,9 @@ def test_add_get_analysis_result(self): res = mock.MagicMock() res.result_id = idx results.append(res) - exp_data.add_analysis_results(res) + with self.assertWarns(UserWarning): + # This is invalid result ID string and cause a warning + exp_data.add_analysis_results(res) # We cannot compare results with exp_data.analysis_results() # This test is too hacky since it tris to compare MagicMock with AnalysisResult. @@ -481,7 +484,9 @@ def test_add_get_analysis_results(self): res = mock.MagicMock() res.result_id = idx results.append(res) - exp_data.add_analysis_results(results) + with self.assertWarns(UserWarning): + # This is invalid result ID string and cause a warning + exp_data.add_analysis_results(results) get_result_ids = [res.result_id for res in exp_data.analysis_results()] # We cannot compare results with exp_data.analysis_results() @@ -495,7 +500,9 @@ def test_delete_analysis_result(self): for idx in range(3): res = mock.MagicMock() res.result_id = id_template.format(idx) - exp_data.add_analysis_results(res) + with self.assertWarns(UserWarning): + # This is invalid result ID string and cause a warning + exp_data.add_analysis_results(res) subtests = [(0, id_template.format(0)), (id_template.format(2), id_template.format(2))] for del_key, res_id in subtests: @@ -519,6 +526,7 @@ def test_save(self): service = mock.create_autospec(IBMExperimentService, instance=True) exp_data.add_figures(str.encode("hello world")) analysis_result = mock.MagicMock() + analysis_result.result_id = str(uuid.uuid4()) exp_data.add_analysis_results(analysis_result) exp_data.service = service exp_data.save() @@ -531,7 +539,9 @@ def test_save_delete(self): exp_data = ExperimentData(backend=self.backend, experiment_type="qiskit_test") service = mock.create_autospec(IBMExperimentService, instance=True) exp_data.add_figures(str.encode("hello world")) - exp_data.add_analysis_results(mock.MagicMock()) + res = mock.MagicMock() + res.result_id = str(uuid.uuid4()) + exp_data.add_analysis_results() exp_data.delete_analysis_result(0) exp_data.delete_figure(0) exp_data.service = service @@ -560,6 +570,7 @@ def test_auto_save(self): ) exp_data.auto_save = True mock_result = mock.MagicMock() + mock_result.result_id = str(uuid.uuid4()) subtests = [ # update function, update parameters, service called @@ -1021,6 +1032,7 @@ def test_copy_metadata(self): exp_data = ExperimentData(experiment_type="qiskit_test") exp_data.add_data(self._get_job_result(1)) result = mock.MagicMock() + result.result_id = str(uuid.uuid4()) exp_data.add_analysis_results(result) copied = exp_data.copy(copy_results=False) self.assertEqual(exp_data.data(), copied.data()) @@ -1096,16 +1108,16 @@ def test_getters(self): data = ExperimentData() test_time = datetime.now() data._db_data.creation_datetime = test_time - self.assertEqual(data.creation_datetime, local_to_utc(test_time)) + self.assertEqual(data.creation_datetime, test_time) test_time = test_time + timedelta(hours=1) data._db_data.start_datetime = test_time - self.assertEqual(data.start_datetime, local_to_utc(test_time)) + self.assertEqual(data.start_datetime, test_time) test_time = test_time + timedelta(hours=1) data._db_data.end_datetime = test_time - self.assertEqual(data.end_datetime, local_to_utc(test_time)) + self.assertEqual(data.end_datetime, test_time) test_time = test_time + timedelta(hours=1) data._db_data.updated_datetime = test_time - self.assertEqual(data.updated_datetime, local_to_utc(test_time)) + self.assertEqual(data.updated_datetime, test_time) data._db_data.hub = "hub_name" data._db_data.group = "group_name" From f6e7294e560ac62416c574362a213ef103ac4a55 Mon Sep 17 00:00:00 2001 From: Naoki Kanazawa Date: Fri, 11 Aug 2023 09:44:49 +0900 Subject: [PATCH 27/27] Update running_time doc --- qiskit_experiments/framework/experiment_data.py | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/qiskit_experiments/framework/experiment_data.py b/qiskit_experiments/framework/experiment_data.py index e5d2ad327b..5e12163fc9 100644 --- a/qiskit_experiments/framework/experiment_data.py +++ b/qiskit_experiments/framework/experiment_data.py @@ -452,15 +452,9 @@ def updated_datetime(self) -> datetime: def running_time(self) -> datetime: """Return the running time of this experiment data. - The running time is the time the latest successful job was run on + The running time is the time the latest successful job started running on the remote quantum machine. This can change as more jobs finish. - .. warning:: - - IBM job returns running time in tzlocal(), - but we don't know the local time of the remote quantum machine. - This may return wrong datetime if server and client are in different timezone. - """ return self._running_time