diff --git a/Makefile b/Makefile index 1d6e99c4..74bbe796 100644 --- a/Makefile +++ b/Makefile @@ -20,10 +20,18 @@ else endif @source $(CURDIR)/venv/bin/activate && \ interrogate -c pyproject.toml -v . -f 100 && \ - python3 -m coverage run -m pytest -x -n $(n_workers) --failed-first -k $(chosen_tests) --client $(client) --responses $(responses) && \ + python3 -m coverage run --source="src" -m pytest -x -n $(n_workers) --failed-first -k $(chosen_tests) --client $(client) --responses $(responses) && \ python3 -m coverage html && \ deactivate + +coverage: + @source $(CURDIR)/venv/bin/activate && \ + python3 -m coverage run -m pytest -x --client $(client) && \ + python3 -m coverage html && \ + open htmlcov/index.html && \ + deactivate + # set up jupyter dev kernel jupyter: -deactivate diff --git a/docs/how-to/data-hub/add-data.md b/docs/how-to/data-hub/add-data.md new file mode 100644 index 00000000..3526fda5 --- /dev/null +++ b/docs/how-to/data-hub/add-data.md @@ -0,0 +1,47 @@ +# Add data to a Deep Origin Database + +This document describes how to add data to a Deep Origin Database. + +Consider the following dataframe constructed from a database using: + +```python +from deeporigin.data_hub import api +df = api.get_dataframe("xy") +df +``` + +![](../../images/df-xy.png) + +## Add new rows + +To add new rows to the underlying database, use the `add_databse_rows` function: + +```python +data = dict(X=[1, 2], Y=[2, 3]) +api.add_database_rows(database_id="xy", data=data) +``` + +`data` should be a dictionary where the keys are column names and the values are lists of values to be written to the corresponding columns. `add_database_rows` will add this data to the database, creating as many new rows as needed. + +`add_database_rows` returns a list of the row IDs created during this process. + + +## Add fragments of new rows + +Similarly, fragments of rows (subsets of columns) can be written to the database: + +```python +data = dict(X=[10, 20]) # note Y is not specified +api.add_database_rows(database_id="xy", data=data) +``` + +`add_database_rows` returns a list of the row IDs created during this process, for example: + +```python +["row-1", "row-2"] +``` + + +## Reference + +The reference documentation for [add_database_rows](../../ref/data-hub/high-level-api.md#src.data_hub.api.add_database_rows) \ No newline at end of file diff --git a/docs/images/df-xy.png b/docs/images/df-xy.png new file mode 100644 index 00000000..5154e6cb Binary files /dev/null and b/docs/images/df-xy.png differ diff --git a/mkdocs.yaml b/mkdocs.yaml index 05eba09d..af3167ff 100644 --- a/mkdocs.yaml +++ b/mkdocs.yaml @@ -59,6 +59,7 @@ nav: - Upload files: how-to/data-hub/upload-files.md - Download files: how-to/data-hub/download-files.md - Write data: how-to/data-hub/write-data.md + - Add data to database: how-to/data-hub/add-data.md - API reference: - High-level API: ref/data-hub/high-level-api.md - Low-level API: ref/data-hub/low-level-api.md @@ -112,7 +113,7 @@ plugins: handlers: python: paths: ["."] - options: + options: annotations_path: brief show_source: false docstring_options: diff --git a/pyproject.toml b/pyproject.toml index 56e54b9e..d213ba8b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -27,7 +27,7 @@ dependencies = [ "tabulate", "filetype", "httpx", - "deeporigin-data-sdk==0.1.0a21", + "deeporigin-data-sdk==0.1.0a27", "humanize", "packaging", "diskcache", diff --git a/src/data_hub/api.py b/src/data_hub/api.py index 5e7a07cf..8f95a9b4 100644 --- a/src/data_hub/api.py +++ b/src/data_hub/api.py @@ -358,11 +358,81 @@ def upload_file( return response.file +@beartype +def add_database_rows( + *, + database_id: str, + data: dict, + client=None, + _stash: bool = False, +) -> list[str]: + """Add new data to a database. + + Use this function to add new rows, or fragments of rows, to a Deep Origin database. + + Args: + database_id: Human ID or System ID of the database + data: A dictionary where each key is a column name and each value is a list of values. All values should have the same length. Key names should match column names in the database. + + Returns: + A list of row IDs + + """ + # check that dict has columns that make sense + db = _api.describe_database( + database_id=database_id, + client=client, + _stash=_stash, + ) + + col_names = [col.name for col in db.cols] + + for col in data.keys(): + if col not in col_names: + raise DeepOriginException( + message=f"Column `{col}` does not exist in database `{database_id}`." + ) + + # check that dict has all keys of the same length + value_lengths = [] + for col in data.keys(): + value_lengths.append(len(data[col])) + + if len(set(value_lengths)) > 1: + raise DeepOriginException( + message="All rows must have the same number of values." + ) + + response = make_database_rows( + database_id=database_id, + n_rows=value_lengths[0], + client=client, + _stash=_stash, + ) + + row_ids = [row.id for row in response.rows] + row_hids = [row.hid for row in response.rows] + + for col in data.keys(): + set_data_in_cells( + values=data[col], + row_ids=row_ids, + column_id=col, + database_id=database_id, + columns=db.cols, + client=client, + _stash=_stash, + ) + + return row_hids + + @beartype @ensure_client def make_database_rows( database_id: str, n_rows: int = 1, + *, client=None, _stash: bool = False, ) -> dict: @@ -1005,6 +1075,23 @@ def get_dataframe( f"Expected database_id: {database_id} to resolve to a database, but instead, it resolved to a {db_row.type}" ) + # early exit for empty DB + if "cols" not in db_row.keys() or db_row.cols is None: + data = dict() + if return_type == "dataframe": + # this import is here because we don't want to + # import pandas unless we actually use this function + df = _make_deeporigin_dataframe( + data=data, + reference_ids=None, + db_row=db_row, + rows=None, + columns=None, + ) + return df + else: + return dict() + columns = db_row.cols database_id = db_row.id @@ -1017,9 +1104,6 @@ def get_dataframe( reference_ids = [] file_ids = [] - if columns is None: - return None - # remove notebook columns because they are not # shown in the UI as columns columns = [ @@ -1033,7 +1117,7 @@ def get_dataframe( data[column["id"]] = [] for row in rows: - # warning: add_row_to_data mutates file_ids + # warning: add_row_to_data mutates data, file_ids # and reference_ids add_row_to_data( data=data, @@ -1082,22 +1166,13 @@ def get_dataframe( if return_type == "dataframe": # make the dataframe - # this import is here because we don't want to - # import pandas unless we actually use this function - from deeporigin.data_hub.dataframe import DataFrame - - df = DataFrame(data) - df.attrs["reference_ids"] = list(set(reference_ids)) - df.attrs["id"] = database_id - df.attrs["metadata"] = dict(db_row) - - df = _type_and_cleanup_dataframe(df, columns) - - # find last updated row for pretty printing - df.attrs["last_updated_row"] = find_last_updated_row(rows) - - df._deep_origin_out_of_sync = False - df._modified_columns = dict() + df = _make_deeporigin_dataframe( + data=data, + reference_ids=reference_ids, + db_row=db_row, + rows=rows, + columns=columns, + ) return df else: @@ -1113,6 +1188,38 @@ def get_dataframe( return renamed_data +def _make_deeporigin_dataframe( + *, + data: dict, + reference_ids: Optional[list], + db_row: dict, + columns: Optional[list], + rows: Optional[list], +): + # this import is here because we don't want to + # import pandas unless we actually use this function + from deeporigin.data_hub.dataframe import DataFrame + + df = DataFrame(data) + if reference_ids is not None: + df.attrs["reference_ids"] = list(set(reference_ids)) + df.attrs["id"] = db_row.id + df.attrs["metadata"] = dict(db_row) + + if columns is not None: + df = _type_and_cleanup_dataframe(df, columns) + + # find last updated row for pretty printing + if len(df) > 0: + df.attrs["last_updated_row"] = find_last_updated_row(rows) + else: + df.attrs["last_updated_row"] = db_row + + df._deep_origin_out_of_sync = False + df._modified_columns = dict() + return df + + @beartype @ensure_client def download_files( @@ -1173,21 +1280,26 @@ def download_files( pass +@beartype def add_row_to_data( *, data: dict, - row, + row: dict, columns: list, file_ids: list, reference_ids: list, ): """utility function to combine data from a row into a dataframe""" - row_data = _row_to_dict( + row_data = row_to_dict( row, file_ids=file_ids, reference_ids=reference_ids, ) if row_data is None: + for column in columns: + col_id = column["id"] + data[col_id].append(None) + return data["ID"].append(row_data["ID"]) @@ -1204,21 +1316,39 @@ def add_row_to_data( data[col_id].append(None) -def _row_to_dict( - row, +@beartype +def row_to_dict( + row: dict, *, - file_ids: list, - reference_ids: list, -): - """utility function to convert a row to a dictionary""" - if "fields" not in row.keys(): - return None + file_ids: Optional[list] = None, + reference_ids: Optional[list] = None, +) -> dict: + """convert a database row (as returned by api.list_database_rows) to a dictionary where keys are column IDs and values are the values in the row - fields = row.fields + Danger: This function mutates inputs + This function mutates file_ids and reference_ids + + Args: + row: database row (as returned by api.list_database_rows) + file_ids: list of file IDs, will be mutated in-place + reference_ids: list of reference IDs, will be mutated in-place + + Returns: + dict + """ + + if file_ids is None: + file_ids = [] + if reference_ids is None: + reference_ids = [] values = {"ID": row.hid, "Validation Status": row.validationStatus} - if fields is None: + + if "fields" not in row.keys() or row.fields is None: return values + + fields = row.fields + for field in fields: if "systemType" in field.keys() and field.systemType == "bodyDocument": continue diff --git a/src/data_hub/dataframe.py b/src/data_hub/dataframe.py index f0241ab7..f1b36834 100644 --- a/src/data_hub/dataframe.py +++ b/src/data_hub/dataframe.py @@ -12,6 +12,7 @@ from beartype.typing import Optional from dateutil.parser import parse from deeporigin.data_hub import api +from deeporigin.exceptions import DeepOriginException from deeporigin.platform.api import get_last_edited_user_name from deeporigin.utils.config import construct_resource_url from deeporigin.utils.constants import DataType, IDFormat @@ -20,7 +21,10 @@ check_for_updates() -__NO_NEW_ROWS_MSG__ = "Adding rows is not allowed, because this dataframe corresponds to a subset of the rows in the corresponding database." +__NO_NEW_ROWS_MSG__ = "Adding rows to Deep Origin DataFrames is not allowed. " +__NO_NEW_ROWS_FIX__ = ( + "If you want to add rows to the underlying database, use `api.add_database_rows()`." +) class DataFrame(pd.DataFrame): @@ -35,8 +39,27 @@ class DataFrame(pd.DataFrame): _modified_columns: dict = dict() """if data is modified in a dataframe, and auto_sync is False, this list will contain the columns that have been modified so that the Deep Origin database can be updated. If an empty list, the Deep Origin database will not be updated, and the dataframe matches the Deep Origin database at the time of creation.""" - _allow_adding_rows: bool = True - """If `True`, new rows can be added to the dataframe. If `False`, new rows cannot be added to the dataframe.""" + def _track_changes(self, column: str, rows: list): + """callback that tracks changes made to the DB, and responds appropriately. if auto_sync is true, changes + are written immediately to DB. if not, then they're tracked in _modified_columns + + Args: + column (str): the name of the column that was modified + rows (list): the IDs of rows that were modified + """ + + if self.auto_sync: + # auto sync enabled, simply write ASAP + self.to_deeporigin() + else: + # auto sync not enabled, so we need to + # keep track of changes in _modified_columns + if column not in self._modified_columns.keys(): + # this is the first time we're modifying this column + self._modified_columns[column] = set(rows) + else: + # we've already modified this column before, so update the rows we're touched + self._modified_columns[column].update(set(rows)) @property def loc(self): @@ -45,32 +68,52 @@ def __init__(self, df): self.df = df def __getitem__(self, key): + """this function is called when we slice a DB, so we need to disallow appending rows""" + # first call the superclass method df = super(DataFrame, self.df).loc[key] # inherit attributes df.attrs = self.df.attrs - # disallow adding rows - df._allow_adding_rows = False - df._modified_columns = self.df._modified_columns return df def __setitem__(self, key, value): - """callback for adding a new row, typically""" - - if not self.df._allow_adding_rows: - # adding rows is not allowed - if isinstance(key, (list, pd.Index)): - if not all(k in self.df.index for k in key): - raise ValueError(__NO_NEW_ROWS_MSG__) - elif key not in self.df.index: - raise ValueError(__NO_NEW_ROWS_MSG__) + """callback for adding a new row or modifying data in existing rows""" + + # disallow making new rows + if isinstance(key, (list, pd.Index)): + rows = key + if not all(k in self.df.index for k in key): + raise DeepOriginException( + message=__NO_NEW_ROWS_MSG__, + fix=__NO_NEW_ROWS_FIX__, + ) + else: + rows = [key] + if key not in self.df.index: + raise DeepOriginException( + message=__NO_NEW_ROWS_MSG__, + fix=__NO_NEW_ROWS_FIX__, + ) + + # first check if the new value is the same as + # the old value + old_value = list(self.df.loc[key]) + + try: + if value == old_value: + # noop + return + except Exception: + pass + super(DataFrame, self.df).loc[key] = value - # TODO we need to mark that every column has been modified, but only this row + for col in self.df.columns: + self.df._track_changes(col, rows) # Return the custom _LocIndexer instance return _LocIndexer(self) @@ -78,25 +121,25 @@ def __setitem__(self, key, value): class AtIndexer: """this class override is used to intercept calls to at indexer of a pandas dataframe""" - def __init__(self, obj): - self.obj = obj + def __init__(self, df): + self.df = df def __getitem__(self, key): """intercept for the set operation""" - return self.obj._get_value(*key) + return self.df._get_value(*key) def __setitem__(self, key, value) -> None: """intercept for the set operation""" - if ( - isinstance(value, pd.Series) - and len(value) > len(self) - and not self._allow_adding_rows - ): - raise ValueError(__NO_NEW_ROWS_MSG__) + if isinstance(value, pd.Series) and len(value) > len(self): + raise DeepOriginException( + title="Adding rows to a DataFrame not allowed", + message=__NO_NEW_ROWS_MSG__, + fix=__NO_NEW_ROWS_FIX__, + ) - old_value = self.obj._get_value(*key) + old_value = self.df._get_value(*key) # the reason this is in a try block is because # this can fail for any number of reasons. @@ -114,22 +157,10 @@ def __setitem__(self, key, value) -> None: column = key[1] # Perform the actual setting operation - self.obj._set_value(*key, value) + self.df._set_value(*key, value) - # now update the DB. note that self is an AtIndexer - # object, so we need to index into the pandas object - if self.obj.auto_sync: - # auto sync enabled, simply write ASAP - self.obj.to_deeporigin() - else: - # auto sync not enabled, so we need to - # keep track of changes in _modified_columns - if column not in self.obj._modified_columns.keys(): - # this is the first time we're modifying this column - self.obj._modified_columns[column] = set(rows) - else: - # we've already modified this column before, so update the rows we're touched - self.obj._modified_columns[column].update(set(rows)) + # now update the DB. + self.df._track_changes(column, rows) @property def at(self): @@ -177,10 +208,11 @@ def append( sort=False, ): """Override the `append` method""" - if self._allow_adding_rows: - return super().append(other, ignore_index, verify_integrity, sort) - else: - raise ValueError(__NO_NEW_ROWS_MSG__) + raise DeepOriginException( + title="Adding rows to a DataFrame not allowed", + message=__NO_NEW_ROWS_MSG__, + fix=__NO_NEW_ROWS_FIX__, + ) def _repr_html_(self): """method override to customize printing in a Jupyter notebook""" diff --git a/src/platform/utils.py b/src/platform/utils.py index 243598fd..97d3b8fa 100644 --- a/src/platform/utils.py +++ b/src/platform/utils.py @@ -13,7 +13,7 @@ @beartype def add_functions_to_module( - module: str, + module, api_name: str, ) -> set: """utility function to dynamically add functions to a module diff --git a/src/utils/core.py b/src/utils/core.py index da640c87..ec85e95e 100644 --- a/src/utils/core.py +++ b/src/utils/core.py @@ -167,9 +167,12 @@ def in_aws_lambda(): @beartype -def find_last_updated_row(rows: List[dict]) -> dict: +def find_last_updated_row(rows: List[dict]) -> dict | None: """utility function to find the most recently updated row and return that object""" + if len(rows) == 0: + return None + most_recent_date = None most_recent_row = rows[0] diff --git a/tests/test_dataframe.py b/tests/test_dataframe.py index a01ed94c..d0cf56ce 100644 --- a/tests/test_dataframe.py +++ b/tests/test_dataframe.py @@ -5,8 +5,10 @@ import numpy as np import pandas as pd import pytest +from box import BoxList from deeporigin.data_hub import api from deeporigin.data_hub.dataframe import DataFrame +from deeporigin.exceptions import DeepOriginException from tests.utils import clean_up_test_objects @@ -40,6 +42,39 @@ def config(pytestconfig): ) ) + data["df"].attrs["id"] = "placeholder" + data["df"].attrs["metadata"] = { + "id": "_database:jeWMK8pVA6XHD9Ht6tGrM", + "type": "database", + "hid": "xy", + "name": "XY", + "dateCreated": "2024-10-23 20:17:32.602123", + "dateUpdated": "2024-10-23 20:17:32.602123", + "createdByUserDrn": "drn:identity::user:google-apps|user@deeporigin.com", + "parentId": "_workspace:gRrmQ9z14diV39ZwgvRx6", + "hidPrefix": "xy", + "cols": BoxList( + [ + { + "id": "_column:TbQYk64PIxSjk5QcaO95V", + "parentId": "_database:jeWMK8pVA6XHD9Ht6tGrM", + "name": "float", + "type": "float", + "cardinality": "one", + "configNumeric": {}, + }, + { + "id": "_column:Vesbs3ViQGJC47N8JgGRh", + "parentId": "_database:jeWMK8pVA6XHD9Ht6tGrM", + "name": "integer", + "type": "integer", + "cardinality": "one", + "configNumeric": {}, + }, + ] + ), + } + else: data["mock"] = False @@ -94,94 +129,119 @@ def config(pytestconfig): clean_up_test_objects(TEST_DB_NAME) -@pytest.mark.parametrize("column", NUMERIC_COLUMNS) -def test_dataframe_read_modify(config, column): # noqa: F811 - """this function tests our ability to fetch data, modify it, and write it back""" +@pytest.mark.parametrize("add_column", [True, False]) +def test_empty_db(config, add_column): # noqa: F811 + """check that we can create a dataframe from an empty database""" if config["mock"]: pytest.skip(SKIP_MSG) - df = DataFrame.from_deeporigin(config["db-name"]) - df["sq_" + column] = df[column] ** 2 - df.to_deeporigin() + name = "tc-" + str(uuid.uuid4())[:8] + try: + api.create_database(name=name) -@pytest.mark.parametrize("column", NUMERIC_COLUMNS) -def test_dataframe_write_new_columns(config, column): # noqa: F811 - """this function tests our ability to write new columns to a database""" + if add_column: + api.add_database_column( + database_id=name, + name="float", + type="float", + ) - if config["mock"]: - pytest.skip(SKIP_MSG) + df = DataFrame.from_deeporigin(name) + html = df._repr_html_() - df = DataFrame.from_deeporigin(config["db-name"]) - df["cube_" + column] = df[column] ** 3 - df.to_deeporigin() + assert "was last edited" in html, "Malformed view of empty dataframe" + + assert len(df) == 0, "Empty dataframe should have no rows" + + finally: + # clean up + api.delete_database(database_id=name) -def test_slicing_restrictions(config): - """check that we control what happens when a dataframe is sliced by rows""" +def test_df_loc_indexer_1(config): # noqa: F811 + """check that we can modify a row using the loc indexer""" if config["mock"]: - df = DataFrame(config["df"]) + df = config["df"] else: df = DataFrame.from_deeporigin(config["db-name"]) - assert df._allow_adding_rows is True, "Expected _allow_adding_rows to be True" + # should be able to modify an existing row + first_row = df.index[0] + last_row = df.index[-1] + df.loc[first_row] = list(df.loc[last_row]) - # slice to 2 rows - df = df.loc[df.index[:2]] + assert ( + df._modified_columns != {} + ), "Failed to successfully modify a row using the loc indexer" - assert df._allow_adding_rows is False, "Expected _allow_adding_rows to be False" + if not config["mock"]: + df.to_deeporigin() -def test_slice_and_modify(config): - """check that we control what happens when a dataframe is sliced by rows""" +def test_df_loc_indexer_2(config): # noqa: F811 + """check that we can modify 2 rows using the loc indexer""" if config["mock"]: - df = DataFrame(config["df"]) + df = config["df"] else: df = DataFrame.from_deeporigin(config["db-name"]) - assert df._allow_adding_rows is True, "Expected _allow_adding_rows to be True" + # should be able to modify an existing row + first_row = df.index[0] + last_row = df.index[-1] + df.loc[[first_row, last_row]] = list(df.loc[last_row]) - # slice to 2 rows - df = df.loc[df.index[:2]] + assert ( + df._modified_columns != {} + ), "Failed to successfully modify a row using the loc indexer" + + if not config["mock"]: + df.to_deeporigin() - assert df._allow_adding_rows is False, "Expected _allow_adding_rows to be False" - # we should be allowed to modify a slice - row_id = df.index[0] - df.at[row_id, "integer"] = 100 +def test_df_loc_indexer_3(config): # noqa: F811 + """check that we cannot add rows to a dataframe""" if config["mock"]: - return + df = config["df"] + else: + df = DataFrame.from_deeporigin(config["db-name"]) - # should be allowed to write back to DB - df.to_deeporigin() + # should be able to modify an existing row + first_row = df.index[0] + with pytest.raises( + DeepOriginException, + match="Adding rows to Deep Origin DataFrames is not allowed", + ): + df.loc["new-row"] = list(df.loc[first_row]) -def test_slice_and_extend_loc(config): - """test that we can add a row to a whole df, but not to a slice of it""" + +@pytest.mark.parametrize("column", NUMERIC_COLUMNS) +def test_dataframe_read_modify(config, column): # noqa: F811 + """this function tests our ability to fetch data, modify it, and write it back""" if config["mock"]: - df = DataFrame(config["df"]) - row_prefix = "x" - else: - df = DataFrame.from_deeporigin(config["db-name"]) - row_prefix = df.attrs["metadata"]["hidPrefix"] + pytest.skip(SKIP_MSG) - # should be possible to add a new row - df.loc[row_prefix + "-" + str(len(df) + 1)] = list(df.loc[df.index[0]]) + df = DataFrame.from_deeporigin(config["db-name"]) + df["sq_" + column] = df[column] ** 2 + df.to_deeporigin() - if not config["mock"]: - df.to_deeporigin() - # slice to 2 rows - df = df.loc[df.index[:2]] +@pytest.mark.parametrize("column", NUMERIC_COLUMNS) +def test_dataframe_write_new_columns(config, column): # noqa: F811 + """this function tests our ability to write new columns to a database""" + + if config["mock"]: + pytest.skip(SKIP_MSG) - # should not be possible to add a new row - with pytest.raises(ValueError, match="Adding rows is not allowed"): - df.loc[row_prefix + "-" + str(len(df) + 1)] = list(df.loc[df.index[0]]) + df = DataFrame.from_deeporigin(config["db-name"]) + df["cube_" + column] = df[column] ** 3 + df.to_deeporigin() @pytest.mark.parametrize("row", [0, -1]) diff --git a/tests/test_dataframe_kitchen_sink.py b/tests/test_dataframe_kitchen_sink.py index 637ab589..eb2da326 100644 --- a/tests/test_dataframe_kitchen_sink.py +++ b/tests/test_dataframe_kitchen_sink.py @@ -33,11 +33,11 @@ def test_kitchen_sink_db_int(config): # noqa: F811 df = DataFrame.from_deeporigin("kitchen-sink") # test writing a single value - df.at["ks-34", "Int"] = np.random.randint(100) + df.at["ks-34", "Int"] = np.random.randint(len(df)) df.to_deeporigin() # test writing entire columns - df["Int"] = np.random.randint(50, 999, 100) + df["Int"] = np.random.randint(50, 999, len(df)) df.to_deeporigin() @@ -54,7 +54,7 @@ def test_kitchen_sink_db_float(config): # noqa: F811 df.to_deeporigin() # test writing entire columns - df["Float"] = np.random.random(100) * 100 + df["Float"] = np.random.random(len(df)) * 100 df.to_deeporigin() @@ -71,6 +71,6 @@ def test_kitchen_sink_db_bool(config): # noqa: F811 df.to_deeporigin() # test writing entire columns - df["Bool"] = [random.choice([True, False]) for _ in range(100)] + df["Bool"] = [random.choice([True, False]) for _ in range(len(df))] df.to_deeporigin()