diff --git a/genie/input_to_database.py b/genie/input_to_database.py index c189ed51..47174e2a 100644 --- a/genie/input_to_database.py +++ b/genie/input_to_database.py @@ -568,7 +568,7 @@ def build_error_tracking_table(invalid_errors: List[dict]): Error tracking dataframe """ - error_table_columns = ["id", "errors", "name", "fileType", "center", "entity"] + error_table_columns = ["id", "errors", "name", "fileType", "center", "version", "entity"] invalid_error_rows = [] for invalid_error in invalid_errors: entity = invalid_error["entity"] diff --git a/tests/test_input_to_database.py b/tests/test_input_to_database.py index 3d52d4f6..6f21deaf 100644 --- a/tests/test_input_to_database.py +++ b/tests/test_input_to_database.py @@ -26,6 +26,7 @@ name="data_clinical_supp_sample_SAGE.txt", modifiedOn="2019-03-24T12:00:00.Z", md5="44444", + versionNumber=3 ) patient_clinical_synid = "syn11111" @@ -679,6 +680,7 @@ def setup_method(self): 1553428800000, "clinical", center, + sample_clinical_entity.versionNumber, sample_clinical_entity, ] ] @@ -692,6 +694,7 @@ def setup_method(self): "modifiedOn", "fileType", "center", + "version", "entity", ] ) @@ -706,6 +709,7 @@ def setup_method(self): "modifiedOn", "fileType", "center", + "version", "entity", ], ) @@ -716,14 +720,15 @@ def setup_method(self): sample_clinical_entity.name, "clinical", center, + sample_clinical_entity.versionNumber, sample_clinical_entity, ] ] self.errors_df = pd.DataFrame( - error, columns=["id", "errors", "name", "fileType", "center", "entity"] + error, columns=["id", "errors", "name", "fileType", "center", "version", "entity"] ) self.empty_errors = pd.DataFrame( - columns=["id", "errors", "name", "fileType", "center", "entity"] + columns=["id", "errors", "name", "fileType", "center", "version", "entity"] ) self.with_dupsdf = pd.DataFrame( @@ -744,6 +749,7 @@ def setup_method(self): ], "center": ["SAGE"] * 5, "fileType": ["type"] * 5, + "version": [3] * 5, "entity": ["entity"] * 5, } ) @@ -768,12 +774,15 @@ def setup_method(self): ], "center": ["SAGE"] * 5, "fileType": ["type"] * 5, + "version": [3] * 5, "entity": ["entity"] * 5, } ) self.empty_dup = pd.DataFrame( - columns=["id", "name", "center", "fileType", "entity", "errors"] + columns=["id", "name", "center", "fileType", "version", "entity", "errors"] ) + # self.empty_dup = self.empty_dup.astype({"version": int}) + # self.empty_dup.index = self.empty_dup.index.astype('object') def test_build_validation_status_table(self): input_valid_statuses = [ @@ -836,7 +845,12 @@ def test_dups_get_duplicated_files(self): def test_nodups_get_duplicated_files(self): """Test no duplicated""" dupsdf = input_to_database.get_duplicated_files(self.no_dupsdf) - assert dupsdf.equals(self.empty_dup) + # These empty frames won't be equal without these conversions + # HACK: Convert the index type to the same type + self.empty_dup.index = self.empty_dup.index.astype('int') + # HACK: Convert the dtype of the "version" column to the same type + self.empty_dup["version"] = self.empty_dup["version"].astype('int') + pd.testing.assert_frame_equal(dupsdf, self.empty_dup) def test__update_tables_content(self): """Tests duplicates are added to the tables and errors/statues are @@ -899,6 +913,7 @@ def test_validation(self, syn, genie_config): entity = synapseclient.Entity( id="syn1234", md5="44444", + version=3, path="/path/to/foobar.txt", name="data_clinical_supp_SAGE.txt", ) @@ -913,6 +928,7 @@ def test_validation(self, syn, genie_config): entity.name, modified_on, filetype, + entity.version, center, ] ] @@ -936,6 +952,8 @@ def test_validation(self, syn, genie_config): input_to_database, "build_validation_status_table", return_value=self.validation_statusdf, + ), patch.object( + syn, "get", return_value=entity ), patch.object( input_to_database, "build_error_tracking_table", return_value=self.errors_df ), patch.object(