diff --git a/ImportExport/Importer.cpp b/ImportExport/Importer.cpp index 426df9408f..9b8c56369f 100644 --- a/ImportExport/Importer.cpp +++ b/ImportExport/Importer.cpp @@ -2214,6 +2214,12 @@ static ImportStatus import_thread_delimited( return thread_import_status; } +class ColumnNotGeoError : public std::runtime_error { + public: + ColumnNotGeoError(const std::string& column_name) + : std::runtime_error("Column '" + column_name + "' is not a geo column") {} +}; + static ImportStatus import_thread_shapefile( int thread_id, Importer* importer, @@ -2477,7 +2483,10 @@ static ImportStatus import_thread_shapefile( auto const& field_name = cit->second; auto const fit = fieldNameToIndexMap.find(field_name); - CHECK(fit != fieldNameToIndexMap.end()); + if (fit == fieldNameToIndexMap.end()) { + throw ColumnNotGeoError(cd->columnName); + } + auto const& field_index = fit->second; CHECK(field_index < fieldNameToIndexMap.size()); @@ -2556,6 +2565,9 @@ static ImportStatus import_thread_shapefile( if (e.getErrorCode() == Executor::ERR_INTERRUPTED) { throw e; } + } catch (ColumnNotGeoError& e) { + LOG(ERROR) << "Input exception thrown: " << e.what() << ". Aborting import."; + throw std::runtime_error(e.what()); } catch (const std::exception& e) { for (size_t col_idx_to_pop = 0; col_idx_to_pop < col_idx; ++col_idx_to_pop) { import_buffers[col_idx_to_pop]->pop_value(); diff --git a/Tests/ImportGeoTableTest/datafiles/geospatial_poly.geojson b/Tests/ImportGeoTableTest/datafiles/geospatial_poly.geojson new file mode 100644 index 0000000000..091e5121f5 --- /dev/null +++ b/Tests/ImportGeoTableTest/datafiles/geospatial_poly.geojson @@ -0,0 +1,17 @@ +{ +"type": "FeatureCollection", +"name": "geospatial_poly", +"crs": { "type": "name", "properties": { "name": "urn:ogc:def:crs:OGC:1.3:CRS84" } }, +"features": [ +{ "type": "Feature", "properties": { "trip": 0.0 }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 0.0, 0.0 ], [ 1.0, 0.0 ], [ 0.0, 1.0 ], [ 0.0, 0.0 ] ] ] } }, +{ "type": "Feature", "properties": { "trip": 1.0 }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 0.0, 0.0 ], [ 2.0, 0.0 ], [ 0.0, 2.0 ], [ 0.0, 0.0 ] ] ] } }, +{ "type": "Feature", "properties": { "trip": 2.0 }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 0.0, 0.0 ], [ 3.0, 0.0 ], [ 0.0, 3.0 ], [ 0.0, 0.0 ] ] ] } }, +{ "type": "Feature", "properties": { "trip": 3.0 }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 0.0, 0.0 ], [ 4.0, 0.0 ], [ 0.0, 4.0 ], [ 0.0, 0.0 ] ] ] } }, +{ "type": "Feature", "properties": { "trip": 4.0 }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 0.0, 0.0 ], [ 5.0, 0.0 ], [ 0.0, 5.0 ], [ 0.0, 0.0 ] ] ] } }, +{ "type": "Feature", "properties": { "trip": 5.0 }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 0.0, 0.0 ], [ 6.0, 0.0 ], [ 0.0, 6.0 ], [ 0.0, 0.0 ] ] ] } }, +{ "type": "Feature", "properties": { "trip": 6.0 }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 0.0, 0.0 ], [ 7.0, 0.0 ], [ 0.0, 7.0 ], [ 0.0, 0.0 ] ] ] } }, +{ "type": "Feature", "properties": { "trip": 7.0 }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 0.0, 0.0 ], [ 8.0, 0.0 ], [ 0.0, 8.0 ], [ 0.0, 0.0 ] ] ] } }, +{ "type": "Feature", "properties": { "trip": 8.0 }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 0.0, 0.0 ], [ 9.0, 0.0 ], [ 0.0, 9.0 ], [ 0.0, 0.0 ] ] ] } }, +{ "type": "Feature", "properties": { "trip": 9.0 }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 0.0, 0.0 ], [ 10.0, 0.0 ], [ 0.0, 10.0 ], [ 0.0, 0.0 ] ] ] } } +] +} diff --git a/Tests/LoadTableTest.cpp b/Tests/LoadTableTest.cpp index ba97be9c52..90b0182809 100644 --- a/Tests/LoadTableTest.cpp +++ b/Tests/LoadTableTest.cpp @@ -597,6 +597,240 @@ TEST_F(LoadTableTest, ArrowNoColumns) { "No columns to insert"); } +class ImportGeoTableTest : public DBHandlerTestFixture { + protected: + void SetUp() override { + DBHandlerTestFixture::SetUp(); + sql("DROP TABLE IF EXISTS import_geo_table_test"); + } + + const std::string getGeoFileName() const { + auto geo_file_name = + boost::filesystem::absolute( + boost::filesystem::path( + "../../Tests/ImportGeoTableTest/datafiles/geospatial_poly.geojson")) + .string(); + return geo_file_name; + } + + const TCopyParams getCopyParams() const { + TCopyParams copy_params; + return copy_params; + } + + const TCreateParams getCreateParams() const { + TCreateParams create_params; + create_params.is_replicated = false; + return create_params; + } + + TColumnType getScalarColumnType(const std::string& name, + const TDatumType::type type) const { + TColumnType ct; + ct.col_name = name; + ct.col_type.type = type; + ct.col_type.encoding = TEncodingType::type::NONE; + ct.col_type.nullable = true; + ct.col_type.is_array = false; + ct.col_type.precision = 0; + ct.col_type.scale = 0; + ct.col_type.comp_param = 0; + ct.col_type.size = 0; + ct.is_reserved_keyword = false; + ct.src_name = name; + ct.is_system = false; + ct.is_physical = false; + ct.col_id = 0; + return ct; + }; + + TColumnType getPolyColumnType(const std::string& name) const { + TColumnType ct; + ct.col_name = name; + ct.col_type.type = TDatumType::type::POLYGON; + ct.col_type.encoding = TEncodingType::type::GEOINT; + ct.col_type.nullable = true; + ct.col_type.is_array = false; + ct.col_type.precision = 23; // aka subtype = SQLTypes::kGEOMETRY (not the same as + // TDatumType::type::GEOMETRY) + ct.col_type.scale = 4326; // aka output_srid = WGS84 + ct.col_type.comp_param = 32; + ct.col_type.size = 0; + ct.is_reserved_keyword = false; + ct.src_name = name; + ct.is_system = false; + ct.is_physical = false; + ct.col_id = 0; + return ct; + }; + + void TearDown() override { + sql("DROP TABLE IF EXISTS import_geo_table_test"); + DBHandlerTestFixture::TearDown(); + } +}; + +TEST_F(ImportGeoTableTest, ImportGeoTableAuto) { + // geo import with empty row descriptor + // will automatically create table + // equivalent to COPY FROM WITH (geo='true') + auto* handler = getDbHandlerAndSessionId().first; + auto& session = getDbHandlerAndSessionId().second; + TRowDescriptor row_descriptor; + EXPECT_NO_THROW(handler->import_geo_table(session, + "import_geo_table_test", + getGeoFileName(), + getCopyParams(), + row_descriptor, + getCreateParams())); + sqlAndCompareResult("SELECT count(*) FROM import_geo_table_test", {{i(10)}}); + sqlAndCompareResult("SELECT trip FROM import_geo_table_test WHERE rowid=0", {{0.0f}}); +} + +TEST_F(ImportGeoTableTest, ImportGeoTableExplicit) { + // geo import with explicit row descriptor (e.g. Immerse import) + // must create table first + // correct types + auto* handler = getDbHandlerAndSessionId().first; + auto& session = getDbHandlerAndSessionId().second; + TRowDescriptor row_descriptor{getScalarColumnType("trip", TDatumType::type::FLOAT), + getPolyColumnType("omnisci_geo")}; + EXPECT_NO_THROW(handler->create_table(session, + "import_geo_table_test", + row_descriptor, + TFileType::type::POLYGON, + getCreateParams())); + EXPECT_NO_THROW(handler->import_geo_table(session, + "import_geo_table_test", + getGeoFileName(), + getCopyParams(), + row_descriptor, + getCreateParams())); + sqlAndCompareResult("SELECT count(*) FROM import_geo_table_test", {{i(10)}}); + sqlAndCompareResult("SELECT trip FROM import_geo_table_test WHERE rowid=0", {{0.0f}}); +} + +TEST_F(ImportGeoTableTest, ImportGeoTableOverride) { + // geo import with explicit row descriptor (e.g. Immerse import) + // must create table first + // type of column 'trip' overridden from FLOAT to INT (valid) + auto* handler = getDbHandlerAndSessionId().first; + auto& session = getDbHandlerAndSessionId().second; + TRowDescriptor row_descriptor{getScalarColumnType("trip", TDatumType::type::INT), + getPolyColumnType("omnisci_geo")}; + EXPECT_NO_THROW(handler->create_table(session, + "import_geo_table_test", + row_descriptor, + TFileType::type::POLYGON, + getCreateParams())); + EXPECT_NO_THROW(handler->import_geo_table(session, + "import_geo_table_test", + getGeoFileName(), + getCopyParams(), + row_descriptor, + getCreateParams())); + sqlAndCompareResult("SELECT count(*) FROM import_geo_table_test", {{i(10)}}); + sqlAndCompareResult("SELECT trip FROM import_geo_table_test WHERE rowid=0", {{i(0)}}); +} + +TEST_F(ImportGeoTableTest, ImportGeoTableTypeMismatch1) { + // geo import with explicit row descriptor (e.g. Immerse import) + // must create table first + // types of columns swapped (possible in Immerse for now) + // import will not fail, but should reject all rows + auto* handler = getDbHandlerAndSessionId().first; + auto& session = getDbHandlerAndSessionId().second; + TRowDescriptor row_descriptor{ + getPolyColumnType("trip"), + getScalarColumnType("omnisci_geo", TDatumType::type::FLOAT)}; + EXPECT_NO_THROW(handler->create_table(session, + "import_geo_table_test", + row_descriptor, + TFileType::type::POLYGON, + getCreateParams())); + EXPECT_THROW(handler->import_geo_table(session, + "import_geo_table_test", + getGeoFileName(), + getCopyParams(), + row_descriptor, + getCreateParams()), + TOmniSciException); + sqlAndCompareResult("SELECT count(*) FROM import_geo_table_test", {{i(0)}}); +} + +TEST_F(ImportGeoTableTest, ImportGeoTableFailTypeMismatch2) { + // geo import with explicit row descriptor (e.g. Immerse import) + // must create table first + // column types valid but columns swapped (possible in Immerse for now) + // import will not fail, but should reject all rows + auto* handler = getDbHandlerAndSessionId().first; + auto& session = getDbHandlerAndSessionId().second; + TRowDescriptor row_descriptor{ + getScalarColumnType("omnisci_geo", TDatumType::type::FLOAT), + getPolyColumnType("trip")}; + EXPECT_NO_THROW(handler->create_table(session, + "import_geo_table_test", + row_descriptor, + TFileType::type::POLYGON, + getCreateParams())); + EXPECT_THROW(handler->import_geo_table(session, + "import_geo_table_test", + getGeoFileName(), + getCopyParams(), + row_descriptor, + getCreateParams()), + TOmniSciException); + sqlAndCompareResult("SELECT count(*) FROM import_geo_table_test", {{i(0)}}); +} + +TEST_F(ImportGeoTableTest, ImportGeoTableFailNoGeoColumns) { + // geo import with explicit row descriptor (e.g. Immerse import) + // must create table first + // no geo columns in row descriptor + // import should fail + auto* handler = getDbHandlerAndSessionId().first; + auto& session = getDbHandlerAndSessionId().second; + TRowDescriptor row_descriptor{getScalarColumnType("trip", TDatumType::type::FLOAT)}; + EXPECT_NO_THROW(handler->create_table(session, + "import_geo_table_test", + row_descriptor, + TFileType::type::POLYGON, + getCreateParams())); + EXPECT_THROW(handler->import_geo_table(session, + "import_geo_table_test", + getGeoFileName(), + getCopyParams(), + row_descriptor, + getCreateParams()), + TOmniSciException); + sqlAndCompareResult("SELECT count(*) FROM import_geo_table_test", {{i(0)}}); +} + +TEST_F(ImportGeoTableTest, ImportGeoTableFailTooManyGeoColumns) { + // geo import with explicit row descriptor (e.g. Immerse import) + // must create table first + // more than one geo column in row descriptor + // import should fail + auto* handler = getDbHandlerAndSessionId().first; + auto& session = getDbHandlerAndSessionId().second; + TRowDescriptor row_descriptor{getScalarColumnType("trip", TDatumType::type::FLOAT), + getPolyColumnType("omnisci_geo1"), + getPolyColumnType("omnisci_geo2")}; + EXPECT_NO_THROW(handler->create_table(session, + "import_geo_table_test", + row_descriptor, + TFileType::type::POLYGON, + getCreateParams())); + EXPECT_THROW(handler->import_geo_table(session, + "import_geo_table_test", + getGeoFileName(), + getCopyParams(), + row_descriptor, + getCreateParams()), + TOmniSciException); + sqlAndCompareResult("SELECT count(*) FROM import_geo_table_test", {{i(0)}}); +} + #ifdef HAVE_AWS_S3 class ThriftDetectServerPrivilegeTest : public DBHandlerTestFixture { protected: diff --git a/ThriftHandler/DBHandler.cpp b/ThriftHandler/DBHandler.cpp index d372fdd21d..d216e8c409 100644 --- a/ThriftHandler/DBHandler.cpp +++ b/ThriftHandler/DBHandler.cpp @@ -5142,30 +5142,25 @@ void DBHandler::import_geo_table(const TSessionId& session, } if (is_geo_layer) { - // Final check to ensure that we actually have a geo column - // of the expected name and type before doing the actual import, - // in case the user naively overrode the name or type in Immerse - // Preview (which as of 6/8/18 it still allows you to do). - // This avoids a fatal assert later when it fails to find the - // column. We should make Immerse more robust and disallow this. - bool have_geo_column_with_correct_name = false; - for (const auto& r : rd) { - if (TTypeInfo_IsGeo(r.col_type.type)) { - // TODO(team): allow user to override the geo column name - if (r.col_name == OMNISCI_GEO_PREFIX) { - have_geo_column_with_correct_name = true; - } else if (r.col_name == LEGACY_GEO_PREFIX) { - CHECK(colname_to_src.find(r.col_name) != colname_to_src.end()); - // Normalize column names for geo append with legacy column naming scheme - colname_to_src[r.col_name] = r.col_name; - have_geo_column_with_correct_name = true; - } + // Final check to ensure that we have exactly one geo column + // before doing the actual import, in case the user naively + // overrode the types in Immerse Preview (which as of 6/17/21 + // it still allows you to do). We should make Immerse more + // robust and disallow re-typing of columns to/from geo types + // completely. Currently, if multiple columns are re-typed + // such that there is still exactly one geo column (but it's + // the wrong one) then this test will pass, but the import + // will then reject some (or more likely all) of the rows. + int num_geo_columns{0}; + for (auto const& col : rd) { + if (TTypeInfo_IsGeo(col.col_type.type)) { + num_geo_columns++; } } - if (!have_geo_column_with_correct_name) { - std::string exception_message = "Table " + this_table_name + - " does not have a geo column with name '" + - OMNISCI_GEO_PREFIX + "'. Import aborted!"; + if (num_geo_columns != 1) { + std::string exception_message = + "Table '" + this_table_name + + "' must have exactly one geo column. Import aborted!"; caught_exception_messages.emplace_back(exception_message); continue; }