Skip to content

Commit

Permalink
Allow rename of omnisci_geo column during Immerse geo file import
Browse files Browse the repository at this point in the history
  • Loading branch information
simoneves authored and andrewseidl committed Jul 19, 2021
1 parent 88f2451 commit 26128ca
Show file tree
Hide file tree
Showing 4 changed files with 281 additions and 23 deletions.
14 changes: 13 additions & 1 deletion ImportExport/Importer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2214,6 +2214,12 @@ static ImportStatus import_thread_delimited(
return thread_import_status;
}

class ColumnNotGeoError : public std::runtime_error {
public:
ColumnNotGeoError(const std::string& column_name)
: std::runtime_error("Column '" + column_name + "' is not a geo column") {}
};

static ImportStatus import_thread_shapefile(
int thread_id,
Importer* importer,
Expand Down Expand Up @@ -2477,7 +2483,10 @@ static ImportStatus import_thread_shapefile(
auto const& field_name = cit->second;

auto const fit = fieldNameToIndexMap.find(field_name);
CHECK(fit != fieldNameToIndexMap.end());
if (fit == fieldNameToIndexMap.end()) {
throw ColumnNotGeoError(cd->columnName);
}

auto const& field_index = fit->second;
CHECK(field_index < fieldNameToIndexMap.size());

Expand Down Expand Up @@ -2556,6 +2565,9 @@ static ImportStatus import_thread_shapefile(
if (e.getErrorCode() == Executor::ERR_INTERRUPTED) {
throw e;
}
} catch (ColumnNotGeoError& e) {
LOG(ERROR) << "Input exception thrown: " << e.what() << ". Aborting import.";
throw std::runtime_error(e.what());
} catch (const std::exception& e) {
for (size_t col_idx_to_pop = 0; col_idx_to_pop < col_idx; ++col_idx_to_pop) {
import_buffers[col_idx_to_pop]->pop_value();
Expand Down
17 changes: 17 additions & 0 deletions Tests/ImportGeoTableTest/datafiles/geospatial_poly.geojson
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
{
"type": "FeatureCollection",
"name": "geospatial_poly",
"crs": { "type": "name", "properties": { "name": "urn:ogc:def:crs:OGC:1.3:CRS84" } },
"features": [
{ "type": "Feature", "properties": { "trip": 0.0 }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 0.0, 0.0 ], [ 1.0, 0.0 ], [ 0.0, 1.0 ], [ 0.0, 0.0 ] ] ] } },
{ "type": "Feature", "properties": { "trip": 1.0 }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 0.0, 0.0 ], [ 2.0, 0.0 ], [ 0.0, 2.0 ], [ 0.0, 0.0 ] ] ] } },
{ "type": "Feature", "properties": { "trip": 2.0 }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 0.0, 0.0 ], [ 3.0, 0.0 ], [ 0.0, 3.0 ], [ 0.0, 0.0 ] ] ] } },
{ "type": "Feature", "properties": { "trip": 3.0 }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 0.0, 0.0 ], [ 4.0, 0.0 ], [ 0.0, 4.0 ], [ 0.0, 0.0 ] ] ] } },
{ "type": "Feature", "properties": { "trip": 4.0 }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 0.0, 0.0 ], [ 5.0, 0.0 ], [ 0.0, 5.0 ], [ 0.0, 0.0 ] ] ] } },
{ "type": "Feature", "properties": { "trip": 5.0 }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 0.0, 0.0 ], [ 6.0, 0.0 ], [ 0.0, 6.0 ], [ 0.0, 0.0 ] ] ] } },
{ "type": "Feature", "properties": { "trip": 6.0 }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 0.0, 0.0 ], [ 7.0, 0.0 ], [ 0.0, 7.0 ], [ 0.0, 0.0 ] ] ] } },
{ "type": "Feature", "properties": { "trip": 7.0 }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 0.0, 0.0 ], [ 8.0, 0.0 ], [ 0.0, 8.0 ], [ 0.0, 0.0 ] ] ] } },
{ "type": "Feature", "properties": { "trip": 8.0 }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 0.0, 0.0 ], [ 9.0, 0.0 ], [ 0.0, 9.0 ], [ 0.0, 0.0 ] ] ] } },
{ "type": "Feature", "properties": { "trip": 9.0 }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 0.0, 0.0 ], [ 10.0, 0.0 ], [ 0.0, 10.0 ], [ 0.0, 0.0 ] ] ] } }
]
}
234 changes: 234 additions & 0 deletions Tests/LoadTableTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -597,6 +597,240 @@ TEST_F(LoadTableTest, ArrowNoColumns) {
"No columns to insert");
}

class ImportGeoTableTest : public DBHandlerTestFixture {
protected:
void SetUp() override {
DBHandlerTestFixture::SetUp();
sql("DROP TABLE IF EXISTS import_geo_table_test");
}

const std::string getGeoFileName() const {
auto geo_file_name =
boost::filesystem::absolute(
boost::filesystem::path(
"../../Tests/ImportGeoTableTest/datafiles/geospatial_poly.geojson"))
.string();
return geo_file_name;
}

const TCopyParams getCopyParams() const {
TCopyParams copy_params;
return copy_params;
}

const TCreateParams getCreateParams() const {
TCreateParams create_params;
create_params.is_replicated = false;
return create_params;
}

TColumnType getScalarColumnType(const std::string& name,
const TDatumType::type type) const {
TColumnType ct;
ct.col_name = name;
ct.col_type.type = type;
ct.col_type.encoding = TEncodingType::type::NONE;
ct.col_type.nullable = true;
ct.col_type.is_array = false;
ct.col_type.precision = 0;
ct.col_type.scale = 0;
ct.col_type.comp_param = 0;
ct.col_type.size = 0;
ct.is_reserved_keyword = false;
ct.src_name = name;
ct.is_system = false;
ct.is_physical = false;
ct.col_id = 0;
return ct;
};

TColumnType getPolyColumnType(const std::string& name) const {
TColumnType ct;
ct.col_name = name;
ct.col_type.type = TDatumType::type::POLYGON;
ct.col_type.encoding = TEncodingType::type::GEOINT;
ct.col_type.nullable = true;
ct.col_type.is_array = false;
ct.col_type.precision = 23; // aka subtype = SQLTypes::kGEOMETRY (not the same as
// TDatumType::type::GEOMETRY)
ct.col_type.scale = 4326; // aka output_srid = WGS84
ct.col_type.comp_param = 32;
ct.col_type.size = 0;
ct.is_reserved_keyword = false;
ct.src_name = name;
ct.is_system = false;
ct.is_physical = false;
ct.col_id = 0;
return ct;
};

void TearDown() override {
sql("DROP TABLE IF EXISTS import_geo_table_test");
DBHandlerTestFixture::TearDown();
}
};

TEST_F(ImportGeoTableTest, ImportGeoTableAuto) {
// geo import with empty row descriptor
// will automatically create table
// equivalent to COPY FROM WITH (geo='true')
auto* handler = getDbHandlerAndSessionId().first;
auto& session = getDbHandlerAndSessionId().second;
TRowDescriptor row_descriptor;
EXPECT_NO_THROW(handler->import_geo_table(session,
"import_geo_table_test",
getGeoFileName(),
getCopyParams(),
row_descriptor,
getCreateParams()));
sqlAndCompareResult("SELECT count(*) FROM import_geo_table_test", {{i(10)}});
sqlAndCompareResult("SELECT trip FROM import_geo_table_test WHERE rowid=0", {{0.0f}});
}

TEST_F(ImportGeoTableTest, ImportGeoTableExplicit) {
// geo import with explicit row descriptor (e.g. Immerse import)
// must create table first
// correct types
auto* handler = getDbHandlerAndSessionId().first;
auto& session = getDbHandlerAndSessionId().second;
TRowDescriptor row_descriptor{getScalarColumnType("trip", TDatumType::type::FLOAT),
getPolyColumnType("omnisci_geo")};
EXPECT_NO_THROW(handler->create_table(session,
"import_geo_table_test",
row_descriptor,
TFileType::type::POLYGON,
getCreateParams()));
EXPECT_NO_THROW(handler->import_geo_table(session,
"import_geo_table_test",
getGeoFileName(),
getCopyParams(),
row_descriptor,
getCreateParams()));
sqlAndCompareResult("SELECT count(*) FROM import_geo_table_test", {{i(10)}});
sqlAndCompareResult("SELECT trip FROM import_geo_table_test WHERE rowid=0", {{0.0f}});
}

TEST_F(ImportGeoTableTest, ImportGeoTableOverride) {
// geo import with explicit row descriptor (e.g. Immerse import)
// must create table first
// type of column 'trip' overridden from FLOAT to INT (valid)
auto* handler = getDbHandlerAndSessionId().first;
auto& session = getDbHandlerAndSessionId().second;
TRowDescriptor row_descriptor{getScalarColumnType("trip", TDatumType::type::INT),
getPolyColumnType("omnisci_geo")};
EXPECT_NO_THROW(handler->create_table(session,
"import_geo_table_test",
row_descriptor,
TFileType::type::POLYGON,
getCreateParams()));
EXPECT_NO_THROW(handler->import_geo_table(session,
"import_geo_table_test",
getGeoFileName(),
getCopyParams(),
row_descriptor,
getCreateParams()));
sqlAndCompareResult("SELECT count(*) FROM import_geo_table_test", {{i(10)}});
sqlAndCompareResult("SELECT trip FROM import_geo_table_test WHERE rowid=0", {{i(0)}});
}

TEST_F(ImportGeoTableTest, ImportGeoTableTypeMismatch1) {
// geo import with explicit row descriptor (e.g. Immerse import)
// must create table first
// types of columns swapped (possible in Immerse for now)
// import will not fail, but should reject all rows
auto* handler = getDbHandlerAndSessionId().first;
auto& session = getDbHandlerAndSessionId().second;
TRowDescriptor row_descriptor{
getPolyColumnType("trip"),
getScalarColumnType("omnisci_geo", TDatumType::type::FLOAT)};
EXPECT_NO_THROW(handler->create_table(session,
"import_geo_table_test",
row_descriptor,
TFileType::type::POLYGON,
getCreateParams()));
EXPECT_THROW(handler->import_geo_table(session,
"import_geo_table_test",
getGeoFileName(),
getCopyParams(),
row_descriptor,
getCreateParams()),
TOmniSciException);
sqlAndCompareResult("SELECT count(*) FROM import_geo_table_test", {{i(0)}});
}

TEST_F(ImportGeoTableTest, ImportGeoTableFailTypeMismatch2) {
// geo import with explicit row descriptor (e.g. Immerse import)
// must create table first
// column types valid but columns swapped (possible in Immerse for now)
// import will not fail, but should reject all rows
auto* handler = getDbHandlerAndSessionId().first;
auto& session = getDbHandlerAndSessionId().second;
TRowDescriptor row_descriptor{
getScalarColumnType("omnisci_geo", TDatumType::type::FLOAT),
getPolyColumnType("trip")};
EXPECT_NO_THROW(handler->create_table(session,
"import_geo_table_test",
row_descriptor,
TFileType::type::POLYGON,
getCreateParams()));
EXPECT_THROW(handler->import_geo_table(session,
"import_geo_table_test",
getGeoFileName(),
getCopyParams(),
row_descriptor,
getCreateParams()),
TOmniSciException);
sqlAndCompareResult("SELECT count(*) FROM import_geo_table_test", {{i(0)}});
}

TEST_F(ImportGeoTableTest, ImportGeoTableFailNoGeoColumns) {
// geo import with explicit row descriptor (e.g. Immerse import)
// must create table first
// no geo columns in row descriptor
// import should fail
auto* handler = getDbHandlerAndSessionId().first;
auto& session = getDbHandlerAndSessionId().second;
TRowDescriptor row_descriptor{getScalarColumnType("trip", TDatumType::type::FLOAT)};
EXPECT_NO_THROW(handler->create_table(session,
"import_geo_table_test",
row_descriptor,
TFileType::type::POLYGON,
getCreateParams()));
EXPECT_THROW(handler->import_geo_table(session,
"import_geo_table_test",
getGeoFileName(),
getCopyParams(),
row_descriptor,
getCreateParams()),
TOmniSciException);
sqlAndCompareResult("SELECT count(*) FROM import_geo_table_test", {{i(0)}});
}

TEST_F(ImportGeoTableTest, ImportGeoTableFailTooManyGeoColumns) {
// geo import with explicit row descriptor (e.g. Immerse import)
// must create table first
// more than one geo column in row descriptor
// import should fail
auto* handler = getDbHandlerAndSessionId().first;
auto& session = getDbHandlerAndSessionId().second;
TRowDescriptor row_descriptor{getScalarColumnType("trip", TDatumType::type::FLOAT),
getPolyColumnType("omnisci_geo1"),
getPolyColumnType("omnisci_geo2")};
EXPECT_NO_THROW(handler->create_table(session,
"import_geo_table_test",
row_descriptor,
TFileType::type::POLYGON,
getCreateParams()));
EXPECT_THROW(handler->import_geo_table(session,
"import_geo_table_test",
getGeoFileName(),
getCopyParams(),
row_descriptor,
getCreateParams()),
TOmniSciException);
sqlAndCompareResult("SELECT count(*) FROM import_geo_table_test", {{i(0)}});
}

#ifdef HAVE_AWS_S3
class ThriftDetectServerPrivilegeTest : public DBHandlerTestFixture {
protected:
Expand Down
39 changes: 17 additions & 22 deletions ThriftHandler/DBHandler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5142,30 +5142,25 @@ void DBHandler::import_geo_table(const TSessionId& session,
}

if (is_geo_layer) {
// Final check to ensure that we actually have a geo column
// of the expected name and type before doing the actual import,
// in case the user naively overrode the name or type in Immerse
// Preview (which as of 6/8/18 it still allows you to do).
// This avoids a fatal assert later when it fails to find the
// column. We should make Immerse more robust and disallow this.
bool have_geo_column_with_correct_name = false;
for (const auto& r : rd) {
if (TTypeInfo_IsGeo(r.col_type.type)) {
// TODO(team): allow user to override the geo column name
if (r.col_name == OMNISCI_GEO_PREFIX) {
have_geo_column_with_correct_name = true;
} else if (r.col_name == LEGACY_GEO_PREFIX) {
CHECK(colname_to_src.find(r.col_name) != colname_to_src.end());
// Normalize column names for geo append with legacy column naming scheme
colname_to_src[r.col_name] = r.col_name;
have_geo_column_with_correct_name = true;
}
// Final check to ensure that we have exactly one geo column
// before doing the actual import, in case the user naively
// overrode the types in Immerse Preview (which as of 6/17/21
// it still allows you to do). We should make Immerse more
// robust and disallow re-typing of columns to/from geo types
// completely. Currently, if multiple columns are re-typed
// such that there is still exactly one geo column (but it's
// the wrong one) then this test will pass, but the import
// will then reject some (or more likely all) of the rows.
int num_geo_columns{0};
for (auto const& col : rd) {
if (TTypeInfo_IsGeo(col.col_type.type)) {
num_geo_columns++;
}
}
if (!have_geo_column_with_correct_name) {
std::string exception_message = "Table " + this_table_name +
" does not have a geo column with name '" +
OMNISCI_GEO_PREFIX + "'. Import aborted!";
if (num_geo_columns != 1) {
std::string exception_message =
"Table '" + this_table_name +
"' must have exactly one geo column. Import aborted!";
caught_exception_messages.emplace_back(exception_message);
continue;
}
Expand Down

0 comments on commit 26128ca

Please sign in to comment.