Skip to content

Commit

Permalink
Release 1.0.0 (OHDSI#418)
Browse files Browse the repository at this point in the history
* Create release 1.0.0

* Enforce consistent ordering of the tables in the scan report (solves issue OHDSI#236)

* Snowflake: always use database and schema when accessing table (meta)data. Fixes issue OHDSI#409

* Update Snowflake JDBC version and activate+fix Snowflake integration tests

* Upgrade dependency, testcontainer version and fix MSSqlServer integration test.

* Only run Snowflake integration tests when a Snowflake access configuartion is available

* Switch to SQL for obtaining field metadata for Snowflake (default, JDBC can still be used through a system property or env.var)

* Fix for OHDSI#411 (can't process custom models with UTF8 BOM in csv file)

* Better method naming and clearer logging for SnowflakeHandler

* Add UTF BOM handling code reading of csv's

* Change to ojdbc8 version 19.23.0.0 (for Oracle). Different (sub)repo, more recently published, solves issue OHDSI#415

* Avoid testing results for integration test with externally loaded BigQuery JDBC jar: makes setup more simple
  • Loading branch information
janblom authored Aug 5, 2024
1 parent c4da141 commit 4aa4253
Show file tree
Hide file tree
Showing 22 changed files with 5,454 additions and 65 deletions.
2 changes: 1 addition & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
<groupId>org.ohdsi</groupId>
<artifactId>leporidae</artifactId>
<packaging>pom</packaging>
<version>1.0.0-SNAPSHOT</version>
<version>1.0.0</version>
<modules>
<module>rabbitinahat</module>
<module>whiterabbit</module>
Expand Down
8 changes: 4 additions & 4 deletions rabbit-core/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
<parent>
<artifactId>leporidae</artifactId>
<groupId>org.ohdsi</groupId>
<version>1.0.0-SNAPSHOT</version>
<version>1.0.0</version>
</parent>
<modelVersion>4.0.0</modelVersion>

Expand Down Expand Up @@ -37,9 +37,9 @@

<dependencies>
<dependency>
<groupId>com.oracle.ojdbc</groupId>
<groupId>com.oracle.database.jdbc</groupId>
<artifactId>ojdbc8</artifactId>
<version>19.3.0.0</version>
<version>19.23.0.0</version>
</dependency>
<dependency>
<groupId>com.microsoft.sqlserver</groupId>
Expand Down Expand Up @@ -175,7 +175,7 @@
<dependency>
<groupId>net.snowflake</groupId>
<artifactId>snowflake-jdbc</artifactId>
<version>3.14.5</version>
<version>3.15.0</version>
</dependency>
<dependency>
<groupId>org.junit.jupiter</groupId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,10 @@ public void use(String database, DbType dbType) {
}
}

public QueryResult query(String sql) {
return new QueryResult(sql, this, verbose);
}

public void execute(String sql) {
execute(sql, false);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
*/
public enum SnowflakeHandler implements StorageHandler {
INSTANCE();
public static final String WR_USE_SNOWFLAKE_JDBC_METADATA = "WR_USE_SNOWFLAKE_METADATA";

DBConfiguration configuration = new SnowflakeConfiguration();
private DBConnection snowflakeConnection = null;
Expand Down Expand Up @@ -98,18 +99,57 @@ public DBConnection getDBConnection() {
}

public String getUseQuery(String ignoredDatabase) {
String useQuery = String.format("USE WAREHOUSE \"%s\";", configuration.getValue(SNOWFLAKE_WAREHOUSE).toUpperCase());
logger.info("SnowFlakeHandler will execute query: " + useQuery);
String useQuery = String.format("USE WAREHOUSE %s;", configuration.getValue(SNOWFLAKE_WAREHOUSE));
logger.info("SnowFlakeHandler will execute query: {}", useQuery);
return useQuery;
}

@Override
public String getTableSizeQuery(String tableName) {
return String.format("SELECT COUNT(*) FROM %s.%s.%s;", this.getDatabase(), this.getSchema(), tableName);
return String.format("SELECT COUNT(*) FROM %s;", resolveTableName(tableName));
}

public String getRowSampleQuery(String table, long rowCount, long sampleSize) {
return String.format("SELECT * FROM %s ORDER BY RANDOM() LIMIT %s", table, sampleSize);
public String getRowSampleQuery(String tableName, long rowCount, long sampleSize) {
return String.format("SELECT * FROM %s ORDER BY RANDOM() LIMIT %s", resolveTableName(tableName), sampleSize);
}

private String resolveTableName(String tableName) {
return String.format("%s.%s.%s", this.getDatabase(), this.getSchema(), tableName);
}

@Override
public ResultSet getFieldsInformation(String tableName) {
try {
String database = this.getDatabase();
String schema = this.getSchema();
DatabaseMetaData metadata = getDBConnection().getMetaData();
if (metadata.storesUpperCaseIdentifiers()) {
database = database.toUpperCase();
schema = schema.toUpperCase();
tableName = tableName.toUpperCase();
} else if (metadata.storesLowerCaseIdentifiers()) {
database = database.toLowerCase();
schema = schema.toLowerCase();
tableName = tableName.toLowerCase();
}

logger.warn("Obtaining columnn information from JDBC metadata: metadata.getColumns({}, {}, {}, null)",
database, schema, tableName);
return metadata.getColumns(database, schema, tableName, null);
} catch (SQLException e) {
throw new RuntimeException(e.getMessage());
}
}

@Override
public String getFieldsInformationQuery(String tableName) {
if (System.getenv(WR_USE_SNOWFLAKE_JDBC_METADATA) != null || System.getProperty(WR_USE_SNOWFLAKE_JDBC_METADATA) != null) {
return null; // not providing a query forces use of JDBC metadata
} else {
return String.format(
"SELECT column_name, data_type FROM %s.INFORMATION_SCHEMA.COLUMNS WHERE TABLE_SCHEMA = '%s' AND TABLE_NAME = '%s'",
this.getDatabase().toUpperCase(), this.getSchema().toUpperCase(), tableName.toUpperCase());
}
}

public String getTablesQuery(String database) {
Expand Down
35 changes: 26 additions & 9 deletions rabbit-core/src/main/java/org/ohdsi/databases/StorageHandler.java
Original file line number Diff line number Diff line change
Expand Up @@ -155,21 +155,38 @@ default List<String> getTableNames() {
*/
default List<FieldInfo> fetchTableStructure(String table, ScanParameters scanParameters) {
List<FieldInfo> fieldInfos = new ArrayList<>();
ResultSet rs = getFieldNamesFromJDBC(table);
try {
while (rs.next()) {
FieldInfo fieldInfo = new FieldInfo(scanParameters, rs.getString("COLUMN_NAME"));
fieldInfo.type = rs.getString("TYPE_NAME");
String fieldInfoQuery = getFieldsInformationQuery(table);
if (fieldInfoQuery != null) {
logger.warn("Obtaining field metadata through SQL query: {}", fieldInfoQuery);
QueryResult queryResult = getDBConnection().query(fieldInfoQuery);
for (Row row : queryResult) {
FieldInfo fieldInfo = new FieldInfo(scanParameters, row.getCells().get(0));
fieldInfo.type = row.getCells().get(1);
fieldInfo.rowCount = getTableSize(table);
fieldInfos.add(fieldInfo);
}
} catch (
SQLException e) {
throw new RuntimeException(e.getMessage());
} else {
logger.warn("Obtaining field metadata through JDBC");
ResultSet rs = getFieldsInformation(table);
try {
while (rs.next()) {
FieldInfo fieldInfo = new FieldInfo(scanParameters, rs.getString("COLUMN_NAME"));
fieldInfo.type = rs.getString("TYPE_NAME");
fieldInfo.rowCount = getTableSize(table);
fieldInfos.add(fieldInfo);
}
} catch (
SQLException e) {
throw new RuntimeException(e.getMessage());
}
}
return fieldInfos;
}

default String getFieldsInformationQuery(String table) {
return null;
}

/**
* Retrieves column names (fields) for a table.
*
Expand All @@ -179,7 +196,7 @@ default List<FieldInfo> fetchTableStructure(String table, ScanParameters scanPar
* @param table name of the table to get the column names for
* @return java.sql.ResultSet
*/
default ResultSet getFieldNamesFromJDBC(String table) {
default ResultSet getFieldsInformation(String table) {
try {
DatabaseMetaData metadata = getDBConnection().getMetaData();
return metadata.getColumns(null, null, table, null);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@

import org.apache.commons.csv.CSVFormat;
import org.apache.commons.csv.CSVRecord;
import org.apache.commons.io.input.BOMInputStream;
import org.ohdsi.utilities.ScanFieldName;
import org.ohdsi.utilities.ScanSheetName;
import org.ohdsi.utilities.files.QuickAndDirtyXlsxReader;
Expand Down Expand Up @@ -78,15 +79,18 @@ public String getDbName() {
return dbName;
}

public static Database generateCDMModel(CDMVersion cdmVersion) {
public static Database generateCDMModel(CDMVersion cdmVersion) throws IOException {
return Database.generateModelFromCSV(Database.class.getResourceAsStream(cdmVersion.fileName), cdmVersion.fileName);
}

public static Database generateModelFromCSV(InputStream stream, String dbName) {
public static Database generateModelFromCSV(InputStream stream, String dbName) throws IOException {
Database database = new Database();

database.dbName = dbName.substring(0, dbName.lastIndexOf("."));

// wrap the stream with a BOM handling inputstream
stream = BOMInputStream.builder().setInputStream(stream).get();

Map<String, Table> nameToTable = new HashMap<>();
try {
ConceptsMap conceptIdHintsMap = new ConceptsMap(CONCEPT_ID_HINTS_FILE_NAME);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
import java.util.ArrayList;
import java.util.List;

public class Table implements MappableItem {
public class Table implements MappableItem, Comparable {

private Database db;
private String name;
Expand Down Expand Up @@ -152,4 +152,9 @@ public static String createSheetNameFromTableName(String tableName) {
name = name.replace('/','_');
return name;
}

@Override
public int compareTo(Object o) {
return this.name.compareTo(((Table) o).name);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
******************************************************************************/
package org.ohdsi.utilities.files;

import org.apache.commons.io.input.BOMInputStream;

import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
Expand Down Expand Up @@ -46,11 +48,11 @@ public ReadTextFile(InputStream inputStream) {
public ReadTextFile(String filename) {
this.filename = filename;
try {
FileInputStream inputStream = new FileInputStream(filename);
InputStream inputStream = BOMInputStream.builder().setInputStream(new FileInputStream(filename)).get();
bufferedReader = new BufferedReader(new InputStreamReader(inputStream, "UTF-8"));
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (UnsupportedEncodingException e) {
} catch (IOException e) {
System.err.println("Computer does not support UTF-8 encoding");
e.printStackTrace();
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
package org.ohdsi.rabbitInAHat.dataModel;

import org.apache.commons.io.input.BOMInputStream;
import org.junit.jupiter.api.Test;

import java.io.IOException;
import java.io.InputStream;

import static org.junit.jupiter.api.Assertions.*;

class TestDatabase {

@Test
void testGenerateModelFromCSV() throws IOException {
// confirm that issue #411 is fixed, can read custom models from (UTF-8) CSV files with and without BOM

// generate a model from a CSV file without BOM
String testFileWithoutBom = "tiny_riah_without_bom.csv";
InputStream inWithoutBom = TestDatabase.class.getResourceAsStream(testFileWithoutBom);
assertNotNull(inWithoutBom);
Database ignoredWithoutBom = Database.generateModelFromCSV(inWithoutBom, testFileWithoutBom);

// generate a model from a CSV file with BOM
String testFileWithBom = "tiny_riah_with_bom.csv";
InputStream inWithBom = TestDatabase.class.getResourceAsStream(testFileWithBom);
assertNotNull(inWithBom);
Database ignoredWithBom = Database.generateModelFromCSV(inWithBom, testFileWithBom);

}
}
Loading

0 comments on commit 4aa4253

Please sign in to comment.