Skip to content

Commit

Permalink
Merge branch 'OHDSI:master' into master
Browse files Browse the repository at this point in the history
  • Loading branch information
janblom authored Aug 6, 2024
2 parents d3a2591 + 4aa4253 commit 93642d7
Show file tree
Hide file tree
Showing 23 changed files with 5,460 additions and 66 deletions.
5 changes: 5 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,11 @@ Technology
White Rabbit and Rabbit in a Hat are pure Java applications. Both applications use [Apache's POI Java libraries](http://poi.apache.org/) to read and write Word and Excel files.
White Rabbit uses JDBC to connect to the respective databases.

Intended use
============
Whte Rabbit and Rabbit In A hat were designed and implemented for use within a secure and trusted environment. No efforts have been made to
encrypt or otherwise protect the passwords, parameters and results. This should be kept in mind when deploying these tools.

System Requirements
============
Requires Java 1.8 or higher for running, and read access to the database to be scanned. Java can be downloaded from
Expand Down
2 changes: 1 addition & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
<groupId>org.ohdsi</groupId>
<artifactId>leporidae</artifactId>
<packaging>pom</packaging>
<version>1.0.0-SNAPSHOT</version>
<version>1.0.0</version>
<modules>
<module>rabbitinahat</module>
<module>whiterabbit</module>
Expand Down
10 changes: 5 additions & 5 deletions rabbit-core/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
<parent>
<artifactId>leporidae</artifactId>
<groupId>org.ohdsi</groupId>
<version>1.0.0-SNAPSHOT</version>
<version>1.0.0</version>
</parent>
<modelVersion>4.0.0</modelVersion>

Expand Down Expand Up @@ -37,9 +37,9 @@

<dependencies>
<dependency>
<groupId>com.oracle.ojdbc</groupId>
<groupId>com.oracle.database.jdbc</groupId>
<artifactId>ojdbc8</artifactId>
<version>19.3.0.0</version>
<version>19.23.0.0</version>
</dependency>
<dependency>
<groupId>com.microsoft.sqlserver</groupId>
Expand Down Expand Up @@ -145,7 +145,7 @@
<dependency>
<groupId>com.amazon.redshift</groupId>
<artifactId>redshift-jdbc42</artifactId>
<version>2.1.0.25</version>
<version>2.1.0.28</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.avro/avro -->
<dependency>
Expand Down Expand Up @@ -175,7 +175,7 @@
<dependency>
<groupId>net.snowflake</groupId>
<artifactId>snowflake-jdbc</artifactId>
<version>3.14.5</version>
<version>3.15.0</version>
</dependency>
<dependency>
<groupId>org.junit.jupiter</groupId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,10 @@ public void use(String database, DbType dbType) {
}
}

public QueryResult query(String sql) {
return new QueryResult(sql, this, verbose);
}

public void execute(String sql) {
execute(sql, false);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
*/
public enum SnowflakeHandler implements StorageHandler {
INSTANCE();
public static final String WR_USE_SNOWFLAKE_JDBC_METADATA = "WR_USE_SNOWFLAKE_METADATA";

DBConfiguration configuration = new SnowflakeConfiguration();
private DBConnection snowflakeConnection = null;
Expand Down Expand Up @@ -98,18 +99,57 @@ public DBConnection getDBConnection() {
}

public String getUseQuery(String ignoredDatabase) {
String useQuery = String.format("USE WAREHOUSE \"%s\";", configuration.getValue(SNOWFLAKE_WAREHOUSE).toUpperCase());
logger.info("SnowFlakeHandler will execute query: " + useQuery);
String useQuery = String.format("USE WAREHOUSE %s;", configuration.getValue(SNOWFLAKE_WAREHOUSE));
logger.info("SnowFlakeHandler will execute query: {}", useQuery);
return useQuery;
}

@Override
public String getTableSizeQuery(String tableName) {
return String.format("SELECT COUNT(*) FROM %s.%s.%s;", this.getDatabase(), this.getSchema(), tableName);
return String.format("SELECT COUNT(*) FROM %s;", resolveTableName(tableName));
}

public String getRowSampleQuery(String table, long rowCount, long sampleSize) {
return String.format("SELECT * FROM %s ORDER BY RANDOM() LIMIT %s", table, sampleSize);
public String getRowSampleQuery(String tableName, long rowCount, long sampleSize) {
return String.format("SELECT * FROM %s ORDER BY RANDOM() LIMIT %s", resolveTableName(tableName), sampleSize);
}

private String resolveTableName(String tableName) {
return String.format("%s.%s.%s", this.getDatabase(), this.getSchema(), tableName);
}

@Override
public ResultSet getFieldsInformation(String tableName) {
try {
String database = this.getDatabase();
String schema = this.getSchema();
DatabaseMetaData metadata = getDBConnection().getMetaData();
if (metadata.storesUpperCaseIdentifiers()) {
database = database.toUpperCase();
schema = schema.toUpperCase();
tableName = tableName.toUpperCase();
} else if (metadata.storesLowerCaseIdentifiers()) {
database = database.toLowerCase();
schema = schema.toLowerCase();
tableName = tableName.toLowerCase();
}

logger.warn("Obtaining columnn information from JDBC metadata: metadata.getColumns({}, {}, {}, null)",
database, schema, tableName);
return metadata.getColumns(database, schema, tableName, null);
} catch (SQLException e) {
throw new RuntimeException(e.getMessage());
}
}

@Override
public String getFieldsInformationQuery(String tableName) {
if (System.getenv(WR_USE_SNOWFLAKE_JDBC_METADATA) != null || System.getProperty(WR_USE_SNOWFLAKE_JDBC_METADATA) != null) {
return null; // not providing a query forces use of JDBC metadata
} else {
return String.format(
"SELECT column_name, data_type FROM %s.INFORMATION_SCHEMA.COLUMNS WHERE TABLE_SCHEMA = '%s' AND TABLE_NAME = '%s'",
this.getDatabase().toUpperCase(), this.getSchema().toUpperCase(), tableName.toUpperCase());
}
}

public String getTablesQuery(String database) {
Expand Down
35 changes: 26 additions & 9 deletions rabbit-core/src/main/java/org/ohdsi/databases/StorageHandler.java
Original file line number Diff line number Diff line change
Expand Up @@ -155,21 +155,38 @@ default List<String> getTableNames() {
*/
default List<FieldInfo> fetchTableStructure(String table, ScanParameters scanParameters) {
List<FieldInfo> fieldInfos = new ArrayList<>();
ResultSet rs = getFieldNamesFromJDBC(table);
try {
while (rs.next()) {
FieldInfo fieldInfo = new FieldInfo(scanParameters, rs.getString("COLUMN_NAME"));
fieldInfo.type = rs.getString("TYPE_NAME");
String fieldInfoQuery = getFieldsInformationQuery(table);
if (fieldInfoQuery != null) {
logger.warn("Obtaining field metadata through SQL query: {}", fieldInfoQuery);
QueryResult queryResult = getDBConnection().query(fieldInfoQuery);
for (Row row : queryResult) {
FieldInfo fieldInfo = new FieldInfo(scanParameters, row.getCells().get(0));
fieldInfo.type = row.getCells().get(1);
fieldInfo.rowCount = getTableSize(table);
fieldInfos.add(fieldInfo);
}
} catch (
SQLException e) {
throw new RuntimeException(e.getMessage());
} else {
logger.warn("Obtaining field metadata through JDBC");
ResultSet rs = getFieldsInformation(table);
try {
while (rs.next()) {
FieldInfo fieldInfo = new FieldInfo(scanParameters, rs.getString("COLUMN_NAME"));
fieldInfo.type = rs.getString("TYPE_NAME");
fieldInfo.rowCount = getTableSize(table);
fieldInfos.add(fieldInfo);
}
} catch (
SQLException e) {
throw new RuntimeException(e.getMessage());
}
}
return fieldInfos;
}

default String getFieldsInformationQuery(String table) {
return null;
}

/**
* Retrieves column names (fields) for a table.
*
Expand All @@ -179,7 +196,7 @@ default List<FieldInfo> fetchTableStructure(String table, ScanParameters scanPar
* @param table name of the table to get the column names for
* @return java.sql.ResultSet
*/
default ResultSet getFieldNamesFromJDBC(String table) {
default ResultSet getFieldsInformation(String table) {
try {
DatabaseMetaData metadata = getDBConnection().getMetaData();
return metadata.getColumns(null, null, table, null);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@

import org.apache.commons.csv.CSVFormat;
import org.apache.commons.csv.CSVRecord;
import org.apache.commons.io.input.BOMInputStream;
import org.ohdsi.utilities.ScanFieldName;
import org.ohdsi.utilities.ScanSheetName;
import org.ohdsi.utilities.files.QuickAndDirtyXlsxReader;
Expand Down Expand Up @@ -78,15 +79,18 @@ public String getDbName() {
return dbName;
}

public static Database generateCDMModel(CDMVersion cdmVersion) {
public static Database generateCDMModel(CDMVersion cdmVersion) throws IOException {
return Database.generateModelFromCSV(Database.class.getResourceAsStream(cdmVersion.fileName), cdmVersion.fileName);
}

public static Database generateModelFromCSV(InputStream stream, String dbName) {
public static Database generateModelFromCSV(InputStream stream, String dbName) throws IOException {
Database database = new Database();

database.dbName = dbName.substring(0, dbName.lastIndexOf("."));

// wrap the stream with a BOM handling inputstream
stream = BOMInputStream.builder().setInputStream(stream).get();

Map<String, Table> nameToTable = new HashMap<>();
try {
ConceptsMap conceptIdHintsMap = new ConceptsMap(CONCEPT_ID_HINTS_FILE_NAME);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
import java.util.ArrayList;
import java.util.List;

public class Table implements MappableItem {
public class Table implements MappableItem, Comparable {

private Database db;
private String name;
Expand Down Expand Up @@ -152,4 +152,9 @@ public static String createSheetNameFromTableName(String tableName) {
name = name.replace('/','_');
return name;
}

@Override
public int compareTo(Object o) {
return this.name.compareTo(((Table) o).name);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
******************************************************************************/
package org.ohdsi.utilities.files;

import org.apache.commons.io.input.BOMInputStream;

import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
Expand Down Expand Up @@ -46,11 +48,11 @@ public ReadTextFile(InputStream inputStream) {
public ReadTextFile(String filename) {
this.filename = filename;
try {
FileInputStream inputStream = new FileInputStream(filename);
InputStream inputStream = BOMInputStream.builder().setInputStream(new FileInputStream(filename)).get();
bufferedReader = new BufferedReader(new InputStreamReader(inputStream, "UTF-8"));
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (UnsupportedEncodingException e) {
} catch (IOException e) {
System.err.println("Computer does not support UTF-8 encoding");
e.printStackTrace();
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
package org.ohdsi.rabbitInAHat.dataModel;

import org.apache.commons.io.input.BOMInputStream;
import org.junit.jupiter.api.Test;

import java.io.IOException;
import java.io.InputStream;

import static org.junit.jupiter.api.Assertions.*;

class TestDatabase {

@Test
void testGenerateModelFromCSV() throws IOException {
// confirm that issue #411 is fixed, can read custom models from (UTF-8) CSV files with and without BOM

// generate a model from a CSV file without BOM
String testFileWithoutBom = "tiny_riah_without_bom.csv";
InputStream inWithoutBom = TestDatabase.class.getResourceAsStream(testFileWithoutBom);
assertNotNull(inWithoutBom);
Database ignoredWithoutBom = Database.generateModelFromCSV(inWithoutBom, testFileWithoutBom);

// generate a model from a CSV file with BOM
String testFileWithBom = "tiny_riah_with_bom.csv";
InputStream inWithBom = TestDatabase.class.getResourceAsStream(testFileWithBom);
assertNotNull(inWithBom);
Database ignoredWithBom = Database.generateModelFromCSV(inWithBom, testFileWithBom);

}
}
Loading

0 comments on commit 93642d7

Please sign in to comment.