Merge branch 'OHDSI:master' into master

thehyve · Aug 6, 2024 · 93642d7 · 93642d7
2 parents d3a2591 + 4aa4253
commit 93642d7
Show file tree

Hide file tree

Showing 23 changed files with 5,460 additions and 66 deletions.
diff --git a/README.md b/README.md
@@ -37,6 +37,11 @@ Technology
 White Rabbit and Rabbit in a Hat are pure Java applications. Both applications use [Apache's POI Java libraries](http://poi.apache.org/) to read and write Word and Excel files. 
 White Rabbit uses JDBC to connect to the respective databases.
 
+Intended use
+============
+Whte Rabbit and Rabbit In A hat were designed and implemented for use within a secure and trusted environment. No efforts have been made to
+encrypt or otherwise protect the passwords, parameters and results. This should be kept in mind when deploying these tools.
+
 System Requirements
 ============
 Requires Java 1.8 or higher for running, and read access to the database to be scanned. Java can be downloaded from

diff --git a/pom.xml b/pom.xml
@@ -6,7 +6,7 @@
     <groupId>org.ohdsi</groupId>
     <artifactId>leporidae</artifactId>
     <packaging>pom</packaging>
-    <version>1.0.0-SNAPSHOT</version>
+    <version>1.0.0</version>
     <modules>
         <module>rabbitinahat</module>
         <module>whiterabbit</module>

diff --git a/rabbit-core/pom.xml b/rabbit-core/pom.xml
@@ -5,7 +5,7 @@
     <parent>
         <artifactId>leporidae</artifactId>
         <groupId>org.ohdsi</groupId>
-        <version>1.0.0-SNAPSHOT</version>
+        <version>1.0.0</version>
     </parent>
     <modelVersion>4.0.0</modelVersion>
 
@@ -37,9 +37,9 @@
 
     <dependencies>
         <dependency>
-            <groupId>com.oracle.ojdbc</groupId>
+            <groupId>com.oracle.database.jdbc</groupId>
             <artifactId>ojdbc8</artifactId>
-            <version>19.3.0.0</version>
+            <version>19.23.0.0</version>
         </dependency>
         <dependency>
             <groupId>com.microsoft.sqlserver</groupId>
@@ -145,7 +145,7 @@
         <dependency>
             <groupId>com.amazon.redshift</groupId>
             <artifactId>redshift-jdbc42</artifactId>
-            <version>2.1.0.25</version>
+            <version>2.1.0.28</version>
         </dependency>
         <!-- https://mvnrepository.com/artifact/org.apache.avro/avro -->
         <dependency>
@@ -175,7 +175,7 @@
         <dependency>
             <groupId>net.snowflake</groupId>
             <artifactId>snowflake-jdbc</artifactId>
-            <version>3.14.5</version>
+            <version>3.15.0</version>
         </dependency>
         <dependency>
             <groupId>org.junit.jupiter</groupId>

diff --git a/rabbit-core/src/main/java/org/ohdsi/databases/DBConnection.java b/rabbit-core/src/main/java/org/ohdsi/databases/DBConnection.java
@@ -115,6 +115,10 @@ public void use(String database, DbType dbType) {
         }
     }
 
+    public QueryResult query(String sql) {
+        return new QueryResult(sql, this, verbose);
+    }
+
     public void execute(String sql) {
         execute(sql, false);
     }

diff --git a/rabbit-core/src/main/java/org/ohdsi/databases/SnowflakeHandler.java b/rabbit-core/src/main/java/org/ohdsi/databases/SnowflakeHandler.java
@@ -38,6 +38,7 @@
  */
 public enum SnowflakeHandler implements StorageHandler {
     INSTANCE();
+    public static final String WR_USE_SNOWFLAKE_JDBC_METADATA = "WR_USE_SNOWFLAKE_METADATA";
 
     DBConfiguration configuration = new SnowflakeConfiguration();
     private DBConnection snowflakeConnection = null;
@@ -98,18 +99,57 @@ public DBConnection getDBConnection() {
     }
 
     public String getUseQuery(String ignoredDatabase) {
-        String useQuery = String.format("USE WAREHOUSE \"%s\";", configuration.getValue(SNOWFLAKE_WAREHOUSE).toUpperCase());
-        logger.info("SnowFlakeHandler will execute query: " + useQuery);
+        String useQuery = String.format("USE WAREHOUSE %s;", configuration.getValue(SNOWFLAKE_WAREHOUSE));
+        logger.info("SnowFlakeHandler will execute query: {}", useQuery);
         return useQuery;
     }
 
     @Override
     public String getTableSizeQuery(String tableName) {
-        return String.format("SELECT COUNT(*) FROM %s.%s.%s;", this.getDatabase(), this.getSchema(), tableName);
+        return String.format("SELECT COUNT(*) FROM %s;", resolveTableName(tableName));
     }
 
-    public String getRowSampleQuery(String table, long rowCount, long sampleSize) {
-        return String.format("SELECT * FROM %s ORDER BY RANDOM() LIMIT %s", table, sampleSize);
+    public String getRowSampleQuery(String tableName, long rowCount, long sampleSize) {
+        return String.format("SELECT * FROM %s ORDER BY RANDOM() LIMIT %s", resolveTableName(tableName), sampleSize);
+    }
+
+    private String resolveTableName(String tableName) {
+        return String.format("%s.%s.%s", this.getDatabase(), this.getSchema(), tableName);
+    }
+
+    @Override
+    public ResultSet getFieldsInformation(String tableName) {
+        try {
+            String database = this.getDatabase();
+            String schema = this.getSchema();
+            DatabaseMetaData metadata = getDBConnection().getMetaData();
+            if (metadata.storesUpperCaseIdentifiers()) {
+                database = database.toUpperCase();
+                schema = schema.toUpperCase();
+                tableName = tableName.toUpperCase();
+            } else if (metadata.storesLowerCaseIdentifiers()) {
+                database = database.toLowerCase();
+                schema = schema.toLowerCase();
+                tableName = tableName.toLowerCase();
+            }
+
+            logger.warn("Obtaining columnn information from JDBC metadata: metadata.getColumns({}, {}, {}, null)",
+                    database, schema, tableName);
+            return metadata.getColumns(database, schema, tableName, null);
+        } catch (SQLException e) {
+            throw new RuntimeException(e.getMessage());
+        }
+    }
+
+    @Override
+    public String getFieldsInformationQuery(String tableName) {
+        if (System.getenv(WR_USE_SNOWFLAKE_JDBC_METADATA) != null || System.getProperty(WR_USE_SNOWFLAKE_JDBC_METADATA) != null) {
+            return null;    // not providing a query forces use of JDBC metadata
+        } else {
+            return String.format(
+                    "SELECT column_name, data_type FROM %s.INFORMATION_SCHEMA.COLUMNS WHERE TABLE_SCHEMA = '%s' AND TABLE_NAME = '%s'",
+                    this.getDatabase().toUpperCase(), this.getSchema().toUpperCase(), tableName.toUpperCase());
+        }
     }
 
     public String getTablesQuery(String database) {

diff --git a/rabbit-core/src/main/java/org/ohdsi/databases/StorageHandler.java b/rabbit-core/src/main/java/org/ohdsi/databases/StorageHandler.java
@@ -155,21 +155,38 @@ default List<String> getTableNames() {
      */
     default List<FieldInfo> fetchTableStructure(String table, ScanParameters scanParameters) {
         List<FieldInfo> fieldInfos = new ArrayList<>();
-        ResultSet rs = getFieldNamesFromJDBC(table);
-        try {
-            while (rs.next()) {
-                FieldInfo fieldInfo = new FieldInfo(scanParameters, rs.getString("COLUMN_NAME"));
-                fieldInfo.type = rs.getString("TYPE_NAME");
+        String fieldInfoQuery = getFieldsInformationQuery(table);
+        if (fieldInfoQuery != null) {
+            logger.warn("Obtaining field metadata through SQL query: {}", fieldInfoQuery);
+            QueryResult queryResult = getDBConnection().query(fieldInfoQuery);
+            for (Row row : queryResult) {
+                FieldInfo fieldInfo = new FieldInfo(scanParameters, row.getCells().get(0));
+                fieldInfo.type = row.getCells().get(1);
                 fieldInfo.rowCount = getTableSize(table);
                 fieldInfos.add(fieldInfo);
             }
-        } catch (
-                SQLException e) {
-            throw new RuntimeException(e.getMessage());
+        } else {
+            logger.warn("Obtaining field metadata through JDBC");
+            ResultSet rs = getFieldsInformation(table);
+            try {
+                while (rs.next()) {
+                    FieldInfo fieldInfo = new FieldInfo(scanParameters, rs.getString("COLUMN_NAME"));
+                    fieldInfo.type = rs.getString("TYPE_NAME");
+                    fieldInfo.rowCount = getTableSize(table);
+                    fieldInfos.add(fieldInfo);
+                }
+            } catch (
+                    SQLException e) {
+                throw new RuntimeException(e.getMessage());
+            }
         }
         return fieldInfos;
     }
 
+    default String getFieldsInformationQuery(String table) {
+        return null;
+    }
+
     /**
      * Retrieves column names (fields) for a table.
      *
@@ -179,7 +196,7 @@ default List<FieldInfo> fetchTableStructure(String table, ScanParameters scanPar
      * @param table name of the table to get the column names for
      * @return java.sql.ResultSet
      */
-    default ResultSet getFieldNamesFromJDBC(String table) {
+    default ResultSet getFieldsInformation(String table) {
         try {
             DatabaseMetaData metadata = getDBConnection().getMetaData();
             return metadata.getColumns(null, null, table, null);

diff --git a/rabbit-core/src/main/java/org/ohdsi/rabbitInAHat/dataModel/Database.java b/rabbit-core/src/main/java/org/ohdsi/rabbitInAHat/dataModel/Database.java
@@ -25,6 +25,7 @@
 
 import org.apache.commons.csv.CSVFormat;
 import org.apache.commons.csv.CSVRecord;
+import org.apache.commons.io.input.BOMInputStream;
 import org.ohdsi.utilities.ScanFieldName;
 import org.ohdsi.utilities.ScanSheetName;
 import org.ohdsi.utilities.files.QuickAndDirtyXlsxReader;
@@ -78,15 +79,18 @@ public String getDbName() {
 		return dbName;
 	}
 
-	public static Database generateCDMModel(CDMVersion cdmVersion) {
+	public static Database generateCDMModel(CDMVersion cdmVersion) throws IOException {
 		return Database.generateModelFromCSV(Database.class.getResourceAsStream(cdmVersion.fileName), cdmVersion.fileName);
 	}
 
-	public static Database generateModelFromCSV(InputStream stream, String dbName) {
+	public static Database generateModelFromCSV(InputStream stream, String dbName) throws IOException {
 		Database database = new Database();
 
 		database.dbName = dbName.substring(0, dbName.lastIndexOf("."));
 
+		// wrap the stream with a BOM handling inputstream
+		stream = BOMInputStream.builder().setInputStream(stream).get();
+
 		Map<String, Table> nameToTable = new HashMap<>();
 		try {
 			ConceptsMap conceptIdHintsMap = new ConceptsMap(CONCEPT_ID_HINTS_FILE_NAME);

diff --git a/rabbit-core/src/main/java/org/ohdsi/rabbitInAHat/dataModel/Table.java b/rabbit-core/src/main/java/org/ohdsi/rabbitInAHat/dataModel/Table.java
@@ -20,7 +20,7 @@
 import java.util.ArrayList;
 import java.util.List;
 
-public class Table implements MappableItem {
+public class Table implements MappableItem, Comparable {
 
 	private Database			db;
 	private String				name;
@@ -152,4 +152,9 @@ public static String createSheetNameFromTableName(String tableName) {
 		name = name.replace('/','_');
 		return name;
 	}
+
+	@Override
+	public int compareTo(Object o) {
+		return this.name.compareTo(((Table) o).name);
+	}
 }
diff --git a/rabbit-core/src/main/java/org/ohdsi/utilities/files/ReadTextFile.java b/rabbit-core/src/main/java/org/ohdsi/utilities/files/ReadTextFile.java
@@ -17,6 +17,8 @@
  ******************************************************************************/
 package org.ohdsi.utilities.files;
 
+import org.apache.commons.io.input.BOMInputStream;
+
 import java.io.BufferedReader;
 import java.io.FileInputStream;
 import java.io.FileNotFoundException;
@@ -46,11 +48,11 @@ public ReadTextFile(InputStream inputStream) {
   public ReadTextFile(String filename) {
     this.filename = filename;
     try {
-      FileInputStream inputStream = new FileInputStream(filename);
+      InputStream inputStream = BOMInputStream.builder().setInputStream(new FileInputStream(filename)).get();
       bufferedReader = new BufferedReader(new InputStreamReader(inputStream, "UTF-8"));
     } catch (FileNotFoundException e) {
       e.printStackTrace();
-    } catch (UnsupportedEncodingException e) {
+    } catch (IOException e) {
       System.err.println("Computer does not support UTF-8 encoding");
       e.printStackTrace();
     }

diff --git a/rabbit-core/src/test/java/org/ohdsi/rabbitInAHat/dataModel/TestDatabase.java b/rabbit-core/src/test/java/org/ohdsi/rabbitInAHat/dataModel/TestDatabase.java
@@ -0,0 +1,30 @@
+package org.ohdsi.rabbitInAHat.dataModel;
+
+import org.apache.commons.io.input.BOMInputStream;
+import org.junit.jupiter.api.Test;
+
+import java.io.IOException;
+import java.io.InputStream;
+
+import static org.junit.jupiter.api.Assertions.*;
+
+class TestDatabase {
+
+    @Test
+    void testGenerateModelFromCSV() throws IOException {
+        // confirm that issue #411 is fixed, can read custom models from (UTF-8) CSV files with and without BOM
+
+        // generate a model from a CSV file without BOM
+        String testFileWithoutBom = "tiny_riah_without_bom.csv";
+        InputStream inWithoutBom = TestDatabase.class.getResourceAsStream(testFileWithoutBom);
+        assertNotNull(inWithoutBom);
+        Database ignoredWithoutBom = Database.generateModelFromCSV(inWithoutBom, testFileWithoutBom);
+
+        // generate a model from a CSV file with BOM
+        String testFileWithBom = "tiny_riah_with_bom.csv";
+        InputStream inWithBom = TestDatabase.class.getResourceAsStream(testFileWithBom);
+        assertNotNull(inWithBom);
+        Database ignoredWithBom = Database.generateModelFromCSV(inWithBom, testFileWithBom);
+
+    }
+}