test_morphlines.conf

SOLR_LOCATOR : {
 
  # Name of solr collection
  collection : solrtest
 
  # ZooKeeper ensemble -- edit this for your cluster's Zk hostname(s)
  zkHost : "localhost:2181/solr"

  # The maximum number of documents to send to Solr per network batch (throughput knob)
  # batchSize : 1000
}
 
morphlines : [
  {
    id : solrTest
    importCommands : ["org.kitesdk.**", "com.cloudera.**", "org.apache.solr.**"]
 
    commands : [
 
     # Read the Parquet data  
    
    { readAvroParquetFile {
         # For Parquet files that were not written with the parquet.avro package 
         #  (e.g. Impala Parquet files) there is no Avro write schema stored in 
          #  the Parquet file metadata. To read such files using the 
          #  readAvroParquetFile command you must either provide an Avro reader 
          #  schema via the readerSchemaFile parameter, or a default Avro schema 
          #  will be derived using the standard mapping specification. 
          
          # Optionally, use this Avro schema in JSON format inline for projection:
          readerSchemaString:"""{ "type": "record"
             ,"name": "my_record"
             ,"fields": [
                {"name": "id", "type":["null","string"]}
               ,{"name": "value", "type":["null","string"]}
             ]
         }"""
          
        }
      }
    

    { logDebug { format : "output record {}", args : ["@{}"] } }

      
    {  extractAvroPaths {
         flatten : true
        paths : {
                id : /id
                value : /value
        }
       }
    }

       
      
 
      { sanitizeUnknownSolrFields { solrLocator : ${SOLR_LOCATOR} } }
 
      # load the record into a Solr server or MapReduce Reducer.
      { loadSolr { solrLocator : ${SOLR_LOCATOR} } }
 
    ]
  }
]