forked from bkvarda/MorphlineParquetIndexer
-
Notifications
You must be signed in to change notification settings - Fork 0
/
test_morphlines.conf
65 lines (46 loc) · 1.81 KB
/
test_morphlines.conf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
SOLR_LOCATOR : {
# Name of solr collection
collection : solrtest
# ZooKeeper ensemble -- edit this for your cluster's Zk hostname(s)
zkHost : "localhost:2181/solr"
# The maximum number of documents to send to Solr per network batch (throughput knob)
# batchSize : 1000
}
morphlines : [
{
id : solrTest
importCommands : ["org.kitesdk.**", "com.cloudera.**", "org.apache.solr.**"]
commands : [
# Read the Parquet data
{ readAvroParquetFile {
# For Parquet files that were not written with the parquet.avro package
# (e.g. Impala Parquet files) there is no Avro write schema stored in
# the Parquet file metadata. To read such files using the
# readAvroParquetFile command you must either provide an Avro reader
# schema via the readerSchemaFile parameter, or a default Avro schema
# will be derived using the standard mapping specification.
# Optionally, use this Avro schema in JSON format inline for projection:
readerSchemaString:"""{ "type": "record"
,"name": "my_record"
,"fields": [
{"name": "id", "type":["null","string"]}
,{"name": "value", "type":["null","string"]}
]
}"""
}
}
{ logDebug { format : "output record {}", args : ["@{}"] } }
{ extractAvroPaths {
flatten : true
paths : {
id : /id
value : /value
}
}
}
{ sanitizeUnknownSolrFields { solrLocator : ${SOLR_LOCATOR} } }
# load the record into a Solr server or MapReduce Reducer.
{ loadSolr { solrLocator : ${SOLR_LOCATOR} } }
]
}
]