From 1e8511a42c4e4e419bbb5d8c0c60e92c31123631 Mon Sep 17 00:00:00 2001 From: parisni Date: Tue, 18 Dec 2018 22:04:17 +0100 Subject: [PATCH 1/3] Makes multivalued handled within ChildDocuments --- .../scala/com/lucidworks/spark/SolrRelation.scala | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/src/main/scala/com/lucidworks/spark/SolrRelation.scala b/src/main/scala/com/lucidworks/spark/SolrRelation.scala index 929664ab..196ea5f1 100644 --- a/src/main/scala/com/lucidworks/spark/SolrRelation.scala +++ b/src/main/scala/com/lucidworks/spark/SolrRelation.scala @@ -684,7 +684,17 @@ class SolrRelation( val elem = it.next() val childDoc = new SolrInputDocument for (i <- 0 until elem.schema.fields.size) { - childDoc.setField(elem.schema.fields(i).name, elem.get(i)) + val childFname = elem.schema.fields(i).name + val childValue = elem.get(i) + childValue match { + //TODO: Do we need to check explicitly for ArrayBuffer and WrappedArray + case v: Iterable[Any] => + val it = v.iterator + while (it.hasNext) childDoc.addField(childFname, it.next()) + case bd: java.math.BigDecimal => + childDoc.setField(childFname, bd.doubleValue()) + case _ => childDoc.setField(childFname, childValue) + } } // Generate unique key if the child document doesn't have one From fe494193cbd1364fa324613c79f3fbfef1b31097 Mon Sep 17 00:00:00 2001 From: parisni Date: Sat, 5 Jan 2019 19:18:21 +0100 Subject: [PATCH 2/3] Exclude some scala dependency for livy to work see: - https://community.cloudera.com/t5/Advanced-Analytics-Apache-Spark/Livy-object-Predef-does-not-have-a-member-classOf/td-p/66115 - https://groups.google.com/a/cloudera.org/forum/#!searchin/livy-user/$20Predef$20does$20not$20have$20a$20member$20classOf%7Csort:date/livy-user/bBZMo9LC0S4/zpGXfP9OBQAJ --- pom.xml | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/pom.xml b/pom.xml index cfb28e0f..e4b49bb0 100644 --- a/pom.xml +++ b/pom.xml @@ -40,7 +40,7 @@ 2.4.0 7.5.0 2.4.0 - 2.11.8 + 2.11.12 2.11 1.1.1 2.4.0 @@ -171,13 +171,13 @@ testCompile - + ${scala.version} @@ -279,6 +279,10 @@ org.eclipse.jetty.orbit:* org.slf4j:* org.scala-lang:scala-library + + org.scala-lang:scala-reflect + org.scala-lang.modules:scala-parser-combinators_2.11 + commons-httpclient:commons-httpclient org.apache.curator:* org.apache.commons:commons-lang3 From f5762565abdc7580d747888f4114192b3e8eb74e Mon Sep 17 00:00:00 2001 From: parisni Date: Mon, 19 Aug 2019 22:04:58 +0200 Subject: [PATCH 3/3] Add `add_new_fields` conf to control solr schema This is related to #246 --- README.adoc | 8 ++++++++ src/main/scala/com/lucidworks/spark/SolrConf.scala | 6 ++++++ src/main/scala/com/lucidworks/spark/SolrRelation.scala | 2 +- .../lucidworks/spark/util/ConfigurationConstants.scala | 1 + 4 files changed, 16 insertions(+), 1 deletion(-) diff --git a/README.adoc b/README.adoc index d382afda..a5917175 100644 --- a/README.adoc +++ b/README.adoc @@ -393,6 +393,14 @@ Usage: `option("max_active_partitions", "100")` Default: null +===== add_new_fields + +Used to let spark create the non defined columns. + +Usage: `option("add_new_fields", "false")` + +Default: true + //end::tuning[] //tag::spark-troubleshooting[] diff --git a/src/main/scala/com/lucidworks/spark/SolrConf.scala b/src/main/scala/com/lucidworks/spark/SolrConf.scala index f60386f9..3083476c 100644 --- a/src/main/scala/com/lucidworks/spark/SolrConf.scala +++ b/src/main/scala/com/lucidworks/spark/SolrConf.scala @@ -80,6 +80,9 @@ class SolrConf(config: Map[String, String]) extends Serializable with LazyLoggin def splits: Option[Boolean] = if (config.get(SOLR_DO_SPLITS).isDefined) Some(config(SOLR_DO_SPLITS).toBoolean) else None + def addNewFields: Option[Boolean] = + if (config.get(ADD_NEW_FIELDS).isDefined) Some(config(ADD_NEW_FIELDS).toBoolean) else None + def docValues: Option[Boolean] = if (config.get(SOLR_DOC_VALUES).isDefined) Some(config(SOLR_DOC_VALUES).toBoolean) else None @@ -283,6 +286,9 @@ class SolrConf(config: Map[String, String]) extends Serializable with LazyLoggin if (getAccumulatorName.isDefined) { sb ++= s", ${ACCUMULATOR_NAME}=${getAccumulatorName.get}" } + if (addNewFields.isDefined) { + sb ++= s", ${ADD_NEW_FIELDS}=${addNewFields.get}" + } if (getSolrFieldTypes.isDefined) { sb ++= s", ${SOLR_FIELD_TYPES}=${getSolrFieldTypes.get}" } diff --git a/src/main/scala/com/lucidworks/spark/SolrRelation.scala b/src/main/scala/com/lucidworks/spark/SolrRelation.scala index e130c165..ee6bffcf 100644 --- a/src/main/scala/com/lucidworks/spark/SolrRelation.scala +++ b/src/main/scala/com/lucidworks/spark/SolrRelation.scala @@ -659,7 +659,7 @@ class SolrRelation( // build up a list of updates to send to the Solr Schema API val fieldsToAddToSolr = getFieldsToAdd(dfSchema) - if (fieldsToAddToSolr.nonEmpty) { + if (fieldsToAddToSolr.nonEmpty && conf.addNewFields.getOrElse(true)) { SolrRelation.addFieldsForInsert(fieldsToAddToSolr, collectionId, cloudClient) } diff --git a/src/main/scala/com/lucidworks/spark/util/ConfigurationConstants.scala b/src/main/scala/com/lucidworks/spark/util/ConfigurationConstants.scala index 4d33a0aa..5d28f210 100644 --- a/src/main/scala/com/lucidworks/spark/util/ConfigurationConstants.scala +++ b/src/main/scala/com/lucidworks/spark/util/ConfigurationConstants.scala @@ -53,6 +53,7 @@ object ConfigurationConstants { val SOLR_SQL_SCHEMA: String = "sql_schema" val EXCLUDE_FIELDS: String = "exclude_fields" val MAX_ROWS: String = "max_rows" + val ADD_NEW_FIELDS: String = "add_new_fields" val ACCUMULATOR_NAME: String = "acc_name" }