Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Dynamic field optional #274

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions README.adoc
Original file line number Diff line number Diff line change
Expand Up @@ -393,6 +393,14 @@ Usage: `option("max_active_partitions", "100")`

Default: null

===== add_new_fields

Used to let spark create the non defined columns.

Usage: `option("add_new_fields", "false")`

Default: true

//end::tuning[]

//tag::spark-troubleshooting[]
Expand Down
8 changes: 6 additions & 2 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -172,13 +172,13 @@
<goal>testCompile</goal>
</goals>
</execution>
<execution>
<!-- <execution>
<id>attach-scaladocs</id>
<phase>verify</phase>
<goals>
<goal>doc-jar</goal>
</goals>
</execution>
</execution> -->
</executions>
<configuration>
<scalaVersion>${scala.version}</scalaVersion>
Expand Down Expand Up @@ -280,6 +280,10 @@
<exclude>org.eclipse.jetty.orbit:*</exclude>
<exclude>org.slf4j:*</exclude>
<exclude>org.scala-lang:scala-library</exclude>
<!-- begin removed for livy-->
<exclude>org.scala-lang:scala-reflect</exclude>
<exclude>org.scala-lang.modules:scala-parser-combinators_2.11</exclude>
<!-- end removed for livy-->
<exclude>commons-httpclient:commons-httpclient</exclude>
<exclude>org.apache.curator:*</exclude>
<exclude>org.apache.commons:commons-lang3</exclude>
Expand Down
6 changes: 6 additions & 0 deletions src/main/scala/com/lucidworks/spark/SolrConf.scala
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,9 @@ class SolrConf(config: Map[String, String]) extends Serializable with LazyLoggin
def splits: Option[Boolean] =
if (config.get(SOLR_DO_SPLITS).isDefined) Some(config(SOLR_DO_SPLITS).toBoolean) else None

def addNewFields: Option[Boolean] =
if (config.get(ADD_NEW_FIELDS).isDefined) Some(config(ADD_NEW_FIELDS).toBoolean) else None

def docValues: Option[Boolean] =
if (config.get(SOLR_DOC_VALUES).isDefined) Some(config(SOLR_DOC_VALUES).toBoolean) else None

Expand Down Expand Up @@ -283,6 +286,9 @@ class SolrConf(config: Map[String, String]) extends Serializable with LazyLoggin
if (getAccumulatorName.isDefined) {
sb ++= s", ${ACCUMULATOR_NAME}=${getAccumulatorName.get}"
}
if (addNewFields.isDefined) {
sb ++= s", ${ADD_NEW_FIELDS}=${addNewFields.get}"
}
if (getSolrFieldTypes.isDefined) {
sb ++= s", ${SOLR_FIELD_TYPES}=${getSolrFieldTypes.get}"
}
Expand Down
14 changes: 12 additions & 2 deletions src/main/scala/com/lucidworks/spark/SolrRelation.scala
Original file line number Diff line number Diff line change
Expand Up @@ -659,7 +659,7 @@ class SolrRelation(
// build up a list of updates to send to the Solr Schema API
val fieldsToAddToSolr = getFieldsToAdd(dfSchema)

if (fieldsToAddToSolr.nonEmpty) {
if (fieldsToAddToSolr.nonEmpty && conf.addNewFields.getOrElse(true)) {
SolrRelation.addFieldsForInsert(fieldsToAddToSolr, collectionId, cloudClient)
}

Expand Down Expand Up @@ -694,7 +694,17 @@ class SolrRelation(
val elem = it.next()
val childDoc = new SolrInputDocument
for (i <- 0 until elem.schema.fields.size) {
childDoc.setField(elem.schema.fields(i).name, elem.get(i))
val childFname = elem.schema.fields(i).name
val childValue = elem.get(i)
childValue match {
//TODO: Do we need to check explicitly for ArrayBuffer and WrappedArray
case v: Iterable[Any] =>
val it = v.iterator
while (it.hasNext) childDoc.addField(childFname, it.next())
case bd: java.math.BigDecimal =>
childDoc.setField(childFname, bd.doubleValue())
case _ => childDoc.setField(childFname, childValue)
}
}

// Generate unique key if the child document doesn't have one
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ object ConfigurationConstants {
val SOLR_SQL_SCHEMA: String = "sql_schema"
val EXCLUDE_FIELDS: String = "exclude_fields"
val MAX_ROWS: String = "max_rows"
val ADD_NEW_FIELDS: String = "add_new_fields"

val ACCUMULATOR_NAME: String = "acc_name"
}