From 8196fcb29898c341fceb50367ff6b6ec06eaa479 Mon Sep 17 00:00:00 2001 From: Oliver Kennedy Date: Sat, 30 Dec 2023 21:39:50 -0500 Subject: [PATCH] Infer clean names in load dataset (closes #297). --- vizier/backend/src/info/vizierdb/Vizier.scala | 8 -------- vizier/backend/src/info/vizierdb/spark/DataFrameOps.scala | 7 ++++--- .../src/info/vizierdb/spark/load/LoadSparkDataset.scala | 1 + .../spreadsheet/SpreadsheetDatasetConstructor.scala | 2 +- 4 files changed, 6 insertions(+), 12 deletions(-) diff --git a/vizier/backend/src/info/vizierdb/Vizier.scala b/vizier/backend/src/info/vizierdb/Vizier.scala index 23e1dcc4..6434dce1 100644 --- a/vizier/backend/src/info/vizierdb/Vizier.scala +++ b/vizier/backend/src/info/vizierdb/Vizier.scala @@ -167,14 +167,6 @@ object Vizier } } - def setWorkingDirectory(): Unit = - { - if(config.workingDirectory.isDefined){ - val path = new File(config.workingDirectory()).getAbsolutePath() - System.setProperty("user.dir", path) - } - } - def main(args: Array[String]) { config = new Config(args) diff --git a/vizier/backend/src/info/vizierdb/spark/DataFrameOps.scala b/vizier/backend/src/info/vizierdb/spark/DataFrameOps.scala index cfe1616c..fe62bef6 100644 --- a/vizier/backend/src/info/vizierdb/spark/DataFrameOps.scala +++ b/vizier/backend/src/info/vizierdb/spark/DataFrameOps.scala @@ -3,6 +3,7 @@ package info.vizierdb.spark import org.apache.spark.sql.DataFrame import org.apache.spark.sql.Column import org.apache.spark.sql.catalyst.expressions.NamedExpression +import info.vizierdb.VizierException object DataFrameOps { @@ -17,16 +18,16 @@ object DataFrameOps df.queryExecution.logical.output def safeColumnLookup(df: DataFrame, col: String): Column = - safeColumnLookup.getOrElse { + safeColumnLookupOpt(df, col).getOrElse { throw new VizierException(s"Expected to find $col in ${df.columns.mkString(", ")}") } def safeColumnLookupOpt(df: DataFrame, col: String): Option[Column] = - safeOutputLookup(df, col) + safeOutputLookupOpt(df, col) .map { new Column(_) } def safeOutputLookup(df: DataFrame, col: String): NamedExpression = - safeOutputLookup.getOrElse { + safeOutputLookupOpt(df, col).getOrElse { throw new VizierException(s"Expected to find $col in ${df.columns.mkString(", ")}") } diff --git a/vizier/backend/src/info/vizierdb/spark/load/LoadSparkDataset.scala b/vizier/backend/src/info/vizierdb/spark/load/LoadSparkDataset.scala index 0ceda94a..621822d0 100644 --- a/vizier/backend/src/info/vizierdb/spark/load/LoadSparkDataset.scala +++ b/vizier/backend/src/info/vizierdb/spark/load/LoadSparkDataset.scala @@ -89,6 +89,7 @@ object LoadSparkDataset )) .load(url.getPath(projectId, noRelativePaths = true)._1) .schema + .map { s => s.copy(name = cleanColumnName(s.name)) } }, sparkOptions, projectId diff --git a/vizier/backend/src/info/vizierdb/spreadsheet/SpreadsheetDatasetConstructor.scala b/vizier/backend/src/info/vizierdb/spreadsheet/SpreadsheetDatasetConstructor.scala index 4e30e92a..d5a2deb6 100644 --- a/vizier/backend/src/info/vizierdb/spreadsheet/SpreadsheetDatasetConstructor.scala +++ b/vizier/backend/src/info/vizierdb/spreadsheet/SpreadsheetDatasetConstructor.scala @@ -250,7 +250,7 @@ case class SpreadsheetDatasetConstructor( { colDefaults.get(col) match { case None => Literal(null) - case Some(None) => DataFrameOps.safeOutputLookup(colNames(col)) + case Some(None) => DataFrameOps.safeOutputLookup(df, colNames(col)) case Some(Some(pattern)) => pattern.expression.transform { // Note: Any changes to this block MUST be reflected in the