Skip to content

Commit

Permalink
Infer clean names in load dataset (closes #297).
Browse files Browse the repository at this point in the history
  • Loading branch information
okennedy committed Dec 31, 2023
1 parent c5e39c1 commit 8196fcb
Show file tree
Hide file tree
Showing 4 changed files with 6 additions and 12 deletions.
8 changes: 0 additions & 8 deletions vizier/backend/src/info/vizierdb/Vizier.scala
Original file line number Diff line number Diff line change
Expand Up @@ -167,14 +167,6 @@ object Vizier
}
}

def setWorkingDirectory(): Unit =
{
if(config.workingDirectory.isDefined){
val path = new File(config.workingDirectory()).getAbsolutePath()
System.setProperty("user.dir", path)
}
}

def main(args: Array[String])
{
config = new Config(args)
Expand Down
7 changes: 4 additions & 3 deletions vizier/backend/src/info/vizierdb/spark/DataFrameOps.scala
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package info.vizierdb.spark
import org.apache.spark.sql.DataFrame
import org.apache.spark.sql.Column
import org.apache.spark.sql.catalyst.expressions.NamedExpression
import info.vizierdb.VizierException

object DataFrameOps
{
Expand All @@ -17,16 +18,16 @@ object DataFrameOps
df.queryExecution.logical.output

def safeColumnLookup(df: DataFrame, col: String): Column =
safeColumnLookup.getOrElse {
safeColumnLookupOpt(df, col).getOrElse {
throw new VizierException(s"Expected to find $col in ${df.columns.mkString(", ")}")
}

def safeColumnLookupOpt(df: DataFrame, col: String): Option[Column] =
safeOutputLookup(df, col)
safeOutputLookupOpt(df, col)
.map { new Column(_) }

def safeOutputLookup(df: DataFrame, col: String): NamedExpression =
safeOutputLookup.getOrElse {
safeOutputLookupOpt(df, col).getOrElse {
throw new VizierException(s"Expected to find $col in ${df.columns.mkString(", ")}")
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ object LoadSparkDataset
))
.load(url.getPath(projectId, noRelativePaths = true)._1)
.schema
.map { s => s.copy(name = cleanColumnName(s.name)) }
},
sparkOptions,
projectId
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -250,7 +250,7 @@ case class SpreadsheetDatasetConstructor(
{
colDefaults.get(col) match {
case None => Literal(null)
case Some(None) => DataFrameOps.safeOutputLookup(colNames(col))
case Some(None) => DataFrameOps.safeOutputLookup(df, colNames(col))
case Some(Some(pattern)) =>
pattern.expression.transform {
// Note: Any changes to this block MUST be reflected in the
Expand Down

0 comments on commit 8196fcb

Please sign in to comment.