forked from microsoft/Mobius
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
16 changed files
with
754 additions
and
5 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
19 changes: 19 additions & 0 deletions
19
csharp/Adapter/Microsoft.Spark.CSharp/Proxy/IDataFrameWriterProxy.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
// Copyright (c) Microsoft. All rights reserved. | ||
// Licensed under the MIT license. See LICENSE file in the project root for full license information. | ||
|
||
using System.Collections.Generic; | ||
|
||
namespace Microsoft.Spark.CSharp.Proxy | ||
{ | ||
internal interface IDataFrameWriterProxy | ||
{ | ||
void Mode(string saveMode); | ||
void Format(string source); | ||
void Options(Dictionary<string, string> options); | ||
void PartitionBy(params string[] colNames); | ||
void Save(); | ||
void InsertInto(string tableName); | ||
void SaveAsTable(string tableName); | ||
void Jdbc(string url, string table, Dictionary<string, string> properties); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
66 changes: 66 additions & 0 deletions
66
csharp/Adapter/Microsoft.Spark.CSharp/Proxy/Ipc/DataFrameWriterIpcProxy.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,66 @@ | ||
// Copyright (c) Microsoft. All rights reserved. | ||
// Licensed under the MIT license. See LICENSE file in the project root for full license information. | ||
|
||
using System; | ||
using System.Collections.Generic; | ||
using Microsoft.Spark.CSharp.Interop.Ipc; | ||
|
||
namespace Microsoft.Spark.CSharp.Proxy.Ipc | ||
{ | ||
internal class DataFrameWriterIpcProxy : IDataFrameWriterProxy | ||
{ | ||
private readonly JvmObjectReference jvmDataFrameWriterReference; | ||
|
||
internal DataFrameWriterIpcProxy(JvmObjectReference jvmDataFrameWriterReference) | ||
{ | ||
this.jvmDataFrameWriterReference = jvmDataFrameWriterReference; | ||
} | ||
|
||
public void Mode(string saveMode) | ||
{ | ||
SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod( | ||
jvmDataFrameWriterReference, "mode", new object[] { saveMode }); | ||
} | ||
|
||
public void Format(string source) | ||
{ | ||
SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod( | ||
jvmDataFrameWriterReference, "format", new object[] { source }); | ||
} | ||
|
||
public void Options(Dictionary<string, string> options) | ||
{ | ||
SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod( | ||
jvmDataFrameWriterReference, "options", new object[] { options }); | ||
} | ||
|
||
public void PartitionBy(params string[] colNames) | ||
{ | ||
SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod( | ||
jvmDataFrameWriterReference, "partitionBy", new object[] { colNames }); | ||
} | ||
|
||
public void Save() | ||
{ | ||
SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmDataFrameWriterReference, "save"); | ||
} | ||
|
||
public void InsertInto(string tableName) | ||
{ | ||
SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod( | ||
jvmDataFrameWriterReference, "insertInto", new object[] { tableName }); | ||
} | ||
|
||
public void SaveAsTable(string tableName) | ||
{ | ||
SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod( | ||
jvmDataFrameWriterReference, "saveAsTable", new object[] { tableName }); | ||
} | ||
|
||
public void Jdbc(string url, string table, Dictionary<string, string> properties) | ||
{ | ||
SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod( | ||
jvmDataFrameWriterReference, "jdbc", new object[] { url, table, properties }); | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
170 changes: 170 additions & 0 deletions
170
csharp/Adapter/Microsoft.Spark.CSharp/Sql/DataFrameWriter.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,170 @@ | ||
// Copyright (c) Microsoft. All rights reserved. | ||
// Licensed under the MIT license. See LICENSE file in the project root for full license information. | ||
|
||
using System.Collections.Generic; | ||
using Microsoft.Spark.CSharp.Proxy; | ||
|
||
namespace Microsoft.Spark.CSharp.Sql | ||
{ | ||
/// <summary> | ||
/// Interface used to write a DataFrame to external storage systems (e.g. file systems, | ||
/// key-value stores, etc). Use DataFrame.Write to access this. | ||
/// | ||
/// See also http://spark.apache.org/docs/latest/api/scala/index.html#org.apache.spark.sql.DataFrameWriter | ||
/// </summary> | ||
public class DataFrameWriter | ||
{ | ||
internal IDataFrameWriterProxy DataFrameWriterProxy | ||
{ | ||
get { return dataFrameWriterProxy; } | ||
} | ||
|
||
private readonly IDataFrameWriterProxy dataFrameWriterProxy; | ||
|
||
internal DataFrameWriter(IDataFrameWriterProxy dataFrameWriterProxy) | ||
{ | ||
this.dataFrameWriterProxy = dataFrameWriterProxy; | ||
} | ||
|
||
/// <summary> | ||
/// Specifies the behavior when data or table already exists. Options include: | ||
/// - `SaveMode.Overwrite`: overwrite the existing data. | ||
/// - `SaveMode.Append`: append the data. | ||
/// - `SaveMode.Ignore`: ignore the operation (i.e. no-op). | ||
/// - `SaveMode.ErrorIfExists`: default option, throw an exception at runtime. | ||
/// </summary> | ||
public DataFrameWriter Mode(SaveMode saveMode) | ||
{ | ||
return Mode(saveMode.GetStringValue()); | ||
} | ||
|
||
/// <summary> | ||
/// Specifies the behavior when data or table already exists. Options include: | ||
/// - `SaveMode.Overwrite`: overwrite the existing data. | ||
/// - `SaveMode.Append`: append the data. | ||
/// - `SaveMode.Ignore`: ignore the operation (i.e. no-op). | ||
/// - `SaveMode.ErrorIfExists`: default option, throw an exception at runtime. | ||
/// </summary> | ||
public DataFrameWriter Mode(string saveMode) | ||
{ | ||
dataFrameWriterProxy.Mode(saveMode); | ||
return this; | ||
} | ||
|
||
/// <summary> | ||
/// Specifies the underlying output data source. Built-in options include "parquet", "json", etc. | ||
/// </summary> | ||
public DataFrameWriter Format(string source) | ||
{ | ||
dataFrameWriterProxy.Format(source); | ||
return this; | ||
} | ||
|
||
/// <summary> | ||
/// Adds an output option for the underlying data source. | ||
/// </summary> | ||
public DataFrameWriter Option(string key, string value) | ||
{ | ||
var options = new Dictionary<string, string>() { { key, value } }; | ||
return Options(options); | ||
} | ||
|
||
/// <summary> | ||
/// Adds output options for the underlying data source. | ||
/// </summary> | ||
public DataFrameWriter Options(Dictionary<string,string> options) | ||
{ | ||
dataFrameWriterProxy.Options(options); | ||
return this; | ||
} | ||
|
||
/// <summary> | ||
/// Partitions the output by the given columns on the file system. If specified, the output is | ||
/// laid out on the file system similar to Hive's partitioning scheme. | ||
/// | ||
/// This is only applicable for Parquet at the moment. | ||
/// </summary> | ||
public DataFrameWriter PartitionBy(params string[] colNames) | ||
{ | ||
dataFrameWriterProxy.PartitionBy(colNames); | ||
return this; | ||
} | ||
|
||
/// <summary> | ||
/// Saves the content of the DataFrame at the specified path. | ||
/// </summary> | ||
public void Save(string path) | ||
{ | ||
Option("path", path).Save(); | ||
} | ||
|
||
/// <summary> | ||
/// Saves the content of the DataFrame as the specified table. | ||
/// </summary> | ||
public void Save() | ||
{ | ||
dataFrameWriterProxy.Save(); | ||
} | ||
|
||
/// <summary> | ||
/// Inserts the content of the DataFrame to the specified table. It requires that | ||
/// the schema of the DataFrame is the same as the schema of the table. | ||
/// Because it inserts data to an existing table, format or options will be ignored. | ||
/// </summary> | ||
public void InsertInto(string tableName) | ||
{ | ||
dataFrameWriterProxy.InsertInto(tableName); | ||
} | ||
|
||
/// <summary> | ||
/// Saves the content of the DataFrame as the specified table. | ||
/// In the case the table already exists, behavior of this function depends on the | ||
/// save mode, specified by the `mode` function (default to throwing an exception). | ||
/// When `mode` is `Overwrite`, the schema of the DataFrame does not need to be | ||
/// the same as that of the existing table. | ||
/// When `mode` is `Append`, the schema of the DataFrame need to be | ||
/// the same as that of the existing table, and format or options will be ignored. | ||
/// </summary> | ||
public void SaveAsTable(string tableName) | ||
{ | ||
dataFrameWriterProxy.SaveAsTable(tableName); | ||
} | ||
|
||
/// <summary> | ||
/// Saves the content of the DataFrame to a external database table via JDBC. In the case the | ||
/// table already exists in the external database, behavior of this function depends on the | ||
/// save mode, specified by the `mode` function (default to throwing an exception). | ||
/// | ||
/// Don't create too many partitions in parallel on a large cluster; otherwise Spark might crash | ||
/// your external database systems. | ||
/// </summary> | ||
/// <param name="url">JDBC database url of the form `jdbc:subprotocol:subname`</param> | ||
/// <param name="table">Name of the table in the external database.</param> | ||
/// <param name="properties">JDBC database connection arguments, a list of arbitrary string tag/value. | ||
/// Normally at least a "user" and "password" property should be included.</param> | ||
public void Jdbc(string url, string table, Dictionary<string, string> properties) | ||
{ | ||
dataFrameWriterProxy.Jdbc(url, table, properties); | ||
} | ||
|
||
/// <summary> | ||
/// Saves the content of the DataFrame in JSON format at the specified path. | ||
/// This is equivalent to: | ||
/// Format("json").Save(path) | ||
/// </summary> | ||
public void Json(string path) | ||
{ | ||
Format("json").Save(path); | ||
} | ||
|
||
/// <summary> | ||
/// Saves the content of the DataFrame in JSON format at the specified path. | ||
/// This is equivalent to: | ||
/// Format("parquet").Save(path) | ||
/// </summary> | ||
public void Parquet(string path) | ||
{ | ||
Format("parquet").Save(path); | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
// Copyright (c) Microsoft. All rights reserved. | ||
// Licensed under the MIT license. See LICENSE file in the project root for full license information. | ||
|
||
namespace Microsoft.Spark.CSharp.Sql | ||
{ | ||
/// <summary> | ||
/// SaveMode is used to specify the expected behavior of saving a DataFrame to a data source. | ||
/// </summary> | ||
public enum SaveMode | ||
{ | ||
/// <summary> | ||
/// Append mode means that when saving a DataFrame to a data source, if data/table already exists, | ||
/// contents of the DataFrame are expected to be appended to existing data. | ||
/// </summary> | ||
Append, | ||
|
||
/// <summary> | ||
/// Overwrite mode means that when saving a DataFrame to a data source, | ||
/// if data/table already exists, existing data is expected to be overwritten by the contents of | ||
/// the DataFrame. | ||
/// </summary> | ||
Overwrite, | ||
|
||
/// <summary> | ||
/// ErrorIfExists mode means that when saving a DataFrame to a data source, if data already exists, | ||
/// an exception is expected to be thrown. | ||
/// </summary> | ||
ErrorIfExists, | ||
|
||
/// <summary> | ||
/// Ignore mode means that when saving a DataFrame to a data source, if data already exists, | ||
/// the save operation is expected to not save the contents of the DataFrame and to not | ||
/// change the existing data. | ||
/// </summary> | ||
Ignore | ||
} | ||
|
||
/// <summary> | ||
/// For SaveMode.ErrorIfExists, the corresponding literal string in spark is "error" or "default". | ||
/// </summary> | ||
public static class SaveModeExtensions | ||
{ | ||
public static string GetStringValue(this SaveMode mode) | ||
{ | ||
switch (mode) | ||
{ | ||
case SaveMode.ErrorIfExists: | ||
return "error"; | ||
default: | ||
return mode.ToString(); | ||
} | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.