diff --git a/csharp/Adapter/Microsoft.Spark.CSharp/Adapter.csproj b/csharp/Adapter/Microsoft.Spark.CSharp/Adapter.csproj
index 741588ac..330e7506 100644
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Adapter.csproj
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Adapter.csproj
@@ -62,6 +62,7 @@
     <Compile Include="Configuration\IConfigurationService.cs" />
     <Compile Include="Core\Accumulator.cs" />
     <Compile Include="Core\Broadcast.cs" />
+    <Compile Include="Core\HadoopConfiguration.cs" />
     <Compile Include="Core\Option.cs" />
     <Compile Include="Core\Partitioner.cs" />
     <Compile Include="Core\RDDCollector.cs" />
@@ -105,11 +106,13 @@
     <Compile Include="Proxy\IDataFrameReaderProxy.cs" />
     <Compile Include="Proxy\IDataFrameWriterProxy.cs" />
     <Compile Include="Proxy\IDStreamProxy.cs" />
+    <Compile Include="Proxy\IHadoopConfigurationProxy.cs" />
     <Compile Include="Proxy\Ipc\DataFrameIpcProxy.cs" />
     <Compile Include="Proxy\Ipc\DataFrameNaFunctionsIpcProxy.cs" />
     <Compile Include="Proxy\Ipc\DataFrameReaderIpcProxy.cs" />
     <Compile Include="Proxy\Ipc\DataFrameWriterIpcProxy.cs" />
     <Compile Include="Proxy\Ipc\DStreamIpcProxy.cs" />
+    <Compile Include="Proxy\Ipc\HadoopConfigurationIpcProxy.cs" />
     <Compile Include="Proxy\Ipc\RDDIpcProxy.cs" />
     <Compile Include="Proxy\Ipc\SparkCLRIpcProxy.cs" />
     <Compile Include="Proxy\Ipc\SqlContextIpcProxy.cs" />
diff --git a/csharp/Adapter/Microsoft.Spark.CSharp/Core/HadoopConfiguration.cs b/csharp/Adapter/Microsoft.Spark.CSharp/Core/HadoopConfiguration.cs
new file mode 100644
index 00000000..a93c3ecf
--- /dev/null
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Core/HadoopConfiguration.cs
@@ -0,0 +1,40 @@
+﻿// Copyright (c) Microsoft. All rights reserved.
+// Licensed under the MIT license. See LICENSE file in the project root for full license information.
+
+using Microsoft.Spark.CSharp.Proxy;
+
+namespace Microsoft.Spark.CSharp.Core
+{
+    /// <summary>
+    /// Configuration for Hadoop operations
+    /// </summary>
+    public class HadoopConfiguration
+    {
+        private readonly IHadoopConfigurationProxy hadoopConfigurationProxy;
+        internal HadoopConfiguration(IHadoopConfigurationProxy hadoopConfProxy)
+        {
+            hadoopConfigurationProxy = hadoopConfProxy;
+        }
+
+        /// <summary>
+        /// Sets a property value to HadoopConfiguration
+        /// </summary>
+        /// <param name="name">Name of the property</param>
+        /// <param name="value">Value of the property</param>
+        public void Set(string name, string value)
+        {
+            hadoopConfigurationProxy.Set(name, value);
+        }
+
+        /// <summary>
+        /// Gets the value of a property from HadoopConfiguration
+        /// </summary>
+        /// <param name="name">Name of the property</param>
+        /// <param name="defaultValue">Default value if the property is not available in the configuration</param>
+        /// <returns></returns>
+        public string Get(string name, string defaultValue)
+        {
+            return hadoopConfigurationProxy.Get(name, defaultValue);
+        }
+    }
+}
diff --git a/csharp/Adapter/Microsoft.Spark.CSharp/Core/SparkContext.cs b/csharp/Adapter/Microsoft.Spark.CSharp/Core/SparkContext.cs
index 9b362306..f16220c0 100644
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Core/SparkContext.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Core/SparkContext.cs
@@ -88,6 +88,11 @@ public int DefaultMinPartitions
         /// </summary>
         public StatusTracker StatusTracker { get { return new StatusTracker(SparkContextProxy.StatusTracker); } }
 
+        /// <summary>
+        /// Configuration for Hadoop usage in Spark
+        /// </summary>
+        public HadoopConfiguration HadoopConfiguration { get { return new HadoopConfiguration(SparkContextProxy.HadoopConfiguration); }}
+
         /// <summary>
         /// Initializes a SparkContext instance with a specific master, application name, and spark home 
         /// </summary>
diff --git a/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/IHadoopConfigurationProxy.cs b/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/IHadoopConfigurationProxy.cs
new file mode 100644
index 00000000..97a3072a
--- /dev/null
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/IHadoopConfigurationProxy.cs
@@ -0,0 +1,11 @@
+﻿// Copyright (c) Microsoft. All rights reserved.
+// Licensed under the MIT license. See LICENSE file in the project root for full license information.
+
+namespace Microsoft.Spark.CSharp.Proxy
+{
+    interface IHadoopConfigurationProxy
+    {
+        void Set(string name, string value);
+        string Get(string name, string defaultValue);
+    }
+}
diff --git a/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/ISparkContextProxy.cs b/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/ISparkContextProxy.cs
index 51691b5a..51324332 100644
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/ISparkContextProxy.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/ISparkContextProxy.cs
@@ -27,6 +27,7 @@ internal interface ISparkContextProxy
         long StartTime { get; }
         int DefaultParallelism { get; }
         int DefaultMinPartitions { get; }
+        IHadoopConfigurationProxy HadoopConfiguration { get; }
         void Stop();
         IRDDProxy EmptyRDD();
         IRDDProxy Parallelize(IEnumerable<byte[]> values, int numSlices);
diff --git a/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/Ipc/HadoopConfigurationIpcProxy.cs b/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/Ipc/HadoopConfigurationIpcProxy.cs
new file mode 100644
index 00000000..404cfafe
--- /dev/null
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/Ipc/HadoopConfigurationIpcProxy.cs
@@ -0,0 +1,28 @@
+﻿// Copyright (c) Microsoft. All rights reserved.
+// Licensed under the MIT license. See LICENSE file in the project root for full license information.
+
+using System.Diagnostics.CodeAnalysis;
+using Microsoft.Spark.CSharp.Interop.Ipc;
+
+namespace Microsoft.Spark.CSharp.Proxy.Ipc
+{
+    [ExcludeFromCodeCoverage] //IPC calls to JVM validated using validation-enabled samples - unit test coverage not reqiured
+    internal class HadoopConfigurationIpcProxy : IHadoopConfigurationProxy
+    {
+        private readonly JvmObjectReference jvmHadoopConfigurationReference;
+        public HadoopConfigurationIpcProxy(JvmObjectReference jHadoopConf)
+        {
+            jvmHadoopConfigurationReference = jHadoopConf;
+        }
+
+        public void Set(string name, string value)
+        {
+            SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmHadoopConfigurationReference, "set", new object[] { name, value });
+        }
+
+        public string Get(string name, string defaultvalue)
+        {
+            return SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmHadoopConfigurationReference, "get", new object[] { name, defaultvalue }).ToString();
+        }
+    }
+}
diff --git a/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/Ipc/SparkContextIpcProxy.cs b/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/Ipc/SparkContextIpcProxy.cs
index 621ecffa..6521b8d9 100644
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/Ipc/SparkContextIpcProxy.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/Ipc/SparkContextIpcProxy.cs
@@ -111,6 +111,22 @@ public int DefaultMinPartitions
         {
             get { if (defaultMinPartitions == null) { defaultMinPartitions = (int)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmJavaContextReference, "defaultMinPartitions"); } return (int)defaultMinPartitions; }
         }
+
+        private IHadoopConfigurationProxy hadoopConfiguration;
+        public IHadoopConfigurationProxy HadoopConfiguration
+        {
+            get
+            {
+                return hadoopConfiguration ??
+                       (hadoopConfiguration =
+                           new HadoopConfigurationIpcProxy(
+                               new JvmObjectReference(
+                                   (string)
+                                       SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmJavaContextReference,
+                                           "hadoopConfiguration"))));
+            }
+        }
+
         public void Accumulator(int port)
         {
             jvmAccumulatorReference = new JvmObjectReference((string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmJavaContextReference, "accumulator", 
diff --git a/csharp/Adapter/documentation/Microsoft.Spark.CSharp.Adapter.Doc.XML b/csharp/Adapter/documentation/Microsoft.Spark.CSharp.Adapter.Doc.XML
index 7c974d22..606a6150 100644
--- a/csharp/Adapter/documentation/Microsoft.Spark.CSharp.Adapter.Doc.XML
+++ b/csharp/Adapter/documentation/Microsoft.Spark.CSharp.Adapter.Doc.XML
@@ -10,21 +10,6 @@
             to be used in SparkCLR runtime
             </summary>
         </member>
-        <member name="T:Microsoft.Spark.CSharp.Configuration.IConfigurationService">
-            <summary>
-            Helps getting config settings to be used in SparkCLR runtime
-            </summary>
-        </member>
-        <member name="M:Microsoft.Spark.CSharp.Configuration.IConfigurationService.GetCSharpWorkerExePath">
-            <summary>
-            The full path of the CSharp external backend worker process executable.
-            </summary>
-        </member>
-        <member name="P:Microsoft.Spark.CSharp.Configuration.IConfigurationService.BackendPortNumber">
-            <summary>
-            The port number used for communicating with the CSharp external backend worker process.
-            </summary>
-        </member>
         <member name="T:Microsoft.Spark.CSharp.Configuration.ConfigurationService.SparkCLRConfiguration">
             <summary>
             Default configuration for SparkCLR jobs.
@@ -88,6 +73,21 @@
             Indicates service is running in Yarn
             </summary>
         </member>
+        <member name="T:Microsoft.Spark.CSharp.Configuration.IConfigurationService">
+            <summary>
+            Helps getting config settings to be used in SparkCLR runtime
+            </summary>
+        </member>
+        <member name="P:Microsoft.Spark.CSharp.Configuration.IConfigurationService.BackendPortNumber">
+            <summary>
+            The port number used for communicating with the CSharp external backend worker process.
+            </summary>
+        </member>
+        <member name="M:Microsoft.Spark.CSharp.Configuration.IConfigurationService.GetCSharpWorkerExePath">
+            <summary>
+            The full path of the CSharp external backend worker process executable.
+            </summary>
+        </member>
         <member name="T:Microsoft.Spark.CSharp.Core.Accumulator">
             <summary>
             A shared variable that can be accumulated, i.e., has a commutative and associative "add"
@@ -127,6 +127,12 @@
             <param name="accumulatorId">The Identity of the accumulator</param>
             <param name="value">The value of the accumulator</param>
         </member>
+        <member name="P:Microsoft.Spark.CSharp.Core.Accumulator`1.Value">
+            <summary>
+            Gets or sets the value of the accumulator; only usable in driver program
+            </summary>
+            <exception cref="T:System.ArgumentException"></exception>
+        </member>
         <member name="M:Microsoft.Spark.CSharp.Core.Accumulator`1.Add(`0)">
             <summary>
             Adds a term to this accumulator's value
@@ -148,12 +154,6 @@
             </summary>
             <returns>A string representation of the current accumulator</returns>
         </member>
-        <member name="P:Microsoft.Spark.CSharp.Core.Accumulator`1.Value">
-            <summary>
-            Gets or sets the value of the accumulator; only usable in driver program
-            </summary>
-            <exception cref="T:System.ArgumentException"></exception>
-        </member>
         <member name="T:Microsoft.Spark.CSharp.Core.AccumulatorParam`1">
             <summary>
             An AccumulatorParam that uses the + operators to add values. Designed for simple types
@@ -216,16 +216,23 @@
             </summary>
             <typeparam name="T">The type of element in Broadcast</typeparam>
         </member>
+        <member name="P:Microsoft.Spark.CSharp.Core.Broadcast`1.Value">
+            <summary>
+            Return the broadcasted value
+            </summary>
+        </member>
         <member name="M:Microsoft.Spark.CSharp.Core.Broadcast`1.Unpersist(System.Boolean)">
             <summary>
             Delete cached copies of this broadcast on the executors.
             </summary>
             <param name="blocking"></param>
         </member>
-        <member name="P:Microsoft.Spark.CSharp.Core.Broadcast`1.Value">
+        <member name="M:Microsoft.Spark.CSharp.Core.HadoopConfiguration.Set(System.String,System.String)">
             <summary>
-            Return the broadcasted value
+            Sets value to HadoopConfiguration
             </summary>
+            <param name="name">Name of the property</param>
+            <param name="value">Value of the property</param>
         </member>
         <member name="T:Microsoft.Spark.CSharp.Core.Option`1">
             <summary>
@@ -245,6 +252,11 @@
             </summary>
             <param name="value">The value to be associated with the new instance.</param>
         </member>
+        <member name="P:Microsoft.Spark.CSharp.Core.Option`1.IsDefined">
+            <summary>
+            Indicates whether the option value is defined.
+            </summary>
+        </member>
         <member name="M:Microsoft.Spark.CSharp.Core.Option`1.GetValue">
             <summary>
             Returns the value of the option if Option.IsDefined is TRUE;
@@ -252,11 +264,6 @@
             </summary>
             <returns></returns>
         </member>
-        <member name="P:Microsoft.Spark.CSharp.Core.Option`1.IsDefined">
-            <summary>
-            Indicates whether the option value is defined.
-            </summary>
-        </member>
         <member name="T:Microsoft.Spark.CSharp.Core.Partitioner">
             <summary>
             An object that defines how the elements in a key-value pair RDD are partitioned by key.
@@ -293,11 +300,6 @@
             Used for collect operation on RDD
             </summary>
         </member>
-        <member name="T:Microsoft.Spark.CSharp.Core.IRDDCollector">
-            <summary>
-            Interface for collect operation on RDD
-            </summary>
-        </member>
         <member name="T:Microsoft.Spark.CSharp.Core.DoubleRDDFunctions">
             <summary>
             Extra functions available on RDDs of Doubles through an implicit conversion. 
@@ -415,6 +417,11 @@
             <param name="self"></param>
             <returns></returns>
         </member>
+        <member name="T:Microsoft.Spark.CSharp.Core.IRDDCollector">
+            <summary>
+            Interface for collect operation on RDD
+            </summary>
+        </member>
         <member name="T:Microsoft.Spark.CSharp.Core.OrderedRDDFunctions">
             <summary>
             Extra functions available on RDDs of (key, value) pairs where the key is sortable through
@@ -1052,6 +1059,14 @@
             <param name="path">path to sequence file</param>
             <param name="compressionCodecClass">(None by default)</param>
         </member>
+        <member name="T:Microsoft.Spark.CSharp.Core.PairRDDFunctions.GroupByMergeHelper`2">
+            <summary>
+            These classes are defined explicitly and marked as [Serializable]instead of using anonymous method as delegate to 
+            prevent C# compiler from generating private anonymous type that is not serializable. Since the delegate has to be 
+            serialized and sent to the Spark workers for execution, it is necessary to have the type marked [Serializable]. 
+            These classes are to work around the limitation on the serializability of compiler generated types
+            </summary>
+        </member>
         <member name="M:Microsoft.Spark.CSharp.Core.PairRDDFunctions.NullIfEmpty``1(System.Collections.Generic.IEnumerable{``0})">
             <summary>
             Converts a collection to a list where the element type is Option(T) type.
@@ -1061,14 +1076,6 @@
             <typeparam name="T">The element type in the collection</typeparam>
             <returns>A list that use Option(T) as element type</returns>
         </member>
-        <member name="T:Microsoft.Spark.CSharp.Core.PairRDDFunctions.GroupByMergeHelper`2">
-            <summary>
-            These classes are defined explicitly and marked as [Serializable]instead of using anonymous method as delegate to 
-            prevent C# compiler from generating private anonymous type that is not serializable. Since the delegate has to be 
-            serialized and sent to the Spark workers for execution, it is necessary to have the type marked [Serializable]. 
-            These classes are to work around the limitation on the serializability of compiler generated types
-            </summary>
-        </member>
         <member name="T:Microsoft.Spark.CSharp.Core.PipelinedRDD`1">
             <summary>
             Wraps C#-based transformations that can be executed within a stage. It helps avoid unnecessary Ser/De of data between
@@ -1076,6 +1083,50 @@
             </summary>
             <typeparam name="U"></typeparam>
         </member>
+        <member name="M:Microsoft.Spark.CSharp.Core.PipelinedRDD`1.MapPartitionsWithIndex``1(System.Func{System.Int32,System.Collections.Generic.IEnumerable{`0},System.Collections.Generic.IEnumerable{``0}},System.Boolean)">
+            <summary>
+            Return a new RDD by applying a function to each partition of this RDD,
+            while tracking the index of the original partition.
+            </summary>
+            <typeparam name="U1">The element type</typeparam>
+            <param name="newFunc">The function to be applied to each partition</param>
+            <param name="preservesPartitioningParam">Indicates if it preserves partition parameters</param>
+            <returns>A new RDD</returns>
+        </member>
+        <member name="T:Microsoft.Spark.CSharp.Core.PipelinedRDD`1.MapPartitionsWithIndexHelper`2">
+            <summary>
+            This class is defined explicitly instead of using anonymous method as delegate to prevent C# compiler from generating
+            private anonymous type that is not serializable. Since the delegate has to be serialized and sent to the Spark workers
+            for execution, it is necessary to have the type marked [Serializable]. This class is to work around the limitation
+            on the serializability of compiler generated types
+            </summary>
+        </member>
+        <member name="T:Microsoft.Spark.CSharp.Core.PriorityQueue`1">
+            <summary>
+            A bounded priority queue implemented with max binary heap.
+            
+            Construction steps:
+             1. Build a Max Heap of the first k elements.
+             2. For each element after the kth element, compare it with the root of the max heap,
+               a. If the element is less than the root, replace root with this element, heapify.
+               b. Else ignore it.
+            </summary>
+        </member>
+        <member name="M:Microsoft.Spark.CSharp.Core.PriorityQueue`1.#ctor(System.Int32,System.Collections.Generic.Comparer{`0})">
+            <summary>
+            Constructor of PriorityQueue type.
+            </summary>
+        </member>
+        <member name="M:Microsoft.Spark.CSharp.Core.PriorityQueue`1.Offer(`0)">
+            <summary>
+            Inserts the specified element into this priority queue.
+            </summary>
+        </member>
+        <member name="T:Microsoft.Spark.CSharp.Core.Profiler">
+            <summary>
+            A class represents a profiler
+            </summary>
+        </member>
         <member name="T:Microsoft.Spark.CSharp.Core.RDD`1">
             <summary>
             Represents a Resilient Distributed Dataset (RDD), the basic abstraction in Spark. Represents an immutable, 
@@ -1095,6 +1146,21 @@
             Indicates whether the RDD is checkpointed.
             </summary>
         </member>
+        <member name="P:Microsoft.Spark.CSharp.Core.RDD`1.IsCached">
+            <summary>
+            Return whether this RDD has been cached or not
+            </summary>
+        </member>
+        <member name="P:Microsoft.Spark.CSharp.Core.RDD`1.IsCheckpointed">
+            <summary>
+            Return whether this RDD has been checkpointed or not
+            </summary>
+        </member>
+        <member name="P:Microsoft.Spark.CSharp.Core.RDD`1.Name">
+            <summary>
+            Return the name of this RDD.
+            </summary>
+        </member>
         <member name="M:Microsoft.Spark.CSharp.Core.RDD`1.Cache">
             <summary>
             Persist this RDD with the default storage level <see cref="F:Microsoft.Spark.CSharp.Core.StorageLevelType.MEMORY_ONLY_SER"/>.
@@ -1651,7 +1717,7 @@
             n is the number of partitions. So there may exist gaps, but this
             method won't trigger a spark job, which is different from <see cref="M:Microsoft.Spark.CSharp.Core.RDD`1.ZipWithIndex"/>
             
-            &gt;&gt;&gt; sc.Parallelize(new string[] { "a", "b", "c", "d" }, 1).ZipWithIndex().Collect()
+            >>> sc.Parallelize(new string[] { "a", "b", "c", "d" }, 1).ZipWithIndex().Collect()
             [('a', 0), ('b', 1), ('c', 4), ('d', 2), ('e', 5)]
             
             </summary>
@@ -1705,44 +1771,6 @@
             <param name="seed">the seed for the Random number generator</param>
             <returns>A random sub-sample of the RDD without replacement.</returns>
         </member>
-        <member name="P:Microsoft.Spark.CSharp.Core.RDD`1.IsCached">
-            <summary>
-            Return whether this RDD has been cached or not
-            </summary>
-        </member>
-        <member name="P:Microsoft.Spark.CSharp.Core.RDD`1.IsCheckpointed">
-            <summary>
-            Return whether this RDD has been checkpointed or not
-            </summary>
-        </member>
-        <member name="P:Microsoft.Spark.CSharp.Core.RDD`1.Name">
-            <summary>
-            Return the name of this RDD.
-            </summary>
-        </member>
-        <member name="M:Microsoft.Spark.CSharp.Core.PipelinedRDD`1.MapPartitionsWithIndex``1(System.Func{System.Int32,System.Collections.Generic.IEnumerable{`0},System.Collections.Generic.IEnumerable{``0}},System.Boolean)">
-            <summary>
-            Return a new RDD by applying a function to each partition of this RDD,
-            while tracking the index of the original partition.
-            </summary>
-            <typeparam name="U1">The element type</typeparam>
-            <param name="newFunc">The function to be applied to each partition</param>
-            <param name="preservesPartitioningParam">Indicates if it preserves partition parameters</param>
-            <returns>A new RDD</returns>
-        </member>
-        <member name="T:Microsoft.Spark.CSharp.Core.PipelinedRDD`1.MapPartitionsWithIndexHelper`2">
-            <summary>
-            This class is defined explicitly instead of using anonymous method as delegate to prevent C# compiler from generating
-            private anonymous type that is not serializable. Since the delegate has to be serialized and sent to the Spark workers
-            for execution, it is necessary to have the type marked [Serializable]. This class is to work around the limitation
-            on the serializability of compiler generated types
-            </summary>
-        </member>
-        <member name="T:Microsoft.Spark.CSharp.Core.Profiler">
-            <summary>
-            A class represents a profiler
-            </summary>
-        </member>
         <member name="T:Microsoft.Spark.CSharp.Core.StringRDDFunctions">
             <summary>
             Some useful utility functions for <c>RDD{string}</c>
@@ -1953,6 +1981,46 @@
             Get existing SparkContext
             </summary>
         </member>
+        <member name="M:Microsoft.Spark.CSharp.Core.SparkContext.GetConf">
+            <summary>
+            Return a copy of this JavaSparkContext's configuration. The configuration ''cannot'' be changed at runtime.
+            </summary>
+        </member>
+        <member name="P:Microsoft.Spark.CSharp.Core.SparkContext.Version">
+            <summary>
+            The version of Spark on which this application is running.
+            </summary>
+        </member>
+        <member name="P:Microsoft.Spark.CSharp.Core.SparkContext.StartTime">
+            <summary>
+            Return the epoch time when the Spark Context was started.
+            </summary>
+        </member>
+        <member name="P:Microsoft.Spark.CSharp.Core.SparkContext.DefaultParallelism">
+            <summary>
+            Default level of parallelism to use when not given by user (e.g. for reduce tasks)
+            </summary>
+        </member>
+        <member name="P:Microsoft.Spark.CSharp.Core.SparkContext.DefaultMinPartitions">
+            <summary>
+            Default min number of partitions for Hadoop RDDs when not given by user
+            </summary>
+        </member>
+        <member name="P:Microsoft.Spark.CSharp.Core.SparkContext.SparkUser">
+            <summary>
+            Get SPARK_USER for user who is running SparkContext.
+            </summary>
+        </member>
+        <member name="P:Microsoft.Spark.CSharp.Core.SparkContext.StatusTracker">
+            <summary>
+            Return :class:`StatusTracker` object
+            </summary>
+        </member>
+        <member name="P:Microsoft.Spark.CSharp.Core.SparkContext.HadoopConfiguration">
+            <summary>
+            Configuration for Hadoop usage in Spark
+            </summary>
+        </member>
         <member name="M:Microsoft.Spark.CSharp.Core.SparkContext.#ctor(System.String,System.String,System.String)">
             <summary>
             Initializes a SparkContext instance with a specific master, application name, and spark home 
@@ -2295,36 +2363,6 @@
             Cancel all jobs that have been scheduled or are running.
             </summary>
         </member>
-        <member name="P:Microsoft.Spark.CSharp.Core.SparkContext.Version">
-            <summary>
-            The version of Spark on which this application is running.
-            </summary>
-        </member>
-        <member name="P:Microsoft.Spark.CSharp.Core.SparkContext.StartTime">
-            <summary>
-            Return the epoch time when the Spark Context was started.
-            </summary>
-        </member>
-        <member name="P:Microsoft.Spark.CSharp.Core.SparkContext.DefaultParallelism">
-            <summary>
-            Default level of parallelism to use when not given by user (e.g. for reduce tasks)
-            </summary>
-        </member>
-        <member name="P:Microsoft.Spark.CSharp.Core.SparkContext.DefaultMinPartitions">
-            <summary>
-            Default min number of partitions for Hadoop RDDs when not given by user
-            </summary>
-        </member>
-        <member name="P:Microsoft.Spark.CSharp.Core.SparkContext.SparkUser">
-            <summary>
-            Get SPARK_USER for user who is running SparkContext.
-            </summary>
-        </member>
-        <member name="P:Microsoft.Spark.CSharp.Core.SparkContext.StatusTracker">
-            <summary>
-            Return :class:`StatusTracker` object
-            </summary>
-        </member>
         <member name="T:Microsoft.Spark.CSharp.Core.StatCounter">
             <summary>
             A class for tracking the statistics of a set of numbers (count, mean and variance) in a numerically
@@ -2370,14 +2408,6 @@
             </summary>
             <returns></returns>
         </member>
-        <member name="M:Microsoft.Spark.CSharp.Core.StatCounter.ToString">
-            <summary>
-            Returns a string that represents this StatCounter.
-            </summary>
-            <returns>
-            A string that represents this StatCounter.
-            </returns>
-        </member>
         <member name="P:Microsoft.Spark.CSharp.Core.StatCounter.Count">
             <summary>
             Gets the count number of this StatCounter
@@ -2423,6 +2453,14 @@
             Return the sample standard deviation of the values, which corrects for bias in estimating the variance by dividing by N-1 instead of N.
             </summary>
         </member>
+        <member name="M:Microsoft.Spark.CSharp.Core.StatCounter.ToString">
+            <summary>
+            Returns a string that represents this StatCounter.
+            </summary>
+            <returns>
+            A string that represents this StatCounter.
+            </returns>
+        </member>
         <member name="T:Microsoft.Spark.CSharp.Core.StatusTracker">
             <summary>
             Low-level status reporting APIs for monitoring job and stage progress.
@@ -2691,26 +2729,21 @@
             Adjust checkCount adaptively according to current weak reference objects count
             </summary>
         </member>
-        <member name="F:Microsoft.Spark.CSharp.Interop.Ipc.WeakObjectManagerImpl.MaxReleasingDuration">
+        <member name="P:Microsoft.Spark.CSharp.Interop.Ipc.WeakObjectManagerImpl.CheckInterval">
             <summary>
-            Maximum running duration for checking thread each time
-            </summary>
-        </member>
-        <member name="M:Microsoft.Spark.CSharp.Interop.Ipc.WeakObjectManagerImpl.GetAliveCount">
-            <summary>
-            It can be an expensive operation. ** Do not use ** unless there is a real need for this method
+            Sleep time for checking thread
             </summary>
-            <returns></returns>
         </member>
-        <member name="P:Microsoft.Spark.CSharp.Interop.Ipc.WeakObjectManagerImpl.CheckInterval">
+        <member name="F:Microsoft.Spark.CSharp.Interop.Ipc.WeakObjectManagerImpl.MaxReleasingDuration">
             <summary>
-            Sleep time for checking thread
+            Maximum running duration for checking thread each time
             </summary>
         </member>
-        <member name="T:Microsoft.Spark.CSharp.Interop.SparkCLREnvironment">
+        <member name="M:Microsoft.Spark.CSharp.Interop.Ipc.WeakObjectManagerImpl.GetAliveCount">
             <summary>
-            Contains everything needed to setup an environment for using C# with Spark
+            It can be an expensive operation. ** Do not use ** unless there is a real need for this method
             </summary>
+            <returns></returns>
         </member>
         <member name="T:Microsoft.Spark.CSharp.Interop.Ipc.IJvmBridge">
             <summary>
@@ -2929,6 +2962,11 @@
             <param name="s">The stream to write</param>
             <param name="value">The string to write</param>
         </member>
+        <member name="T:Microsoft.Spark.CSharp.Interop.SparkCLREnvironment">
+            <summary>
+            Contains everything needed to setup an environment for using C# with Spark
+            </summary>
+        </member>
         <member name="T:Microsoft.Spark.CSharp.Network.ByteBuf">
             <summary>
             ByteBuf delimits a section of a ByteBufChunk.
@@ -2956,6 +2994,66 @@
                  0       ==     readerIndex   ==   writerIndex    ==    capacity
             </summary>
         </member>
+        <member name="P:Microsoft.Spark.CSharp.Network.ByteBuf.Array">
+            <summary>
+            Gets the underlying array.
+            </summary>
+        </member>
+        <member name="P:Microsoft.Spark.CSharp.Network.ByteBuf.Capacity">
+            <summary>
+            Gets the total number of elements in the range delimited by the ByteBuf.
+            </summary>
+        </member>
+        <member name="P:Microsoft.Spark.CSharp.Network.ByteBuf.Offset">
+            <summary>
+            Gets the position of the first element in the range delimited
+            by the ByteBuf, relative to the start of the original array.
+            </summary>
+        </member>
+        <member name="P:Microsoft.Spark.CSharp.Network.ByteBuf.ByteBufChunk">
+            <summary>
+            Returns the ByteBuf chunk that contains this ByteBuf.
+            </summary>
+        </member>
+        <member name="P:Microsoft.Spark.CSharp.Network.ByteBuf.ReadableBytes">
+            <summary>
+            Returns the number of readable bytes which is equal to (writerIndex - readerIndex).
+            </summary>
+        </member>
+        <member name="P:Microsoft.Spark.CSharp.Network.ByteBuf.WritableBytes">
+            <summary>
+            Returns the number of writable bytes which is equal to (capacity - writerIndex).
+            </summary>
+        </member>
+        <member name="P:Microsoft.Spark.CSharp.Network.ByteBuf.UnsafeArray">
+            <summary>
+            Gets the underlying unsafe array.
+            </summary>
+        </member>
+        <member name="P:Microsoft.Spark.CSharp.Network.ByteBuf.ReaderIndex">
+            <summary>
+            Gets or sets the readerIndex of this ByteBuf
+            </summary>
+            <exception cref="T:System.IndexOutOfRangeException"></exception>
+        </member>
+        <member name="P:Microsoft.Spark.CSharp.Network.ByteBuf.WriterIndex">
+            <summary>
+            Gets or sets the writerIndex of this ByteBuf
+            </summary>
+            <exception cref="T:System.IndexOutOfRangeException"></exception>
+        </member>
+        <member name="P:Microsoft.Spark.CSharp.Network.ByteBuf.ReaderIndexOffset">
+            <summary>
+            Returns the position of the readerIndex element in the range delimited
+            by the ByteBuf, relative to the start of the original array.
+            </summary>
+        </member>
+        <member name="P:Microsoft.Spark.CSharp.Network.ByteBuf.WriterIndexOffset">
+            <summary>
+            Returns the position of the readerIndex element in the range delimited
+            by the ByteBuf, relative to the start of the original array.
+            </summary>
+        </member>
         <member name="M:Microsoft.Spark.CSharp.Network.ByteBuf.Clear">
             <summary>
             Sets the readerIndex and writerIndex of this buffer to 0.
@@ -3031,90 +3129,78 @@
             Creates an empty ByteBuf with error status.
             </summary>
         </member>
-        <member name="P:Microsoft.Spark.CSharp.Network.ByteBuf.Array">
-            <summary>
-            Gets the underlying array.
-            </summary>
-        </member>
-        <member name="P:Microsoft.Spark.CSharp.Network.ByteBuf.Capacity">
-            <summary>
-            Gets the total number of elements in the range delimited by the ByteBuf.
-            </summary>
-        </member>
-        <member name="P:Microsoft.Spark.CSharp.Network.ByteBuf.Offset">
+        <member name="T:Microsoft.Spark.CSharp.Network.ByteBufChunk">
             <summary>
-            Gets the position of the first element in the range delimited
-            by the ByteBuf, relative to the start of the original array.
+            ByteBufChunk represents a memory blocks that can be allocated from 
+            .Net heap (managed code) or process heap(unsafe code)
             </summary>
         </member>
-        <member name="P:Microsoft.Spark.CSharp.Network.ByteBuf.ByteBufChunk">
+        <member name="F:Microsoft.Spark.CSharp.Network.ByteBufChunk.Parent">
             <summary>
-            Returns the ByteBuf chunk that contains this ByteBuf.
+            The ByteBufChunkList that contains this ByteBufChunk
             </summary>
         </member>
-        <member name="P:Microsoft.Spark.CSharp.Network.ByteBuf.ReadableBytes">
+        <member name="F:Microsoft.Spark.CSharp.Network.ByteBufChunk.Prev">
             <summary>
-            Returns the number of readable bytes which is equal to (writerIndex - readerIndex).
+            The previous ByteBufChunk in linked like list
             </summary>
         </member>
-        <member name="P:Microsoft.Spark.CSharp.Network.ByteBuf.WritableBytes">
+        <member name="F:Microsoft.Spark.CSharp.Network.ByteBufChunk.Next">
             <summary>
-            Returns the number of writable bytes which is equal to (capacity - writerIndex).
+            The next ByteBufChunk in linked like list
             </summary>
         </member>
-        <member name="P:Microsoft.Spark.CSharp.Network.ByteBuf.UnsafeArray">
+        <member name="M:Microsoft.Spark.CSharp.Network.ByteBufChunk.Finalize">
             <summary>
-            Gets the underlying unsafe array.
+            Finalizer.
             </summary>
         </member>
-        <member name="P:Microsoft.Spark.CSharp.Network.ByteBuf.ReaderIndex">
+        <member name="P:Microsoft.Spark.CSharp.Network.ByteBufChunk.Array">
             <summary>
-            Gets or sets the readerIndex of this ByteBuf
+            Returns the underlying array that is used for managed code.
             </summary>
-            <exception cref="T:System.IndexOutOfRangeException"></exception>
         </member>
-        <member name="P:Microsoft.Spark.CSharp.Network.ByteBuf.WriterIndex">
+        <member name="P:Microsoft.Spark.CSharp.Network.ByteBufChunk.BufId">
             <summary>
-            Gets or sets the writerIndex of this ByteBuf
+            Returns the buffer Id that registered as RIO buffer.
+            Only apply to unsafe ByteBufChunk
             </summary>
-            <exception cref="T:System.IndexOutOfRangeException"></exception>
         </member>
-        <member name="P:Microsoft.Spark.CSharp.Network.ByteBuf.ReaderIndexOffset">
+        <member name="P:Microsoft.Spark.CSharp.Network.ByteBufChunk.FreeBytes">
             <summary>
-            Returns the position of the readerIndex element in the range delimited
-            by the ByteBuf, relative to the start of the original array.
+            Returns the unused bytes in this chunk.
             </summary>
         </member>
-        <member name="P:Microsoft.Spark.CSharp.Network.ByteBuf.WriterIndexOffset">
+        <member name="P:Microsoft.Spark.CSharp.Network.ByteBufChunk.IsDisposed">
             <summary>
-            Returns the position of the readerIndex element in the range delimited
-            by the ByteBuf, relative to the start of the original array.
+            Indicates whether this ByteBufChunk is disposed.
             </summary>
         </member>
-        <member name="T:Microsoft.Spark.CSharp.Network.ByteBufChunk">
+        <member name="P:Microsoft.Spark.CSharp.Network.ByteBufChunk.IsUnsafe">
             <summary>
-            ByteBufChunk represents a memory blocks that can be allocated from 
-            .Net heap (managed code) or process heap(unsafe code)
+            Indicates whether the underlying buffer array is a unsafe type array.
+            The unsafe array is used for PInvoke with native code.
             </summary>
         </member>
-        <member name="F:Microsoft.Spark.CSharp.Network.ByteBufChunk.Parent">
+        <member name="P:Microsoft.Spark.CSharp.Network.ByteBufChunk.Pool">
             <summary>
-            The ByteBufChunkList that contains this ByteBufChunk
+            Returns the ByteBufPool that this ByteBufChunk belongs to.
             </summary>
         </member>
-        <member name="F:Microsoft.Spark.CSharp.Network.ByteBufChunk.Prev">
+        <member name="P:Microsoft.Spark.CSharp.Network.ByteBufChunk.Size">
             <summary>
-            The previous ByteBufChunk in linked like list
+            Returns the size of the ByteBufChunk
             </summary>
         </member>
-        <member name="F:Microsoft.Spark.CSharp.Network.ByteBufChunk.Next">
+        <member name="P:Microsoft.Spark.CSharp.Network.ByteBufChunk.Usage">
             <summary>
-            The next ByteBufChunk in linked like list
+            Returns the percentage of the current usage of the chunk
             </summary>
         </member>
-        <member name="M:Microsoft.Spark.CSharp.Network.ByteBufChunk.Finalize">
+        <member name="P:Microsoft.Spark.CSharp.Network.ByteBufChunk.UnsafeArray">
             <summary>
-            Finalizer.
+            Returns the IntPtr that points to beginning of the cached heap block.
+            This is used for PInvoke with native code.
             </summary>
         </member>
         <member name="M:Microsoft.Spark.CSharp.Network.ByteBufChunk.Allocate(Microsoft.Spark.CSharp.Network.ByteBuf@)">
@@ -3161,54 +3247,6 @@
                 Implementation of the Dispose pattern.
             </summary>
         </member>
-        <member name="P:Microsoft.Spark.CSharp.Network.ByteBufChunk.Array">
-            <summary>
-            Returns the underlying array that is used for managed code.
-            </summary>
-        </member>
-        <member name="P:Microsoft.Spark.CSharp.Network.ByteBufChunk.BufId">
-            <summary>
-            Returns the buffer Id that registered as RIO buffer.
-            Only apply to unsafe ByteBufChunk
-            </summary>
-        </member>
-        <member name="P:Microsoft.Spark.CSharp.Network.ByteBufChunk.FreeBytes">
-            <summary>
-            Returns the unused bytes in this chunk.
-            </summary>
-        </member>
-        <member name="P:Microsoft.Spark.CSharp.Network.ByteBufChunk.IsDisposed">
-            <summary>
-            Indicates whether this ByteBufChunk is disposed.
-            </summary>
-        </member>
-        <member name="P:Microsoft.Spark.CSharp.Network.ByteBufChunk.IsUnsafe">
-            <summary>
-            Indicates whether the underlying buffer array is a unsafe type array.
-            The unsafe array is used for PInvoke with native code.
-            </summary>
-        </member>
-        <member name="P:Microsoft.Spark.CSharp.Network.ByteBufChunk.Pool">
-            <summary>
-            Returns the ByteBufPool that this ByteBufChunk belongs to.
-            </summary>
-        </member>
-        <member name="P:Microsoft.Spark.CSharp.Network.ByteBufChunk.Size">
-            <summary>
-            Returns the size of the ByteBufChunk
-            </summary>
-        </member>
-        <member name="P:Microsoft.Spark.CSharp.Network.ByteBufChunk.Usage">
-            <summary>
-            Returns the percentage of the current usage of the chunk
-            </summary>
-        </member>
-        <member name="P:Microsoft.Spark.CSharp.Network.ByteBufChunk.UnsafeArray">
-            <summary>
-            Returns the IntPtr that points to beginning of the cached heap block.
-            This is used for PInvoke with native code.
-            </summary>
-        </member>
         <member name="T:Microsoft.Spark.CSharp.Network.ByteBufChunk.Segment">
             <summary>
             Segment struct delimits a section of a byte chunk.
@@ -3312,28 +3350,6 @@
             <param name="chunkOrder">Used to caculate chunk size and ensures it is a multiple of a segment size</param>
             <param name="isUnsafe">Indicates whether allocates memory from process's heap</param>
         </member>
-        <member name="M:Microsoft.Spark.CSharp.Network.ByteBufPool.Allocate">
-            <summary>
-            Allocates a ByteBuf from this ByteBufPool to use.
-            </summary>
-            <returns>A ByteBuf contained in this ByteBufPool</returns>
-        </member>
-        <member name="M:Microsoft.Spark.CSharp.Network.ByteBufPool.Free(Microsoft.Spark.CSharp.Network.ByteBuf)">
-            <summary>
-            Deallocates a ByteBuf back to this ByteBufPool.
-            </summary>
-            <param name="byteBuf">The ByteBuf to be release.</param>
-        </member>
-        <member name="M:Microsoft.Spark.CSharp.Network.ByteBufPool.ToString">
-            <summary>
-            Gets a readable string for this ByteBufPool
-            </summary>
-        </member>
-        <member name="M:Microsoft.Spark.CSharp.Network.ByteBufPool.GetUsages">
-            <summary>
-            Returns the chunk numbers in each queue.
-            </summary>
-        </member>
         <member name="P:Microsoft.Spark.CSharp.Network.ByteBufPool.Default">
             <summary>
             Gets the default byte buffer pool instance for managed memory.
@@ -3362,75 +3378,34 @@
             Returns the size of a ByteChunk in this ByteBufPool
             </summary>
         </member>
-        <member name="T:Microsoft.Spark.CSharp.Network.DefaultSocketWrapper">
+        <member name="M:Microsoft.Spark.CSharp.Network.ByteBufPool.Allocate">
             <summary>
-            A simple wrapper of System.Net.Sockets.Socket class.
+            Allocates a ByteBuf from this ByteBufPool to use.
             </summary>
+            <returns>A ByteBuf contained in this ByteBufPool</returns>
         </member>
-        <member name="T:Microsoft.Spark.CSharp.Network.ISocketWrapper">
+        <member name="M:Microsoft.Spark.CSharp.Network.ByteBufPool.Free(Microsoft.Spark.CSharp.Network.ByteBuf)">
             <summary>
-            ISocketWrapper interface defines the common methods to operate a socket (traditional socket or 
-            Windows Registered IO socket)
+            Deallocates a ByteBuf back to this ByteBufPool.
             </summary>
+            <param name="byteBuf">The ByteBuf to be release.</param>
         </member>
-        <member name="M:Microsoft.Spark.CSharp.Network.ISocketWrapper.Accept">
+        <member name="M:Microsoft.Spark.CSharp.Network.ByteBufPool.ToString">
             <summary>
-            Accepts a incoming connection request.
+            Gets a readable string for this ByteBufPool
             </summary>
-            <returns>A ISocket instance used to send and receive data</returns>
         </member>
-        <member name="M:Microsoft.Spark.CSharp.Network.ISocketWrapper.Close">
+        <member name="M:Microsoft.Spark.CSharp.Network.ByteBufPool.GetUsages">
             <summary>
-            Close the ISocket connections and releases all associated resources.
+            Returns the chunk numbers in each queue.
             </summary>
         </member>
-        <member name="M:Microsoft.Spark.CSharp.Network.ISocketWrapper.Connect(System.Net.IPAddress,System.Int32)">
+        <member name="T:Microsoft.Spark.CSharp.Network.DefaultSocketWrapper">
             <summary>
-            Establishes a connection to a remote host that is specified by an IP address and a port number
+            A simple wrapper of System.Net.Sockets.Socket class.
             </summary>
-            <param name="remoteaddr">The IP address of the remote host</param>
-            <param name="port">The port number of the remote host</param>
         </member>
-        <member name="M:Microsoft.Spark.CSharp.Network.ISocketWrapper.GetStream">
-            <summary>
-            Returns a stream used to send and receive data.
-            </summary>
-            <returns>The underlying Stream instance that be used to send and receive data</returns>
-        </member>
-        <member name="M:Microsoft.Spark.CSharp.Network.ISocketWrapper.Listen(System.Int32)">
-            <summary>
-            Starts listening for incoming connections requests
-            </summary>
-            <param name="backlog">The maximum length of the pending connections queue. </param>
-        </member>
-        <member name="M:Microsoft.Spark.CSharp.Network.ISocketWrapper.Receive">
-            <summary>
-            Receives network data from this socket, and returns a ByteBuf that contains the received data.
-            </summary>
-            <returns>A ByteBuf object that contains received data.</returns>
-        </member>
-        <member name="M:Microsoft.Spark.CSharp.Network.ISocketWrapper.Send(Microsoft.Spark.CSharp.Network.ByteBuf)">
-            <summary>
-            Sends data to this socket with a ByteBuf object that contains data to be sent.
-            </summary>
-            <param name="data">A ByteBuf object that contains data to be sent</param>
-        </member>
-        <member name="P:Microsoft.Spark.CSharp.Network.ISocketWrapper.HasData">
-            <summary>
-            Indicates whether there are data that has been received from the network and is available to be read.
-            </summary>
-        </member>
-        <member name="P:Microsoft.Spark.CSharp.Network.ISocketWrapper.LocalEndPoint">
-            <summary>
-            Returns the local endpoint.
-            </summary>
-        </member>
-        <member name="P:Microsoft.Spark.CSharp.Network.ISocketWrapper.RemoteEndPoint">
-            <summary>
-            Returns the remote endpoint
-            </summary>
-        </member>
-        <member name="M:Microsoft.Spark.CSharp.Network.DefaultSocketWrapper.#ctor">
+        <member name="M:Microsoft.Spark.CSharp.Network.DefaultSocketWrapper.#ctor">
             <summary>
             Default constructor that creates a new instance of DefaultSocket class which represents
             a traditional socket (System.Net.Socket.Socket).
@@ -3525,6 +3500,69 @@
             Returns the remote endpoint if it has one.
             </summary>
         </member>
+        <member name="T:Microsoft.Spark.CSharp.Network.ISocketWrapper">
+            <summary>
+            ISocketWrapper interface defines the common methods to operate a socket (traditional socket or 
+            Windows Registered IO socket)
+            </summary>
+        </member>
+        <member name="M:Microsoft.Spark.CSharp.Network.ISocketWrapper.Accept">
+            <summary>
+            Accepts a incoming connection request.
+            </summary>
+            <returns>A ISocket instance used to send and receive data</returns>
+        </member>
+        <member name="M:Microsoft.Spark.CSharp.Network.ISocketWrapper.Close">
+            <summary>
+            Close the ISocket connections and releases all associated resources.
+            </summary>
+        </member>
+        <member name="M:Microsoft.Spark.CSharp.Network.ISocketWrapper.Connect(System.Net.IPAddress,System.Int32)">
+            <summary>
+            Establishes a connection to a remote host that is specified by an IP address and a port number
+            </summary>
+            <param name="remoteaddr">The IP address of the remote host</param>
+            <param name="port">The port number of the remote host</param>
+        </member>
+        <member name="M:Microsoft.Spark.CSharp.Network.ISocketWrapper.GetStream">
+            <summary>
+            Returns a stream used to send and receive data.
+            </summary>
+            <returns>The underlying Stream instance that be used to send and receive data</returns>
+        </member>
+        <member name="M:Microsoft.Spark.CSharp.Network.ISocketWrapper.Listen(System.Int32)">
+            <summary>
+            Starts listening for incoming connections requests
+            </summary>
+            <param name="backlog">The maximum length of the pending connections queue. </param>
+        </member>
+        <member name="M:Microsoft.Spark.CSharp.Network.ISocketWrapper.Receive">
+            <summary>
+            Receives network data from this socket, and returns a ByteBuf that contains the received data.
+            </summary>
+            <returns>A ByteBuf object that contains received data.</returns>
+        </member>
+        <member name="M:Microsoft.Spark.CSharp.Network.ISocketWrapper.Send(Microsoft.Spark.CSharp.Network.ByteBuf)">
+            <summary>
+            Sends data to this socket with a ByteBuf object that contains data to be sent.
+            </summary>
+            <param name="data">A ByteBuf object that contains data to be sent</param>
+        </member>
+        <member name="P:Microsoft.Spark.CSharp.Network.ISocketWrapper.HasData">
+            <summary>
+            Indicates whether there are data that has been received from the network and is available to be read.
+            </summary>
+        </member>
+        <member name="P:Microsoft.Spark.CSharp.Network.ISocketWrapper.LocalEndPoint">
+            <summary>
+            Returns the local endpoint.
+            </summary>
+        </member>
+        <member name="P:Microsoft.Spark.CSharp.Network.ISocketWrapper.RemoteEndPoint">
+            <summary>
+            Returns the remote endpoint
+            </summary>
+        </member>
         <member name="T:Microsoft.Spark.CSharp.Network.RioNative">
             <summary>
             RioNative class imports and initializes RIOSock.dll for use with RIO socket APIs.
@@ -3547,6 +3585,11 @@
             it must be called before calling EnsureRioLoaded()
             </summary>
         </member>
+        <member name="P:Microsoft.Spark.CSharp.Network.RioNative.ConnectionTable">
+            <summary>
+            Gets the connection table that contains all connections.
+            </summary>
+        </member>
         <member name="M:Microsoft.Spark.CSharp.Network.RioNative.EnsureRioLoaded">
             <summary>
             Ensures that the native dll of RIO socket is loaded and initialized.
@@ -3562,11 +3605,6 @@
             Initializes RIOSock native library.
             </summary>
         </member>
-        <member name="P:Microsoft.Spark.CSharp.Network.RioNative.ConnectionTable">
-            <summary>
-            Gets the connection table that contains all connections.
-            </summary>
-        </member>
         <member name="T:Microsoft.Spark.CSharp.Network.RioResult">
             <summary>
             The RioResult structure contains data used to indicate request completion results used with RIO socket
@@ -3594,6 +3632,26 @@
             Finalizer
             </summary>
         </member>
+        <member name="P:Microsoft.Spark.CSharp.Network.RioSocketWrapper.HasData">
+            <summary>
+            Indicates whether there are data that has been received from the network and is available to be read.
+            </summary>
+        </member>
+        <member name="P:Microsoft.Spark.CSharp.Network.RioSocketWrapper.LocalEndPoint">
+            <summary>
+            Returns the local endpoint.
+            </summary>
+        </member>
+        <member name="P:Microsoft.Spark.CSharp.Network.RioSocketWrapper.RemoteEndPoint">
+            <summary>
+            Returns the remote endpoint
+            </summary>
+        </member>
+        <member name="P:Microsoft.Spark.CSharp.Network.RioSocketWrapper.SockHandle">
+            <summary>
+            Returns the handle of native socket.
+            </summary>
+        </member>
         <member name="M:Microsoft.Spark.CSharp.Network.RioSocketWrapper.Accept">
             <summary>
             Accepts a incoming connection request.
@@ -3687,26 +3745,6 @@
             Generates a unique key from a GUID.
             </summary>
         </member>
-        <member name="P:Microsoft.Spark.CSharp.Network.RioSocketWrapper.HasData">
-            <summary>
-            Indicates whether there are data that has been received from the network and is available to be read.
-            </summary>
-        </member>
-        <member name="P:Microsoft.Spark.CSharp.Network.RioSocketWrapper.LocalEndPoint">
-            <summary>
-            Returns the local endpoint.
-            </summary>
-        </member>
-        <member name="P:Microsoft.Spark.CSharp.Network.RioSocketWrapper.RemoteEndPoint">
-            <summary>
-            Returns the remote endpoint
-            </summary>
-        </member>
-        <member name="P:Microsoft.Spark.CSharp.Network.RioSocketWrapper.SockHandle">
-            <summary>
-            Returns the handle of native socket.
-            </summary>
-        </member>
         <member name="T:Microsoft.Spark.CSharp.Network.SaeaSocketWrapper">
             <summary>
             SaeaSocketWrapper class is a wrapper of a socket that use SocketAsyncEventArgs class
@@ -3729,6 +3767,21 @@
             Finalizer.
             </summary>
         </member>
+        <member name="P:Microsoft.Spark.CSharp.Network.SaeaSocketWrapper.HasData">
+            <summary>
+            Indicates whether there are data that has been received from the network and is available to be read.
+            </summary>
+        </member>
+        <member name="P:Microsoft.Spark.CSharp.Network.SaeaSocketWrapper.LocalEndPoint">
+            <summary>
+            Returns the local endpoint.
+            </summary>
+        </member>
+        <member name="P:Microsoft.Spark.CSharp.Network.SaeaSocketWrapper.RemoteEndPoint">
+            <summary>
+            Returns the remote endpoint
+            </summary>
+        </member>
         <member name="M:Microsoft.Spark.CSharp.Network.SaeaSocketWrapper.Accept">
             <summary>
             Accepts a incoming connection request.
@@ -3815,21 +3868,6 @@
             If all of the data has NOT been sent, then it calls PostSend to send more data. 
             </summary>
         </member>
-        <member name="P:Microsoft.Spark.CSharp.Network.SaeaSocketWrapper.HasData">
-            <summary>
-            Indicates whether there are data that has been received from the network and is available to be read.
-            </summary>
-        </member>
-        <member name="P:Microsoft.Spark.CSharp.Network.SaeaSocketWrapper.LocalEndPoint">
-            <summary>
-            Returns the local endpoint.
-            </summary>
-        </member>
-        <member name="P:Microsoft.Spark.CSharp.Network.SaeaSocketWrapper.RemoteEndPoint">
-            <summary>
-            Returns the remote endpoint
-            </summary>
-        </member>
         <member name="T:Microsoft.Spark.CSharp.Network.SocketStream">
             <summary>
             Provides the underlying stream of data for network access.
@@ -3848,6 +3886,36 @@
             </summary>
             <param name="socket">a RioSocketWrapper object</param>
         </member>
+        <member name="P:Microsoft.Spark.CSharp.Network.SocketStream.CanRead">
+            <summary>
+            Indicates that data can be read from the stream.
+            This property always returns <see langword='true'/>
+            </summary>
+        </member>
+        <member name="P:Microsoft.Spark.CSharp.Network.SocketStream.CanSeek">
+            <summary>
+            Indicates that the stream can seek a specific location in the stream.
+            This property always returns <see langword='false'/>
+            </summary>
+        </member>
+        <member name="P:Microsoft.Spark.CSharp.Network.SocketStream.CanWrite">
+            <summary>
+            Indicates that data can be written to the stream.
+            This property always returns <see langword='true'/>
+            </summary>
+        </member>
+        <member name="P:Microsoft.Spark.CSharp.Network.SocketStream.Length">
+            <summary>
+            The length of data available on the stream.
+            Always throws <see cref='T:System.NotSupportedException'/>.
+            </summary>
+        </member>
+        <member name="P:Microsoft.Spark.CSharp.Network.SocketStream.Position">
+            <summary>
+            Gets or sets the position in the stream.
+            Always throws <see cref='T:System.NotSupportedException'/>.
+            </summary>
+        </member>
         <member name="M:Microsoft.Spark.CSharp.Network.SocketStream.Flush">
             <summary>
             Flushes data from the stream.  This is meaningless for us, so it does nothing.
@@ -3889,36 +3957,6 @@
             <param name="offset">Offset into the buffer from where we'll start writing.</param>
             <param name="count">Number of bytes to write.</param>
         </member>
-        <member name="P:Microsoft.Spark.CSharp.Network.SocketStream.CanRead">
-            <summary>
-            Indicates that data can be read from the stream.
-            This property always returns <see langword='true'/>
-            </summary>
-        </member>
-        <member name="P:Microsoft.Spark.CSharp.Network.SocketStream.CanSeek">
-            <summary>
-            Indicates that the stream can seek a specific location in the stream.
-            This property always returns <see langword='false'/>
-            </summary>
-        </member>
-        <member name="P:Microsoft.Spark.CSharp.Network.SocketStream.CanWrite">
-            <summary>
-            Indicates that data can be written to the stream.
-            This property always returns <see langword='true'/>
-            </summary>
-        </member>
-        <member name="P:Microsoft.Spark.CSharp.Network.SocketStream.Length">
-            <summary>
-            The length of data available on the stream.
-            Always throws <see cref="T:System.NotSupportedException"/>.
-            </summary>
-        </member>
-        <member name="P:Microsoft.Spark.CSharp.Network.SocketStream.Position">
-            <summary>
-            Gets or sets the position in the stream.
-            Always throws <see cref="T:System.NotSupportedException"/>.
-            </summary>
-        </member>
         <member name="T:Microsoft.Spark.CSharp.Network.SockDataToken">
             <summary>
             SockDataToken class is used to associate with the SocketAsyncEventArgs object.
@@ -3960,6 +3998,11 @@
             only the application is running on a Windows OS that supports Registered IO socket.
             </summary>
         </member>
+        <member name="P:Microsoft.Spark.CSharp.Network.SocketFactory.SocketWrapperType">
+            <summary>
+            Set socket wrapper type only for internal use (unit test)
+            </summary>
+        </member>
         <member name="M:Microsoft.Spark.CSharp.Network.SocketFactory.CreateSocket">
             <summary>
             Creates a ISocket instance based on the configuration and OS version.
@@ -3975,11 +4018,6 @@
             Indicates whether current OS supports RIO socket.
             </summary>
         </member>
-        <member name="P:Microsoft.Spark.CSharp.Network.SocketFactory.SocketWrapperType">
-            <summary>
-            Set socket wrapper type only for internal use (unit test)
-            </summary>
-        </member>
         <member name="T:Microsoft.Spark.CSharp.Network.SocketWrapperType">
             <summary>
             SocketWrapperType defines the socket wrapper type be used in transport.
@@ -4167,162 +4205,162 @@
             Right now it just prints out the messages to Console
             </summary>
         </member>
-        <member name="T:Microsoft.Spark.CSharp.Services.ILoggerService">
-            <summary>
-            Defines a logger what be used in service
-            </summary>
-        </member>
-        <member name="M:Microsoft.Spark.CSharp.Services.ILoggerService.GetLoggerInstance(System.Type)">
+        <member name="M:Microsoft.Spark.CSharp.Services.DefaultLoggerService.GetLoggerInstance(System.Type)">
             <summary>
             Get an instance of ILoggerService by a given type of logger
             </summary>
             <param name="type">The type of a logger to return</param>
             <returns>An instance of ILoggerService</returns>
         </member>
-        <member name="M:Microsoft.Spark.CSharp.Services.ILoggerService.LogDebug(System.String)">
+        <member name="M:Microsoft.Spark.CSharp.Services.DefaultLoggerService.LogDebug(System.String)">
             <summary>
             Logs a message at debug level.
             </summary>
             <param name="message">The message to be logged</param>
         </member>
-        <member name="M:Microsoft.Spark.CSharp.Services.ILoggerService.LogDebug(System.String,System.Object[])">
+        <member name="M:Microsoft.Spark.CSharp.Services.DefaultLoggerService.LogDebug(System.String,System.Object[])">
             <summary>
             Logs a message at debug level with a format string.
             </summary>
             <param name="messageFormat">The format string</param>
             <param name="messageParameters">The array of arguments</param>
         </member>
-        <member name="M:Microsoft.Spark.CSharp.Services.ILoggerService.LogInfo(System.String)">
+        <member name="M:Microsoft.Spark.CSharp.Services.DefaultLoggerService.LogInfo(System.String)">
             <summary>
             Logs a message at info level.
             </summary>
             <param name="message">The message to be logged</param>
         </member>
-        <member name="M:Microsoft.Spark.CSharp.Services.ILoggerService.LogInfo(System.String,System.Object[])">
+        <member name="M:Microsoft.Spark.CSharp.Services.DefaultLoggerService.LogInfo(System.String,System.Object[])">
             <summary>
             Logs a message at info level with a format string.
             </summary>
             <param name="messageFormat">The format string</param>
             <param name="messageParameters">The array of arguments</param>
         </member>
-        <member name="M:Microsoft.Spark.CSharp.Services.ILoggerService.LogWarn(System.String)">
+        <member name="M:Microsoft.Spark.CSharp.Services.DefaultLoggerService.LogWarn(System.String)">
             <summary>
             Logs a message at warning level.
             </summary>
             <param name="message">The message to be logged</param>
         </member>
-        <member name="M:Microsoft.Spark.CSharp.Services.ILoggerService.LogWarn(System.String,System.Object[])">
+        <member name="M:Microsoft.Spark.CSharp.Services.DefaultLoggerService.LogWarn(System.String,System.Object[])">
             <summary>
             Logs a message at warning level with a format string.
             </summary>
             <param name="messageFormat">The format string</param>
             <param name="messageParameters">The array of arguments</param>
         </member>
-        <member name="M:Microsoft.Spark.CSharp.Services.ILoggerService.LogFatal(System.String)">
+        <member name="M:Microsoft.Spark.CSharp.Services.DefaultLoggerService.LogFatal(System.String)">
             <summary>
             Logs a fatal message.
             </summary>
             <param name="message">The message to be logged</param>
         </member>
-        <member name="M:Microsoft.Spark.CSharp.Services.ILoggerService.LogFatal(System.String,System.Object[])">
+        <member name="M:Microsoft.Spark.CSharp.Services.DefaultLoggerService.LogFatal(System.String,System.Object[])">
             <summary>
             Logs a fatal message with a format string.
             </summary>
             <param name="messageFormat">The format string</param>
             <param name="messageParameters">The array of arguments</param>
         </member>
-        <member name="M:Microsoft.Spark.CSharp.Services.ILoggerService.LogError(System.String)">
+        <member name="M:Microsoft.Spark.CSharp.Services.DefaultLoggerService.LogError(System.String)">
             <summary>
             Logs a error message.
             </summary>
             <param name="message">The message to be logged</param>
         </member>
-        <member name="M:Microsoft.Spark.CSharp.Services.ILoggerService.LogError(System.String,System.Object[])">
+        <member name="M:Microsoft.Spark.CSharp.Services.DefaultLoggerService.LogError(System.String,System.Object[])">
             <summary>
             Logs a error message with a format string.
             </summary>
             <param name="messageFormat">The format string</param>
             <param name="messageParameters">The array of arguments</param>
         </member>
-        <member name="M:Microsoft.Spark.CSharp.Services.ILoggerService.LogException(System.Exception)">
+        <member name="M:Microsoft.Spark.CSharp.Services.DefaultLoggerService.LogException(System.Exception)">
             <summary>
             Logs an exception
             </summary>
             <param name="e">The exception to be logged</param>
         </member>
-        <member name="M:Microsoft.Spark.CSharp.Services.DefaultLoggerService.GetLoggerInstance(System.Type)">
+        <member name="T:Microsoft.Spark.CSharp.Services.ILoggerService">
+            <summary>
+            Defines a logger what be used in service
+            </summary>
+        </member>
+        <member name="M:Microsoft.Spark.CSharp.Services.ILoggerService.GetLoggerInstance(System.Type)">
             <summary>
             Get an instance of ILoggerService by a given type of logger
             </summary>
             <param name="type">The type of a logger to return</param>
             <returns>An instance of ILoggerService</returns>
         </member>
-        <member name="M:Microsoft.Spark.CSharp.Services.DefaultLoggerService.LogDebug(System.String)">
+        <member name="M:Microsoft.Spark.CSharp.Services.ILoggerService.LogDebug(System.String)">
             <summary>
             Logs a message at debug level.
             </summary>
             <param name="message">The message to be logged</param>
         </member>
-        <member name="M:Microsoft.Spark.CSharp.Services.DefaultLoggerService.LogDebug(System.String,System.Object[])">
+        <member name="M:Microsoft.Spark.CSharp.Services.ILoggerService.LogDebug(System.String,System.Object[])">
             <summary>
             Logs a message at debug level with a format string.
             </summary>
             <param name="messageFormat">The format string</param>
             <param name="messageParameters">The array of arguments</param>
         </member>
-        <member name="M:Microsoft.Spark.CSharp.Services.DefaultLoggerService.LogInfo(System.String)">
+        <member name="M:Microsoft.Spark.CSharp.Services.ILoggerService.LogInfo(System.String)">
             <summary>
             Logs a message at info level.
             </summary>
             <param name="message">The message to be logged</param>
         </member>
-        <member name="M:Microsoft.Spark.CSharp.Services.DefaultLoggerService.LogInfo(System.String,System.Object[])">
+        <member name="M:Microsoft.Spark.CSharp.Services.ILoggerService.LogInfo(System.String,System.Object[])">
             <summary>
             Logs a message at info level with a format string.
             </summary>
             <param name="messageFormat">The format string</param>
             <param name="messageParameters">The array of arguments</param>
         </member>
-        <member name="M:Microsoft.Spark.CSharp.Services.DefaultLoggerService.LogWarn(System.String)">
+        <member name="M:Microsoft.Spark.CSharp.Services.ILoggerService.LogWarn(System.String)">
             <summary>
             Logs a message at warning level.
             </summary>
             <param name="message">The message to be logged</param>
         </member>
-        <member name="M:Microsoft.Spark.CSharp.Services.DefaultLoggerService.LogWarn(System.String,System.Object[])">
+        <member name="M:Microsoft.Spark.CSharp.Services.ILoggerService.LogWarn(System.String,System.Object[])">
             <summary>
             Logs a message at warning level with a format string.
             </summary>
             <param name="messageFormat">The format string</param>
             <param name="messageParameters">The array of arguments</param>
         </member>
-        <member name="M:Microsoft.Spark.CSharp.Services.DefaultLoggerService.LogFatal(System.String)">
+        <member name="M:Microsoft.Spark.CSharp.Services.ILoggerService.LogFatal(System.String)">
             <summary>
             Logs a fatal message.
             </summary>
             <param name="message">The message to be logged</param>
         </member>
-        <member name="M:Microsoft.Spark.CSharp.Services.DefaultLoggerService.LogFatal(System.String,System.Object[])">
+        <member name="M:Microsoft.Spark.CSharp.Services.ILoggerService.LogFatal(System.String,System.Object[])">
             <summary>
             Logs a fatal message with a format string.
             </summary>
             <param name="messageFormat">The format string</param>
             <param name="messageParameters">The array of arguments</param>
         </member>
-        <member name="M:Microsoft.Spark.CSharp.Services.DefaultLoggerService.LogError(System.String)">
+        <member name="M:Microsoft.Spark.CSharp.Services.ILoggerService.LogError(System.String)">
             <summary>
             Logs a error message.
             </summary>
             <param name="message">The message to be logged</param>
         </member>
-        <member name="M:Microsoft.Spark.CSharp.Services.DefaultLoggerService.LogError(System.String,System.Object[])">
+        <member name="M:Microsoft.Spark.CSharp.Services.ILoggerService.LogError(System.String,System.Object[])">
             <summary>
             Logs a error message with a format string.
             </summary>
             <param name="messageFormat">The format string</param>
             <param name="messageParameters">The array of arguments</param>
         </member>
-        <member name="M:Microsoft.Spark.CSharp.Services.DefaultLoggerService.LogException(System.Exception)">
+        <member name="M:Microsoft.Spark.CSharp.Services.ILoggerService.LogException(System.Exception)">
             <summary>
             Logs an exception
             </summary>
@@ -4665,6 +4703,27 @@
             See also http://spark.apache.org/docs/latest/api/scala/index.html#org.apache.spark.sql.DataFrame
             </summary>
         </member>
+        <member name="P:Microsoft.Spark.CSharp.Sql.DataFrame.Rdd">
+            <summary>
+            Represents the content of the DataFrame as an RDD of Rows.
+            </summary>
+        </member>
+        <member name="P:Microsoft.Spark.CSharp.Sql.DataFrame.IsLocal">
+            <summary>
+            Returns true if the collect and take methods can be run locally (without any Spark executors).
+            </summary>
+        </member>
+        <member name="P:Microsoft.Spark.CSharp.Sql.DataFrame.Schema">
+            <summary>
+            Returns the schema of this DataFrame.
+            </summary>
+        </member>
+        <member name="P:Microsoft.Spark.CSharp.Sql.DataFrame.Item(System.String)">
+            <summary>
+            Returns a column for a given column name.
+            </summary>
+            <param name="columnName">The name of column</param>
+        </member>
         <member name="M:Microsoft.Spark.CSharp.Sql.DataFrame.RegisterTempTable(System.String)">
             <summary>
             Registers this DataFrame as a temporary table using the given name.  The lifetime of this 
@@ -5220,27 +5279,6 @@
             SaveMode specified by mode, and a set of options.
             </summary>
         </member>
-        <member name="P:Microsoft.Spark.CSharp.Sql.DataFrame.Rdd">
-            <summary>
-            Represents the content of the DataFrame as an RDD of Rows.
-            </summary>
-        </member>
-        <member name="P:Microsoft.Spark.CSharp.Sql.DataFrame.IsLocal">
-            <summary>
-            Returns true if the collect and take methods can be run locally (without any Spark executors).
-            </summary>
-        </member>
-        <member name="P:Microsoft.Spark.CSharp.Sql.DataFrame.Schema">
-            <summary>
-            Returns the schema of this DataFrame.
-            </summary>
-        </member>
-        <member name="P:Microsoft.Spark.CSharp.Sql.DataFrame.Item(System.String)">
-            <summary>
-            Returns a column for a given column name.
-            </summary>
-            <param name="columnName">The name of column</param>
-        </member>
         <member name="T:Microsoft.Spark.CSharp.Sql.JoinType">
             <summary>
             The type of join operation for DataFrame
@@ -5661,470 +5699,135 @@
             It supports running both SQL and HiveQL commands.
             </summary>
         </member>
-        <member name="T:Microsoft.Spark.CSharp.Sql.SqlContext">
-            <summary>
-            The entry point for working with structured data (rows and columns) in Spark.  
-            Allows the creation of [[DataFrame]] objects as well as the execution of SQL queries.
-            </summary>
-        </member>
-        <member name="M:Microsoft.Spark.CSharp.Sql.SqlContext.#ctor(Microsoft.Spark.CSharp.Core.SparkContext)">
+        <member name="M:Microsoft.Spark.CSharp.Sql.HiveContext.#ctor(Microsoft.Spark.CSharp.Core.SparkContext)">
             <summary>
-            Creates a SqlContext
+            Creates a HiveContext
             </summary>
             <param name="sparkContext"></param>
         </member>
-        <member name="M:Microsoft.Spark.CSharp.Sql.SqlContext.GetOrCreate(Microsoft.Spark.CSharp.Core.SparkContext)">
+        <member name="M:Microsoft.Spark.CSharp.Sql.HiveContext.RefreshTable(System.String)">
             <summary>
-            Get the existing SQLContext or create a new one with given SparkContext.
+            Invalidate and refresh all the cached the metadata of the given table.
+            For performance reasons, Spark SQL or the external data source library it uses
+            might cache certain metadata about a table, such as the location of blocks.
+            When those change outside of Spark SQL, users should call this function to invalidate the cache.
             </summary>
-            <param name="sparkContext"></param>
-            <returns></returns>
+            <param name="tableName"></param>
         </member>
-        <member name="M:Microsoft.Spark.CSharp.Sql.SqlContext.NewSession">
+        <member name="T:Microsoft.Spark.CSharp.Sql.PythonSerDe">
             <summary>
-            Returns a new SQLContext as new session, that has separate SQLConf, 
-            registered temporary tables and UDFs, but shared SparkContext and table cache.
+            Used for SerDe of Python objects
             </summary>
-            <returns></returns>
         </member>
-        <member name="M:Microsoft.Spark.CSharp.Sql.SqlContext.GetConf(System.String,System.String)">
+        <member name="M:Microsoft.Spark.CSharp.Sql.PythonSerDe.GetUnpickledObjects(System.Byte[])">
             <summary>
-            Returns the value of Spark SQL configuration property for the given key.
-            If the key is not set, returns defaultValue.
+            Unpickles objects from byte[]
             </summary>
-            <param name="key"></param>
-            <param name="defaultValue"></param>
+            <param name="buffer"></param>
             <returns></returns>
         </member>
-        <member name="M:Microsoft.Spark.CSharp.Sql.SqlContext.SetConf(System.String,System.String)">
+        <member name="T:Microsoft.Spark.CSharp.Sql.RowConstructor">
             <summary>
-            Sets the given Spark SQL configuration property.
+            Used by Unpickler to unpickle pickled objects. It is also used to construct a Row (C# representation of pickled objects).
             </summary>
-            <param name="key"></param>
-            <param name="value"></param>
         </member>
-        <member name="M:Microsoft.Spark.CSharp.Sql.SqlContext.Read">
+        <member name="F:Microsoft.Spark.CSharp.Sql.RowConstructor.currentSchema">
             <summary>
-            Returns a DataFrameReader that can be used to read data in as a DataFrame.
+            Schema of the DataFrame currently being processed
             </summary>
         </member>
-        <member name="M:Microsoft.Spark.CSharp.Sql.SqlContext.ReadDataFrame(System.String,Microsoft.Spark.CSharp.Sql.StructType,System.Collections.Generic.Dictionary{System.String,System.String})">
+        <member name="F:Microsoft.Spark.CSharp.Sql.RowConstructor.isCurrentSchemaSet">
             <summary>
-            Loads a dataframe the source path using the given schema and options
+            Indicates if Schema is already set during construction of this type
             </summary>
-            <param name="path"></param>
-            <param name="schema"></param>
-            <param name="options"></param>
-            <returns></returns>
         </member>
-        <member name="M:Microsoft.Spark.CSharp.Sql.SqlContext.CreateDataFrame(Microsoft.Spark.CSharp.Core.RDD{System.Object[]},Microsoft.Spark.CSharp.Sql.StructType)">
+        <member name="F:Microsoft.Spark.CSharp.Sql.RowConstructor.Values">
             <summary>
-            Creates a <see cref="T:Microsoft.Spark.CSharp.Sql.DataFrame"/> from a RDD containing array of object using the given schema.
+            Arguments used to construct this typ
             </summary>
-            <param name="rdd">RDD containing array of object. The array acts as a row and items within the array act as columns which the schema is specified in <paramref name="schema"/>. </param>
-            <param name="schema">The schema of DataFrame.</param>
-            <returns></returns>
         </member>
-        <member name="M:Microsoft.Spark.CSharp.Sql.SqlContext.RegisterDataFrameAsTable(Microsoft.Spark.CSharp.Sql.DataFrame,System.String)">
+        <member name="F:Microsoft.Spark.CSharp.Sql.RowConstructor.Schema">
             <summary>
-            Registers the given <see cref="T:Microsoft.Spark.CSharp.Sql.DataFrame"/> as a temporary table in the catalog.
-            Temporary tables exist only during the lifetime of this instance of SqlContext.
+            Schema of the values
             </summary>
-            <param name="dataFrame"></param>
-            <param name="tableName"></param>
         </member>
-        <member name="M:Microsoft.Spark.CSharp.Sql.SqlContext.DropTempTable(System.String)">
+        <member name="M:Microsoft.Spark.CSharp.Sql.RowConstructor.ToString">
             <summary>
-            Remove the temp table from catalog.
+            Returns a string that represents the current object.
             </summary>
-            <param name="tableName"></param>
+            <returns>A string that represents the current object.</returns>
         </member>
-        <member name="M:Microsoft.Spark.CSharp.Sql.SqlContext.Table(System.String)">
+        <member name="M:Microsoft.Spark.CSharp.Sql.RowConstructor.construct(System.Object[])">
             <summary>
-            Returns the specified table as a <see cref="T:Microsoft.Spark.CSharp.Sql.DataFrame"/>
+            Used by Unpickler - do not use to construct Row. Use GetRow() method
             </summary>
-            <param name="tableName"></param>
+            <param name="args"></param>
             <returns></returns>
         </member>
-        <member name="M:Microsoft.Spark.CSharp.Sql.SqlContext.Tables(System.String)">
+        <member name="M:Microsoft.Spark.CSharp.Sql.RowConstructor.GetRow">
             <summary>
-            Returns a <see cref="T:Microsoft.Spark.CSharp.Sql.DataFrame"/> containing names of tables in the given database.
-            If <paramref name="databaseName"/> is not specified, the current database will be used.
-            The returned DataFrame has two columns: 'tableName' and 'isTemporary' (a column with bool 
-            type indicating if a table is a temporary one or not).
+            Used to construct a Row
             </summary>
-            <param name="databaseName">Name of the database to use. Default to the current database. 
-            Note: This is only applicable to HiveContext.</param>
             <returns></returns>
         </member>
-        <member name="M:Microsoft.Spark.CSharp.Sql.SqlContext.TableNames(System.String)">
+        <member name="T:Microsoft.Spark.CSharp.Sql.Row">
             <summary>
-            Returns a list of names of tables in the database <paramref name="databaseName"/>
+             Represents one row of output from a relational operator.
             </summary>
-            <param name="databaseName">Name of the database to use. Default to the current database.
-            Note: This is only applicable to HiveContext.</param>
-            <returns></returns>
         </member>
-        <member name="M:Microsoft.Spark.CSharp.Sql.SqlContext.CacheTable(System.String)">
+        <member name="M:Microsoft.Spark.CSharp.Sql.Row.Size">
             <summary>
-            Caches the specified table in-memory.
+            Number of elements in the Row.
             </summary>
-            <param name="tableName"></param>
+            <returns>elements count in this row</returns>
         </member>
-        <member name="M:Microsoft.Spark.CSharp.Sql.SqlContext.UncacheTable(System.String)">
+        <member name="M:Microsoft.Spark.CSharp.Sql.Row.GetSchema">
             <summary>
-            Removes the specified table from the in-memory cache.
+            Schema for the row.
             </summary>
-            <param name="tableName"></param>
         </member>
-        <member name="M:Microsoft.Spark.CSharp.Sql.SqlContext.ClearCache">
+        <member name="M:Microsoft.Spark.CSharp.Sql.Row.Get(System.Int32)">
             <summary>
-            Removes all cached tables from the in-memory cache.
+            Returns the value at position i.
             </summary>
         </member>
-        <member name="M:Microsoft.Spark.CSharp.Sql.SqlContext.IsCached(System.String)">
+        <member name="M:Microsoft.Spark.CSharp.Sql.Row.Get(System.String)">
             <summary>
-            Returns true if the table is currently cached in-memory.
+            Returns the value of a given columnName.
             </summary>
-            <param name="tableName"></param>
-            <returns></returns>
         </member>
-        <member name="M:Microsoft.Spark.CSharp.Sql.SqlContext.Sql(System.String)">
+        <member name="M:Microsoft.Spark.CSharp.Sql.Row.GetAs``1(System.Int32)">
             <summary>
-            Executes a SQL query using Spark, returning the result as a DataFrame. The dialect that is used for SQL parsing can be configured with 'spark.sql.dialect'
+            Returns the value at position i, the return value will be cast to type T.
             </summary>
-            <param name="sqlQuery"></param>
-            <returns></returns>
         </member>
-        <member name="M:Microsoft.Spark.CSharp.Sql.SqlContext.JsonFile(System.String)">
+        <member name="M:Microsoft.Spark.CSharp.Sql.Row.GetAs``1(System.String)">
             <summary>
-            Loads a JSON file (one object per line), returning the result as a DataFrame
-            It goes through the entire dataset once to determine the schema.
+            Returns the value of a given columnName, the return value will be cast to type T.
             </summary>
-            <param name="path">path to JSON file</param>
-            <returns></returns>
         </member>
-        <member name="M:Microsoft.Spark.CSharp.Sql.SqlContext.JsonFile(System.String,Microsoft.Spark.CSharp.Sql.StructType)">
+        <member name="T:Microsoft.Spark.CSharp.Sql.Functions">
             <summary>
-            Loads a JSON file (one object per line) and applies the given schema
+            DataFrame Built-in functions
             </summary>
-            <param name="path">path to JSON file</param>
-            <param name="schema">schema to use</param>
-            <returns></returns>
         </member>
-        <member name="M:Microsoft.Spark.CSharp.Sql.SqlContext.TextFile(System.String,Microsoft.Spark.CSharp.Sql.StructType,System.String)">
+        <member name="M:Microsoft.Spark.CSharp.Sql.Functions.Lit(System.Object)">
             <summary>
-            Loads text file with the specific column delimited using the given schema
+            Creates a Column of any literal value.
             </summary>
-            <param name="path">path to text file</param>
-            <param name="schema">schema to use</param>
-            <param name="delimiter">delimiter to use</param>
-            <returns></returns>
+            <param name="column">The given literal value</param>
+            <returns>A new Column is created to represent the literal value</returns>
         </member>
-        <member name="M:Microsoft.Spark.CSharp.Sql.SqlContext.TextFile(System.String,System.String,System.Boolean,System.Boolean)">
+        <member name="M:Microsoft.Spark.CSharp.Sql.Functions.Col(System.String)">
             <summary>
-            Loads a text file (one object per line), returning the result as a DataFrame
+            Returns a Column based on the given column name.
             </summary>
-            <param name="path">path to text file</param>
-            <param name="delimiter">delimited to use</param>
-            <param name="hasHeader">indicates if the text file has a header row</param>
-            <param name="inferSchema">indicates if every row has to be read to infer the schema; if false, columns will be strings</param>
-            <returns></returns>
+            <param name="colName">The name of column specified</param>
+            <returns>The column for the given name</returns>
         </member>
-        <member name="M:Microsoft.Spark.CSharp.Sql.SqlContext.RegisterFunction``1(System.String,System.Func{``0})">
+        <member name="M:Microsoft.Spark.CSharp.Sql.Functions.Column(System.String)">
             <summary>
-            Register UDF with no input argument, e.g:
-                SqlContext.RegisterFunction&lt;bool>("MyFilter", () => true);
-                sqlContext.Sql("SELECT * FROM MyTable where MyFilter()");
-            </summary>
-            <typeparam name="RT"></typeparam>
-            <param name="name"></param>
-            <param name="f"></param>
-        </member>
-        <member name="M:Microsoft.Spark.CSharp.Sql.SqlContext.RegisterFunction``2(System.String,System.Func{``1,``0})">
-            <summary>
-            Register UDF with 1 input argument, e.g:
-                SqlContext.RegisterFunction&lt;bool, string>("MyFilter", (arg1) => arg1 != null);
-                sqlContext.Sql("SELECT * FROM MyTable where MyFilter(columnName1)");
-            </summary>
-            <typeparam name="RT"></typeparam>
-            <typeparam name="A1"></typeparam>
-            <param name="name"></param>
-            <param name="f"></param>
-        </member>
-        <member name="M:Microsoft.Spark.CSharp.Sql.SqlContext.RegisterFunction``3(System.String,System.Func{``1,``2,``0})">
-            <summary>
-            Register UDF with 2 input arguments, e.g:
-                SqlContext.RegisterFunction&lt;bool, string, string>("MyFilter", (arg1, arg2) => arg1 != null &amp;&amp; arg2 != null);
-                sqlContext.Sql("SELECT * FROM MyTable where MyFilter(columnName1, columnName2)");
-            </summary>
-            <typeparam name="RT"></typeparam>
-            <typeparam name="A1"></typeparam>
-            <typeparam name="A2"></typeparam>
-            <param name="name"></param>
-            <param name="f"></param>
-        </member>
-        <member name="M:Microsoft.Spark.CSharp.Sql.SqlContext.RegisterFunction``4(System.String,System.Func{``1,``2,``3,``0})">
-            <summary>
-            Register UDF with 3 input arguments, e.g:
-                SqlContext.RegisterFunction&lt;bool, string, string, string>("MyFilter", (arg1, arg2, arg3) => arg1 != null &amp;&amp; arg2 != null &amp;&amp; arg3 != null);
-                sqlContext.Sql("SELECT * FROM MyTable where MyFilter(columnName1, columnName2, columnName3)");
-            </summary>
-            <typeparam name="RT"></typeparam>
-            <typeparam name="A1"></typeparam>
-            <typeparam name="A2"></typeparam>
-            <typeparam name="A3"></typeparam>
-            <param name="name"></param>
-            <param name="f"></param>
-        </member>
-        <member name="M:Microsoft.Spark.CSharp.Sql.SqlContext.RegisterFunction``5(System.String,System.Func{``1,``2,``3,``4,``0})">
-            <summary>
-            Register UDF with 4 input arguments, e.g:
-                SqlContext.RegisterFunction&lt;bool, string, string, ..., string>("MyFilter", (arg1, arg2, ..., arg4) => arg1 != null &amp;&amp; arg2 != null &amp;&amp; ... &amp;&amp; arg3 != null);
-                sqlContext.Sql("SELECT * FROM MyTable where MyFilter(columnName1, columnName2, ..., columnName4)");
-            </summary>
-            <typeparam name="RT"></typeparam>
-            <typeparam name="A1"></typeparam>
-            <typeparam name="A2"></typeparam>
-            <typeparam name="A3"></typeparam>
-            <typeparam name="A4"></typeparam>
-            <param name="name"></param>
-            <param name="f"></param>
-        </member>
-        <member name="M:Microsoft.Spark.CSharp.Sql.SqlContext.RegisterFunction``6(System.String,System.Func{``1,``2,``3,``4,``5,``0})">
-            <summary>
-            Register UDF with 5 input arguments, e.g:
-                SqlContext.RegisterFunction&lt;bool, string, string, ..., string>("MyFilter", (arg1, arg2, ..., arg5) => arg1 != null &amp;&amp; arg2 != null &amp;&amp; ... &amp;&amp; arg5 != null);
-                sqlContext.Sql("SELECT * FROM MyTable where MyFilter(columnName1, columnName2, ..., columnName5)");
-            </summary>
-            <typeparam name="RT"></typeparam>
-            <typeparam name="A1"></typeparam>
-            <typeparam name="A2"></typeparam>
-            <typeparam name="A3"></typeparam>
-            <typeparam name="A4"></typeparam>
-            <typeparam name="A5"></typeparam>
-            <param name="name"></param>
-            <param name="f"></param>
-        </member>
-        <member name="M:Microsoft.Spark.CSharp.Sql.SqlContext.RegisterFunction``7(System.String,System.Func{``1,``2,``3,``4,``5,``6,``0})">
-            <summary>
-            Register UDF with 6 input arguments, e.g:
-                SqlContext.RegisterFunction&lt;bool, string, string, ..., string>("MyFilter", (arg1, arg2, ..., arg6) => arg1 != null &amp;&amp; arg2 != null &amp;&amp; ... &amp;&amp; arg6 != null);
-                sqlContext.Sql("SELECT * FROM MyTable where MyFilter(columnName1, columnName2, ..., columnName6)");
-            </summary>
-            <typeparam name="RT"></typeparam>
-            <typeparam name="A1"></typeparam>
-            <typeparam name="A2"></typeparam>
-            <typeparam name="A3"></typeparam>
-            <typeparam name="A4"></typeparam>
-            <typeparam name="A5"></typeparam>
-            <typeparam name="A6"></typeparam>
-            <param name="name"></param>
-            <param name="f"></param>
-        </member>
-        <member name="M:Microsoft.Spark.CSharp.Sql.SqlContext.RegisterFunction``8(System.String,System.Func{``1,``2,``3,``4,``5,``6,``7,``0})">
-            <summary>
-            Register UDF with 7 input arguments, e.g:
-                SqlContext.RegisterFunction&lt;bool, string, string, ..., string>("MyFilter", (arg1, arg2, ..., arg7) => arg1 != null &amp;&amp; arg2 != null &amp;&amp; ... &amp;&amp; arg7 != null);
-                sqlContext.Sql("SELECT * FROM MyTable where MyFilter(columnName1, columnName2, ..., columnName7)");
-            </summary>
-            <typeparam name="RT"></typeparam>
-            <typeparam name="A1"></typeparam>
-            <typeparam name="A2"></typeparam>
-            <typeparam name="A3"></typeparam>
-            <typeparam name="A4"></typeparam>
-            <typeparam name="A5"></typeparam>
-            <typeparam name="A6"></typeparam>
-            <typeparam name="A7"></typeparam>
-            <param name="name"></param>
-            <param name="f"></param>
-        </member>
-        <member name="M:Microsoft.Spark.CSharp.Sql.SqlContext.RegisterFunction``9(System.String,System.Func{``1,``2,``3,``4,``5,``6,``7,``8,``0})">
-            <summary>
-            Register UDF with 8 input arguments, e.g:
-                SqlContext.RegisterFunction&lt;bool, string, string, ..., string>("MyFilter", (arg1, arg2, ..., arg8) => arg1 != null &amp;&amp; arg2 != null &amp;&amp; ... &amp;&amp; arg8 != null);
-                sqlContext.Sql("SELECT * FROM MyTable where MyFilter(columnName1, columnName2, ..., columnName8)");
-            </summary>
-            <typeparam name="RT"></typeparam>
-            <typeparam name="A1"></typeparam>
-            <typeparam name="A2"></typeparam>
-            <typeparam name="A3"></typeparam>
-            <typeparam name="A4"></typeparam>
-            <typeparam name="A5"></typeparam>
-            <typeparam name="A6"></typeparam>
-            <typeparam name="A7"></typeparam>
-            <typeparam name="A8"></typeparam>
-            <param name="name"></param>
-            <param name="f"></param>
-        </member>
-        <member name="M:Microsoft.Spark.CSharp.Sql.SqlContext.RegisterFunction``10(System.String,System.Func{``1,``2,``3,``4,``5,``6,``7,``8,``9,``0})">
-            <summary>
-            Register UDF with 9 input arguments, e.g:
-                SqlContext.RegisterFunction&lt;bool, string, string, ..., string>("MyFilter", (arg1, arg2, ..., arg9) => arg1 != null &amp;&amp; arg2 != null &amp;&amp; ... &amp;&amp; arg9 != null);
-                sqlContext.Sql("SELECT * FROM MyTable where MyFilter(columnName1, columnName2, ..., columnName9)");
-            </summary>
-            <typeparam name="RT"></typeparam>
-            <typeparam name="A1"></typeparam>
-            <typeparam name="A2"></typeparam>
-            <typeparam name="A3"></typeparam>
-            <typeparam name="A4"></typeparam>
-            <typeparam name="A5"></typeparam>
-            <typeparam name="A6"></typeparam>
-            <typeparam name="A7"></typeparam>
-            <typeparam name="A8"></typeparam>
-            <typeparam name="A9"></typeparam>
-            <param name="name"></param>
-            <param name="f"></param>
-        </member>
-        <member name="M:Microsoft.Spark.CSharp.Sql.SqlContext.RegisterFunction``11(System.String,System.Func{``1,``2,``3,``4,``5,``6,``7,``8,``9,``10,``0})">
-            <summary>
-            Register UDF with 10 input arguments, e.g:
-                SqlContext.RegisterFunction&lt;bool, string, string, ..., string>("MyFilter", (arg1, arg2, ..., arg10) => arg1 != null &amp;&amp; arg2 != null &amp;&amp; ... &amp;&amp; arg10 != null);
-                sqlContext.Sql("SELECT * FROM MyTable where MyFilter(columnName1, columnName2, ..., columnName10)");
-            </summary>
-            <typeparam name="RT"></typeparam>
-            <typeparam name="A1"></typeparam>
-            <typeparam name="A2"></typeparam>
-            <typeparam name="A3"></typeparam>
-            <typeparam name="A4"></typeparam>
-            <typeparam name="A5"></typeparam>
-            <typeparam name="A6"></typeparam>
-            <typeparam name="A7"></typeparam>
-            <typeparam name="A8"></typeparam>
-            <typeparam name="A9"></typeparam>
-            <typeparam name="A10"></typeparam>
-            <param name="name"></param>
-            <param name="f"></param>
-        </member>
-        <member name="M:Microsoft.Spark.CSharp.Sql.HiveContext.#ctor(Microsoft.Spark.CSharp.Core.SparkContext)">
-            <summary>
-            Creates a HiveContext
-            </summary>
-            <param name="sparkContext"></param>
-        </member>
-        <member name="M:Microsoft.Spark.CSharp.Sql.HiveContext.RefreshTable(System.String)">
-            <summary>
-            Invalidate and refresh all the cached the metadata of the given table.
-            For performance reasons, Spark SQL or the external data source library it uses
-            might cache certain metadata about a table, such as the location of blocks.
-            When those change outside of Spark SQL, users should call this function to invalidate the cache.
-            </summary>
-            <param name="tableName"></param>
-        </member>
-        <member name="T:Microsoft.Spark.CSharp.Sql.PythonSerDe">
-            <summary>
-            Used for SerDe of Python objects
-            </summary>
-        </member>
-        <member name="M:Microsoft.Spark.CSharp.Sql.PythonSerDe.GetUnpickledObjects(System.Byte[])">
-            <summary>
-            Unpickles objects from byte[]
-            </summary>
-            <param name="buffer"></param>
-            <returns></returns>
-        </member>
-        <member name="T:Microsoft.Spark.CSharp.Sql.RowConstructor">
-            <summary>
-            Used by Unpickler to unpickle pickled objects. It is also used to construct a Row (C# representation of pickled objects).
-            </summary>
-        </member>
-        <member name="F:Microsoft.Spark.CSharp.Sql.RowConstructor.currentSchema">
-            <summary>
-            Schema of the DataFrame currently being processed
-            </summary>
-        </member>
-        <member name="F:Microsoft.Spark.CSharp.Sql.RowConstructor.isCurrentSchemaSet">
-            <summary>
-            Indicates if Schema is already set during construction of this type
-            </summary>
-        </member>
-        <member name="F:Microsoft.Spark.CSharp.Sql.RowConstructor.Values">
-            <summary>
-            Arguments used to construct this typ
-            </summary>
-        </member>
-        <member name="F:Microsoft.Spark.CSharp.Sql.RowConstructor.Schema">
-            <summary>
-            Schema of the values
-            </summary>
-        </member>
-        <member name="M:Microsoft.Spark.CSharp.Sql.RowConstructor.ToString">
-            <summary>
-            Returns a string that represents the current object.
-            </summary>
-            <returns>A string that represents the current object.</returns>
-        </member>
-        <member name="M:Microsoft.Spark.CSharp.Sql.RowConstructor.construct(System.Object[])">
-            <summary>
-            Used by Unpickler - do not use to construct Row. Use GetRow() method
-            </summary>
-            <param name="args"></param>
-            <returns></returns>
-        </member>
-        <member name="M:Microsoft.Spark.CSharp.Sql.RowConstructor.GetRow">
-            <summary>
-            Used to construct a Row
-            </summary>
-            <returns></returns>
-        </member>
-        <member name="T:Microsoft.Spark.CSharp.Sql.Row">
-            <summary>
-             Represents one row of output from a relational operator.
-            </summary>
-        </member>
-        <member name="M:Microsoft.Spark.CSharp.Sql.Row.Size">
-            <summary>
-            Number of elements in the Row.
-            </summary>
-            <returns>elements count in this row</returns>
-        </member>
-        <member name="M:Microsoft.Spark.CSharp.Sql.Row.GetSchema">
-            <summary>
-            Schema for the row.
-            </summary>
-        </member>
-        <member name="M:Microsoft.Spark.CSharp.Sql.Row.Get(System.Int32)">
-            <summary>
-            Returns the value at position i.
-            </summary>
-        </member>
-        <member name="M:Microsoft.Spark.CSharp.Sql.Row.Get(System.String)">
-            <summary>
-            Returns the value of a given columnName.
-            </summary>
-        </member>
-        <member name="M:Microsoft.Spark.CSharp.Sql.Row.GetAs``1(System.Int32)">
-            <summary>
-            Returns the value at position i, the return value will be cast to type T.
-            </summary>
-        </member>
-        <member name="M:Microsoft.Spark.CSharp.Sql.Row.GetAs``1(System.String)">
-            <summary>
-            Returns the value of a given columnName, the return value will be cast to type T.
-            </summary>
-        </member>
-        <member name="T:Microsoft.Spark.CSharp.Sql.Functions">
-            <summary>
-            DataFrame Built-in functions
-            </summary>
-        </member>
-        <member name="M:Microsoft.Spark.CSharp.Sql.Functions.Lit(System.Object)">
-            <summary>
-            Creates a Column of any literal value.
-            </summary>
-            <param name="column">The given literal value</param>
-            <returns>A new Column is created to represent the literal value</returns>
-        </member>
-        <member name="M:Microsoft.Spark.CSharp.Sql.Functions.Col(System.String)">
-            <summary>
-            Returns a Column based on the given column name.
-            </summary>
-            <param name="colName">The name of column specified</param>
-            <returns>The column for the given name</returns>
-        </member>
-        <member name="M:Microsoft.Spark.CSharp.Sql.Functions.Column(System.String)">
-            <summary>
-            Returns a Column based on the given column name.
+            Returns a Column based on the given column name.
             </summary>
             <param name="colName">The name of column specified</param>
             <returns>The column for the given name</returns>
@@ -6755,70 +6458,389 @@
             </summary>
             <typeparam name="RT"></typeparam>
         </member>
-        <member name="T:Microsoft.Spark.CSharp.Sql.SaveMode">
+        <member name="T:Microsoft.Spark.CSharp.Sql.SaveMode">
+            <summary>
+            SaveMode is used to specify the expected behavior of saving a DataFrame to a data source.
+            </summary>
+        </member>
+        <member name="F:Microsoft.Spark.CSharp.Sql.SaveMode.Append">
+            <summary>
+            Append mode means that when saving a DataFrame to a data source, if data/table already exists,
+            contents of the DataFrame are expected to be appended to existing data.
+            </summary>
+        </member>
+        <member name="F:Microsoft.Spark.CSharp.Sql.SaveMode.Overwrite">
+            <summary>
+            Overwrite mode means that when saving a DataFrame to a data source,
+            if data/table already exists, existing data is expected to be overwritten by the contents of
+            the DataFrame.
+            </summary>
+        </member>
+        <member name="F:Microsoft.Spark.CSharp.Sql.SaveMode.ErrorIfExists">
+            <summary>
+            ErrorIfExists mode means that when saving a DataFrame to a data source, if data already exists,
+            an exception is expected to be thrown.
+            </summary>
+        </member>
+        <member name="F:Microsoft.Spark.CSharp.Sql.SaveMode.Ignore">
+            <summary>
+            Ignore mode means that when saving a DataFrame to a data source, if data already exists,
+            the save operation is expected to not save the contents of the DataFrame and to not
+            change the existing data.
+            </summary>
+        </member>
+        <member name="T:Microsoft.Spark.CSharp.Sql.SaveModeExtensions">
+            <summary>
+            For SaveMode.ErrorIfExists, the corresponding literal string in spark is "error" or "default".
+            </summary>
+        </member>
+        <member name="M:Microsoft.Spark.CSharp.Sql.SaveModeExtensions.GetStringValue(Microsoft.Spark.CSharp.Sql.SaveMode)">
+            <summary>
+            Gets the string for the value of SaveMode
+            </summary>
+            <param name="mode">The given SaveMode</param>
+            <returns>The string that represents the given SaveMode</returns>
+        </member>
+        <member name="T:Microsoft.Spark.CSharp.Sql.SqlContext">
+            <summary>
+            The entry point for working with structured data (rows and columns) in Spark.  
+            Allows the creation of [[DataFrame]] objects as well as the execution of SQL queries.
+            </summary>
+        </member>
+        <member name="M:Microsoft.Spark.CSharp.Sql.SqlContext.#ctor(Microsoft.Spark.CSharp.Core.SparkContext)">
+            <summary>
+            Creates a SqlContext
+            </summary>
+            <param name="sparkContext"></param>
+        </member>
+        <member name="M:Microsoft.Spark.CSharp.Sql.SqlContext.GetOrCreate(Microsoft.Spark.CSharp.Core.SparkContext)">
+            <summary>
+            Get the existing SQLContext or create a new one with given SparkContext.
+            </summary>
+            <param name="sparkContext"></param>
+            <returns></returns>
+        </member>
+        <member name="M:Microsoft.Spark.CSharp.Sql.SqlContext.NewSession">
+            <summary>
+            Returns a new SQLContext as new session, that has separate SQLConf, 
+            registered temporary tables and UDFs, but shared SparkContext and table cache.
+            </summary>
+            <returns></returns>
+        </member>
+        <member name="M:Microsoft.Spark.CSharp.Sql.SqlContext.GetConf(System.String,System.String)">
+            <summary>
+            Returns the value of Spark SQL configuration property for the given key.
+            If the key is not set, returns defaultValue.
+            </summary>
+            <param name="key"></param>
+            <param name="defaultValue"></param>
+            <returns></returns>
+        </member>
+        <member name="M:Microsoft.Spark.CSharp.Sql.SqlContext.SetConf(System.String,System.String)">
+            <summary>
+            Sets the given Spark SQL configuration property.
+            </summary>
+            <param name="key"></param>
+            <param name="value"></param>
+        </member>
+        <member name="M:Microsoft.Spark.CSharp.Sql.SqlContext.Read">
+            <summary>
+            Returns a DataFrameReader that can be used to read data in as a DataFrame.
+            </summary>
+        </member>
+        <member name="M:Microsoft.Spark.CSharp.Sql.SqlContext.ReadDataFrame(System.String,Microsoft.Spark.CSharp.Sql.StructType,System.Collections.Generic.Dictionary{System.String,System.String})">
+            <summary>
+            Loads a dataframe the source path using the given schema and options
+            </summary>
+            <param name="path"></param>
+            <param name="schema"></param>
+            <param name="options"></param>
+            <returns></returns>
+        </member>
+        <member name="M:Microsoft.Spark.CSharp.Sql.SqlContext.CreateDataFrame(Microsoft.Spark.CSharp.Core.RDD{System.Object[]},Microsoft.Spark.CSharp.Sql.StructType)">
+            <summary>
+            Creates a <see cref="T:Microsoft.Spark.CSharp.Sql.DataFrame"/> from a RDD containing array of object using the given schema.
+            </summary>
+            <param name="rdd">RDD containing array of object. The array acts as a row and items within the array act as columns which the schema is specified in <paramref name="schema"/>. </param>
+            <param name="schema">The schema of DataFrame.</param>
+            <returns></returns>
+        </member>
+        <member name="M:Microsoft.Spark.CSharp.Sql.SqlContext.RegisterDataFrameAsTable(Microsoft.Spark.CSharp.Sql.DataFrame,System.String)">
+            <summary>
+            Registers the given <see cref="T:Microsoft.Spark.CSharp.Sql.DataFrame"/> as a temporary table in the catalog.
+            Temporary tables exist only during the lifetime of this instance of SqlContext.
+            </summary>
+            <param name="dataFrame"></param>
+            <param name="tableName"></param>
+        </member>
+        <member name="M:Microsoft.Spark.CSharp.Sql.SqlContext.DropTempTable(System.String)">
+            <summary>
+            Remove the temp table from catalog.
+            </summary>
+            <param name="tableName"></param>
+        </member>
+        <member name="M:Microsoft.Spark.CSharp.Sql.SqlContext.Table(System.String)">
+            <summary>
+            Returns the specified table as a <see cref="T:Microsoft.Spark.CSharp.Sql.DataFrame"/>
+            </summary>
+            <param name="tableName"></param>
+            <returns></returns>
+        </member>
+        <member name="M:Microsoft.Spark.CSharp.Sql.SqlContext.Tables(System.String)">
+            <summary>
+            Returns a <see cref="T:Microsoft.Spark.CSharp.Sql.DataFrame"/> containing names of tables in the given database.
+            If <paramref name="databaseName"/> is not specified, the current database will be used.
+            The returned DataFrame has two columns: 'tableName' and 'isTemporary' (a column with bool 
+            type indicating if a table is a temporary one or not).
+            </summary>
+            <param name="databaseName">Name of the database to use. Default to the current database. 
+            Note: This is only applicable to HiveContext.</param>
+            <returns></returns>
+        </member>
+        <member name="M:Microsoft.Spark.CSharp.Sql.SqlContext.TableNames(System.String)">
+            <summary>
+            Returns a list of names of tables in the database <paramref name="databaseName"/>
+            </summary>
+            <param name="databaseName">Name of the database to use. Default to the current database.
+            Note: This is only applicable to HiveContext.</param>
+            <returns></returns>
+        </member>
+        <member name="M:Microsoft.Spark.CSharp.Sql.SqlContext.CacheTable(System.String)">
+            <summary>
+            Caches the specified table in-memory.
+            </summary>
+            <param name="tableName"></param>
+        </member>
+        <member name="M:Microsoft.Spark.CSharp.Sql.SqlContext.UncacheTable(System.String)">
+            <summary>
+            Removes the specified table from the in-memory cache.
+            </summary>
+            <param name="tableName"></param>
+        </member>
+        <member name="M:Microsoft.Spark.CSharp.Sql.SqlContext.ClearCache">
+            <summary>
+            Removes all cached tables from the in-memory cache.
+            </summary>
+        </member>
+        <member name="M:Microsoft.Spark.CSharp.Sql.SqlContext.IsCached(System.String)">
+            <summary>
+            Returns true if the table is currently cached in-memory.
+            </summary>
+            <param name="tableName"></param>
+            <returns></returns>
+        </member>
+        <member name="M:Microsoft.Spark.CSharp.Sql.SqlContext.Sql(System.String)">
+            <summary>
+            Executes a SQL query using Spark, returning the result as a DataFrame. The dialect that is used for SQL parsing can be configured with 'spark.sql.dialect'
+            </summary>
+            <param name="sqlQuery"></param>
+            <returns></returns>
+        </member>
+        <member name="M:Microsoft.Spark.CSharp.Sql.SqlContext.JsonFile(System.String)">
+            <summary>
+            Loads a JSON file (one object per line), returning the result as a DataFrame
+            It goes through the entire dataset once to determine the schema.
+            </summary>
+            <param name="path">path to JSON file</param>
+            <returns></returns>
+        </member>
+        <member name="M:Microsoft.Spark.CSharp.Sql.SqlContext.JsonFile(System.String,Microsoft.Spark.CSharp.Sql.StructType)">
+            <summary>
+            Loads a JSON file (one object per line) and applies the given schema
+            </summary>
+            <param name="path">path to JSON file</param>
+            <param name="schema">schema to use</param>
+            <returns></returns>
+        </member>
+        <member name="M:Microsoft.Spark.CSharp.Sql.SqlContext.TextFile(System.String,Microsoft.Spark.CSharp.Sql.StructType,System.String)">
+            <summary>
+            Loads text file with the specific column delimited using the given schema
+            </summary>
+            <param name="path">path to text file</param>
+            <param name="schema">schema to use</param>
+            <param name="delimiter">delimiter to use</param>
+            <returns></returns>
+        </member>
+        <member name="M:Microsoft.Spark.CSharp.Sql.SqlContext.TextFile(System.String,System.String,System.Boolean,System.Boolean)">
+            <summary>
+            Loads a text file (one object per line), returning the result as a DataFrame
+            </summary>
+            <param name="path">path to text file</param>
+            <param name="delimiter">delimited to use</param>
+            <param name="hasHeader">indicates if the text file has a header row</param>
+            <param name="inferSchema">indicates if every row has to be read to infer the schema; if false, columns will be strings</param>
+            <returns></returns>
+        </member>
+        <member name="M:Microsoft.Spark.CSharp.Sql.SqlContext.RegisterFunction``1(System.String,System.Func{``0})">
+            <summary>
+            Register UDF with no input argument, e.g:
+                SqlContext.RegisterFunction&lt;bool>("MyFilter", () => true);
+                sqlContext.Sql("SELECT * FROM MyTable where MyFilter()");
+            </summary>
+            <typeparam name="RT"></typeparam>
+            <param name="name"></param>
+            <param name="f"></param>
+        </member>
+        <member name="M:Microsoft.Spark.CSharp.Sql.SqlContext.RegisterFunction``2(System.String,System.Func{``1,``0})">
+            <summary>
+            Register UDF with 1 input argument, e.g:
+                SqlContext.RegisterFunction&lt;bool, string>("MyFilter", (arg1) => arg1 != null);
+                sqlContext.Sql("SELECT * FROM MyTable where MyFilter(columnName1)");
+            </summary>
+            <typeparam name="RT"></typeparam>
+            <typeparam name="A1"></typeparam>
+            <param name="name"></param>
+            <param name="f"></param>
+        </member>
+        <member name="M:Microsoft.Spark.CSharp.Sql.SqlContext.RegisterFunction``3(System.String,System.Func{``1,``2,``0})">
+            <summary>
+            Register UDF with 2 input arguments, e.g:
+                SqlContext.RegisterFunction&lt;bool, string, string>("MyFilter", (arg1, arg2) => arg1 != null &amp;&amp; arg2 != null);
+                sqlContext.Sql("SELECT * FROM MyTable where MyFilter(columnName1, columnName2)");
+            </summary>
+            <typeparam name="RT"></typeparam>
+            <typeparam name="A1"></typeparam>
+            <typeparam name="A2"></typeparam>
+            <param name="name"></param>
+            <param name="f"></param>
+        </member>
+        <member name="M:Microsoft.Spark.CSharp.Sql.SqlContext.RegisterFunction``4(System.String,System.Func{``1,``2,``3,``0})">
+            <summary>
+            Register UDF with 3 input arguments, e.g:
+                SqlContext.RegisterFunction&lt;bool, string, string, string>("MyFilter", (arg1, arg2, arg3) => arg1 != null &amp;&amp; arg2 != null &amp;&amp; arg3 != null);
+                sqlContext.Sql("SELECT * FROM MyTable where MyFilter(columnName1, columnName2, columnName3)");
+            </summary>
+            <typeparam name="RT"></typeparam>
+            <typeparam name="A1"></typeparam>
+            <typeparam name="A2"></typeparam>
+            <typeparam name="A3"></typeparam>
+            <param name="name"></param>
+            <param name="f"></param>
+        </member>
+        <member name="M:Microsoft.Spark.CSharp.Sql.SqlContext.RegisterFunction``5(System.String,System.Func{``1,``2,``3,``4,``0})">
             <summary>
-            SaveMode is used to specify the expected behavior of saving a DataFrame to a data source.
+            Register UDF with 4 input arguments, e.g:
+                SqlContext.RegisterFunction&lt;bool, string, string, ..., string>("MyFilter", (arg1, arg2, ..., arg4) => arg1 != null &amp;&amp; arg2 != null &amp;&amp; ... &amp;&amp; arg3 != null);
+                sqlContext.Sql("SELECT * FROM MyTable where MyFilter(columnName1, columnName2, ..., columnName4)");
             </summary>
+            <typeparam name="RT"></typeparam>
+            <typeparam name="A1"></typeparam>
+            <typeparam name="A2"></typeparam>
+            <typeparam name="A3"></typeparam>
+            <typeparam name="A4"></typeparam>
+            <param name="name"></param>
+            <param name="f"></param>
         </member>
-        <member name="F:Microsoft.Spark.CSharp.Sql.SaveMode.Append">
+        <member name="M:Microsoft.Spark.CSharp.Sql.SqlContext.RegisterFunction``6(System.String,System.Func{``1,``2,``3,``4,``5,``0})">
             <summary>
-            Append mode means that when saving a DataFrame to a data source, if data/table already exists,
-            contents of the DataFrame are expected to be appended to existing data.
+            Register UDF with 5 input arguments, e.g:
+                SqlContext.RegisterFunction&lt;bool, string, string, ..., string>("MyFilter", (arg1, arg2, ..., arg5) => arg1 != null &amp;&amp; arg2 != null &amp;&amp; ... &amp;&amp; arg5 != null);
+                sqlContext.Sql("SELECT * FROM MyTable where MyFilter(columnName1, columnName2, ..., columnName5)");
             </summary>
+            <typeparam name="RT"></typeparam>
+            <typeparam name="A1"></typeparam>
+            <typeparam name="A2"></typeparam>
+            <typeparam name="A3"></typeparam>
+            <typeparam name="A4"></typeparam>
+            <typeparam name="A5"></typeparam>
+            <param name="name"></param>
+            <param name="f"></param>
         </member>
-        <member name="F:Microsoft.Spark.CSharp.Sql.SaveMode.Overwrite">
+        <member name="M:Microsoft.Spark.CSharp.Sql.SqlContext.RegisterFunction``7(System.String,System.Func{``1,``2,``3,``4,``5,``6,``0})">
             <summary>
-            Overwrite mode means that when saving a DataFrame to a data source,
-            if data/table already exists, existing data is expected to be overwritten by the contents of
-            the DataFrame.
+            Register UDF with 6 input arguments, e.g:
+                SqlContext.RegisterFunction&lt;bool, string, string, ..., string>("MyFilter", (arg1, arg2, ..., arg6) => arg1 != null &amp;&amp; arg2 != null &amp;&amp; ... &amp;&amp; arg6 != null);
+                sqlContext.Sql("SELECT * FROM MyTable where MyFilter(columnName1, columnName2, ..., columnName6)");
             </summary>
+            <typeparam name="RT"></typeparam>
+            <typeparam name="A1"></typeparam>
+            <typeparam name="A2"></typeparam>
+            <typeparam name="A3"></typeparam>
+            <typeparam name="A4"></typeparam>
+            <typeparam name="A5"></typeparam>
+            <typeparam name="A6"></typeparam>
+            <param name="name"></param>
+            <param name="f"></param>
         </member>
-        <member name="F:Microsoft.Spark.CSharp.Sql.SaveMode.ErrorIfExists">
+        <member name="M:Microsoft.Spark.CSharp.Sql.SqlContext.RegisterFunction``8(System.String,System.Func{``1,``2,``3,``4,``5,``6,``7,``0})">
             <summary>
-            ErrorIfExists mode means that when saving a DataFrame to a data source, if data already exists,
-            an exception is expected to be thrown.
+            Register UDF with 7 input arguments, e.g:
+                SqlContext.RegisterFunction&lt;bool, string, string, ..., string>("MyFilter", (arg1, arg2, ..., arg7) => arg1 != null &amp;&amp; arg2 != null &amp;&amp; ... &amp;&amp; arg7 != null);
+                sqlContext.Sql("SELECT * FROM MyTable where MyFilter(columnName1, columnName2, ..., columnName7)");
             </summary>
+            <typeparam name="RT"></typeparam>
+            <typeparam name="A1"></typeparam>
+            <typeparam name="A2"></typeparam>
+            <typeparam name="A3"></typeparam>
+            <typeparam name="A4"></typeparam>
+            <typeparam name="A5"></typeparam>
+            <typeparam name="A6"></typeparam>
+            <typeparam name="A7"></typeparam>
+            <param name="name"></param>
+            <param name="f"></param>
         </member>
-        <member name="F:Microsoft.Spark.CSharp.Sql.SaveMode.Ignore">
+        <member name="M:Microsoft.Spark.CSharp.Sql.SqlContext.RegisterFunction``9(System.String,System.Func{``1,``2,``3,``4,``5,``6,``7,``8,``0})">
             <summary>
-            Ignore mode means that when saving a DataFrame to a data source, if data already exists,
-            the save operation is expected to not save the contents of the DataFrame and to not
-            change the existing data.
+            Register UDF with 8 input arguments, e.g:
+                SqlContext.RegisterFunction&lt;bool, string, string, ..., string>("MyFilter", (arg1, arg2, ..., arg8) => arg1 != null &amp;&amp; arg2 != null &amp;&amp; ... &amp;&amp; arg8 != null);
+                sqlContext.Sql("SELECT * FROM MyTable where MyFilter(columnName1, columnName2, ..., columnName8)");
             </summary>
+            <typeparam name="RT"></typeparam>
+            <typeparam name="A1"></typeparam>
+            <typeparam name="A2"></typeparam>
+            <typeparam name="A3"></typeparam>
+            <typeparam name="A4"></typeparam>
+            <typeparam name="A5"></typeparam>
+            <typeparam name="A6"></typeparam>
+            <typeparam name="A7"></typeparam>
+            <typeparam name="A8"></typeparam>
+            <param name="name"></param>
+            <param name="f"></param>
         </member>
-        <member name="T:Microsoft.Spark.CSharp.Sql.SaveModeExtensions">
+        <member name="M:Microsoft.Spark.CSharp.Sql.SqlContext.RegisterFunction``10(System.String,System.Func{``1,``2,``3,``4,``5,``6,``7,``8,``9,``0})">
             <summary>
-            For SaveMode.ErrorIfExists, the corresponding literal string in spark is "error" or "default".
+            Register UDF with 9 input arguments, e.g:
+                SqlContext.RegisterFunction&lt;bool, string, string, ..., string>("MyFilter", (arg1, arg2, ..., arg9) => arg1 != null &amp;&amp; arg2 != null &amp;&amp; ... &amp;&amp; arg9 != null);
+                sqlContext.Sql("SELECT * FROM MyTable where MyFilter(columnName1, columnName2, ..., columnName9)");
             </summary>
+            <typeparam name="RT"></typeparam>
+            <typeparam name="A1"></typeparam>
+            <typeparam name="A2"></typeparam>
+            <typeparam name="A3"></typeparam>
+            <typeparam name="A4"></typeparam>
+            <typeparam name="A5"></typeparam>
+            <typeparam name="A6"></typeparam>
+            <typeparam name="A7"></typeparam>
+            <typeparam name="A8"></typeparam>
+            <typeparam name="A9"></typeparam>
+            <param name="name"></param>
+            <param name="f"></param>
         </member>
-        <member name="M:Microsoft.Spark.CSharp.Sql.SaveModeExtensions.GetStringValue(Microsoft.Spark.CSharp.Sql.SaveMode)">
+        <member name="M:Microsoft.Spark.CSharp.Sql.SqlContext.RegisterFunction``11(System.String,System.Func{``1,``2,``3,``4,``5,``6,``7,``8,``9,``10,``0})">
             <summary>
-            Gets the string for the value of SaveMode
+            Register UDF with 10 input arguments, e.g:
+                SqlContext.RegisterFunction&lt;bool, string, string, ..., string>("MyFilter", (arg1, arg2, ..., arg10) => arg1 != null &amp;&amp; arg2 != null &amp;&amp; ... &amp;&amp; arg10 != null);
+                sqlContext.Sql("SELECT * FROM MyTable where MyFilter(columnName1, columnName2, ..., columnName10)");
             </summary>
-            <param name="mode">The given SaveMode</param>
-            <returns>The string that represents the given SaveMode</returns>
+            <typeparam name="RT"></typeparam>
+            <typeparam name="A1"></typeparam>
+            <typeparam name="A2"></typeparam>
+            <typeparam name="A3"></typeparam>
+            <typeparam name="A4"></typeparam>
+            <typeparam name="A5"></typeparam>
+            <typeparam name="A6"></typeparam>
+            <typeparam name="A7"></typeparam>
+            <typeparam name="A8"></typeparam>
+            <typeparam name="A9"></typeparam>
+            <typeparam name="A10"></typeparam>
+            <param name="name"></param>
+            <param name="f"></param>
         </member>
         <member name="T:Microsoft.Spark.CSharp.Sql.DataType">
             <summary>
             The base type of all Spark SQL data types.
             </summary>
         </member>
-        <member name="M:Microsoft.Spark.CSharp.Sql.DataType.ParseDataTypeFromJson(System.String)">
-            <summary>
-            Parses a Json string to construct a DataType.
-            </summary>
-            <param name="json">The Json string to be parsed</param>
-            <returns>The new DataType instance from the Json string</returns>
-        </member>
-        <member name="M:Microsoft.Spark.CSharp.Sql.DataType.ParseDataTypeFromJson(Newtonsoft.Json.Linq.JToken)">
-            <summary>
-            Parse a JToken object to construct a DataType.
-            </summary>
-            <param name="json">The JToken object to be parsed</param>
-            <returns>The new DataType instance from the Json string</returns>
-            <exception cref="T:System.NotImplementedException">Not implemented for "udt" type</exception>
-            <exception cref="T:System.ArgumentException"></exception>
-        </member>
         <member name="P:Microsoft.Spark.CSharp.Sql.DataType.TypeName">
             <summary>
             Trim "Type" in the end from class name, ToLower() to align with Scala.
@@ -6839,6 +6861,22 @@
             The compact JSON representation of this data type.
             </summary>
         </member>
+        <member name="M:Microsoft.Spark.CSharp.Sql.DataType.ParseDataTypeFromJson(System.String)">
+            <summary>
+            Parses a Json string to construct a DataType.
+            </summary>
+            <param name="json">The Json string to be parsed</param>
+            <returns>The new DataType instance from the Json string</returns>
+        </member>
+        <member name="M:Microsoft.Spark.CSharp.Sql.DataType.ParseDataTypeFromJson(Newtonsoft.Json.Linq.JToken)">
+            <summary>
+            Parse a JToken object to construct a DataType.
+            </summary>
+            <param name="json">The JToken object to be parsed</param>
+            <returns>The new DataType instance from the Json string</returns>
+            <exception cref="T:System.NotImplementedException">Not implemented for "udt" type</exception>
+            <exception cref="T:System.ArgumentException"></exception>
+        </member>
         <member name="T:Microsoft.Spark.CSharp.Sql.AtomicType">
             <summary>
             An internal type used to represent a simple type. 
@@ -6953,20 +6991,6 @@
             The data type for collections of multiple values. 
             </summary>
         </member>
-        <member name="M:Microsoft.Spark.CSharp.Sql.ArrayType.#ctor(Microsoft.Spark.CSharp.Sql.DataType,System.Boolean)">
-            <summary>
-            Initializes a ArrayType instance with a specific DataType and specifying if the array has null values.
-            </summary>
-            <param name="elementType">The data type of values</param>
-            <param name="containsNull">Indicates if values have null values</param>
-        </member>
-        <member name="M:Microsoft.Spark.CSharp.Sql.ArrayType.FromJson(Newtonsoft.Json.Linq.JObject)">
-            <summary>
-            Constructs a ArrayType from a Json object
-            </summary>
-            <param name="json">The Json object used to construct a ArrayType</param>
-            <returns>A new ArrayType instance</returns>
-        </member>
         <member name="P:Microsoft.Spark.CSharp.Sql.ArrayType.ElementType">
             <summary>
             Gets the DataType of each element in the array
@@ -6977,11 +7001,25 @@
             Returns whether the array can contain null (None) values
             </summary>
         </member>
+        <member name="M:Microsoft.Spark.CSharp.Sql.ArrayType.#ctor(Microsoft.Spark.CSharp.Sql.DataType,System.Boolean)">
+            <summary>
+            Initializes a ArrayType instance with a specific DataType and specifying if the array has null values.
+            </summary>
+            <param name="elementType">The data type of values</param>
+            <param name="containsNull">Indicates if values have null values</param>
+        </member>
         <member name="P:Microsoft.Spark.CSharp.Sql.ArrayType.SimpleString">
             <summary>
             Readable string representation for the type.
             </summary>
         </member>
+        <member name="M:Microsoft.Spark.CSharp.Sql.ArrayType.FromJson(Newtonsoft.Json.Linq.JObject)">
+            <summary>
+            Constructs a ArrayType from a Json object
+            </summary>
+            <param name="json">The Json object used to construct a ArrayType</param>
+            <returns>A new ArrayType instance</returns>
+        </member>
         <member name="T:Microsoft.Spark.CSharp.Sql.MapType">
             <summary>
             The data type for Maps. Not implemented yet.
@@ -7000,22 +7038,6 @@
             A field inside a StructType.
             </summary>
         </member>
-        <member name="M:Microsoft.Spark.CSharp.Sql.StructField.#ctor(System.String,Microsoft.Spark.CSharp.Sql.DataType,System.Boolean,Newtonsoft.Json.Linq.JObject)">
-            <summary>
-            Initializes a StructField instance with a specific name, data type, nullable, and metadata
-            </summary>
-            <param name="name">The name of this field</param>
-            <param name="dataType">The data type of this field</param>
-            <param name="isNullable">Indicates if values of this field can be null values</param>
-            <param name="metadata">The metadata of this field</param>
-        </member>
-        <member name="M:Microsoft.Spark.CSharp.Sql.StructField.FromJson(Newtonsoft.Json.Linq.JObject)">
-            <summary>
-            Constructs a StructField from a Json object
-            </summary>
-            <param name="json">The Json object used to construct a StructField</param>
-            <returns>A new StructField instance</returns>
-        </member>
         <member name="P:Microsoft.Spark.CSharp.Sql.StructField.Name">
             <summary>
             The name of this field.
@@ -7036,23 +7058,49 @@
             The metadata of this field. The metadata should be preserved during transformation if the content of the column is not modified, e.g, in selection. 
             </summary>
         </member>
+        <member name="M:Microsoft.Spark.CSharp.Sql.StructField.#ctor(System.String,Microsoft.Spark.CSharp.Sql.DataType,System.Boolean,Newtonsoft.Json.Linq.JObject)">
+            <summary>
+            Initializes a StructField instance with a specific name, data type, nullable, and metadata
+            </summary>
+            <param name="name">The name of this field</param>
+            <param name="dataType">The data type of this field</param>
+            <param name="isNullable">Indicates if values of this field can be null values</param>
+            <param name="metadata">The metadata of this field</param>
+        </member>
         <member name="P:Microsoft.Spark.CSharp.Sql.StructField.SimpleString">
             <summary>
             Returns a readable string that represents the type.
             </summary>
         </member>
+        <member name="M:Microsoft.Spark.CSharp.Sql.StructField.FromJson(Newtonsoft.Json.Linq.JObject)">
+            <summary>
+            Constructs a StructField from a Json object
+            </summary>
+            <param name="json">The Json object used to construct a StructField</param>
+            <returns>A new StructField instance</returns>
+        </member>
         <member name="T:Microsoft.Spark.CSharp.Sql.StructType">
             <summary>
             Struct type, consisting of a list of StructField
             This is the data type representing a Row
             </summary>
         </member>
+        <member name="P:Microsoft.Spark.CSharp.Sql.StructType.Fields">
+            <summary>
+            Gets a list of StructField.
+            </summary>
+        </member>
         <member name="M:Microsoft.Spark.CSharp.Sql.StructType.#ctor(System.Collections.Generic.IEnumerable{Microsoft.Spark.CSharp.Sql.StructField})">
             <summary>
             Initializes a StructType instance with a specific collection of SructField object.
             </summary>
             <param name="fields">The collection that holds StructField objects</param>
         </member>
+        <member name="P:Microsoft.Spark.CSharp.Sql.StructType.SimpleString">
+            <summary>
+            Returns a readable string that joins all <see cref="T:Microsoft.Spark.CSharp.Sql.StructField"/>s together.
+            </summary>
+        </member>
         <member name="M:Microsoft.Spark.CSharp.Sql.StructType.FromJson(Newtonsoft.Json.Linq.JObject)">
             <summary>
             Constructs a StructType from a Json object
@@ -7060,19 +7108,14 @@
             <param name="json">The Json object used to construct a StructType</param>
             <returns>A new StructType instance</returns>
         </member>
-        <member name="P:Microsoft.Spark.CSharp.Sql.StructType.Fields">
-            <summary>
-            Gets a list of StructField.
-            </summary>
-        </member>
-        <member name="P:Microsoft.Spark.CSharp.Sql.StructType.SimpleString">
+        <member name="T:Microsoft.Spark.CSharp.Streaming.ConstantInputDStream`1">
             <summary>
-            Returns a readable string that joins all <see cref="T:Microsoft.Spark.CSharp.Sql.StructField"/>s together.
+            An input stream that always returns the same RDD on each timestep. Useful for testing.
             </summary>
         </member>
-        <member name="T:Microsoft.Spark.CSharp.Streaming.ConstantInputDStream`1">
+        <member name="M:Microsoft.Spark.CSharp.Streaming.ConstantInputDStream`1.#ctor(Microsoft.Spark.CSharp.Core.RDD{`0},Microsoft.Spark.CSharp.Streaming.StreamingContext)">
             <summary>
-            An input stream that always returns the same RDD on each timestep. Useful for testing.
+            Construct a ConstantInputDStream instance.
             </summary>
         </member>
         <member name="T:Microsoft.Spark.CSharp.Streaming.DStream`1">
@@ -7096,6 +7139,11 @@
             </summary>
             <typeparam name="T"></typeparam>
         </member>
+        <member name="P:Microsoft.Spark.CSharp.Streaming.DStream`1.SlideDuration">
+            <summary>
+            Return the slideDuration in seconds of this DStream
+            </summary>
+        </member>
         <member name="M:Microsoft.Spark.CSharp.Streaming.DStream`1.Count">
             <summary>
             Return a new DStream in which each RDD has a single element
@@ -7353,16 +7401,6 @@
             <param name="numPartitions">number of partitions of each RDD in the new DStream.</param>
             <returns></returns>
         </member>
-        <member name="P:Microsoft.Spark.CSharp.Streaming.DStream`1.SlideDuration">
-            <summary>
-            Return the slideDuration in seconds of this DStream
-            </summary>
-        </member>
-        <member name="M:Microsoft.Spark.CSharp.Streaming.ConstantInputDStream`1.#ctor(Microsoft.Spark.CSharp.Core.RDD{`0},Microsoft.Spark.CSharp.Streaming.StreamingContext)">
-            <summary>
-            Construct a ConstantInputDStream instance.
-            </summary>
-        </member>
         <member name="T:Microsoft.Spark.CSharp.Streaming.MapPartitionsWithIndexHelper`2">
             <summary>
             Following classes are defined explicitly instead of using anonymous method as delegate to prevent C# compiler from generating
diff --git a/csharp/Adapter/documentation/Mobius_API_Documentation.md b/csharp/Adapter/documentation/Mobius_API_Documentation.md
index a0c5db03..b1adc9f8 100644
--- a/csharp/Adapter/documentation/Mobius_API_Documentation.md
+++ b/csharp/Adapter/documentation/Mobius_API_Documentation.md
@@ -142,13 +142,6 @@
             Used for collect operation on RDD
             
         
-###<font color="#68228B">Microsoft.Spark.CSharp.Core.IRDDCollector</font>
-####Summary
-  
-            
-            Interface for collect operation on RDD
-            
-        
 ###<font color="#68228B">Microsoft.Spark.CSharp.Core.DoubleRDDFunctions</font>
 ####Summary
   
@@ -163,6 +156,13 @@
 ---
   
   
+###<font color="#68228B">Microsoft.Spark.CSharp.Core.IRDDCollector</font>
+####Summary
+  
+            
+            Interface for collect operation on RDD
+            
+        
 ###<font color="#68228B">Microsoft.Spark.CSharp.Core.OrderedRDDFunctions</font>
 ####Summary
   
@@ -210,20 +210,22 @@
 ---
   
   
-###<font color="#68228B">Microsoft.Spark.CSharp.Core.RDD`1</font>
+###<font color="#68228B">Microsoft.Spark.CSharp.Core.PriorityQueue`1</font>
 ####Summary
   
             
-            Represents a Resilient Distributed Dataset (RDD), the basic abstraction in Spark. Represents an immutable, 
-            partitioned collection of elements that can be operated on in parallel
+            A bounded priority queue implemented with max binary heap.
             
-            See also http://spark.apache.org/docs/latest/api/scala/index.html#org.apache.spark.rdd.RDD
+            Construction steps:
+             1. Build a Max Heap of the first k elements.
+             2. For each element after the kth element, compare it with the root of the max heap,
+               a. If the element is less than the root, replace root with this element, heapify.
+               b. Else ignore it.
             
-            Type of the RDD
         
 ####Methods
 
-<table><tr><th>Name</th><th>Description</th></tr><tr><td><font color="blue">Cache</font></td><td>Persist this RDD with the default storage level .</td></tr><tr><td><font color="blue">Persist</font></td><td>Set this RDD's storage level to persist its values across operations after the first time it is computed. This can only be used to assign a new storage level if the RDD does not have a storage level set yet. If no storage level is specified defaults to . sc.Parallelize(new string[] {"b", "a", "c").Persist().isCached True</td></tr><tr><td><font color="blue">Unpersist</font></td><td>Mark the RDD as non-persistent, and remove all blocks for it from memory and disk.</td></tr><tr><td><font color="blue">Checkpoint</font></td><td>Mark this RDD for checkpointing. It will be saved to a file inside the checkpoint directory set with ) and all references to its parent RDDs will be removed. This function must be called before any job has been executed on this RDD. It is strongly recommended that this RDD is persisted in memory, otherwise saving it on a file will require recomputation.</td></tr><tr><td><font color="blue">GetNumPartitions</font></td><td>Returns the number of partitions of this RDD.</td></tr><tr><td><font color="blue">Map``1</font></td><td>Return a new RDD by applying a function to each element of this RDD. sc.Parallelize(new string[]{"b", "a", "c"}, 1).Map(x =&gt; new KeyValuePair&lt;string, int&gt;(x, 1)).Collect() [('a', 1), ('b', 1), ('c', 1)]</td></tr><tr><td><font color="blue">FlatMap``1</font></td><td>Return a new RDD by first applying a function to all elements of this RDD, and then flattening the results. sc.Parallelize(new int[] {2, 3, 4}, 1).FlatMap(x =&gt; Enumerable.Range(1, x - 1)).Collect() [1, 1, 1, 2, 2, 3]</td></tr><tr><td><font color="blue">MapPartitions``1</font></td><td>Return a new RDD by applying a function to each partition of this RDD. sc.Parallelize(new int[] {1, 2, 3, 4}, 2).MapPartitions(iter =&gt; new[]{iter.Sum(x =&gt; (x as decimal?))}).Collect() [3, 7]</td></tr><tr><td><font color="blue">MapPartitionsWithIndex``1</font></td><td>Return a new RDD by applying a function to each partition of this RDD, while tracking the index of the original partition. sc.Parallelize(new int[]{1, 2, 3, 4}, 4).MapPartitionsWithIndex&lt;double&gt;((pid, iter) =&gt; (double)pid).Sum() 6</td></tr><tr><td><font color="blue">Filter</font></td><td>Return a new RDD containing only the elements that satisfy a predicate. sc.Parallelize(new int[]{1, 2, 3, 4, 5}, 1).Filter(x =&gt; x % 2 == 0).Collect() [2, 4]</td></tr><tr><td><font color="blue">Distinct</font></td><td>Return a new RDD containing the distinct elements in this RDD. &gt;&gt;&gt; sc.Parallelize(new int[] {1, 1, 2, 3}, 1).Distinct().Collect() [1, 2, 3]</td></tr><tr><td><font color="blue">Sample</font></td><td>Return a sampled subset of this RDD. var rdd = sc.Parallelize(Enumerable.Range(0, 100), 4) 6 &lt;= rdd.Sample(False, 0.1, 81).count() &lt;= 14 true</td></tr><tr><td><font color="blue">RandomSplit</font></td><td>Randomly splits this RDD with the provided weights. var rdd = sc.Parallelize(Enumerable.Range(0, 500), 1) var rdds = rdd.RandomSplit(new double[] {2, 3}, 17) 150 &lt; rdds[0].Count() &lt; 250 250 &lt; rdds[1].Count() &lt; 350</td></tr><tr><td><font color="blue">TakeSample</font></td><td>Return a fixed-size sampled subset of this RDD. var rdd = sc.Parallelize(Enumerable.Range(0, 10), 2) rdd.TakeSample(true, 20, 1).Length 20 rdd.TakeSample(false, 5, 2).Length 5 rdd.TakeSample(false, 15, 3).Length 10</td></tr><tr><td><font color="blue">ComputeFractionForSampleSize</font></td><td>Returns a sampling rate that guarantees a sample of size &gt;= sampleSizeLowerBound 99.99% of the time. How the sampling rate is determined: Let p = num / total, where num is the sample size and total is the total number of data points in the RDD. We're trying to compute q &gt; p such that - when sampling with replacement, we're drawing each data point with prob_i ~ Pois(q), where we want to guarantee Pr[s &lt; num] &lt; 0.0001 for s = sum(prob_i for i from 0 to total), i.e. the failure rate of not having a sufficiently large sample &lt; 0.0001. Setting q = p + 5 * sqrt(p/total) is sufficient to guarantee 0.9999 success rate for num &gt; 12, but we need a slightly larger q (9 empirically determined). - when sampling without replacement, we're drawing each data point with prob_i ~ Binomial(total, fraction) and our choice of q guarantees 1-delta, or 0.9999 success rate, where success rate is defined the same as in sampling with replacement.</td></tr><tr><td><font color="blue">Union</font></td><td>Return the union of this RDD and another one. var rdd = sc.Parallelize(new int[] { 1, 1, 2, 3 }, 1) rdd.union(rdd).collect() [1, 1, 2, 3, 1, 1, 2, 3]</td></tr><tr><td><font color="blue">Intersection</font></td><td>Return the intersection of this RDD and another one. The output will not contain any duplicate elements, even if the input RDDs did. Note that this method performs a shuffle internally. var rdd1 = sc.Parallelize(new int[] { 1, 10, 2, 3, 4, 5 }, 1) var rdd2 = sc.Parallelize(new int[] { 1, 6, 2, 3, 7, 8 }, 1) var rdd1.Intersection(rdd2).Collect() [1, 2, 3]</td></tr><tr><td><font color="blue">Glom</font></td><td>Return an RDD created by coalescing all elements within each partition into a list. var rdd = sc.Parallelize(new int[] { 1, 2, 3, 4 }, 2) rdd.Glom().Collect() [[1, 2], [3, 4]]</td></tr><tr><td><font color="blue">Cartesian``1</font></td><td>Return the Cartesian product of this RDD and another one, that is, the RDD of all pairs of elements (a, b) where a is in self and b is in other. rdd = sc.Parallelize(new int[] { 1, 2 }, 1) rdd.Cartesian(rdd).Collect() [(1, 1), (1, 2), (2, 1), (2, 2)]</td></tr><tr><td><font color="blue">GroupBy``1</font></td><td>Return an RDD of grouped items. Each group consists of a key and a sequence of elements mapping to that key. The ordering of elements within each group is not guaranteed, and may even differ each time the resulting RDD is evaluated. Note: This operation may be very expensive. If you are grouping in order to perform an aggregation (such as a sum or average) over each key, using [[PairRDDFunctions.aggregateByKey]] or [[PairRDDFunctions.reduceByKey]] will provide much better performance. &gt;&gt;&gt; rdd = sc.Parallelize(new int[] { 1, 1, 2, 3, 5, 8 }, 1) &gt;&gt;&gt; result = rdd.GroupBy(lambda x: x % 2).Collect() [(0, [2, 8]), (1, [1, 1, 3, 5])]</td></tr><tr><td><font color="blue">Pipe</font></td><td>Return an RDD created by piping elements to a forked external process. &gt;&gt;&gt; sc.Parallelize(new char[] { '1', '2', '3', '4' }, 1).Pipe("cat").Collect() [u'1', u'2', u'3', u'4']</td></tr><tr><td><font color="blue">Foreach</font></td><td>Applies a function to all elements of this RDD. sc.Parallelize(new int[] { 1, 2, 3, 4, 5 }, 1).Foreach(x =&gt; Console.Write(x))</td></tr><tr><td><font color="blue">ForeachPartition</font></td><td>Applies a function to each partition of this RDD. sc.parallelize(new int[] { 1, 2, 3, 4, 5 }, 1).ForeachPartition(iter =&gt; { foreach (var x in iter) Console.Write(x + " "); })</td></tr><tr><td><font color="blue">Collect</font></td><td>Return a list that contains all of the elements in this RDD.</td></tr><tr><td><font color="blue">Reduce</font></td><td>Reduces the elements of this RDD using the specified commutative and associative binary operator. sc.Parallelize(new int[] { 1, 2, 3, 4, 5 }, 1).Reduce((x, y) =&gt; x + y) 15</td></tr><tr><td><font color="blue">TreeReduce</font></td><td>Reduces the elements of this RDD in a multi-level tree pattern. &gt;&gt;&gt; add = lambda x, y: x + y &gt;&gt;&gt; rdd = sc.Parallelize(new int[] { -5, -4, -3, -2, -1, 1, 2, 3, 4 }, 10).TreeReduce((x, y) =&gt; x + y)) &gt;&gt;&gt; rdd.TreeReduce(add) -5 &gt;&gt;&gt; rdd.TreeReduce(add, 1) -5 &gt;&gt;&gt; rdd.TreeReduce(add, 2) -5 &gt;&gt;&gt; rdd.TreeReduce(add, 5) -5 &gt;&gt;&gt; rdd.TreeReduce(add, 10) -5</td></tr><tr><td><font color="blue">Fold</font></td><td>Aggregate the elements of each partition, and then the results for all the partitions, using a given associative and commutative function and a neutral "zero value." The function op(t1, t2) is allowed to modify t1 and return it as its result value to avoid object allocation; however, it should not modify t2. This behaves somewhat differently from fold operations implemented for non-distributed collections in functional languages like Scala. This fold operation may be applied to partitions individually, and then fold those results into the final result, rather than apply the fold to each element sequentially in some defined ordering. For functions that are not commutative, the result may differ from that of a fold applied to a non-distributed collection. &gt;&gt;&gt; from operator import add &gt;&gt;&gt; sc.parallelize([1, 2, 3, 4, 5]).fold(0, add) 15</td></tr><tr><td><font color="blue">Aggregate``1</font></td><td>Aggregate the elements of each partition, and then the results for all the partitions, using a given combine functions and a neutral "zero value." The functions op(t1, t2) is allowed to modify t1 and return it as its result value to avoid object allocation; however, it should not modify t2. The first function (seqOp) can return a different result type, U, than the type of this RDD. Thus, we need one operation for merging a T into an U and one operation for merging two U &gt;&gt;&gt; sc.parallelize(new int[] { 1, 2, 3, 4 }, 1).Aggregate(0, (x, y) =&gt; x + y, (x, y) =&gt; x + y)) 10</td></tr><tr><td><font color="blue">TreeAggregate``1</font></td><td>Aggregates the elements of this RDD in a multi-level tree pattern. rdd = sc.Parallelize(new int[] { 1, 2, 3, 4 }, 1).TreeAggregate(0, (x, y) =&gt; x + y, (x, y) =&gt; x + y)) 10</td></tr><tr><td><font color="blue">Count</font></td><td>Return the number of elements in this RDD.</td></tr><tr><td><font color="blue">CountByValue</font></td><td>Return the count of each unique value in this RDD as a dictionary of (value, count) pairs. sc.Parallelize(new int[] { 1, 2, 1, 2, 2 }, 2).CountByValue()) [(1, 2), (2, 3)]</td></tr><tr><td><font color="blue">Take</font></td><td>Take the first num elements of the RDD. It works by first scanning one partition, and use the results from that partition to estimate the number of additional partitions needed to satisfy the limit. Translated from the Scala implementation in RDD#take(). sc.Parallelize(new int[] { 2, 3, 4, 5, 6 }, 2).Cache().Take(2))) [2, 3] sc.Parallelize(new int[] { 2, 3, 4, 5, 6 }, 2).Take(10) [2, 3, 4, 5, 6] sc.Parallelize(Enumerable.Range(0, 100), 100).Filter(x =&gt; x &gt; 90).Take(3) [91, 92, 93]</td></tr><tr><td><font color="blue">First</font></td><td>Return the first element in this RDD. &gt;&gt;&gt; sc.Parallelize(new int[] { 2, 3, 4 }, 2).First() 2</td></tr><tr><td><font color="blue">IsEmpty</font></td><td>Returns true if and only if the RDD contains no elements at all. Note that an RDD may be empty even when it has at least 1 partition. sc.Parallelize(new int[0], 1).isEmpty() true sc.Parallelize(new int[] {1}).isEmpty() false</td></tr><tr><td><font color="blue">Subtract</font></td><td>Return each value in this RDD that is not contained in . var x = sc.Parallelize(new int[] { 1, 2, 3, 4 }, 1) var y = sc.Parallelize(new int[] { 3 }, 1) x.Subtract(y).Collect()) [1, 2, 4]</td></tr><tr><td><font color="blue">KeyBy``1</font></td><td>Creates tuples of the elements in this RDD by applying . sc.Parallelize(new int[] { 1, 2, 3, 4 }, 1).KeyBy(x =&gt; x * x).Collect()) (1, 1), (4, 2), (9, 3), (16, 4)</td></tr><tr><td><font color="blue">Repartition</font></td><td>Return a new RDD that has exactly numPartitions partitions. Can increase or decrease the level of parallelism in this RDD. Internally, this uses a shuffle to redistribute data. If you are decreasing the number of partitions in this RDD, consider using `Coalesce`, which can avoid performing a shuffle. var rdd = sc.Parallelize(new int[] { 1, 2, 3, 4, 5, 6, 7 }, 4) rdd.Glom().Collect().Length 4 rdd.Repartition(2).Glom().Collect().Length 2</td></tr><tr><td><font color="blue">Coalesce</font></td><td>Return a new RDD that is reduced into `numPartitions` partitions. sc.Parallelize(new int[] { 1, 2, 3, 4, 5 }, 3).Glom().Collect().Length 3 &gt;&gt;&gt; sc.Parallelize(new int[] { 1, 2, 3, 4, 5 }, 3).Coalesce(1).Glom().Collect().Length 1</td></tr><tr><td><font color="blue">Zip``1</font></td><td>Zips this RDD with another one, returning key-value pairs with the first element in each RDD second element in each RDD, etc. Assumes that the two RDDs have the same number of partitions and the same number of elements in each partition (e.g. one was made through a map on the other). var x = sc.parallelize(range(0,5)) var y = sc.parallelize(range(1000, 1005)) x.Zip(y).Collect() [(0, 1000), (1, 1001), (2, 1002), (3, 1003), (4, 1004)]</td></tr><tr><td><font color="blue">ZipWithIndex</font></td><td>Zips this RDD with its element indices. The ordering is first based on the partition index and then the ordering of items within each partition. So the first item in the first partition gets index 0, and the last item in the last partition receives the largest index. This method needs to trigger a spark job when this RDD contains more than one partitions. sc.Parallelize(new string[] { "a", "b", "c", "d" }, 3).ZipWithIndex().Collect() [('a', 0), ('b', 1), ('c', 2), ('d', 3)]</td></tr><tr><td><font color="blue">ZipWithUniqueId</font></td><td>Zips this RDD with generated unique Long ids. Items in the kth partition will get ids k, n+k, 2*n+k, ..., where n is the number of partitions. So there may exist gaps, but this method won't trigger a spark job, which is different from &gt;&gt;&gt; sc.Parallelize(new string[] { "a", "b", "c", "d" }, 1).ZipWithIndex().Collect() [('a', 0), ('b', 1), ('c', 4), ('d', 2), ('e', 5)]</td></tr><tr><td><font color="blue">SetName</font></td><td>Assign a name to this RDD. &gt;&gt;&gt; rdd1 = sc.parallelize([1, 2]) &gt;&gt;&gt; rdd1.setName('RDD1').name() u'RDD1'</td></tr><tr><td><font color="blue">ToDebugString</font></td><td>A description of this RDD and its recursive dependencies for debugging.</td></tr><tr><td><font color="blue">GetStorageLevel</font></td><td>Get the RDD's current storage level. &gt;&gt;&gt; rdd1 = sc.parallelize([1,2]) &gt;&gt;&gt; rdd1.getStorageLevel() StorageLevel(False, False, False, False, 1) &gt;&gt;&gt; print(rdd1.getStorageLevel()) Serialized 1x Replicated</td></tr><tr><td><font color="blue">ToLocalIterator</font></td><td>Return an iterator that contains all of the elements in this RDD. The iterator will consume as much memory as the largest partition in this RDD. sc.Parallelize(Enumerable.Range(0, 10), 1).ToLocalIterator() [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]</td></tr><tr><td><font color="blue">RandomSampleWithRange</font></td><td>Internal method exposed for Random Splits in DataFrames. Samples an RDD given a probability range.</td></tr></table>
+<table><tr><th>Name</th><th>Description</th></tr><tr><td><font color="blue">Offer</font></td><td>Inserts the specified element into this priority queue.</td></tr></table>
 
 ---
   
@@ -235,6 +237,24 @@
             A class represents a profiler
             
         
+###<font color="#68228B">Microsoft.Spark.CSharp.Core.RDD`1</font>
+####Summary
+  
+            
+            Represents a Resilient Distributed Dataset (RDD), the basic abstraction in Spark. Represents an immutable, 
+            partitioned collection of elements that can be operated on in parallel
+            
+            See also http://spark.apache.org/docs/latest/api/scala/index.html#org.apache.spark.rdd.RDD
+            
+            Type of the RDD
+        
+####Methods
+
+<table><tr><th>Name</th><th>Description</th></tr><tr><td><font color="blue">Cache</font></td><td>Persist this RDD with the default storage level .</td></tr><tr><td><font color="blue">Persist</font></td><td>Set this RDD's storage level to persist its values across operations after the first time it is computed. This can only be used to assign a new storage level if the RDD does not have a storage level set yet. If no storage level is specified defaults to . sc.Parallelize(new string[] {"b", "a", "c").Persist().isCached True</td></tr><tr><td><font color="blue">Unpersist</font></td><td>Mark the RDD as non-persistent, and remove all blocks for it from memory and disk.</td></tr><tr><td><font color="blue">Checkpoint</font></td><td>Mark this RDD for checkpointing. It will be saved to a file inside the checkpoint directory set with ) and all references to its parent RDDs will be removed. This function must be called before any job has been executed on this RDD. It is strongly recommended that this RDD is persisted in memory, otherwise saving it on a file will require recomputation.</td></tr><tr><td><font color="blue">GetNumPartitions</font></td><td>Returns the number of partitions of this RDD.</td></tr><tr><td><font color="blue">Map``1</font></td><td>Return a new RDD by applying a function to each element of this RDD. sc.Parallelize(new string[]{"b", "a", "c"}, 1).Map(x =&gt; new KeyValuePair&lt;string, int&gt;(x, 1)).Collect() [('a', 1), ('b', 1), ('c', 1)]</td></tr><tr><td><font color="blue">FlatMap``1</font></td><td>Return a new RDD by first applying a function to all elements of this RDD, and then flattening the results. sc.Parallelize(new int[] {2, 3, 4}, 1).FlatMap(x =&gt; Enumerable.Range(1, x - 1)).Collect() [1, 1, 1, 2, 2, 3]</td></tr><tr><td><font color="blue">MapPartitions``1</font></td><td>Return a new RDD by applying a function to each partition of this RDD. sc.Parallelize(new int[] {1, 2, 3, 4}, 2).MapPartitions(iter =&gt; new[]{iter.Sum(x =&gt; (x as decimal?))}).Collect() [3, 7]</td></tr><tr><td><font color="blue">MapPartitionsWithIndex``1</font></td><td>Return a new RDD by applying a function to each partition of this RDD, while tracking the index of the original partition. sc.Parallelize(new int[]{1, 2, 3, 4}, 4).MapPartitionsWithIndex&lt;double&gt;((pid, iter) =&gt; (double)pid).Sum() 6</td></tr><tr><td><font color="blue">Filter</font></td><td>Return a new RDD containing only the elements that satisfy a predicate. sc.Parallelize(new int[]{1, 2, 3, 4, 5}, 1).Filter(x =&gt; x % 2 == 0).Collect() [2, 4]</td></tr><tr><td><font color="blue">Distinct</font></td><td>Return a new RDD containing the distinct elements in this RDD. &gt;&gt;&gt; sc.Parallelize(new int[] {1, 1, 2, 3}, 1).Distinct().Collect() [1, 2, 3]</td></tr><tr><td><font color="blue">Sample</font></td><td>Return a sampled subset of this RDD. var rdd = sc.Parallelize(Enumerable.Range(0, 100), 4) 6 &lt;= rdd.Sample(False, 0.1, 81).count() &lt;= 14 true</td></tr><tr><td><font color="blue">RandomSplit</font></td><td>Randomly splits this RDD with the provided weights. var rdd = sc.Parallelize(Enumerable.Range(0, 500), 1) var rdds = rdd.RandomSplit(new double[] {2, 3}, 17) 150 &lt; rdds[0].Count() &lt; 250 250 &lt; rdds[1].Count() &lt; 350</td></tr><tr><td><font color="blue">TakeSample</font></td><td>Return a fixed-size sampled subset of this RDD. var rdd = sc.Parallelize(Enumerable.Range(0, 10), 2) rdd.TakeSample(true, 20, 1).Length 20 rdd.TakeSample(false, 5, 2).Length 5 rdd.TakeSample(false, 15, 3).Length 10</td></tr><tr><td><font color="blue">ComputeFractionForSampleSize</font></td><td>Returns a sampling rate that guarantees a sample of size &gt;= sampleSizeLowerBound 99.99% of the time. How the sampling rate is determined: Let p = num / total, where num is the sample size and total is the total number of data points in the RDD. We're trying to compute q &gt; p such that - when sampling with replacement, we're drawing each data point with prob_i ~ Pois(q), where we want to guarantee Pr[s &lt; num] &lt; 0.0001 for s = sum(prob_i for i from 0 to total), i.e. the failure rate of not having a sufficiently large sample &lt; 0.0001. Setting q = p + 5 * sqrt(p/total) is sufficient to guarantee 0.9999 success rate for num &gt; 12, but we need a slightly larger q (9 empirically determined). - when sampling without replacement, we're drawing each data point with prob_i ~ Binomial(total, fraction) and our choice of q guarantees 1-delta, or 0.9999 success rate, where success rate is defined the same as in sampling with replacement.</td></tr><tr><td><font color="blue">Union</font></td><td>Return the union of this RDD and another one. var rdd = sc.Parallelize(new int[] { 1, 1, 2, 3 }, 1) rdd.union(rdd).collect() [1, 1, 2, 3, 1, 1, 2, 3]</td></tr><tr><td><font color="blue">Intersection</font></td><td>Return the intersection of this RDD and another one. The output will not contain any duplicate elements, even if the input RDDs did. Note that this method performs a shuffle internally. var rdd1 = sc.Parallelize(new int[] { 1, 10, 2, 3, 4, 5 }, 1) var rdd2 = sc.Parallelize(new int[] { 1, 6, 2, 3, 7, 8 }, 1) var rdd1.Intersection(rdd2).Collect() [1, 2, 3]</td></tr><tr><td><font color="blue">Glom</font></td><td>Return an RDD created by coalescing all elements within each partition into a list. var rdd = sc.Parallelize(new int[] { 1, 2, 3, 4 }, 2) rdd.Glom().Collect() [[1, 2], [3, 4]]</td></tr><tr><td><font color="blue">Cartesian``1</font></td><td>Return the Cartesian product of this RDD and another one, that is, the RDD of all pairs of elements (a, b) where a is in self and b is in other. rdd = sc.Parallelize(new int[] { 1, 2 }, 1) rdd.Cartesian(rdd).Collect() [(1, 1), (1, 2), (2, 1), (2, 2)]</td></tr><tr><td><font color="blue">GroupBy``1</font></td><td>Return an RDD of grouped items. Each group consists of a key and a sequence of elements mapping to that key. The ordering of elements within each group is not guaranteed, and may even differ each time the resulting RDD is evaluated. Note: This operation may be very expensive. If you are grouping in order to perform an aggregation (such as a sum or average) over each key, using [[PairRDDFunctions.aggregateByKey]] or [[PairRDDFunctions.reduceByKey]] will provide much better performance. &gt;&gt;&gt; rdd = sc.Parallelize(new int[] { 1, 1, 2, 3, 5, 8 }, 1) &gt;&gt;&gt; result = rdd.GroupBy(lambda x: x % 2).Collect() [(0, [2, 8]), (1, [1, 1, 3, 5])]</td></tr><tr><td><font color="blue">Pipe</font></td><td>Return an RDD created by piping elements to a forked external process. &gt;&gt;&gt; sc.Parallelize(new char[] { '1', '2', '3', '4' }, 1).Pipe("cat").Collect() [u'1', u'2', u'3', u'4']</td></tr><tr><td><font color="blue">Foreach</font></td><td>Applies a function to all elements of this RDD. sc.Parallelize(new int[] { 1, 2, 3, 4, 5 }, 1).Foreach(x =&gt; Console.Write(x))</td></tr><tr><td><font color="blue">ForeachPartition</font></td><td>Applies a function to each partition of this RDD. sc.parallelize(new int[] { 1, 2, 3, 4, 5 }, 1).ForeachPartition(iter =&gt; { foreach (var x in iter) Console.Write(x + " "); })</td></tr><tr><td><font color="blue">Collect</font></td><td>Return a list that contains all of the elements in this RDD.</td></tr><tr><td><font color="blue">Reduce</font></td><td>Reduces the elements of this RDD using the specified commutative and associative binary operator. sc.Parallelize(new int[] { 1, 2, 3, 4, 5 }, 1).Reduce((x, y) =&gt; x + y) 15</td></tr><tr><td><font color="blue">TreeReduce</font></td><td>Reduces the elements of this RDD in a multi-level tree pattern. &gt;&gt;&gt; add = lambda x, y: x + y &gt;&gt;&gt; rdd = sc.Parallelize(new int[] { -5, -4, -3, -2, -1, 1, 2, 3, 4 }, 10).TreeReduce((x, y) =&gt; x + y)) &gt;&gt;&gt; rdd.TreeReduce(add) -5 &gt;&gt;&gt; rdd.TreeReduce(add, 1) -5 &gt;&gt;&gt; rdd.TreeReduce(add, 2) -5 &gt;&gt;&gt; rdd.TreeReduce(add, 5) -5 &gt;&gt;&gt; rdd.TreeReduce(add, 10) -5</td></tr><tr><td><font color="blue">Fold</font></td><td>Aggregate the elements of each partition, and then the results for all the partitions, using a given associative and commutative function and a neutral "zero value." The function op(t1, t2) is allowed to modify t1 and return it as its result value to avoid object allocation; however, it should not modify t2. This behaves somewhat differently from fold operations implemented for non-distributed collections in functional languages like Scala. This fold operation may be applied to partitions individually, and then fold those results into the final result, rather than apply the fold to each element sequentially in some defined ordering. For functions that are not commutative, the result may differ from that of a fold applied to a non-distributed collection. &gt;&gt;&gt; from operator import add &gt;&gt;&gt; sc.parallelize([1, 2, 3, 4, 5]).fold(0, add) 15</td></tr><tr><td><font color="blue">Aggregate``1</font></td><td>Aggregate the elements of each partition, and then the results for all the partitions, using a given combine functions and a neutral "zero value." The functions op(t1, t2) is allowed to modify t1 and return it as its result value to avoid object allocation; however, it should not modify t2. The first function (seqOp) can return a different result type, U, than the type of this RDD. Thus, we need one operation for merging a T into an U and one operation for merging two U &gt;&gt;&gt; sc.parallelize(new int[] { 1, 2, 3, 4 }, 1).Aggregate(0, (x, y) =&gt; x + y, (x, y) =&gt; x + y)) 10</td></tr><tr><td><font color="blue">TreeAggregate``1</font></td><td>Aggregates the elements of this RDD in a multi-level tree pattern. rdd = sc.Parallelize(new int[] { 1, 2, 3, 4 }, 1).TreeAggregate(0, (x, y) =&gt; x + y, (x, y) =&gt; x + y)) 10</td></tr><tr><td><font color="blue">Count</font></td><td>Return the number of elements in this RDD.</td></tr><tr><td><font color="blue">CountByValue</font></td><td>Return the count of each unique value in this RDD as a dictionary of (value, count) pairs. sc.Parallelize(new int[] { 1, 2, 1, 2, 2 }, 2).CountByValue()) [(1, 2), (2, 3)]</td></tr><tr><td><font color="blue">Take</font></td><td>Take the first num elements of the RDD. It works by first scanning one partition, and use the results from that partition to estimate the number of additional partitions needed to satisfy the limit. Translated from the Scala implementation in RDD#take(). sc.Parallelize(new int[] { 2, 3, 4, 5, 6 }, 2).Cache().Take(2))) [2, 3] sc.Parallelize(new int[] { 2, 3, 4, 5, 6 }, 2).Take(10) [2, 3, 4, 5, 6] sc.Parallelize(Enumerable.Range(0, 100), 100).Filter(x =&gt; x &gt; 90).Take(3) [91, 92, 93]</td></tr><tr><td><font color="blue">First</font></td><td>Return the first element in this RDD. &gt;&gt;&gt; sc.Parallelize(new int[] { 2, 3, 4 }, 2).First() 2</td></tr><tr><td><font color="blue">IsEmpty</font></td><td>Returns true if and only if the RDD contains no elements at all. Note that an RDD may be empty even when it has at least 1 partition. sc.Parallelize(new int[0], 1).isEmpty() true sc.Parallelize(new int[] {1}).isEmpty() false</td></tr><tr><td><font color="blue">Subtract</font></td><td>Return each value in this RDD that is not contained in . var x = sc.Parallelize(new int[] { 1, 2, 3, 4 }, 1) var y = sc.Parallelize(new int[] { 3 }, 1) x.Subtract(y).Collect()) [1, 2, 4]</td></tr><tr><td><font color="blue">KeyBy``1</font></td><td>Creates tuples of the elements in this RDD by applying . sc.Parallelize(new int[] { 1, 2, 3, 4 }, 1).KeyBy(x =&gt; x * x).Collect()) (1, 1), (4, 2), (9, 3), (16, 4)</td></tr><tr><td><font color="blue">Repartition</font></td><td>Return a new RDD that has exactly numPartitions partitions. Can increase or decrease the level of parallelism in this RDD. Internally, this uses a shuffle to redistribute data. If you are decreasing the number of partitions in this RDD, consider using `Coalesce`, which can avoid performing a shuffle. var rdd = sc.Parallelize(new int[] { 1, 2, 3, 4, 5, 6, 7 }, 4) rdd.Glom().Collect().Length 4 rdd.Repartition(2).Glom().Collect().Length 2</td></tr><tr><td><font color="blue">Coalesce</font></td><td>Return a new RDD that is reduced into `numPartitions` partitions. sc.Parallelize(new int[] { 1, 2, 3, 4, 5 }, 3).Glom().Collect().Length 3 &gt;&gt;&gt; sc.Parallelize(new int[] { 1, 2, 3, 4, 5 }, 3).Coalesce(1).Glom().Collect().Length 1</td></tr><tr><td><font color="blue">Zip``1</font></td><td>Zips this RDD with another one, returning key-value pairs with the first element in each RDD second element in each RDD, etc. Assumes that the two RDDs have the same number of partitions and the same number of elements in each partition (e.g. one was made through a map on the other). var x = sc.parallelize(range(0,5)) var y = sc.parallelize(range(1000, 1005)) x.Zip(y).Collect() [(0, 1000), (1, 1001), (2, 1002), (3, 1003), (4, 1004)]</td></tr><tr><td><font color="blue">ZipWithIndex</font></td><td>Zips this RDD with its element indices. The ordering is first based on the partition index and then the ordering of items within each partition. So the first item in the first partition gets index 0, and the last item in the last partition receives the largest index. This method needs to trigger a spark job when this RDD contains more than one partitions. sc.Parallelize(new string[] { "a", "b", "c", "d" }, 3).ZipWithIndex().Collect() [('a', 0), ('b', 1), ('c', 2), ('d', 3)]</td></tr><tr><td><font color="blue">ZipWithUniqueId</font></td><td>Zips this RDD with generated unique Long ids. Items in the kth partition will get ids k, n+k, 2*n+k, ..., where n is the number of partitions. So there may exist gaps, but this method won't trigger a spark job, which is different from &gt;&gt;&gt; sc.Parallelize(new string[] { "a", "b", "c", "d" }, 1).ZipWithIndex().Collect() [('a', 0), ('b', 1), ('c', 4), ('d', 2), ('e', 5)]</td></tr><tr><td><font color="blue">SetName</font></td><td>Assign a name to this RDD. &gt;&gt;&gt; rdd1 = sc.parallelize([1, 2]) &gt;&gt;&gt; rdd1.setName('RDD1').name() u'RDD1'</td></tr><tr><td><font color="blue">ToDebugString</font></td><td>A description of this RDD and its recursive dependencies for debugging.</td></tr><tr><td><font color="blue">GetStorageLevel</font></td><td>Get the RDD's current storage level. &gt;&gt;&gt; rdd1 = sc.parallelize([1,2]) &gt;&gt;&gt; rdd1.getStorageLevel() StorageLevel(False, False, False, False, 1) &gt;&gt;&gt; print(rdd1.getStorageLevel()) Serialized 1x Replicated</td></tr><tr><td><font color="blue">ToLocalIterator</font></td><td>Return an iterator that contains all of the elements in this RDD. The iterator will consume as much memory as the largest partition in this RDD. sc.Parallelize(Enumerable.Range(0, 10), 1).ToLocalIterator() [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]</td></tr><tr><td><font color="blue">RandomSampleWithRange</font></td><td>Internal method exposed for Random Splits in DataFrames. Samples an RDD given a probability range.</td></tr></table>
+
+---
+  
+  
 ###<font color="#68228B">Microsoft.Spark.CSharp.Core.StringRDDFunctions</font>
 ####Summary
   
@@ -293,7 +313,7 @@
         
 ####Methods
 
-<table><tr><th>Name</th><th>Description</th></tr><tr><td><font color="blue">GetActiveSparkContext</font></td><td>Get existing SparkContext</td></tr><tr><td><font color="blue">TextFile</font></td><td>Read a text file from HDFS, a local file system (available on all nodes), or any Hadoop-supported file system URI, and return it as an RDD of Strings.</td></tr><tr><td><font color="blue">Parallelize``1</font></td><td>Distribute a local collection to form an RDD. sc.Parallelize(new int[] {0, 2, 3, 4, 6}, 5).Glom().Collect() [[0], [2], [3], [4], [6]]</td></tr><tr><td><font color="blue">EmptyRDD</font></td><td>Create an RDD that has no partitions or elements.</td></tr><tr><td><font color="blue">WholeTextFiles</font></td><td>Read a directory of text files from HDFS, a local file system (available on all nodes), or any Hadoop-supported file system URI. Each file is read as a single record and returned in a key-value pair, where the key is the path of each file, the value is the content of each file. For example, if you have the following files: {{{ hdfs://a-hdfs-path/part-00000 hdfs://a-hdfs-path/part-00001 ... hdfs://a-hdfs-path/part-nnnnn }}} Do {{{ RDD&lt;KeyValuePair&lt;string, string&gt;&gt; rdd = sparkContext.WholeTextFiles("hdfs://a-hdfs-path") }}} then `rdd` contains {{{ (a-hdfs-path/part-00000, its content) (a-hdfs-path/part-00001, its content) ... (a-hdfs-path/part-nnnnn, its content) }}} Small files are preferred, large file is also allowable, but may cause bad performance. minPartitions A suggestion value of the minimal splitting number for input data.</td></tr><tr><td><font color="blue">BinaryFiles</font></td><td>Read a directory of binary files from HDFS, a local file system (available on all nodes), or any Hadoop-supported file system URI as a byte array. Each file is read as a single record and returned in a key-value pair, where the key is the path of each file, the value is the content of each file. For example, if you have the following files: {{{ hdfs://a-hdfs-path/part-00000 hdfs://a-hdfs-path/part-00001 ... hdfs://a-hdfs-path/part-nnnnn }}} Do RDD&lt;KeyValuePair&lt;string, byte[]&gt;&gt;"/&gt; rdd = sparkContext.dataStreamFiles("hdfs://a-hdfs-path")`, then `rdd` contains {{{ (a-hdfs-path/part-00000, its content) (a-hdfs-path/part-00001, its content) ... (a-hdfs-path/part-nnnnn, its content) }}} @note Small files are preferred; very large files but may cause bad performance. @param minPartitions A suggestion value of the minimal splitting number for input data.</td></tr><tr><td><font color="blue">SequenceFile</font></td><td>Read a Hadoop SequenceFile with arbitrary key and value Writable class from HDFS, a local file system (available on all nodes), or any Hadoop-supported file system URI. The mechanism is as follows: 1. A Java RDD is created from the SequenceFile or other InputFormat, and the key and value Writable classes 2. Serialization is attempted via Pyrolite pickling 3. If this fails, the fallback is to call 'toString' on each key and value 4. PickleSerializer is used to deserialize pickled objects on the Python side</td></tr><tr><td><font color="blue">NewAPIHadoopFile</font></td><td>Read a 'new API' Hadoop InputFormat with arbitrary key and value class from HDFS, a local file system (available on all nodes), or any Hadoop-supported file system URI. The mechanism is the same as for sc.sequenceFile. A Hadoop configuration can be passed in as a Python dict. This will be converted into a Configuration in Java</td></tr><tr><td><font color="blue">NewAPIHadoopRDD</font></td><td>Read a 'new API' Hadoop InputFormat with arbitrary key and value class, from an arbitrary Hadoop configuration, which is passed in as a Python dict. This will be converted into a Configuration in Java. The mechanism is the same as for sc.sequenceFile.</td></tr><tr><td><font color="blue">HadoopFile</font></td><td>Read an 'old' Hadoop InputFormat with arbitrary key and value class from HDFS, a local file system (available on all nodes), or any Hadoop-supported file system URI. The mechanism is the same as for sc.sequenceFile. A Hadoop configuration can be passed in as a Python dict. This will be converted into a Configuration in Java.</td></tr><tr><td><font color="blue">HadoopRDD</font></td><td>Read an 'old' Hadoop InputFormat with arbitrary key and value class, from an arbitrary Hadoop configuration, which is passed in as a Python dict. This will be converted into a Configuration in Java. The mechanism is the same as for sc.sequenceFile.</td></tr><tr><td><font color="blue">Union``1</font></td><td>Build the union of a list of RDDs. This supports unions() of RDDs with different serialized formats, although this forces them to be reserialized using the default serializer: &gt;&gt;&gt; path = os.path.join(tempdir, "union-text.txt") &gt;&gt;&gt; with open(path, "w") as testFile: ... _ = testFile.write("Hello") &gt;&gt;&gt; textFile = sc.textFile(path) &gt;&gt;&gt; textFile.collect() [u'Hello'] &gt;&gt;&gt; parallelized = sc.parallelize(["World!"]) &gt;&gt;&gt; sorted(sc.union([textFile, parallelized]).collect()) [u'Hello', 'World!']</td></tr><tr><td><font color="blue">Broadcast``1</font></td><td>Broadcast a read-only variable to the cluster, returning a Broadcast object for reading it in distributed functions. The variable will be sent to each cluster only once.</td></tr><tr><td><font color="blue">Accumulator``1</font></td><td>Create an with the given initial value, using a given helper object to define how to add values of the data type if provided. Default AccumulatorParams are used for integers and floating-point numbers if you do not provide one. For other types, a custom AccumulatorParam can be used.</td></tr><tr><td><font color="blue">Stop</font></td><td>Shut down the SparkContext.</td></tr><tr><td><font color="blue">AddFile</font></td><td>Add a file to be downloaded with this Spark job on every node. The `path` passed can be either a local file, a file in HDFS (or other Hadoop-supported filesystems), or an HTTP, HTTPS or FTP URI. To access the file in Spark jobs, use `SparkFiles.get(fileName)` to find its download location.</td></tr><tr><td><font color="blue">SetCheckpointDir</font></td><td>Set the directory under which RDDs are going to be checkpointed. The directory must be a HDFS path if running on a cluster.</td></tr><tr><td><font color="blue">SetJobGroup</font></td><td>Assigns a group ID to all the jobs started by this thread until the group ID is set to a different value or cleared. Often, a unit of execution in an application consists of multiple Spark actions or jobs. Application programmers can use this method to group all those jobs together and give a group description. Once set, the Spark web UI will associate such jobs with this group. The application can also use [[org.apache.spark.api.java.JavaSparkContext.cancelJobGroup]] to cancel all running jobs in this group. For example, {{{ // In the main thread: sc.setJobGroup("some_job_to_cancel", "some job description"); rdd.map(...).count(); // In a separate thread: sc.cancelJobGroup("some_job_to_cancel"); }}} If interruptOnCancel is set to true for the job group, then job cancellation will result in Thread.interrupt() being called on the job's executor threads. This is useful to help ensure that the tasks are actually stopped in a timely manner, but is off by default due to HDFS-1208, where HDFS may respond to Thread.interrupt() by marking nodes as dead.</td></tr><tr><td><font color="blue">SetLocalProperty</font></td><td>Set a local property that affects jobs submitted from this thread, such as the Spark fair scheduler pool.</td></tr><tr><td><font color="blue">GetLocalProperty</font></td><td>Get a local property set in this thread, or null if it is missing. See [[org.apache.spark.api.java.JavaSparkContext.setLocalProperty]].</td></tr><tr><td><font color="blue">SetLogLevel</font></td><td>Control our logLevel. This overrides any user-defined log settings. @param logLevel The desired log level as a string. Valid log levels include: ALL, DEBUG, ERROR, FATAL, INFO, OFF, TRACE, WARN</td></tr><tr><td><font color="blue">CancelJobGroup</font></td><td>Cancel active jobs for the specified group. See for more information.</td></tr><tr><td><font color="blue">CancelAllJobs</font></td><td>Cancel all jobs that have been scheduled or are running.</td></tr></table>
+<table><tr><th>Name</th><th>Description</th></tr><tr><td><font color="blue">GetActiveSparkContext</font></td><td>Get existing SparkContext</td></tr><tr><td><font color="blue">GetConf</font></td><td>Return a copy of this JavaSparkContext's configuration. The configuration ''cannot'' be changed at runtime.</td></tr><tr><td><font color="blue">TextFile</font></td><td>Read a text file from HDFS, a local file system (available on all nodes), or any Hadoop-supported file system URI, and return it as an RDD of Strings.</td></tr><tr><td><font color="blue">Parallelize``1</font></td><td>Distribute a local collection to form an RDD. sc.Parallelize(new int[] {0, 2, 3, 4, 6}, 5).Glom().Collect() [[0], [2], [3], [4], [6]]</td></tr><tr><td><font color="blue">EmptyRDD</font></td><td>Create an RDD that has no partitions or elements.</td></tr><tr><td><font color="blue">WholeTextFiles</font></td><td>Read a directory of text files from HDFS, a local file system (available on all nodes), or any Hadoop-supported file system URI. Each file is read as a single record and returned in a key-value pair, where the key is the path of each file, the value is the content of each file. For example, if you have the following files: {{{ hdfs://a-hdfs-path/part-00000 hdfs://a-hdfs-path/part-00001 ... hdfs://a-hdfs-path/part-nnnnn }}} Do {{{ RDD&lt;KeyValuePair&lt;string, string&gt;&gt; rdd = sparkContext.WholeTextFiles("hdfs://a-hdfs-path") }}} then `rdd` contains {{{ (a-hdfs-path/part-00000, its content) (a-hdfs-path/part-00001, its content) ... (a-hdfs-path/part-nnnnn, its content) }}} Small files are preferred, large file is also allowable, but may cause bad performance. minPartitions A suggestion value of the minimal splitting number for input data.</td></tr><tr><td><font color="blue">BinaryFiles</font></td><td>Read a directory of binary files from HDFS, a local file system (available on all nodes), or any Hadoop-supported file system URI as a byte array. Each file is read as a single record and returned in a key-value pair, where the key is the path of each file, the value is the content of each file. For example, if you have the following files: {{{ hdfs://a-hdfs-path/part-00000 hdfs://a-hdfs-path/part-00001 ... hdfs://a-hdfs-path/part-nnnnn }}} Do RDD&lt;KeyValuePair&lt;string, byte[]&gt;&gt;"/&gt; rdd = sparkContext.dataStreamFiles("hdfs://a-hdfs-path")`, then `rdd` contains {{{ (a-hdfs-path/part-00000, its content) (a-hdfs-path/part-00001, its content) ... (a-hdfs-path/part-nnnnn, its content) }}} @note Small files are preferred; very large files but may cause bad performance. @param minPartitions A suggestion value of the minimal splitting number for input data.</td></tr><tr><td><font color="blue">SequenceFile</font></td><td>Read a Hadoop SequenceFile with arbitrary key and value Writable class from HDFS, a local file system (available on all nodes), or any Hadoop-supported file system URI. The mechanism is as follows: 1. A Java RDD is created from the SequenceFile or other InputFormat, and the key and value Writable classes 2. Serialization is attempted via Pyrolite pickling 3. If this fails, the fallback is to call 'toString' on each key and value 4. PickleSerializer is used to deserialize pickled objects on the Python side</td></tr><tr><td><font color="blue">NewAPIHadoopFile</font></td><td>Read a 'new API' Hadoop InputFormat with arbitrary key and value class from HDFS, a local file system (available on all nodes), or any Hadoop-supported file system URI. The mechanism is the same as for sc.sequenceFile. A Hadoop configuration can be passed in as a Python dict. This will be converted into a Configuration in Java</td></tr><tr><td><font color="blue">NewAPIHadoopRDD</font></td><td>Read a 'new API' Hadoop InputFormat with arbitrary key and value class, from an arbitrary Hadoop configuration, which is passed in as a Python dict. This will be converted into a Configuration in Java. The mechanism is the same as for sc.sequenceFile.</td></tr><tr><td><font color="blue">HadoopFile</font></td><td>Read an 'old' Hadoop InputFormat with arbitrary key and value class from HDFS, a local file system (available on all nodes), or any Hadoop-supported file system URI. The mechanism is the same as for sc.sequenceFile. A Hadoop configuration can be passed in as a Python dict. This will be converted into a Configuration in Java.</td></tr><tr><td><font color="blue">HadoopRDD</font></td><td>Read an 'old' Hadoop InputFormat with arbitrary key and value class, from an arbitrary Hadoop configuration, which is passed in as a Python dict. This will be converted into a Configuration in Java. The mechanism is the same as for sc.sequenceFile.</td></tr><tr><td><font color="blue">Union``1</font></td><td>Build the union of a list of RDDs. This supports unions() of RDDs with different serialized formats, although this forces them to be reserialized using the default serializer: &gt;&gt;&gt; path = os.path.join(tempdir, "union-text.txt") &gt;&gt;&gt; with open(path, "w") as testFile: ... _ = testFile.write("Hello") &gt;&gt;&gt; textFile = sc.textFile(path) &gt;&gt;&gt; textFile.collect() [u'Hello'] &gt;&gt;&gt; parallelized = sc.parallelize(["World!"]) &gt;&gt;&gt; sorted(sc.union([textFile, parallelized]).collect()) [u'Hello', 'World!']</td></tr><tr><td><font color="blue">Broadcast``1</font></td><td>Broadcast a read-only variable to the cluster, returning a Broadcast object for reading it in distributed functions. The variable will be sent to each cluster only once.</td></tr><tr><td><font color="blue">Accumulator``1</font></td><td>Create an with the given initial value, using a given helper object to define how to add values of the data type if provided. Default AccumulatorParams are used for integers and floating-point numbers if you do not provide one. For other types, a custom AccumulatorParam can be used.</td></tr><tr><td><font color="blue">Stop</font></td><td>Shut down the SparkContext.</td></tr><tr><td><font color="blue">AddFile</font></td><td>Add a file to be downloaded with this Spark job on every node. The `path` passed can be either a local file, a file in HDFS (or other Hadoop-supported filesystems), or an HTTP, HTTPS or FTP URI. To access the file in Spark jobs, use `SparkFiles.get(fileName)` to find its download location.</td></tr><tr><td><font color="blue">SetCheckpointDir</font></td><td>Set the directory under which RDDs are going to be checkpointed. The directory must be a HDFS path if running on a cluster.</td></tr><tr><td><font color="blue">SetJobGroup</font></td><td>Assigns a group ID to all the jobs started by this thread until the group ID is set to a different value or cleared. Often, a unit of execution in an application consists of multiple Spark actions or jobs. Application programmers can use this method to group all those jobs together and give a group description. Once set, the Spark web UI will associate such jobs with this group. The application can also use [[org.apache.spark.api.java.JavaSparkContext.cancelJobGroup]] to cancel all running jobs in this group. For example, {{{ // In the main thread: sc.setJobGroup("some_job_to_cancel", "some job description"); rdd.map(...).count(); // In a separate thread: sc.cancelJobGroup("some_job_to_cancel"); }}} If interruptOnCancel is set to true for the job group, then job cancellation will result in Thread.interrupt() being called on the job's executor threads. This is useful to help ensure that the tasks are actually stopped in a timely manner, but is off by default due to HDFS-1208, where HDFS may respond to Thread.interrupt() by marking nodes as dead.</td></tr><tr><td><font color="blue">SetLocalProperty</font></td><td>Set a local property that affects jobs submitted from this thread, such as the Spark fair scheduler pool.</td></tr><tr><td><font color="blue">GetLocalProperty</font></td><td>Get a local property set in this thread, or null if it is missing. See [[org.apache.spark.api.java.JavaSparkContext.setLocalProperty]].</td></tr><tr><td><font color="blue">SetLogLevel</font></td><td>Control our logLevel. This overrides any user-defined log settings. @param logLevel The desired log level as a string. Valid log levels include: ALL, DEBUG, ERROR, FATAL, INFO, OFF, TRACE, WARN</td></tr><tr><td><font color="blue">CancelJobGroup</font></td><td>Cancel active jobs for the specified group. See for more information.</td></tr><tr><td><font color="blue">CancelAllJobs</font></td><td>Cancel all jobs that have been scheduled or are running.</td></tr></table>
 
 ---
   
@@ -629,21 +649,6 @@
 ---
   
   
-###<font color="#68228B">Microsoft.Spark.CSharp.Sql.SqlContext</font>
-####Summary
-  
-            
-            The entry point for working with structured data (rows and columns) in Spark.  
-            Allows the creation of [[DataFrame]] objects as well as the execution of SQL queries.
-            
-        
-####Methods
-
-<table><tr><th>Name</th><th>Description</th></tr><tr><td><font color="blue">GetOrCreate</font></td><td>Get the existing SQLContext or create a new one with given SparkContext.</td></tr><tr><td><font color="blue">NewSession</font></td><td>Returns a new SQLContext as new session, that has separate SQLConf, registered temporary tables and UDFs, but shared SparkContext and table cache.</td></tr><tr><td><font color="blue">GetConf</font></td><td>Returns the value of Spark SQL configuration property for the given key. If the key is not set, returns defaultValue.</td></tr><tr><td><font color="blue">SetConf</font></td><td>Sets the given Spark SQL configuration property.</td></tr><tr><td><font color="blue">Read</font></td><td>Returns a DataFrameReader that can be used to read data in as a DataFrame.</td></tr><tr><td><font color="blue">ReadDataFrame</font></td><td>Loads a dataframe the source path using the given schema and options</td></tr><tr><td><font color="blue">CreateDataFrame</font></td><td>Creates a from a RDD containing array of object using the given schema.</td></tr><tr><td><font color="blue">RegisterDataFrameAsTable</font></td><td>Registers the given as a temporary table in the catalog. Temporary tables exist only during the lifetime of this instance of SqlContext.</td></tr><tr><td><font color="blue">DropTempTable</font></td><td>Remove the temp table from catalog.</td></tr><tr><td><font color="blue">Table</font></td><td>Returns the specified table as a</td></tr><tr><td><font color="blue">Tables</font></td><td>Returns a containing names of tables in the given database. If is not specified, the current database will be used. The returned DataFrame has two columns: 'tableName' and 'isTemporary' (a column with bool type indicating if a table is a temporary one or not).</td></tr><tr><td><font color="blue">TableNames</font></td><td>Returns a list of names of tables in the database</td></tr><tr><td><font color="blue">CacheTable</font></td><td>Caches the specified table in-memory.</td></tr><tr><td><font color="blue">UncacheTable</font></td><td>Removes the specified table from the in-memory cache.</td></tr><tr><td><font color="blue">ClearCache</font></td><td>Removes all cached tables from the in-memory cache.</td></tr><tr><td><font color="blue">IsCached</font></td><td>Returns true if the table is currently cached in-memory.</td></tr><tr><td><font color="blue">Sql</font></td><td>Executes a SQL query using Spark, returning the result as a DataFrame. The dialect that is used for SQL parsing can be configured with 'spark.sql.dialect'</td></tr><tr><td><font color="blue">JsonFile</font></td><td>Loads a JSON file (one object per line), returning the result as a DataFrame It goes through the entire dataset once to determine the schema.</td></tr><tr><td><font color="blue">JsonFile</font></td><td>Loads a JSON file (one object per line) and applies the given schema</td></tr><tr><td><font color="blue">TextFile</font></td><td>Loads text file with the specific column delimited using the given schema</td></tr><tr><td><font color="blue">TextFile</font></td><td>Loads a text file (one object per line), returning the result as a DataFrame</td></tr><tr><td><font color="blue">RegisterFunction``1</font></td><td>Register UDF with no input argument, e.g: SqlContext.RegisterFunction&lt;bool&gt;("MyFilter", () =&gt; true); sqlContext.Sql("SELECT * FROM MyTable where MyFilter()");</td></tr><tr><td><font color="blue">RegisterFunction``2</font></td><td>Register UDF with 1 input argument, e.g: SqlContext.RegisterFunction&lt;bool, string&gt;("MyFilter", (arg1) =&gt; arg1 != null); sqlContext.Sql("SELECT * FROM MyTable where MyFilter(columnName1)");</td></tr><tr><td><font color="blue">RegisterFunction``3</font></td><td>Register UDF with 2 input arguments, e.g: SqlContext.RegisterFunction&lt;bool, string, string&gt;("MyFilter", (arg1, arg2) =&gt; arg1 != null &amp;&amp; arg2 != null); sqlContext.Sql("SELECT * FROM MyTable where MyFilter(columnName1, columnName2)");</td></tr><tr><td><font color="blue">RegisterFunction``4</font></td><td>Register UDF with 3 input arguments, e.g: SqlContext.RegisterFunction&lt;bool, string, string, string&gt;("MyFilter", (arg1, arg2, arg3) =&gt; arg1 != null &amp;&amp; arg2 != null &amp;&amp; arg3 != null); sqlContext.Sql("SELECT * FROM MyTable where MyFilter(columnName1, columnName2, columnName3)");</td></tr><tr><td><font color="blue">RegisterFunction``5</font></td><td>Register UDF with 4 input arguments, e.g: SqlContext.RegisterFunction&lt;bool, string, string, ..., string&gt;("MyFilter", (arg1, arg2, ..., arg4) =&gt; arg1 != null &amp;&amp; arg2 != null &amp;&amp; ... &amp;&amp; arg3 != null); sqlContext.Sql("SELECT * FROM MyTable where MyFilter(columnName1, columnName2, ..., columnName4)");</td></tr><tr><td><font color="blue">RegisterFunction``6</font></td><td>Register UDF with 5 input arguments, e.g: SqlContext.RegisterFunction&lt;bool, string, string, ..., string&gt;("MyFilter", (arg1, arg2, ..., arg5) =&gt; arg1 != null &amp;&amp; arg2 != null &amp;&amp; ... &amp;&amp; arg5 != null); sqlContext.Sql("SELECT * FROM MyTable where MyFilter(columnName1, columnName2, ..., columnName5)");</td></tr><tr><td><font color="blue">RegisterFunction``7</font></td><td>Register UDF with 6 input arguments, e.g: SqlContext.RegisterFunction&lt;bool, string, string, ..., string&gt;("MyFilter", (arg1, arg2, ..., arg6) =&gt; arg1 != null &amp;&amp; arg2 != null &amp;&amp; ... &amp;&amp; arg6 != null); sqlContext.Sql("SELECT * FROM MyTable where MyFilter(columnName1, columnName2, ..., columnName6)");</td></tr><tr><td><font color="blue">RegisterFunction``8</font></td><td>Register UDF with 7 input arguments, e.g: SqlContext.RegisterFunction&lt;bool, string, string, ..., string&gt;("MyFilter", (arg1, arg2, ..., arg7) =&gt; arg1 != null &amp;&amp; arg2 != null &amp;&amp; ... &amp;&amp; arg7 != null); sqlContext.Sql("SELECT * FROM MyTable where MyFilter(columnName1, columnName2, ..., columnName7)");</td></tr><tr><td><font color="blue">RegisterFunction``9</font></td><td>Register UDF with 8 input arguments, e.g: SqlContext.RegisterFunction&lt;bool, string, string, ..., string&gt;("MyFilter", (arg1, arg2, ..., arg8) =&gt; arg1 != null &amp;&amp; arg2 != null &amp;&amp; ... &amp;&amp; arg8 != null); sqlContext.Sql("SELECT * FROM MyTable where MyFilter(columnName1, columnName2, ..., columnName8)");</td></tr><tr><td><font color="blue">RegisterFunction``10</font></td><td>Register UDF with 9 input arguments, e.g: SqlContext.RegisterFunction&lt;bool, string, string, ..., string&gt;("MyFilter", (arg1, arg2, ..., arg9) =&gt; arg1 != null &amp;&amp; arg2 != null &amp;&amp; ... &amp;&amp; arg9 != null); sqlContext.Sql("SELECT * FROM MyTable where MyFilter(columnName1, columnName2, ..., columnName9)");</td></tr><tr><td><font color="blue">RegisterFunction``11</font></td><td>Register UDF with 10 input arguments, e.g: SqlContext.RegisterFunction&lt;bool, string, string, ..., string&gt;("MyFilter", (arg1, arg2, ..., arg10) =&gt; arg1 != null &amp;&amp; arg2 != null &amp;&amp; ... &amp;&amp; arg10 != null); sqlContext.Sql("SELECT * FROM MyTable where MyFilter(columnName1, columnName2, ..., columnName10)");</td></tr></table>
-
----
-  
-  
 ###<font color="#68228B">Microsoft.Spark.CSharp.Sql.PythonSerDe</font>
 ####Summary
   
@@ -728,6 +733,21 @@
 ---
   
   
+###<font color="#68228B">Microsoft.Spark.CSharp.Sql.SqlContext</font>
+####Summary
+  
+            
+            The entry point for working with structured data (rows and columns) in Spark.  
+            Allows the creation of [[DataFrame]] objects as well as the execution of SQL queries.
+            
+        
+####Methods
+
+<table><tr><th>Name</th><th>Description</th></tr><tr><td><font color="blue">GetOrCreate</font></td><td>Get the existing SQLContext or create a new one with given SparkContext.</td></tr><tr><td><font color="blue">NewSession</font></td><td>Returns a new SQLContext as new session, that has separate SQLConf, registered temporary tables and UDFs, but shared SparkContext and table cache.</td></tr><tr><td><font color="blue">GetConf</font></td><td>Returns the value of Spark SQL configuration property for the given key. If the key is not set, returns defaultValue.</td></tr><tr><td><font color="blue">SetConf</font></td><td>Sets the given Spark SQL configuration property.</td></tr><tr><td><font color="blue">Read</font></td><td>Returns a DataFrameReader that can be used to read data in as a DataFrame.</td></tr><tr><td><font color="blue">ReadDataFrame</font></td><td>Loads a dataframe the source path using the given schema and options</td></tr><tr><td><font color="blue">CreateDataFrame</font></td><td>Creates a from a RDD containing array of object using the given schema.</td></tr><tr><td><font color="blue">RegisterDataFrameAsTable</font></td><td>Registers the given as a temporary table in the catalog. Temporary tables exist only during the lifetime of this instance of SqlContext.</td></tr><tr><td><font color="blue">DropTempTable</font></td><td>Remove the temp table from catalog.</td></tr><tr><td><font color="blue">Table</font></td><td>Returns the specified table as a</td></tr><tr><td><font color="blue">Tables</font></td><td>Returns a containing names of tables in the given database. If is not specified, the current database will be used. The returned DataFrame has two columns: 'tableName' and 'isTemporary' (a column with bool type indicating if a table is a temporary one or not).</td></tr><tr><td><font color="blue">TableNames</font></td><td>Returns a list of names of tables in the database</td></tr><tr><td><font color="blue">CacheTable</font></td><td>Caches the specified table in-memory.</td></tr><tr><td><font color="blue">UncacheTable</font></td><td>Removes the specified table from the in-memory cache.</td></tr><tr><td><font color="blue">ClearCache</font></td><td>Removes all cached tables from the in-memory cache.</td></tr><tr><td><font color="blue">IsCached</font></td><td>Returns true if the table is currently cached in-memory.</td></tr><tr><td><font color="blue">Sql</font></td><td>Executes a SQL query using Spark, returning the result as a DataFrame. The dialect that is used for SQL parsing can be configured with 'spark.sql.dialect'</td></tr><tr><td><font color="blue">JsonFile</font></td><td>Loads a JSON file (one object per line), returning the result as a DataFrame It goes through the entire dataset once to determine the schema.</td></tr><tr><td><font color="blue">JsonFile</font></td><td>Loads a JSON file (one object per line) and applies the given schema</td></tr><tr><td><font color="blue">TextFile</font></td><td>Loads text file with the specific column delimited using the given schema</td></tr><tr><td><font color="blue">TextFile</font></td><td>Loads a text file (one object per line), returning the result as a DataFrame</td></tr><tr><td><font color="blue">RegisterFunction``1</font></td><td>Register UDF with no input argument, e.g: SqlContext.RegisterFunction&lt;bool&gt;("MyFilter", () =&gt; true); sqlContext.Sql("SELECT * FROM MyTable where MyFilter()");</td></tr><tr><td><font color="blue">RegisterFunction``2</font></td><td>Register UDF with 1 input argument, e.g: SqlContext.RegisterFunction&lt;bool, string&gt;("MyFilter", (arg1) =&gt; arg1 != null); sqlContext.Sql("SELECT * FROM MyTable where MyFilter(columnName1)");</td></tr><tr><td><font color="blue">RegisterFunction``3</font></td><td>Register UDF with 2 input arguments, e.g: SqlContext.RegisterFunction&lt;bool, string, string&gt;("MyFilter", (arg1, arg2) =&gt; arg1 != null &amp;&amp; arg2 != null); sqlContext.Sql("SELECT * FROM MyTable where MyFilter(columnName1, columnName2)");</td></tr><tr><td><font color="blue">RegisterFunction``4</font></td><td>Register UDF with 3 input arguments, e.g: SqlContext.RegisterFunction&lt;bool, string, string, string&gt;("MyFilter", (arg1, arg2, arg3) =&gt; arg1 != null &amp;&amp; arg2 != null &amp;&amp; arg3 != null); sqlContext.Sql("SELECT * FROM MyTable where MyFilter(columnName1, columnName2, columnName3)");</td></tr><tr><td><font color="blue">RegisterFunction``5</font></td><td>Register UDF with 4 input arguments, e.g: SqlContext.RegisterFunction&lt;bool, string, string, ..., string&gt;("MyFilter", (arg1, arg2, ..., arg4) =&gt; arg1 != null &amp;&amp; arg2 != null &amp;&amp; ... &amp;&amp; arg3 != null); sqlContext.Sql("SELECT * FROM MyTable where MyFilter(columnName1, columnName2, ..., columnName4)");</td></tr><tr><td><font color="blue">RegisterFunction``6</font></td><td>Register UDF with 5 input arguments, e.g: SqlContext.RegisterFunction&lt;bool, string, string, ..., string&gt;("MyFilter", (arg1, arg2, ..., arg5) =&gt; arg1 != null &amp;&amp; arg2 != null &amp;&amp; ... &amp;&amp; arg5 != null); sqlContext.Sql("SELECT * FROM MyTable where MyFilter(columnName1, columnName2, ..., columnName5)");</td></tr><tr><td><font color="blue">RegisterFunction``7</font></td><td>Register UDF with 6 input arguments, e.g: SqlContext.RegisterFunction&lt;bool, string, string, ..., string&gt;("MyFilter", (arg1, arg2, ..., arg6) =&gt; arg1 != null &amp;&amp; arg2 != null &amp;&amp; ... &amp;&amp; arg6 != null); sqlContext.Sql("SELECT * FROM MyTable where MyFilter(columnName1, columnName2, ..., columnName6)");</td></tr><tr><td><font color="blue">RegisterFunction``8</font></td><td>Register UDF with 7 input arguments, e.g: SqlContext.RegisterFunction&lt;bool, string, string, ..., string&gt;("MyFilter", (arg1, arg2, ..., arg7) =&gt; arg1 != null &amp;&amp; arg2 != null &amp;&amp; ... &amp;&amp; arg7 != null); sqlContext.Sql("SELECT * FROM MyTable where MyFilter(columnName1, columnName2, ..., columnName7)");</td></tr><tr><td><font color="blue">RegisterFunction``9</font></td><td>Register UDF with 8 input arguments, e.g: SqlContext.RegisterFunction&lt;bool, string, string, ..., string&gt;("MyFilter", (arg1, arg2, ..., arg8) =&gt; arg1 != null &amp;&amp; arg2 != null &amp;&amp; ... &amp;&amp; arg8 != null); sqlContext.Sql("SELECT * FROM MyTable where MyFilter(columnName1, columnName2, ..., columnName8)");</td></tr><tr><td><font color="blue">RegisterFunction``10</font></td><td>Register UDF with 9 input arguments, e.g: SqlContext.RegisterFunction&lt;bool, string, string, ..., string&gt;("MyFilter", (arg1, arg2, ..., arg9) =&gt; arg1 != null &amp;&amp; arg2 != null &amp;&amp; ... &amp;&amp; arg9 != null); sqlContext.Sql("SELECT * FROM MyTable where MyFilter(columnName1, columnName2, ..., columnName9)");</td></tr><tr><td><font color="blue">RegisterFunction``11</font></td><td>Register UDF with 10 input arguments, e.g: SqlContext.RegisterFunction&lt;bool, string, string, ..., string&gt;("MyFilter", (arg1, arg2, ..., arg10) =&gt; arg1 != null &amp;&amp; arg2 != null &amp;&amp; ... &amp;&amp; arg10 != null); sqlContext.Sql("SELECT * FROM MyTable where MyFilter(columnName1, columnName2, ..., columnName10)");</td></tr></table>
+
+---
+  
+  
 ###<font color="#68228B">Microsoft.Spark.CSharp.Sql.DataType</font>
 ####Summary
   
diff --git a/csharp/AdapterTest/AdapterTest.csproj b/csharp/AdapterTest/AdapterTest.csproj
index 3ede6096..dfe00ea8 100644
--- a/csharp/AdapterTest/AdapterTest.csproj
+++ b/csharp/AdapterTest/AdapterTest.csproj
@@ -77,6 +77,7 @@
     <Compile Include="DataFrameReaderTest.cs" />
     <Compile Include="DataFrameWriterTest.cs" />
     <Compile Include="EventHubsUtilsTest.cs" />
+    <Compile Include="HadoopConfigurationTest.cs" />
     <Compile Include="JsonSerDeTest.cs" />
     <Compile Include="FunctionsTest.cs" />
     <Compile Include="Mocks\MockDataFrameReaderProxy.cs" />
diff --git a/csharp/AdapterTest/HadoopConfigurationTest.cs b/csharp/AdapterTest/HadoopConfigurationTest.cs
new file mode 100644
index 00000000..6eeb1c8f
--- /dev/null
+++ b/csharp/AdapterTest/HadoopConfigurationTest.cs
@@ -0,0 +1,43 @@
+﻿// Copyright (c) Microsoft. All rights reserved.
+// Licensed under the MIT license. See LICENSE file in the project root for full license information.
+
+using Microsoft.Spark.CSharp.Core;
+using Microsoft.Spark.CSharp.Proxy;
+using Moq;
+using NUnit.Framework;
+
+namespace AdapterTest
+{
+    [TestFixture]
+    class HadoopConfigurationTest
+    {
+        private string name, value;
+
+        [Test]
+        public void TestGetterSetter()
+        {
+            Mock<IHadoopConfigurationProxy> hadoopConfProxy = new Mock<IHadoopConfigurationProxy>();
+
+            hadoopConfProxy.Setup(m => m.Get(It.IsAny<string>(), It.IsAny<string>())).Returns("valueofproperty");
+
+            hadoopConfProxy.Setup(m => m.Set(It.IsAny<string>(), It.IsAny<string>()))
+                .Callback<string, string>(ValueSetter);
+
+            var hadoopConf = new HadoopConfiguration(hadoopConfProxy.Object);
+
+            var returnValue = hadoopConf.Get("somename", "somedefaultvalue");
+            Assert.AreEqual("valueofproperty", returnValue);
+
+            hadoopConf.Set("propertyname", "propertyvalue");
+
+            Assert.AreEqual("propertyname", name);
+            Assert.AreEqual("propertyvalue", value);
+        }
+
+        private void ValueSetter(string name, string value)
+        {
+            this.name = name;
+            this.value = value;
+        }
+    }
+}
diff --git a/csharp/AdapterTest/Mocks/MockSparkContextProxy.cs b/csharp/AdapterTest/Mocks/MockSparkContextProxy.cs
index 4b665280..edf8a7c5 100644
--- a/csharp/AdapterTest/Mocks/MockSparkContextProxy.cs
+++ b/csharp/AdapterTest/Mocks/MockSparkContextProxy.cs
@@ -249,6 +249,14 @@ public int AccumulatorServerPort
             }
         }
 
+        public IHadoopConfigurationProxy HadoopConfiguration
+        {
+            get
+            {
+                throw new NotImplementedException();
+            }
+        }
+
         public void Accumulator(int port)
         {
             accumuatorServerPort = port;
diff --git a/csharp/AdapterTest/SparkContextTest.cs b/csharp/AdapterTest/SparkContextTest.cs
index 728b5950..d0005683 100644
--- a/csharp/AdapterTest/SparkContextTest.cs
+++ b/csharp/AdapterTest/SparkContextTest.cs
@@ -139,6 +139,19 @@ public void TestSparkContextStatusTrackerProperty()
             Assert.IsNotNull(statusTracker);
         }
 
+        [Test]
+        public void TestSparkContextHadoopConfigurationProperty()
+        {
+            Mock<IHadoopConfigurationProxy> hadoopConfProxy = new Mock<IHadoopConfigurationProxy>();
+            Mock<ISparkContextProxy> sparkContextProxy = new Mock<ISparkContextProxy>();
+            sparkContextProxy.Setup(m => m.HadoopConfiguration).Returns(hadoopConfProxy.Object);
+            SparkContext sc = new SparkContext(sparkContextProxy.Object, null);
+
+            var hadoopConf = sc.HadoopConfiguration;
+
+            Assert.IsNotNull(hadoopConf);
+        }
+
         [Test]
         public void TestCancelAllJobs()
         {
diff --git a/csharp/Samples/Microsoft.Spark.CSharp/SparkContextSamples.cs b/csharp/Samples/Microsoft.Spark.CSharp/SparkContextSamples.cs
index 2769134e..99a88c59 100644
--- a/csharp/Samples/Microsoft.Spark.CSharp/SparkContextSamples.cs
+++ b/csharp/Samples/Microsoft.Spark.CSharp/SparkContextSamples.cs
@@ -19,6 +19,7 @@ class SparkContextSamples
         internal class BroadcastHelper<T>
         {
             private readonly Broadcast<T[]> broadcastVar;
+
             internal BroadcastHelper(Broadcast<T[]> broadcastVar)
             {
                 this.broadcastVar = broadcastVar;
@@ -37,26 +38,26 @@ internal static void SparkContextBroadcastSample()
             foreach (var value in b.Value)
             {
                 Console.Write(value + " ");
-            }   
+            }
             Console.WriteLine();
 
             if (SparkCLRSamples.Configuration.IsValidationEnabled)
             {
-                CollectionAssert.AreEqual(new[] { 1, 2, 3, 4, 5 }, b.Value);
+                CollectionAssert.AreEqual(new[] {1, 2, 3, 4, 5}, b.Value);
             }
 
-            RDD<int> rdd = SparkCLRSamples.SparkContext.Parallelize(new[] { 0, 0 }, 1);
+            RDD<int> rdd = SparkCLRSamples.SparkContext.Parallelize(new[] {0, 0}, 1);
             var r = rdd.FlatMap(new BroadcastHelper<int>(b).Execute).Collect();
             foreach (var value in r)
             {
                 Console.Write(value + " ");
-            }  
+            }
             Console.WriteLine();
 
             if (SparkCLRSamples.Configuration.IsValidationEnabled)
             {
                 // each item in rdd is mapped to broadcast value.
-                CollectionAssert.AreEqual(new[] { 1, 2, 3, 4, 5, 1, 2, 3, 4, 5 }, r);
+                CollectionAssert.AreEqual(new[] {1, 2, 3, 4, 5, 1, 2, 3, 4, 5}, r);
             }
         }
 
@@ -65,6 +66,7 @@ internal class AccumulatorHelper
         {
             private Accumulator<int> accumulator;
             private bool async;
+
             internal AccumulatorHelper(Accumulator<int> accumulator, bool async = false)
             {
                 this.accumulator = accumulator;
@@ -97,8 +99,9 @@ internal static void SparkContextAccumulatorSample()
             var a = SparkCLRSamples.SparkContext.Accumulator<int>(100);
             var b = SparkCLRSamples.SparkContext.Accumulator<int>(100);
 
-            SparkCLRSamples.SparkContext.Parallelize(new[] { 1, 2, 3, 4 }, 3).Foreach(new AccumulatorHelper(a).Execute);
-            SparkCLRSamples.SparkContext.Parallelize(new[] { 1, 2, 3, 4 }, 3).Foreach(new AccumulatorHelper(b, true).Execute);
+            SparkCLRSamples.SparkContext.Parallelize(new[] {1, 2, 3, 4}, 3).Foreach(new AccumulatorHelper(a).Execute);
+            SparkCLRSamples.SparkContext.Parallelize(new[] {1, 2, 3, 4}, 3)
+                .Foreach(new AccumulatorHelper(b, true).Execute);
             Console.WriteLine("accumulator value, a: {0}, b: {1}", a.Value, b.Value);
 
             if (SparkCLRSamples.Configuration.IsValidationEnabled)
@@ -117,7 +120,7 @@ internal static void SparkContextSample()
             Console.WriteLine(SparkCLRSamples.SparkContext.StartTime);
             Console.WriteLine(SparkCLRSamples.SparkContext.DefaultParallelism);
             Console.WriteLine(SparkCLRSamples.SparkContext.DefaultMinPartitions);
-            
+
             StatusTracker StatusTracker = SparkCLRSamples.SparkContext.StatusTracker;
 
             //var file = Path.GetTempFileName();
@@ -130,7 +133,7 @@ internal static void SparkContextSample()
             SparkCLRSamples.SparkContext.SetLogLevel("DEBUG");
             //SparkCLRSamples.SparkContext.SetJobGroup("SampleGroupId", "Sample Description");
             SparkCLRSamples.SparkContext.SetLocalProperty("SampleKey", "SampleValue");
-            
+
             Console.WriteLine(SparkCLRSamples.SparkContext.GetLocalProperty("SampleKey"));
             SparkCLRSamples.SparkContext.CancelJobGroup("SampleGroupId");
             SparkCLRSamples.SparkContext.CancelAllJobs();
@@ -139,15 +142,32 @@ internal static void SparkContextSample()
         [Sample]
         internal static void SparkContextUnionSample()
         {
-            var rdd1 = SparkCLRSamples.SparkContext.Parallelize(new int[] { 1, 1, 2, 3 }, 1);
-            var rdd2 = SparkCLRSamples.SparkContext.Parallelize(new int[] { 1, 1, 2, 3 }, 1);
+            var rdd1 = SparkCLRSamples.SparkContext.Parallelize(new int[] {1, 1, 2, 3}, 1);
+            var rdd2 = SparkCLRSamples.SparkContext.Parallelize(new int[] {1, 1, 2, 3}, 1);
 
-            var union = SparkCLRSamples.SparkContext.Union(new[] { rdd1, rdd2 }).Collect();
+            var union = SparkCLRSamples.SparkContext.Union(new[] {rdd1, rdd2}).Collect();
             Console.WriteLine(string.Join(",", union));
 
             if (SparkCLRSamples.Configuration.IsValidationEnabled)
             {
-                CollectionAssert.AreEqual(new[] { 1, 1, 2, 3, 1, 1, 2, 3 }, union);
+                CollectionAssert.AreEqual(new[] {1, 1, 2, 3, 1, 1, 2, 3}, union);
+            }
+        }
+
+        [Sample]
+        internal static void SparkContextHadoopConfigurationSample()
+        {
+            var hadoopConf = SparkCLRSamples.SparkContext.HadoopConfiguration;
+            var initialValue = hadoopConf.Get("testproperty", "defaultvalue");
+
+            hadoopConf.Set("testproperty", "testvalue");
+
+            var finalValue = hadoopConf.Get("testproperty", "defaultvalue");
+
+            if (SparkCLRSamples.Configuration.IsValidationEnabled)
+            {
+                Assert.AreEqual("defaultvalue", initialValue);
+                Assert.AreEqual("testvalue", finalValue);
             }
         }
     }

Name	Description
Cache	Persist this RDD with the default storage level .
Persist	Set this RDD's storage level to persist its values across operations after the first time it is computed. This can only be used to assign a new storage level if the RDD does not have a storage level set yet. If no storage level is specified defaults to . sc.Parallelize(new string[] {"b", "a", "c").Persist().isCached True
Unpersist	Mark the RDD as non-persistent, and remove all blocks for it from memory and disk.
Checkpoint	Mark this RDD for checkpointing. It will be saved to a file inside the checkpoint directory set with ) and all references to its parent RDDs will be removed. This function must be called before any job has been executed on this RDD. It is strongly recommended that this RDD is persisted in memory, otherwise saving it on a file will require recomputation.
GetNumPartitions	Returns the number of partitions of this RDD.
Map``1	Return a new RDD by applying a function to each element of this RDD. sc.Parallelize(new string[]{"b", "a", "c"}, 1).Map(x => new KeyValuePair<string, int>(x, 1)).Collect() [('a', 1), ('b', 1), ('c', 1)]
FlatMap``1	Return a new RDD by first applying a function to all elements of this RDD, and then flattening the results. sc.Parallelize(new int[] {2, 3, 4}, 1).FlatMap(x => Enumerable.Range(1, x - 1)).Collect() [1, 1, 1, 2, 2, 3]
MapPartitions``1	Return a new RDD by applying a function to each partition of this RDD. sc.Parallelize(new int[] {1, 2, 3, 4}, 2).MapPartitions(iter => new[]{iter.Sum(x => (x as decimal?))}).Collect() [3, 7]
MapPartitionsWithIndex``1	Return a new RDD by applying a function to each partition of this RDD, while tracking the index of the original partition. sc.Parallelize(new int[]{1, 2, 3, 4}, 4).MapPartitionsWithIndex<double>((pid, iter) => (double)pid).Sum() 6
Filter	Return a new RDD containing only the elements that satisfy a predicate. sc.Parallelize(new int[]{1, 2, 3, 4, 5}, 1).Filter(x => x % 2 == 0).Collect() [2, 4]
Distinct	Return a new RDD containing the distinct elements in this RDD. >>> sc.Parallelize(new int[] {1, 1, 2, 3}, 1).Distinct().Collect() [1, 2, 3]
Sample	Return a sampled subset of this RDD. var rdd = sc.Parallelize(Enumerable.Range(0, 100), 4) 6 <= rdd.Sample(False, 0.1, 81).count() <= 14 true
RandomSplit	Randomly splits this RDD with the provided weights. var rdd = sc.Parallelize(Enumerable.Range(0, 500), 1) var rdds = rdd.RandomSplit(new double[] {2, 3}, 17) 150 < rdds[0].Count() < 250 250 < rdds[1].Count() < 350
TakeSample	Return a fixed-size sampled subset of this RDD. var rdd = sc.Parallelize(Enumerable.Range(0, 10), 2) rdd.TakeSample(true, 20, 1).Length 20 rdd.TakeSample(false, 5, 2).Length 5 rdd.TakeSample(false, 15, 3).Length 10
ComputeFractionForSampleSize	Returns a sampling rate that guarantees a sample of size >= sampleSizeLowerBound 99.99% of the time. How the sampling rate is determined: Let p = num / total, where num is the sample size and total is the total number of data points in the RDD. We're trying to compute q > p such that - when sampling with replacement, we're drawing each data point with prob_i ~ Pois(q), where we want to guarantee Pr[s < num] < 0.0001 for s = sum(prob_i for i from 0 to total), i.e. the failure rate of not having a sufficiently large sample < 0.0001. Setting q = p + 5 * sqrt(p/total) is sufficient to guarantee 0.9999 success rate for num > 12, but we need a slightly larger q (9 empirically determined). - when sampling without replacement, we're drawing each data point with prob_i ~ Binomial(total, fraction) and our choice of q guarantees 1-delta, or 0.9999 success rate, where success rate is defined the same as in sampling with replacement.
Union	Return the union of this RDD and another one. var rdd = sc.Parallelize(new int[] { 1, 1, 2, 3 }, 1) rdd.union(rdd).collect() [1, 1, 2, 3, 1, 1, 2, 3]
Intersection	Return the intersection of this RDD and another one. The output will not contain any duplicate elements, even if the input RDDs did. Note that this method performs a shuffle internally. var rdd1 = sc.Parallelize(new int[] { 1, 10, 2, 3, 4, 5 }, 1) var rdd2 = sc.Parallelize(new int[] { 1, 6, 2, 3, 7, 8 }, 1) var rdd1.Intersection(rdd2).Collect() [1, 2, 3]
Glom	Return an RDD created by coalescing all elements within each partition into a list. var rdd = sc.Parallelize(new int[] { 1, 2, 3, 4 }, 2) rdd.Glom().Collect() [[1, 2], [3, 4]]
Cartesian``1	Return the Cartesian product of this RDD and another one, that is, the RDD of all pairs of elements (a, b) where a is in self and b is in other. rdd = sc.Parallelize(new int[] { 1, 2 }, 1) rdd.Cartesian(rdd).Collect() [(1, 1), (1, 2), (2, 1), (2, 2)]
GroupBy``1	Return an RDD of grouped items. Each group consists of a key and a sequence of elements mapping to that key. The ordering of elements within each group is not guaranteed, and may even differ each time the resulting RDD is evaluated. Note: This operation may be very expensive. If you are grouping in order to perform an aggregation (such as a sum or average) over each key, using [[PairRDDFunctions.aggregateByKey]] or [[PairRDDFunctions.reduceByKey]] will provide much better performance. >>> rdd = sc.Parallelize(new int[] { 1, 1, 2, 3, 5, 8 }, 1) >>> result = rdd.GroupBy(lambda x: x % 2).Collect() [(0, [2, 8]), (1, [1, 1, 3, 5])]
Pipe	Return an RDD created by piping elements to a forked external process. >>> sc.Parallelize(new char[] { '1', '2', '3', '4' }, 1).Pipe("cat").Collect() [u'1', u'2', u'3', u'4']
Foreach	Applies a function to all elements of this RDD. sc.Parallelize(new int[] { 1, 2, 3, 4, 5 }, 1).Foreach(x => Console.Write(x))
ForeachPartition	Applies a function to each partition of this RDD. sc.parallelize(new int[] { 1, 2, 3, 4, 5 }, 1).ForeachPartition(iter => { foreach (var x in iter) Console.Write(x + " "); })
Collect	Return a list that contains all of the elements in this RDD.
Reduce	Reduces the elements of this RDD using the specified commutative and associative binary operator. sc.Parallelize(new int[] { 1, 2, 3, 4, 5 }, 1).Reduce((x, y) => x + y) 15
TreeReduce	Reduces the elements of this RDD in a multi-level tree pattern. >>> add = lambda x, y: x + y >>> rdd = sc.Parallelize(new int[] { -5, -4, -3, -2, -1, 1, 2, 3, 4 }, 10).TreeReduce((x, y) => x + y)) >>> rdd.TreeReduce(add) -5 >>> rdd.TreeReduce(add, 1) -5 >>> rdd.TreeReduce(add, 2) -5 >>> rdd.TreeReduce(add, 5) -5 >>> rdd.TreeReduce(add, 10) -5
Fold	Aggregate the elements of each partition, and then the results for all the partitions, using a given associative and commutative function and a neutral "zero value." The function op(t1, t2) is allowed to modify t1 and return it as its result value to avoid object allocation; however, it should not modify t2. This behaves somewhat differently from fold operations implemented for non-distributed collections in functional languages like Scala. This fold operation may be applied to partitions individually, and then fold those results into the final result, rather than apply the fold to each element sequentially in some defined ordering. For functions that are not commutative, the result may differ from that of a fold applied to a non-distributed collection. >>> from operator import add >>> sc.parallelize([1, 2, 3, 4, 5]).fold(0, add) 15
Aggregate``1	Aggregate the elements of each partition, and then the results for all the partitions, using a given combine functions and a neutral "zero value." The functions op(t1, t2) is allowed to modify t1 and return it as its result value to avoid object allocation; however, it should not modify t2. The first function (seqOp) can return a different result type, U, than the type of this RDD. Thus, we need one operation for merging a T into an U and one operation for merging two U >>> sc.parallelize(new int[] { 1, 2, 3, 4 }, 1).Aggregate(0, (x, y) => x + y, (x, y) => x + y)) 10
TreeAggregate``1	Aggregates the elements of this RDD in a multi-level tree pattern. rdd = sc.Parallelize(new int[] { 1, 2, 3, 4 }, 1).TreeAggregate(0, (x, y) => x + y, (x, y) => x + y)) 10
Count	Return the number of elements in this RDD.
CountByValue	Return the count of each unique value in this RDD as a dictionary of (value, count) pairs. sc.Parallelize(new int[] { 1, 2, 1, 2, 2 }, 2).CountByValue()) [(1, 2), (2, 3)]
Take	Take the first num elements of the RDD. It works by first scanning one partition, and use the results from that partition to estimate the number of additional partitions needed to satisfy the limit. Translated from the Scala implementation in RDD#take(). sc.Parallelize(new int[] { 2, 3, 4, 5, 6 }, 2).Cache().Take(2))) [2, 3] sc.Parallelize(new int[] { 2, 3, 4, 5, 6 }, 2).Take(10) [2, 3, 4, 5, 6] sc.Parallelize(Enumerable.Range(0, 100), 100).Filter(x => x > 90).Take(3) [91, 92, 93]
First	Return the first element in this RDD. >>> sc.Parallelize(new int[] { 2, 3, 4 }, 2).First() 2
IsEmpty	Returns true if and only if the RDD contains no elements at all. Note that an RDD may be empty even when it has at least 1 partition. sc.Parallelize(new int[0], 1).isEmpty() true sc.Parallelize(new int[] {1}).isEmpty() false
Subtract	Return each value in this RDD that is not contained in . var x = sc.Parallelize(new int[] { 1, 2, 3, 4 }, 1) var y = sc.Parallelize(new int[] { 3 }, 1) x.Subtract(y).Collect()) [1, 2, 4]
KeyBy``1	Creates tuples of the elements in this RDD by applying . sc.Parallelize(new int[] { 1, 2, 3, 4 }, 1).KeyBy(x => x * x).Collect()) (1, 1), (4, 2), (9, 3), (16, 4)
Repartition	Return a new RDD that has exactly numPartitions partitions. Can increase or decrease the level of parallelism in this RDD. Internally, this uses a shuffle to redistribute data. If you are decreasing the number of partitions in this RDD, consider using `Coalesce`, which can avoid performing a shuffle. var rdd = sc.Parallelize(new int[] { 1, 2, 3, 4, 5, 6, 7 }, 4) rdd.Glom().Collect().Length 4 rdd.Repartition(2).Glom().Collect().Length 2
Coalesce	Return a new RDD that is reduced into `numPartitions` partitions. sc.Parallelize(new int[] { 1, 2, 3, 4, 5 }, 3).Glom().Collect().Length 3 >>> sc.Parallelize(new int[] { 1, 2, 3, 4, 5 }, 3).Coalesce(1).Glom().Collect().Length 1
Zip``1	Zips this RDD with another one, returning key-value pairs with the first element in each RDD second element in each RDD, etc. Assumes that the two RDDs have the same number of partitions and the same number of elements in each partition (e.g. one was made through a map on the other). var x = sc.parallelize(range(0,5)) var y = sc.parallelize(range(1000, 1005)) x.Zip(y).Collect() [(0, 1000), (1, 1001), (2, 1002), (3, 1003), (4, 1004)]
ZipWithIndex	Zips this RDD with its element indices. The ordering is first based on the partition index and then the ordering of items within each partition. So the first item in the first partition gets index 0, and the last item in the last partition receives the largest index. This method needs to trigger a spark job when this RDD contains more than one partitions. sc.Parallelize(new string[] { "a", "b", "c", "d" }, 3).ZipWithIndex().Collect() [('a', 0), ('b', 1), ('c', 2), ('d', 3)]
ZipWithUniqueId	Zips this RDD with generated unique Long ids. Items in the kth partition will get ids k, n+k, 2*n+k, ..., where n is the number of partitions. So there may exist gaps, but this method won't trigger a spark job, which is different from >>> sc.Parallelize(new string[] { "a", "b", "c", "d" }, 1).ZipWithIndex().Collect() [('a', 0), ('b', 1), ('c', 4), ('d', 2), ('e', 5)]
SetName	Assign a name to this RDD. >>> rdd1 = sc.parallelize([1, 2]) >>> rdd1.setName('RDD1').name() u'RDD1'
ToDebugString	A description of this RDD and its recursive dependencies for debugging.
GetStorageLevel	Get the RDD's current storage level. >>> rdd1 = sc.parallelize([1,2]) >>> rdd1.getStorageLevel() StorageLevel(False, False, False, False, 1) >>> print(rdd1.getStorageLevel()) Serialized 1x Replicated
ToLocalIterator	Return an iterator that contains all of the elements in this RDD. The iterator will consume as much memory as the largest partition in this RDD. sc.Parallelize(Enumerable.Range(0, 10), 1).ToLocalIterator() [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
RandomSampleWithRange	Internal method exposed for Random Splits in DataFrames. Samples an RDD given a probability range.
Name	Description
GetActiveSparkContext	Get existing SparkContext
TextFile	Read a text file from HDFS, a local file system (available on all nodes), or any Hadoop-supported file system URI, and return it as an RDD of Strings.
Parallelize``1	Distribute a local collection to form an RDD. sc.Parallelize(new int[] {0, 2, 3, 4, 6}, 5).Glom().Collect() [[0], [2], [3], [4], [6]]
EmptyRDD	Create an RDD that has no partitions or elements.
WholeTextFiles	Read a directory of text files from HDFS, a local file system (available on all nodes), or any Hadoop-supported file system URI. Each file is read as a single record and returned in a key-value pair, where the key is the path of each file, the value is the content of each file. For example, if you have the following files: {{{ hdfs://a-hdfs-path/part-00000 hdfs://a-hdfs-path/part-00001 ... hdfs://a-hdfs-path/part-nnnnn }}} Do {{{ RDD<KeyValuePair<string, string>> rdd = sparkContext.WholeTextFiles("hdfs://a-hdfs-path") }}} then `rdd` contains {{{ (a-hdfs-path/part-00000, its content) (a-hdfs-path/part-00001, its content) ... (a-hdfs-path/part-nnnnn, its content) }}} Small files are preferred, large file is also allowable, but may cause bad performance. minPartitions A suggestion value of the minimal splitting number for input data.
BinaryFiles	Read a directory of binary files from HDFS, a local file system (available on all nodes), or any Hadoop-supported file system URI as a byte array. Each file is read as a single record and returned in a key-value pair, where the key is the path of each file, the value is the content of each file. For example, if you have the following files: {{{ hdfs://a-hdfs-path/part-00000 hdfs://a-hdfs-path/part-00001 ... hdfs://a-hdfs-path/part-nnnnn }}} Do RDD<KeyValuePair<string, byte[]>>"/> rdd = sparkContext.dataStreamFiles("hdfs://a-hdfs-path")`, then `rdd` contains {{{ (a-hdfs-path/part-00000, its content) (a-hdfs-path/part-00001, its content) ... (a-hdfs-path/part-nnnnn, its content) }}} @note Small files are preferred; very large files but may cause bad performance. @param minPartitions A suggestion value of the minimal splitting number for input data.
SequenceFile	Read a Hadoop SequenceFile with arbitrary key and value Writable class from HDFS, a local file system (available on all nodes), or any Hadoop-supported file system URI. The mechanism is as follows: 1. A Java RDD is created from the SequenceFile or other InputFormat, and the key and value Writable classes 2. Serialization is attempted via Pyrolite pickling 3. If this fails, the fallback is to call 'toString' on each key and value 4. PickleSerializer is used to deserialize pickled objects on the Python side
NewAPIHadoopFile	Read a 'new API' Hadoop InputFormat with arbitrary key and value class from HDFS, a local file system (available on all nodes), or any Hadoop-supported file system URI. The mechanism is the same as for sc.sequenceFile. A Hadoop configuration can be passed in as a Python dict. This will be converted into a Configuration in Java
NewAPIHadoopRDD	Read a 'new API' Hadoop InputFormat with arbitrary key and value class, from an arbitrary Hadoop configuration, which is passed in as a Python dict. This will be converted into a Configuration in Java. The mechanism is the same as for sc.sequenceFile.
HadoopFile	Read an 'old' Hadoop InputFormat with arbitrary key and value class from HDFS, a local file system (available on all nodes), or any Hadoop-supported file system URI. The mechanism is the same as for sc.sequenceFile. A Hadoop configuration can be passed in as a Python dict. This will be converted into a Configuration in Java.
HadoopRDD	Read an 'old' Hadoop InputFormat with arbitrary key and value class, from an arbitrary Hadoop configuration, which is passed in as a Python dict. This will be converted into a Configuration in Java. The mechanism is the same as for sc.sequenceFile.
Union``1	Build the union of a list of RDDs. This supports unions() of RDDs with different serialized formats, although this forces them to be reserialized using the default serializer: >>> path = os.path.join(tempdir, "union-text.txt") >>> with open(path, "w") as testFile: ... _ = testFile.write("Hello") >>> textFile = sc.textFile(path) >>> textFile.collect() [u'Hello'] >>> parallelized = sc.parallelize(["World!"]) >>> sorted(sc.union([textFile, parallelized]).collect()) [u'Hello', 'World!']
Broadcast``1	Broadcast a read-only variable to the cluster, returning a Broadcast object for reading it in distributed functions. The variable will be sent to each cluster only once.
Accumulator``1	Create an with the given initial value, using a given helper object to define how to add values of the data type if provided. Default AccumulatorParams are used for integers and floating-point numbers if you do not provide one. For other types, a custom AccumulatorParam can be used.
Stop	Shut down the SparkContext.
AddFile	Add a file to be downloaded with this Spark job on every node. The `path` passed can be either a local file, a file in HDFS (or other Hadoop-supported filesystems), or an HTTP, HTTPS or FTP URI. To access the file in Spark jobs, use `SparkFiles.get(fileName)` to find its download location.
SetCheckpointDir	Set the directory under which RDDs are going to be checkpointed. The directory must be a HDFS path if running on a cluster.
SetJobGroup	Assigns a group ID to all the jobs started by this thread until the group ID is set to a different value or cleared. Often, a unit of execution in an application consists of multiple Spark actions or jobs. Application programmers can use this method to group all those jobs together and give a group description. Once set, the Spark web UI will associate such jobs with this group. The application can also use [[org.apache.spark.api.java.JavaSparkContext.cancelJobGroup]] to cancel all running jobs in this group. For example, {{{ // In the main thread: sc.setJobGroup("some_job_to_cancel", "some job description"); rdd.map(...).count(); // In a separate thread: sc.cancelJobGroup("some_job_to_cancel"); }}} If interruptOnCancel is set to true for the job group, then job cancellation will result in Thread.interrupt() being called on the job's executor threads. This is useful to help ensure that the tasks are actually stopped in a timely manner, but is off by default due to HDFS-1208, where HDFS may respond to Thread.interrupt() by marking nodes as dead.
SetLocalProperty	Set a local property that affects jobs submitted from this thread, such as the Spark fair scheduler pool.
GetLocalProperty	Get a local property set in this thread, or null if it is missing. See [[org.apache.spark.api.java.JavaSparkContext.setLocalProperty]].
SetLogLevel	Control our logLevel. This overrides any user-defined log settings. @param logLevel The desired log level as a string. Valid log levels include: ALL, DEBUG, ERROR, FATAL, INFO, OFF, TRACE, WARN
CancelJobGroup	Cancel active jobs for the specified group. See for more information.
CancelAllJobs	Cancel all jobs that have been scheduled or are running.
Name	Description
GetActiveSparkContext	Get existing SparkContext
GetConf	Return a copy of this JavaSparkContext's configuration. The configuration ''cannot'' be changed at runtime.
TextFile	Read a text file from HDFS, a local file system (available on all nodes), or any Hadoop-supported file system URI, and return it as an RDD of Strings.
Parallelize``1	Distribute a local collection to form an RDD. sc.Parallelize(new int[] {0, 2, 3, 4, 6}, 5).Glom().Collect() [[0], [2], [3], [4], [6]]
EmptyRDD	Create an RDD that has no partitions or elements.
WholeTextFiles	Read a directory of text files from HDFS, a local file system (available on all nodes), or any Hadoop-supported file system URI. Each file is read as a single record and returned in a key-value pair, where the key is the path of each file, the value is the content of each file. For example, if you have the following files: {{{ hdfs://a-hdfs-path/part-00000 hdfs://a-hdfs-path/part-00001 ... hdfs://a-hdfs-path/part-nnnnn }}} Do {{{ RDD<KeyValuePair<string, string>> rdd = sparkContext.WholeTextFiles("hdfs://a-hdfs-path") }}} then `rdd` contains {{{ (a-hdfs-path/part-00000, its content) (a-hdfs-path/part-00001, its content) ... (a-hdfs-path/part-nnnnn, its content) }}} Small files are preferred, large file is also allowable, but may cause bad performance. minPartitions A suggestion value of the minimal splitting number for input data.
BinaryFiles	Read a directory of binary files from HDFS, a local file system (available on all nodes), or any Hadoop-supported file system URI as a byte array. Each file is read as a single record and returned in a key-value pair, where the key is the path of each file, the value is the content of each file. For example, if you have the following files: {{{ hdfs://a-hdfs-path/part-00000 hdfs://a-hdfs-path/part-00001 ... hdfs://a-hdfs-path/part-nnnnn }}} Do RDD<KeyValuePair<string, byte[]>>"/> rdd = sparkContext.dataStreamFiles("hdfs://a-hdfs-path")`, then `rdd` contains {{{ (a-hdfs-path/part-00000, its content) (a-hdfs-path/part-00001, its content) ... (a-hdfs-path/part-nnnnn, its content) }}} @note Small files are preferred; very large files but may cause bad performance. @param minPartitions A suggestion value of the minimal splitting number for input data.
SequenceFile	Read a Hadoop SequenceFile with arbitrary key and value Writable class from HDFS, a local file system (available on all nodes), or any Hadoop-supported file system URI. The mechanism is as follows: 1. A Java RDD is created from the SequenceFile or other InputFormat, and the key and value Writable classes 2. Serialization is attempted via Pyrolite pickling 3. If this fails, the fallback is to call 'toString' on each key and value 4. PickleSerializer is used to deserialize pickled objects on the Python side
NewAPIHadoopFile	Read a 'new API' Hadoop InputFormat with arbitrary key and value class from HDFS, a local file system (available on all nodes), or any Hadoop-supported file system URI. The mechanism is the same as for sc.sequenceFile. A Hadoop configuration can be passed in as a Python dict. This will be converted into a Configuration in Java
NewAPIHadoopRDD	Read a 'new API' Hadoop InputFormat with arbitrary key and value class, from an arbitrary Hadoop configuration, which is passed in as a Python dict. This will be converted into a Configuration in Java. The mechanism is the same as for sc.sequenceFile.
HadoopFile	Read an 'old' Hadoop InputFormat with arbitrary key and value class from HDFS, a local file system (available on all nodes), or any Hadoop-supported file system URI. The mechanism is the same as for sc.sequenceFile. A Hadoop configuration can be passed in as a Python dict. This will be converted into a Configuration in Java.
HadoopRDD	Read an 'old' Hadoop InputFormat with arbitrary key and value class, from an arbitrary Hadoop configuration, which is passed in as a Python dict. This will be converted into a Configuration in Java. The mechanism is the same as for sc.sequenceFile.
Union``1	Build the union of a list of RDDs. This supports unions() of RDDs with different serialized formats, although this forces them to be reserialized using the default serializer: >>> path = os.path.join(tempdir, "union-text.txt") >>> with open(path, "w") as testFile: ... _ = testFile.write("Hello") >>> textFile = sc.textFile(path) >>> textFile.collect() [u'Hello'] >>> parallelized = sc.parallelize(["World!"]) >>> sorted(sc.union([textFile, parallelized]).collect()) [u'Hello', 'World!']
Broadcast``1	Broadcast a read-only variable to the cluster, returning a Broadcast object for reading it in distributed functions. The variable will be sent to each cluster only once.
Accumulator``1	Create an with the given initial value, using a given helper object to define how to add values of the data type if provided. Default AccumulatorParams are used for integers and floating-point numbers if you do not provide one. For other types, a custom AccumulatorParam can be used.
Stop	Shut down the SparkContext.
AddFile	Add a file to be downloaded with this Spark job on every node. The `path` passed can be either a local file, a file in HDFS (or other Hadoop-supported filesystems), or an HTTP, HTTPS or FTP URI. To access the file in Spark jobs, use `SparkFiles.get(fileName)` to find its download location.
SetCheckpointDir	Set the directory under which RDDs are going to be checkpointed. The directory must be a HDFS path if running on a cluster.
SetJobGroup	Assigns a group ID to all the jobs started by this thread until the group ID is set to a different value or cleared. Often, a unit of execution in an application consists of multiple Spark actions or jobs. Application programmers can use this method to group all those jobs together and give a group description. Once set, the Spark web UI will associate such jobs with this group. The application can also use [[org.apache.spark.api.java.JavaSparkContext.cancelJobGroup]] to cancel all running jobs in this group. For example, {{{ // In the main thread: sc.setJobGroup("some_job_to_cancel", "some job description"); rdd.map(...).count(); // In a separate thread: sc.cancelJobGroup("some_job_to_cancel"); }}} If interruptOnCancel is set to true for the job group, then job cancellation will result in Thread.interrupt() being called on the job's executor threads. This is useful to help ensure that the tasks are actually stopped in a timely manner, but is off by default due to HDFS-1208, where HDFS may respond to Thread.interrupt() by marking nodes as dead.
SetLocalProperty	Set a local property that affects jobs submitted from this thread, such as the Spark fair scheduler pool.
GetLocalProperty	Get a local property set in this thread, or null if it is missing. See [[org.apache.spark.api.java.JavaSparkContext.setLocalProperty]].
SetLogLevel	Control our logLevel. This overrides any user-defined log settings. @param logLevel The desired log level as a string. Valid log levels include: ALL, DEBUG, ERROR, FATAL, INFO, OFF, TRACE, WARN
CancelJobGroup	Cancel active jobs for the specified group. See for more information.
CancelAllJobs	Cancel all jobs that have been scheduled or are running.
Name	Description
GetOrCreate	Get the existing SQLContext or create a new one with given SparkContext.
NewSession	Returns a new SQLContext as new session, that has separate SQLConf, registered temporary tables and UDFs, but shared SparkContext and table cache.
GetConf	Returns the value of Spark SQL configuration property for the given key. If the key is not set, returns defaultValue.
SetConf	Sets the given Spark SQL configuration property.
Read	Returns a DataFrameReader that can be used to read data in as a DataFrame.
ReadDataFrame	Loads a dataframe the source path using the given schema and options
CreateDataFrame	Creates a from a RDD containing array of object using the given schema.
RegisterDataFrameAsTable	Registers the given as a temporary table in the catalog. Temporary tables exist only during the lifetime of this instance of SqlContext.
DropTempTable	Remove the temp table from catalog.
Table	Returns the specified table as a
Tables	Returns a containing names of tables in the given database. If is not specified, the current database will be used. The returned DataFrame has two columns: 'tableName' and 'isTemporary' (a column with bool type indicating if a table is a temporary one or not).
TableNames	Returns a list of names of tables in the database
CacheTable	Caches the specified table in-memory.
UncacheTable	Removes the specified table from the in-memory cache.
ClearCache	Removes all cached tables from the in-memory cache.
IsCached	Returns true if the table is currently cached in-memory.
Sql	Executes a SQL query using Spark, returning the result as a DataFrame. The dialect that is used for SQL parsing can be configured with 'spark.sql.dialect'
JsonFile	Loads a JSON file (one object per line), returning the result as a DataFrame It goes through the entire dataset once to determine the schema.
JsonFile	Loads a JSON file (one object per line) and applies the given schema
TextFile	Loads text file with the specific column delimited using the given schema
TextFile	Loads a text file (one object per line), returning the result as a DataFrame
RegisterFunction``1	Register UDF with no input argument, e.g: SqlContext.RegisterFunction<bool>("MyFilter", () => true); sqlContext.Sql("SELECT * FROM MyTable where MyFilter()");
RegisterFunction``2	Register UDF with 1 input argument, e.g: SqlContext.RegisterFunction<bool, string>("MyFilter", (arg1) => arg1 != null); sqlContext.Sql("SELECT * FROM MyTable where MyFilter(columnName1)");
RegisterFunction``3	Register UDF with 2 input arguments, e.g: SqlContext.RegisterFunction<bool, string, string>("MyFilter", (arg1, arg2) => arg1 != null && arg2 != null); sqlContext.Sql("SELECT * FROM MyTable where MyFilter(columnName1, columnName2)");
RegisterFunction``4	Register UDF with 3 input arguments, e.g: SqlContext.RegisterFunction<bool, string, string, string>("MyFilter", (arg1, arg2, arg3) => arg1 != null && arg2 != null && arg3 != null); sqlContext.Sql("SELECT * FROM MyTable where MyFilter(columnName1, columnName2, columnName3)");
RegisterFunction``5	Register UDF with 4 input arguments, e.g: SqlContext.RegisterFunction<bool, string, string, ..., string>("MyFilter", (arg1, arg2, ..., arg4) => arg1 != null && arg2 != null && ... && arg3 != null); sqlContext.Sql("SELECT * FROM MyTable where MyFilter(columnName1, columnName2, ..., columnName4)");
RegisterFunction``6	Register UDF with 5 input arguments, e.g: SqlContext.RegisterFunction<bool, string, string, ..., string>("MyFilter", (arg1, arg2, ..., arg5) => arg1 != null && arg2 != null && ... && arg5 != null); sqlContext.Sql("SELECT * FROM MyTable where MyFilter(columnName1, columnName2, ..., columnName5)");
RegisterFunction``7	Register UDF with 6 input arguments, e.g: SqlContext.RegisterFunction<bool, string, string, ..., string>("MyFilter", (arg1, arg2, ..., arg6) => arg1 != null && arg2 != null && ... && arg6 != null); sqlContext.Sql("SELECT * FROM MyTable where MyFilter(columnName1, columnName2, ..., columnName6)");
RegisterFunction``8	Register UDF with 7 input arguments, e.g: SqlContext.RegisterFunction<bool, string, string, ..., string>("MyFilter", (arg1, arg2, ..., arg7) => arg1 != null && arg2 != null && ... && arg7 != null); sqlContext.Sql("SELECT * FROM MyTable where MyFilter(columnName1, columnName2, ..., columnName7)");
RegisterFunction``9	Register UDF with 8 input arguments, e.g: SqlContext.RegisterFunction<bool, string, string, ..., string>("MyFilter", (arg1, arg2, ..., arg8) => arg1 != null && arg2 != null && ... && arg8 != null); sqlContext.Sql("SELECT * FROM MyTable where MyFilter(columnName1, columnName2, ..., columnName8)");
RegisterFunction``10	Register UDF with 9 input arguments, e.g: SqlContext.RegisterFunction<bool, string, string, ..., string>("MyFilter", (arg1, arg2, ..., arg9) => arg1 != null && arg2 != null && ... && arg9 != null); sqlContext.Sql("SELECT * FROM MyTable where MyFilter(columnName1, columnName2, ..., columnName9)");
RegisterFunction``11	Register UDF with 10 input arguments, e.g: SqlContext.RegisterFunction<bool, string, string, ..., string>("MyFilter", (arg1, arg2, ..., arg10) => arg1 != null && arg2 != null && ... && arg10 != null); sqlContext.Sql("SELECT * FROM MyTable where MyFilter(columnName1, columnName2, ..., columnName10)");