diff --git a/aws/README.md b/aws/README.md
index 001875c..efaf292 100644
--- a/aws/README.md
+++ b/aws/README.md
@@ -12,7 +12,7 @@ ThisBuild / credentials += Credentials(
System.getenv("GITHUB_TOKEN")
)
-ThisBuild / libraryDependencies += "com.clairvoyant.data.scalaxy" %% "writer-aws" % "1.0.0"
+ThisBuild / libraryDependencies += "com.clairvoyant.data.scalaxy" %% "writer-aws" % "2.0.0"
```
Make sure you add `GITHUB_USERNAME` and `GITHUB_TOKEN` to the environment variables.
@@ -22,6 +22,33 @@ Make sure you add `GITHUB_USERNAME` and `GITHUB_TOKEN` to the environment variab
## S3 BUCKET
User can use this library to write/persist spark dataframe to s3 buckets in various file formats.
+
+### API
+
+The library provides below `write` API in type class `DataFrameToS3BucketWriter` in order to write spark
+dataframe into S3 bucket.
+
+```scala
+def write(
+ dataFrame: DataFrame,
+ fileFormat: T,
+ bucketName: String,
+ path: String,
+ saveMode: SaveMode = SaveMode.Overwrite
+ ): Unit
+```
+
+The `write` method takes below arguments:
+
+| Argument Name | Mandatory | Default Value | Description |
+|:--------------|:---------:|:-------------:|:------------------------------------------------|
+| dataFrame | Yes | - | Dataframe to write to s3 bucket. |
+| fileFormat | Yes | - | `FileFormat` to use while writing to s3 bucket. |
+| bucketName | Yes | - | S3 bucket name. |
+| path | Yes | - | S3 path to write the dataframe. |
+| saveMode | No | overwrite | Save mode to use while writing to s3 bucket. |
+
+
Supported file formats are:
* CSV
@@ -29,12 +56,25 @@ Supported file formats are:
* XML
* Parquet
+
### CSV
Suppose user wants to write the dataframe `df` to s3 bucket `mybucket` under the path `outputPath` in the `csv` format.
Then user need to perform below steps:
-#### 1. Define file format
+#### 1. Import type class
+
+```scala
+import com.clairvoyant.data.scalaxy.writer.aws.DataFrameToS3BucketWriter
+```
+
+#### 2. Import type class instance
+
+```scala
+import com.clairvoyant.data.scalaxy.writer.aws.s3.instances.DataFrameToCSVFileWriter
+```
+
+#### 3. Define file format
```scala
import com.clairvoyant.data.scalaxy.writer.aws.s3.formats.CSVFileFormat
@@ -66,16 +106,11 @@ User can provide below options to the `CSVFileFormat` instance:
| timestampFormat | yyyy-MM-dd HH:mm:ss | Sets the string that indicates a timestamp format. |
| timestampNTZFormat | yyyy-MM-dd'T'HH:mm:ss[.SSS] | Sets the string that indicates a timestamp without timezone format. |
-#### 2. Import type class instance
-```scala
-import com.clairvoyant.data.scalaxy.writer.aws.s3.instances.DataFrameToCSVFileWriter
-``````
-
-#### 3. Call API
+#### 4. Call API
```scala
-DataFrameToS3BucketWriter
+DataFrameToS3BucketWriter[CSVFileFormat]
.write(
dataFrame = df,
fileFormat = csvFileFormat,
@@ -90,7 +125,19 @@ Suppose user wants to write the dataframe `df` to the s3 bucket `myBucket` under
format.
Then user need to perform below steps:
-#### 1. Define file format
+#### 1. Import type class
+
+```scala
+import com.clairvoyant.data.scalaxy.writer.aws.DataFrameToS3BucketWriter
+```
+
+#### 2. Import type class instance
+
+```scala
+import com.clairvoyant.data.scalaxy.writer.aws.s3.instances.DataFrameToJSONFileWriter
+```
+
+#### 3. Define file format
```scala
import com.clairvoyant.data.scalaxy.writer.aws.s3.formats.JSONFileFormat
@@ -113,16 +160,10 @@ User can provide below options to the `JSONFileFormat` instance:
| timestampNTZFormat | yyyy-MM-dd'T'HH:mm:ss[.SSS] | Sets the string that indicates a timestamp without timezone format. |
| timezone | UTC | Sets the string that indicates a time zone ID to be used to format timestamps in the JSON datasources or partition values. |
-#### 2. Import type class instance
+#### 4. Call API
```scala
-import com.clairvoyant.data.scalaxy.writer.aws.s3.instances.DataFrameToJSONFileWriter
-``````
-
-#### 3. Call API
-
-```scala
-DataFrameToS3BucketWriter
+DataFrameToS3BucketWriter[JSONFileFormat]
.write(
dataFrame = df,
fileFormat = jsonFileFormat,
@@ -136,7 +177,19 @@ DataFrameToS3BucketWriter
Suppose user wants to write the dataframe `df` to s3 bucket `myBucket` under the path `outputPath` in the `xml` format.
Then user need to perform below steps:
-#### 1. Define file format
+#### 1. Import type class
+
+```scala
+import com.clairvoyant.data.scalaxy.writer.aws.DataFrameToS3BucketWriter
+```
+
+#### 2. Import type class instance
+
+```scala
+import com.clairvoyant.data.scalaxy.writer.aws.s3.instances.DataFrameToXMLFileWriter
+```
+
+#### 3. Define file format
```scala
import com.clairvoyant.data.scalaxy.writer.aws.s3.formats.XMLFileFormat
@@ -161,16 +214,10 @@ User can provide below options to the `XMLFileFormat` instance:
| timestampFormat | yyyy-MM-dd'T'HH:mm:ss[.SSS][XXX] | Controls the format used to write TimestampType format columns. |
| valueTag | _VALUE | The tag used for the value when there are attributes in the element having no child. |
-#### 2. Import type class instance
-
-```scala
-import com.clairvoyant.data.scalaxy.writer.aws.s3.instances.DataFrameToXMLFileWriter
-``````
-
-#### 3. Call API
+#### 4. Call API
```scala
-DataFrameToS3BucketWriter
+DataFrameToS3BucketWriter[XMLFileFormat]
.write(
dataFrame = df,
fileFormat = xmlFileFormat,
@@ -185,7 +232,19 @@ Suppose user wants to write the dataframe `df` to s3 bucket `myBucket` under the
format.
Then user need to perform below steps:
-#### 1. Define file format
+#### 1. Import type class
+
+```scala
+import com.clairvoyant.data.scalaxy.writer.aws.DataFrameToS3BucketWriter
+```
+
+#### 2. Import type class instance
+
+```scala
+import com.clairvoyant.data.scalaxy.writer.aws.s3.instances.DataFrameToParquetFileWriter
+```
+
+#### 3. Define file format
```scala
import com.clairvoyant.data.scalaxy.writer.aws.s3.formats.ParquetFileFormat
@@ -202,16 +261,10 @@ User can provide below options to the `ParquetFileFormat` instance:
| mergeSchema | false | Sets whether we should merge schemas collected from all Parquet part-files. |
| compression | snappy | Compression codec to use when saving to file. This can be one of the known case-insensitive shorten names (none, uncompressed, snappy, gzip, lzo, brotli, lz4, and zstd). |
-#### 2. Import type class instance
-
-```scala
-import com.clairvoyant.data.scalaxy.writer.aws.s3.instances.DataFrameToParquetFileWriter
-``````
-
-#### 3. Call API
+#### 4. Call API
```scala
-DataFrameToS3BucketWriter
+DataFrameToS3BucketWriter[ParquetFileFormat]
.write(
dataFrame = df,
fileFormat = parquetFileFormat,
diff --git a/aws/src/main/scala/com/clairvoyant/data/scalaxy/writer/aws/s3/DataFrameToS3BucketWriter.scala b/aws/src/main/scala/com/clairvoyant/data/scalaxy/writer/aws/s3/DataFrameToS3BucketWriter.scala
index 28597e7..72de4ac 100644
--- a/aws/src/main/scala/com/clairvoyant/data/scalaxy/writer/aws/s3/DataFrameToS3BucketWriter.scala
+++ b/aws/src/main/scala/com/clairvoyant/data/scalaxy/writer/aws/s3/DataFrameToS3BucketWriter.scala
@@ -1,18 +1,20 @@
package com.clairvoyant.data.scalaxy.writer.aws.s3
import com.clairvoyant.data.scalaxy.writer.aws.s3.formats.FileFormat
-import com.clairvoyant.data.scalaxy.writer.aws.s3.instances.DataFrameToS3FileWriter
import org.apache.spark.sql.{DataFrame, SaveMode}
-object DataFrameToS3BucketWriter {
+trait DataFrameToS3BucketWriter[T]:
- def write[T <: FileFormat](
+ def write(
dataFrame: DataFrame,
fileFormat: T,
bucketName: String,
path: String,
saveMode: SaveMode = SaveMode.Overwrite
- )(using dataFrameToS3FileWriter: DataFrameToS3FileWriter[T]): Unit =
- dataFrameToS3FileWriter.write(dataFrame, fileFormat, bucketName, path, saveMode)
+ ): Unit
-}
+object DataFrameToS3BucketWriter:
+
+ def apply[T <: FileFormat](
+ using dataFrameToS3BucketWriter: DataFrameToS3BucketWriter[T]
+ ): DataFrameToS3BucketWriter[T] = dataFrameToS3BucketWriter
diff --git a/aws/src/main/scala/com/clairvoyant/data/scalaxy/writer/aws/s3/instances/DataFrameToCSVFileWriter.scala b/aws/src/main/scala/com/clairvoyant/data/scalaxy/writer/aws/s3/instances/DataFrameToCSVFileWriter.scala
index cce7128..2261445 100644
--- a/aws/src/main/scala/com/clairvoyant/data/scalaxy/writer/aws/s3/instances/DataFrameToCSVFileWriter.scala
+++ b/aws/src/main/scala/com/clairvoyant/data/scalaxy/writer/aws/s3/instances/DataFrameToCSVFileWriter.scala
@@ -1,9 +1,10 @@
package com.clairvoyant.data.scalaxy.writer.aws.s3.instances
+import com.clairvoyant.data.scalaxy.writer.aws.s3.DataFrameToS3BucketWriter
import com.clairvoyant.data.scalaxy.writer.aws.s3.formats.CSVFileFormat
import org.apache.spark.sql.{DataFrame, SaveMode}
-implicit object DataFrameToCSVFileWriter extends DataFrameToS3FileWriter[CSVFileFormat] {
+implicit object DataFrameToCSVFileWriter extends DataFrameToS3BucketWriter[CSVFileFormat] {
import org.apache.spark.sql.catalyst.csv.CSVOptions.*
diff --git a/aws/src/main/scala/com/clairvoyant/data/scalaxy/writer/aws/s3/instances/DataFrameToJSONFileWriter.scala b/aws/src/main/scala/com/clairvoyant/data/scalaxy/writer/aws/s3/instances/DataFrameToJSONFileWriter.scala
index f733fc7..5a10dd1 100644
--- a/aws/src/main/scala/com/clairvoyant/data/scalaxy/writer/aws/s3/instances/DataFrameToJSONFileWriter.scala
+++ b/aws/src/main/scala/com/clairvoyant/data/scalaxy/writer/aws/s3/instances/DataFrameToJSONFileWriter.scala
@@ -1,9 +1,10 @@
package com.clairvoyant.data.scalaxy.writer.aws.s3.instances
+import com.clairvoyant.data.scalaxy.writer.aws.s3.DataFrameToS3BucketWriter
import com.clairvoyant.data.scalaxy.writer.aws.s3.formats.JSONFileFormat
import org.apache.spark.sql.{DataFrame, SaveMode}
-implicit object DataFrameToJSONFileWriter extends DataFrameToS3FileWriter[JSONFileFormat] {
+implicit object DataFrameToJSONFileWriter extends DataFrameToS3BucketWriter[JSONFileFormat] {
import org.apache.spark.sql.catalyst.json.JSONOptions.*
diff --git a/aws/src/main/scala/com/clairvoyant/data/scalaxy/writer/aws/s3/instances/DataFrameToParquetFileWriter.scala b/aws/src/main/scala/com/clairvoyant/data/scalaxy/writer/aws/s3/instances/DataFrameToParquetFileWriter.scala
index 6629c89..cc95bb9 100644
--- a/aws/src/main/scala/com/clairvoyant/data/scalaxy/writer/aws/s3/instances/DataFrameToParquetFileWriter.scala
+++ b/aws/src/main/scala/com/clairvoyant/data/scalaxy/writer/aws/s3/instances/DataFrameToParquetFileWriter.scala
@@ -1,9 +1,10 @@
package com.clairvoyant.data.scalaxy.writer.aws.s3.instances
+import com.clairvoyant.data.scalaxy.writer.aws.s3.DataFrameToS3BucketWriter
import com.clairvoyant.data.scalaxy.writer.aws.s3.formats.ParquetFileFormat
import org.apache.spark.sql.{DataFrame, SaveMode}
-implicit object DataFrameToParquetFileWriter extends DataFrameToS3FileWriter[ParquetFileFormat] {
+implicit object DataFrameToParquetFileWriter extends DataFrameToS3BucketWriter[ParquetFileFormat] {
override def write(
dataFrame: DataFrame,
diff --git a/aws/src/main/scala/com/clairvoyant/data/scalaxy/writer/aws/s3/instances/DataFrameToS3FileWriter.scala b/aws/src/main/scala/com/clairvoyant/data/scalaxy/writer/aws/s3/instances/DataFrameToS3FileWriter.scala
deleted file mode 100644
index 5c83e7f..0000000
--- a/aws/src/main/scala/com/clairvoyant/data/scalaxy/writer/aws/s3/instances/DataFrameToS3FileWriter.scala
+++ /dev/null
@@ -1,13 +0,0 @@
-package com.clairvoyant.data.scalaxy.writer.aws.s3.instances
-
-import org.apache.spark.sql.{DataFrame, SaveMode}
-
-trait DataFrameToS3FileWriter[T]:
-
- def write(
- dataFrame: DataFrame,
- fileFormat: T,
- bucketName: String,
- path: String,
- saveMode: SaveMode
- ): Unit
diff --git a/aws/src/main/scala/com/clairvoyant/data/scalaxy/writer/aws/s3/instances/DataFrameToXMLFileWriter.scala b/aws/src/main/scala/com/clairvoyant/data/scalaxy/writer/aws/s3/instances/DataFrameToXMLFileWriter.scala
index e4edae0..873de00 100644
--- a/aws/src/main/scala/com/clairvoyant/data/scalaxy/writer/aws/s3/instances/DataFrameToXMLFileWriter.scala
+++ b/aws/src/main/scala/com/clairvoyant/data/scalaxy/writer/aws/s3/instances/DataFrameToXMLFileWriter.scala
@@ -1,9 +1,10 @@
package com.clairvoyant.data.scalaxy.writer.aws.s3.instances
+import com.clairvoyant.data.scalaxy.writer.aws.s3.DataFrameToS3BucketWriter
import com.clairvoyant.data.scalaxy.writer.aws.s3.formats.XMLFileFormat
import org.apache.spark.sql.{DataFrame, SaveMode}
-implicit object DataFrameToXMLFileWriter extends DataFrameToS3FileWriter[XMLFileFormat] {
+implicit object DataFrameToXMLFileWriter extends DataFrameToS3BucketWriter[XMLFileFormat] {
import com.databricks.spark.xml.*
diff --git a/aws/src/test/scala/com/clairvoyant/data/scalaxy/writer/aws/s3/DataFrameToCSVS3BucketWriterSpec.scala b/aws/src/test/scala/com/clairvoyant/data/scalaxy/writer/aws/s3/DataFrameToCSVS3BucketWriterSpec.scala
index 71ea671..e9ab0d4 100644
--- a/aws/src/test/scala/com/clairvoyant/data/scalaxy/writer/aws/s3/DataFrameToCSVS3BucketWriterSpec.scala
+++ b/aws/src/test/scala/com/clairvoyant/data/scalaxy/writer/aws/s3/DataFrameToCSVS3BucketWriterSpec.scala
@@ -10,6 +10,8 @@ class DataFrameToCSVS3BucketWriterSpec extends DataFrameReader with DataFrameMat
val outputDirPath = s"/tmp/out_${System.currentTimeMillis()}"
+ val dataFrameToS3BucketWriter = DataFrameToS3BucketWriter[CSVFileFormat]
+
"write()" should "write a dataframe to the provided s3 path" in {
val df = readJSONFromText(
"""|{
@@ -25,8 +27,8 @@ class DataFrameToCSVS3BucketWriterSpec extends DataFrameReader with DataFrameMat
s3Client.createBucket(bucketName)
- DataFrameToS3BucketWriter
- .write[CSVFileFormat](
+ dataFrameToS3BucketWriter
+ .write(
dataFrame = df,
fileFormat = csvFileFormat,
bucketName = bucketName,
diff --git a/aws/src/test/scala/com/clairvoyant/data/scalaxy/writer/aws/s3/DataFrameToJSONS3BucketWriterSpec.scala b/aws/src/test/scala/com/clairvoyant/data/scalaxy/writer/aws/s3/DataFrameToJSONS3BucketWriterSpec.scala
index e8bca17..ffb11c7 100644
--- a/aws/src/test/scala/com/clairvoyant/data/scalaxy/writer/aws/s3/DataFrameToJSONS3BucketWriterSpec.scala
+++ b/aws/src/test/scala/com/clairvoyant/data/scalaxy/writer/aws/s3/DataFrameToJSONS3BucketWriterSpec.scala
@@ -10,6 +10,8 @@ class DataFrameToJSONS3BucketWriterSpec extends DataFrameReader with DataFrameMa
val outputDirPath = s"/tmp/out_${System.currentTimeMillis()}"
+ val dataFrameToS3BucketWriter = DataFrameToS3BucketWriter[JSONFileFormat]
+
"write()" should "write a dataframe to the provided s3 path" in {
val df = readJSONFromText(
"""|{
@@ -25,8 +27,8 @@ class DataFrameToJSONS3BucketWriterSpec extends DataFrameReader with DataFrameMa
s3Client.createBucket(bucketName)
- DataFrameToS3BucketWriter
- .write[JSONFileFormat](
+ dataFrameToS3BucketWriter
+ .write(
dataFrame = df,
fileFormat = jsonFileFormat,
bucketName = bucketName,
diff --git a/aws/src/test/scala/com/clairvoyant/data/scalaxy/writer/aws/s3/DataFrameToParquetS3BucketWriterSpec.scala b/aws/src/test/scala/com/clairvoyant/data/scalaxy/writer/aws/s3/DataFrameToParquetS3BucketWriterSpec.scala
index 417d6fb..ffe2c80 100644
--- a/aws/src/test/scala/com/clairvoyant/data/scalaxy/writer/aws/s3/DataFrameToParquetS3BucketWriterSpec.scala
+++ b/aws/src/test/scala/com/clairvoyant/data/scalaxy/writer/aws/s3/DataFrameToParquetS3BucketWriterSpec.scala
@@ -10,6 +10,8 @@ class DataFrameToParquetS3BucketWriterSpec extends DataFrameReader with DataFram
val outputDirPath = s"/tmp/out_${System.currentTimeMillis()}"
+ val dataFrameToS3BucketWriter = DataFrameToS3BucketWriter[ParquetFileFormat]
+
"write()" should "write a dataframe to the provided s3 path" in {
val df = readJSONFromText(
"""|{
@@ -25,8 +27,8 @@ class DataFrameToParquetS3BucketWriterSpec extends DataFrameReader with DataFram
s3Client.createBucket(bucketName)
- DataFrameToS3BucketWriter
- .write[ParquetFileFormat](
+ dataFrameToS3BucketWriter
+ .write(
dataFrame = df,
fileFormat = parquetFileFormat,
bucketName = bucketName,
diff --git a/aws/src/test/scala/com/clairvoyant/data/scalaxy/writer/aws/s3/DataFrameToXMLS3BucketWriterSpec.scala b/aws/src/test/scala/com/clairvoyant/data/scalaxy/writer/aws/s3/DataFrameToXMLS3BucketWriterSpec.scala
index 1406e58..620b1e6 100644
--- a/aws/src/test/scala/com/clairvoyant/data/scalaxy/writer/aws/s3/DataFrameToXMLS3BucketWriterSpec.scala
+++ b/aws/src/test/scala/com/clairvoyant/data/scalaxy/writer/aws/s3/DataFrameToXMLS3BucketWriterSpec.scala
@@ -10,6 +10,8 @@ class DataFrameToXMLS3BucketWriterSpec extends DataFrameReader with DataFrameMat
val outputDirPath = s"/tmp/out_${System.currentTimeMillis()}"
+ val dataFrameToS3BucketWriter = DataFrameToS3BucketWriter[XMLFileFormat]
+
"write()" should "write a dataframe to the provided s3 path" in {
val df = readJSONFromText(
"""|{
@@ -25,8 +27,8 @@ class DataFrameToXMLS3BucketWriterSpec extends DataFrameReader with DataFrameMat
s3Client.createBucket(bucketName)
- DataFrameToS3BucketWriter
- .write[XMLFileFormat](
+ dataFrameToS3BucketWriter
+ .write(
dataFrame = df,
fileFormat = xmlFileFormat,
bucketName = bucketName,
diff --git a/gcp/README.md b/gcp/README.md
index a63f4cb..a64d603 100644
--- a/gcp/README.md
+++ b/gcp/README.md
@@ -12,7 +12,7 @@ ThisBuild / credentials += Credentials(
System.getenv("GITHUB_TOKEN")
)
-ThisBuild / libraryDependencies += "com.clairvoyant.data.scalaxy" %% "writer-gcp" % "1.0.0"
+ThisBuild / libraryDependencies += "com.clairvoyant.data.scalaxy" %% "writer-gcp" % "2.0.0"
```
Make sure you add `GITHUB_USERNAME` and `GITHUB_TOKEN` to the environment variables.
@@ -22,6 +22,31 @@ Make sure you add `GITHUB_USERNAME` and `GITHUB_TOKEN` to the environment variab
## GCS Bucket
User can use this library to write/persist spark dataframe to gcs buckets in various file formats.
+
+### API
+
+The library provides below `write` method to write the dataframe to gcs bucket:
+
+```scala
+def write(
+ dataFrame: DataFrame,
+ fileFormat: T,
+ bucketName: String,
+ path: String,
+ saveMode: SaveMode = SaveMode.Overwrite
+ ): Unit
+```
+
+The `write` method takes below parameters:
+
+| Parameter Name | Mandatory | Default Value | Description |
+|:---------------|:---------:|:-------------:|:--------------------------------------------------------------------------------------------------------------|
+| dataFrame | Yes | None | Spark dataframe to be written to gcs bucket. |
+| fileFormat | Yes | None | The instance of file format type class. |
+| bucketName | Yes | None | The name of gcs bucket where dataframe needs to be persisted. |
+| path | Yes | None | The path inside the gcs bucket where dataframe needs to be persisted. |
+| saveMode | No | Overwrite | Mode of writing; default is overwrite; can be avoided if writeDisposition/ createDisposition has been defined |
+
Supported file formats are:
* CSV
@@ -34,7 +59,19 @@ Supported file formats are:
Suppose user wants to write the dataframe `df` to gcs bucket `mybucket` under the path `outputPath` in the `csv` format.
Then user need to perform below steps:
-#### 1. Define file format
+#### 1. Import type class
+
+```scala
+import com.clairvoyant.data.scalaxy.writer.gcp.gcs.DataFrameToGCSBucketWriter
+```
+
+#### 2. Import type class instance
+
+```scala
+import com.clairvoyant.data.scalaxy.writer.gcp.gcs.instances.DataFrameToCSVFileWriter
+```
+
+#### 3. Define file format
```scala
import com.clairvoyant.data.scalaxy.writer.gcp.gcs.formats.CSVFileFormat
@@ -66,23 +103,17 @@ User can provide below options to the `CSVFileFormat` instance:
| timestampFormat | yyyy-MM-dd HH:mm:ss | Sets the string that indicates a timestamp format. |
| timestampNTZFormat | yyyy-MM-dd'T'HH:mm:ss[.SSS] | Sets the string that indicates a timestamp without timezone format. |
-#### 2. Import type class instance
-
-```scala
-import com.clairvoyant.data.scalaxy.writer.gcp.gcs.instances.DataFrameToCSVFileWriter
-``````
-
-#### 3. Call API
+#### 4. Call API
```scala
-DataFrameToGCSBucketWriter
+DataFrameToGCSBucketWriter[CSVFileFormat]
.write(
dataFrame = df,
fileFormat = csvFileFormat,
bucketName = mybucket,
path = outputPath
)
-``````
+```
### JSON
@@ -90,7 +121,19 @@ Suppose user wants to write the dataframe `df` to the gcs bucket `myBucket` unde
format.
Then user need to perform below steps:
-#### 1. Define file format
+#### 1. Import type class
+
+```scala
+import com.clairvoyant.data.scalaxy.writer.gcp.gcs.DataFrameToGCSBucketWriter
+```
+
+#### 2. Import type class instance
+
+```scala
+import com.clairvoyant.data.scalaxy.writer.gcp.gcs.instances.DataFrameToJSONFileWriter
+```
+
+#### 3. Define file format
```scala
import com.clairvoyant.data.scalaxy.writer.gcp.gcs.formats.JSONFileFormat
@@ -113,30 +156,36 @@ User can provide below options to the `JSONFileFormat` instance:
| timestampNTZFormat | yyyy-MM-dd'T'HH:mm:ss[.SSS] | Sets the string that indicates a timestamp without timezone format. |
| timezone | UTC | Sets the string that indicates a time zone ID to be used to format timestamps in the JSON datasources or partition values. |
-#### 2. Import type class instance
-
-```scala
-import com.clairvoyant.data.scalaxy.writer.gcp.gcs.instances.DataFrameToJSONFileWriter
-``````
-
-#### 3. Call API
+#### 4. Call API
```scala
-DataFrameToGCSBucketWriter
+DataFrameToGCSBucketWriter[JSONFileFormat]
.write(
dataFrame = df,
fileFormat = jsonFileFormat,
bucketName = myBucket,
path = outputPath
)
-``````
+```
### XML
Suppose user wants to write the dataframe `df` to gcs bucket `myBucket` under the path `outputPath` in the `xml` format.
Then user need to perform below steps:
-#### 1. Define file format
+#### 1. Import type class
+
+```scala
+import com.clairvoyant.data.scalaxy.writer.gcp.gcs.DataFrameToGCSBucketWriter
+```
+
+#### 2. Import type class instance
+
+```scala
+import com.clairvoyant.data.scalaxy.writer.gcp.gcs.instances.DataFrameToXMLFileWriter
+```
+
+#### 3. Define file format
```scala
import com.clairvoyant.data.scalaxy.writer.gcp.gcs.formats.XMLFileFormat
@@ -161,23 +210,17 @@ User can provide below options to the `XMLFileFormat` instance:
| timestampFormat | yyyy-MM-dd'T'HH:mm:ss[.SSS][XXX] | Controls the format used to write TimestampType format columns. |
| valueTag | _VALUE | The tag used for the value when there are attributes in the element having no child. |
-#### 2. Import type class instance
+#### 4. Call API
```scala
-import com.clairvoyant.data.scalaxy.writer.gcp.gcs.instances.DataFrameToXMLFileWriter
-``````
-
-#### 3. Call API
-
-```scala
-DataFrameToGCSBucketWriter
+DataFrameToGCSBucketWriter[XMLFileFormat]
.write(
dataFrame = df,
fileFormat = xmlFileFormat,
bucketName = myBucket,
path = outputPath
)
-``````
+```
### PARQUET
@@ -185,12 +228,26 @@ Suppose user wants to write the dataframe `df` to gcs bucket `myBucket` under th
format.
Then user need to perform below steps:
-#### 1. Define file format
+#### 1. Import type class
+
+```scala
+import com.clairvoyant.data.scalaxy.writer.gcp.gcs.DataFrameToGCSBucketWriter
+```
+
+#### 2. Import type class instance
+
+```scala
+import com.clairvoyant.data.scalaxy.writer.gcp.gcs.instances.DataFrameToParquetFileWriter
+```
+
+#### 3. Define file format
```scala
import com.clairvoyant.data.scalaxy.writer.gcp.gcs.formats.ParquetFileFormat
-val parquetFileFormat = ParquetFileFormat()
+val parquetFileFormat = ParquetFileFormat(
+ mergeSchema = true
+)
```
User can provide below options to the `ParquetFileFormat` instance:
@@ -202,28 +259,50 @@ User can provide below options to the `ParquetFileFormat` instance:
| mergeSchema | false | Sets whether we should merge schemas collected from all Parquet part-files. |
| compression | snappy | Compression codec to use when saving to file. This can be one of the known case-insensitive shorten names (none, uncompressed, snappy, gzip, lzo, brotli, lz4, and zstd). |
-#### 2. Import type class instance
+#### 4. Call API
```scala
-import com.clairvoyant.data.scalaxy.writer.gcp.gcs.instances.DataFrameToParquetFileWriter
-``````
-
-#### 3. Call API
-
-```scala
-DataFrameToGCSBucketWriter
+DataFrameToGCSBucketWriter[ParquetFileFormat]
.write(
dataFrame = df,
fileFormat = parquetFileFormat,
bucketName = myBucket,
path = outputPath
)
-``````
+```
## BigQuery
User can use this library to write/persist spark dataframe to google cloud BigQuery table.
+### API
+
+The library provides below `write` method to write the dataframe to bigquery table:
+
+```scala
+def write(
+ dataFrame: DataFrame,
+ table: String,
+ dataset: Option[String] = None,
+ project: Option[String] = None,
+ parentProject: Option[String] = None,
+ saveMode: SaveMode = SaveMode.Overwrite,
+ writerType: T
+ ): Unit
+```
+
+The `write` method takes below parameters:
+
+| Parameter Name | Mandatory | Default Value | Description |
+|:---------------|:---------:|:-------------:|:--------------------------------------------------------------------------------------------------------------------------------------------|
+| dataFrame | Yes | None | Spark dataframe to be written to gcs bucket. |
+| table | Yes | None | The name of bigquery table where dataframe needs to be persisted. |
+| dataset | No | None | The dataset containing the table. If you are providing fully qualified name in `table` parameter, then you can ignore this option. |
+| project | No | None | The Google Cloud Project ID of the table.
(Optional. Defaults to the project of the Service Account being used) |
+| parentProject | No | None | The Google Cloud Project ID of the table to bill for the export.
(Optional. Defaults to the project of the Service Account being used). |
+| saveMode | No | Overwrite | Mode of writing; default is overwrite; can be avoided if writeDisposition/ createDisposition has been defined |
+| writerType | Yes | None | The instance of direct or indirect big query writer type. |
+
There are two ways to write the dataframe to BigQuery table:
* Direct Write
@@ -238,7 +317,19 @@ Suppose user wants to write the dataframe `df` to the bigQuery table named `myBQ
dataset `myBQDataset`.
Then user need to perform below steps:
-#### 1. Define BigQuery writer type
+#### 1. Import type class
+
+```scala
+import com.clairvoyant.data.scalaxy.writer.gcp.bigquery.DataFrameToBigQueryWriter
+```
+
+#### 2. Import type class instance
+
+```scala
+import com.clairvoyant.data.scalaxy.writer.gcp.bigquery.instances.DataFrameToDirectBQWriter
+```
+
+#### 3. Define BigQuery writer type
```scala
import com.clairvoyant.data.scalaxy.writer.gcp.bigquery.types.DirectBigQueryWriterType
@@ -246,7 +337,7 @@ import com.clairvoyant.data.scalaxy.writer.gcp.bigquery.types.DirectBigQueryWrit
val bigQueryWriterType = DirectBigQueryWriterType(
createDisposition = "CREATE_IF_NEEDED"
)
-``````
+```
Apart from `createDisposition`, user can pass below parameters to the `DirectBigQueryWriterType` instance:
@@ -267,33 +358,37 @@ Apart from `createDisposition`, user can pass below parameters to the `DirectBig
| queryJobPriority | INTERACTIVE | Priority levels set for the job while reading data from BigQuery query. The permitted values are:
BATCH - Query is queued and started as soon as idle resources are available, usually within a few minutes. If the query hasn't started within 3 hours, its priority is changed to INTERACTIVE.
INTERACTIVE - Query is executed as soon as possible and count towards the concurrent rate limit and the daily rate limit.
For WRITE, this option will be effective when DIRECT write is used with OVERWRITE mode, where the connector overwrites the destination table using MERGE statement. |
| writeAtLeastOnce | false | Guarantees that data is written to BigQuery at least once. This is a lesser guarantee than exactly once. This is suitable for streaming scenarios in which data is continuously being written in small batches.
Supported only by the `DIRECT` write method and mode is NOT `Overwrite`. |
-#### 2. Import type class instance
-
-```scala
-import com.clairvoyant.data.scalaxy.writer.gcp.bigquery.instances.DataFrameToDirectBQWriter
-``````
-
-#### 3. Call API
+#### 4. Call API
```scala
-import com.clairvoyant.data.scalaxy.writer.gcp.bigquery.DataFrameToBigQueryWriter
-
-DataFrameToBigQueryWriter
- .write[DirectBigQueryWriterType](
+DataFrameToBigQueryWriter[DirectBigQueryWriterType]
+ .write(
dataFrame = df,
table = myBQTable,
dataset = myBQDataset,
writerType = bigQueryWriterType
)
-``````
+```
-### Direct Write
+### Indirect Write
Suppose user wants to write the dataframe `df` to the bigQuery table named `myBQTable` present under the
dataset `myBQDataset`.
Then user need to perform below steps:
-#### 1. Define BigQuery writer type
+#### 1. Import type class
+
+```scala
+import com.clairvoyant.data.scalaxy.writer.gcp.bigquery.DataFrameToBigQueryWriter
+```
+
+#### 2. Import type class instance
+
+```scala
+import com.clairvoyant.data.scalaxy.writer.gcp.bigquery.instances.DataFrameToIndirectBQWriter
+```
+
+#### 3. Define BigQuery writer type
```scala
import com.clairvoyant.data.scalaxy.writer.gcp.bigquery.types.IndirectBigQueryWriterType
@@ -301,7 +396,7 @@ import com.clairvoyant.data.scalaxy.writer.gcp.bigquery.types.IndirectBigQueryWr
val bigQueryWriterType = IndirectBigQueryWriterType(
createDisposition = "CREATE_IF_NEEDED"
)
-``````
+```
Apart from `createDisposition`, user can pass below parameters to the `DirectBigQueryWriterType` instance:
@@ -330,37 +425,17 @@ Apart from `createDisposition`, user can pass below parameters to the `DirectBig
| temporaryGcsBucket | None | The GCS bucket that temporarily holds the data before it is loaded to BigQuery. Required unless set in the Spark configuration (spark.conf.set(...)). |
| useAvroLogicalTypes | false | When loading from Avro (`.option("intermediateFormat", "avro")`), BigQuery uses the underlying Avro types instead of the logical types [by default]. Supplying this option converts Avro logical types to their corresponding BigQuery data types. |
-#### 2. Import type class instance
-
-```scala
-import com.clairvoyant.data.scalaxy.writer.gcp.bigquery.instances.DataFrameToIndirectBQWriter
-``````
-
-#### 3. Call API
+#### 4. Call API
```scala
-import com.clairvoyant.data.scalaxy.writer.gcp.bigquery.DataFrameToBigQueryWriter
-
-DataFrameToBigQueryWriter
- .write[IndirectBigQueryWriterType](
+DataFrameToBigQueryWriter[IndirectBigQueryWriterType]
+ .write(
dataFrame = df,
table = myBQTable,
dataset = myBQDataset,
writerType = bigQueryWriterType
)
-``````
-
-User can provide below parameters to the `write` method:
-
-| Parameter Name | Mandatory | Default Value | Description |
-|:---------------|:---------:|:-------------:|:--------------------------------------------------------------------------------------------------------------------------------------------|
-| dataFrame | Yes | None | Spark dataframe to be written to BigQuery table. |
-| table | Yes | None | The name of big query table where dataframe needs to be persisted. |
-| dataset | No | None | The dataset containing the table. If you are providing fully qualified name in `table` parameter, then you can ignore this option. |
-| project | No | None | The Google Cloud Project ID of the table.
(Optional. Defaults to the project of the Service Account being used) |
-| parentProject | No | None | The Google Cloud Project ID of the table to bill for the export.
(Optional. Defaults to the project of the Service Account being used). |
-| saveMode | No | Overwrite | Mode of writing; default is overwrite; can be avoided if writeDisposition/ createDisposition has been defined |
-| writerType | Yes | None | The instance of direct or indirect big query writer type. |
+```
Also, note that for writing to the BigQuery it is necessary to have below privileges to the user:
diff --git a/gcp/src/main/scala/com/clairvoyant/data/scalaxy/writer/gcp/bigquery/DataFrameToBigQueryWriter.scala b/gcp/src/main/scala/com/clairvoyant/data/scalaxy/writer/gcp/bigquery/DataFrameToBigQueryWriter.scala
index 33d22ba..d4f5612 100644
--- a/gcp/src/main/scala/com/clairvoyant/data/scalaxy/writer/gcp/bigquery/DataFrameToBigQueryWriter.scala
+++ b/gcp/src/main/scala/com/clairvoyant/data/scalaxy/writer/gcp/bigquery/DataFrameToBigQueryWriter.scala
@@ -1,12 +1,11 @@
package com.clairvoyant.data.scalaxy.writer.gcp.bigquery
-import com.clairvoyant.data.scalaxy.writer.gcp.bigquery.instances.DataFrameToBQWriter
import com.clairvoyant.data.scalaxy.writer.gcp.bigquery.types.BigQueryWriterType
import org.apache.spark.sql.{DataFrame, SaveMode}
-object DataFrameToBigQueryWriter {
+trait DataFrameToBigQueryWriter[T]:
- def write[T <: BigQueryWriterType](
+ def write(
dataFrame: DataFrame,
table: String,
dataset: Option[String] = None,
@@ -14,15 +13,10 @@ object DataFrameToBigQueryWriter {
parentProject: Option[String] = None,
saveMode: SaveMode = SaveMode.Overwrite,
writerType: T
- )(using dataFrameToBQWriter: DataFrameToBQWriter[T]): Unit =
- dataFrameToBQWriter.write(
- dataFrame,
- table,
- dataset,
- project,
- parentProject,
- saveMode,
- writerType
- )
+ ): Unit
-}
+object DataFrameToBigQueryWriter:
+
+ def apply[T <: BigQueryWriterType](
+ using dataFrameToBigQueryWriter: DataFrameToBigQueryWriter[T]
+ ): DataFrameToBigQueryWriter[T] = dataFrameToBigQueryWriter
diff --git a/gcp/src/main/scala/com/clairvoyant/data/scalaxy/writer/gcp/bigquery/instances/DataFrameToBQWriter.scala b/gcp/src/main/scala/com/clairvoyant/data/scalaxy/writer/gcp/bigquery/instances/DataFrameToBQWriter.scala
deleted file mode 100644
index f9cb3a2..0000000
--- a/gcp/src/main/scala/com/clairvoyant/data/scalaxy/writer/gcp/bigquery/instances/DataFrameToBQWriter.scala
+++ /dev/null
@@ -1,15 +0,0 @@
-package com.clairvoyant.data.scalaxy.writer.gcp.bigquery.instances
-
-import org.apache.spark.sql.{DataFrame, SaveMode}
-
-trait DataFrameToBQWriter[T]:
-
- def write(
- dataFrame: DataFrame,
- table: String,
- dataset: Option[String],
- project: Option[String],
- parentProject: Option[String],
- saveMode: SaveMode = SaveMode.Overwrite,
- writerType: T
- ): Unit
diff --git a/gcp/src/main/scala/com/clairvoyant/data/scalaxy/writer/gcp/bigquery/instances/DataFrameToDirectBQWriter.scala b/gcp/src/main/scala/com/clairvoyant/data/scalaxy/writer/gcp/bigquery/instances/DataFrameToDirectBQWriter.scala
index 49390d9..fe346b3 100644
--- a/gcp/src/main/scala/com/clairvoyant/data/scalaxy/writer/gcp/bigquery/instances/DataFrameToDirectBQWriter.scala
+++ b/gcp/src/main/scala/com/clairvoyant/data/scalaxy/writer/gcp/bigquery/instances/DataFrameToDirectBQWriter.scala
@@ -1,9 +1,10 @@
package com.clairvoyant.data.scalaxy.writer.gcp.bigquery.instances
+import com.clairvoyant.data.scalaxy.writer.gcp.bigquery.DataFrameToBigQueryWriter
import com.clairvoyant.data.scalaxy.writer.gcp.bigquery.types.DirectBigQueryWriterType
import org.apache.spark.sql.{DataFrame, SaveMode}
-implicit object DataFrameToDirectBQWriter extends DataFrameToBQWriter[DirectBigQueryWriterType] {
+implicit object DataFrameToDirectBQWriter extends DataFrameToBigQueryWriter[DirectBigQueryWriterType] {
override def write(
dataFrame: DataFrame,
diff --git a/gcp/src/main/scala/com/clairvoyant/data/scalaxy/writer/gcp/bigquery/instances/DataFrameToIndirectBQWriter.scala b/gcp/src/main/scala/com/clairvoyant/data/scalaxy/writer/gcp/bigquery/instances/DataFrameToIndirectBQWriter.scala
index bc13272..cf089be 100644
--- a/gcp/src/main/scala/com/clairvoyant/data/scalaxy/writer/gcp/bigquery/instances/DataFrameToIndirectBQWriter.scala
+++ b/gcp/src/main/scala/com/clairvoyant/data/scalaxy/writer/gcp/bigquery/instances/DataFrameToIndirectBQWriter.scala
@@ -1,9 +1,10 @@
package com.clairvoyant.data.scalaxy.writer.gcp.bigquery.instances
+import com.clairvoyant.data.scalaxy.writer.gcp.bigquery.DataFrameToBigQueryWriter
import com.clairvoyant.data.scalaxy.writer.gcp.bigquery.types.IndirectBigQueryWriterType
import org.apache.spark.sql.{DataFrame, SaveMode}
-implicit object DataFrameToIndirectBQWriter extends DataFrameToBQWriter[IndirectBigQueryWriterType] {
+implicit object DataFrameToIndirectBQWriter extends DataFrameToBigQueryWriter[IndirectBigQueryWriterType] {
override def write(
dataFrame: DataFrame,
diff --git a/gcp/src/main/scala/com/clairvoyant/data/scalaxy/writer/gcp/gcs/DataFrameToGCSBucketWriter.scala b/gcp/src/main/scala/com/clairvoyant/data/scalaxy/writer/gcp/gcs/DataFrameToGCSBucketWriter.scala
index ce94cee..31d08fe 100644
--- a/gcp/src/main/scala/com/clairvoyant/data/scalaxy/writer/gcp/gcs/DataFrameToGCSBucketWriter.scala
+++ b/gcp/src/main/scala/com/clairvoyant/data/scalaxy/writer/gcp/gcs/DataFrameToGCSBucketWriter.scala
@@ -1,18 +1,20 @@
package com.clairvoyant.data.scalaxy.writer.gcp.gcs
import com.clairvoyant.data.scalaxy.writer.gcp.gcs.formats.FileFormat
-import com.clairvoyant.data.scalaxy.writer.gcp.gcs.instances.DataFrameToGCSFileWriter
import org.apache.spark.sql.{DataFrame, SaveMode}
-object DataFrameToGCSBucketWriter {
+trait DataFrameToGCSBucketWriter[T]:
- def write[T <: FileFormat](
+ def write(
dataFrame: DataFrame,
fileFormat: T,
bucketName: String,
path: String,
saveMode: SaveMode = SaveMode.Overwrite
- )(using dataFrameToGCSFileWriter: DataFrameToGCSFileWriter[T]): Unit =
- dataFrameToGCSFileWriter.write(dataFrame, fileFormat, bucketName, path, saveMode)
+ ): Unit
-}
+object DataFrameToGCSBucketWriter:
+
+ def apply[T <: FileFormat](
+ using dataFrameToGCSBucketWriter: DataFrameToGCSBucketWriter[T]
+ ): DataFrameToGCSBucketWriter[T] = dataFrameToGCSBucketWriter
diff --git a/gcp/src/main/scala/com/clairvoyant/data/scalaxy/writer/gcp/gcs/instances/DataFrameToCSVFileWriter.scala b/gcp/src/main/scala/com/clairvoyant/data/scalaxy/writer/gcp/gcs/instances/DataFrameToCSVFileWriter.scala
index b1029c5..b7ec4aa 100644
--- a/gcp/src/main/scala/com/clairvoyant/data/scalaxy/writer/gcp/gcs/instances/DataFrameToCSVFileWriter.scala
+++ b/gcp/src/main/scala/com/clairvoyant/data/scalaxy/writer/gcp/gcs/instances/DataFrameToCSVFileWriter.scala
@@ -1,9 +1,10 @@
package com.clairvoyant.data.scalaxy.writer.gcp.gcs.instances
+import com.clairvoyant.data.scalaxy.writer.gcp.gcs.DataFrameToGCSBucketWriter
import com.clairvoyant.data.scalaxy.writer.gcp.gcs.formats.CSVFileFormat
import org.apache.spark.sql.{DataFrame, SaveMode}
-implicit object DataFrameToCSVFileWriter extends DataFrameToGCSFileWriter[CSVFileFormat] {
+implicit object DataFrameToCSVFileWriter extends DataFrameToGCSBucketWriter[CSVFileFormat] {
import org.apache.spark.sql.catalyst.csv.CSVOptions.*
diff --git a/gcp/src/main/scala/com/clairvoyant/data/scalaxy/writer/gcp/gcs/instances/DataFrameToGCSFileWriter.scala b/gcp/src/main/scala/com/clairvoyant/data/scalaxy/writer/gcp/gcs/instances/DataFrameToGCSFileWriter.scala
deleted file mode 100644
index e270718..0000000
--- a/gcp/src/main/scala/com/clairvoyant/data/scalaxy/writer/gcp/gcs/instances/DataFrameToGCSFileWriter.scala
+++ /dev/null
@@ -1,13 +0,0 @@
-package com.clairvoyant.data.scalaxy.writer.gcp.gcs.instances
-
-import org.apache.spark.sql.{DataFrame, SaveMode}
-
-trait DataFrameToGCSFileWriter[T]:
-
- def write(
- dataFrame: DataFrame,
- fileFormat: T,
- bucketName: String,
- path: String,
- saveMode: SaveMode
- ): Unit
diff --git a/gcp/src/main/scala/com/clairvoyant/data/scalaxy/writer/gcp/gcs/instances/DataFrameToJSONFileWriter.scala b/gcp/src/main/scala/com/clairvoyant/data/scalaxy/writer/gcp/gcs/instances/DataFrameToJSONFileWriter.scala
index d44b747..79254fd 100644
--- a/gcp/src/main/scala/com/clairvoyant/data/scalaxy/writer/gcp/gcs/instances/DataFrameToJSONFileWriter.scala
+++ b/gcp/src/main/scala/com/clairvoyant/data/scalaxy/writer/gcp/gcs/instances/DataFrameToJSONFileWriter.scala
@@ -1,9 +1,10 @@
package com.clairvoyant.data.scalaxy.writer.gcp.gcs.instances
+import com.clairvoyant.data.scalaxy.writer.gcp.gcs.DataFrameToGCSBucketWriter
import com.clairvoyant.data.scalaxy.writer.gcp.gcs.formats.JSONFileFormat
import org.apache.spark.sql.{DataFrame, SaveMode}
-implicit object DataFrameToJSONFileWriter extends DataFrameToGCSFileWriter[JSONFileFormat] {
+implicit object DataFrameToJSONFileWriter extends DataFrameToGCSBucketWriter[JSONFileFormat] {
import org.apache.spark.sql.catalyst.json.JSONOptions.*
diff --git a/gcp/src/main/scala/com/clairvoyant/data/scalaxy/writer/gcp/gcs/instances/DataFrameToParquetFileWriter.scala b/gcp/src/main/scala/com/clairvoyant/data/scalaxy/writer/gcp/gcs/instances/DataFrameToParquetFileWriter.scala
index a527f47..dc277b4 100644
--- a/gcp/src/main/scala/com/clairvoyant/data/scalaxy/writer/gcp/gcs/instances/DataFrameToParquetFileWriter.scala
+++ b/gcp/src/main/scala/com/clairvoyant/data/scalaxy/writer/gcp/gcs/instances/DataFrameToParquetFileWriter.scala
@@ -1,9 +1,10 @@
package com.clairvoyant.data.scalaxy.writer.gcp.gcs.instances
+import com.clairvoyant.data.scalaxy.writer.gcp.gcs.DataFrameToGCSBucketWriter
import com.clairvoyant.data.scalaxy.writer.gcp.gcs.formats.ParquetFileFormat
import org.apache.spark.sql.{DataFrame, SaveMode}
-implicit object DataFrameToParquetFileWriter extends DataFrameToGCSFileWriter[ParquetFileFormat] {
+implicit object DataFrameToParquetFileWriter extends DataFrameToGCSBucketWriter[ParquetFileFormat] {
override def write(
dataFrame: DataFrame,
diff --git a/gcp/src/main/scala/com/clairvoyant/data/scalaxy/writer/gcp/gcs/instances/DataFrameToXMLFileWriter.scala b/gcp/src/main/scala/com/clairvoyant/data/scalaxy/writer/gcp/gcs/instances/DataFrameToXMLFileWriter.scala
index d1ac893..a7a0ad4 100644
--- a/gcp/src/main/scala/com/clairvoyant/data/scalaxy/writer/gcp/gcs/instances/DataFrameToXMLFileWriter.scala
+++ b/gcp/src/main/scala/com/clairvoyant/data/scalaxy/writer/gcp/gcs/instances/DataFrameToXMLFileWriter.scala
@@ -1,9 +1,10 @@
package com.clairvoyant.data.scalaxy.writer.gcp.gcs.instances
+import com.clairvoyant.data.scalaxy.writer.gcp.gcs.DataFrameToGCSBucketWriter
import com.clairvoyant.data.scalaxy.writer.gcp.gcs.formats.XMLFileFormat
import org.apache.spark.sql.{DataFrame, SaveMode}
-implicit object DataFrameToXMLFileWriter extends DataFrameToGCSFileWriter[XMLFileFormat] {
+implicit object DataFrameToXMLFileWriter extends DataFrameToGCSBucketWriter[XMLFileFormat] {
import com.databricks.spark.xml.*
diff --git a/local-file-system/README.md b/local-file-system/README.md
index 6b474f9..38d04ab 100644
--- a/local-file-system/README.md
+++ b/local-file-system/README.md
@@ -12,19 +12,60 @@ ThisBuild / credentials += Credentials(
System.getenv("GITHUB_TOKEN")
)
-ThisBuild / libraryDependencies += "com.clairvoyant.data.scalaxy" %% "writer-local-file-system" % "1.0.0"
+ThisBuild / libraryDependencies += "com.clairvoyant.data.scalaxy" %% "writer-local-file-system" % "2.0.0"
```
Make sure you add `GITHUB_USERNAME` and `GITHUB_TOKEN` to the environment variables.
`GITHUB_TOKEN` is the Personal Access Token with the permission to read packages.
+### API
+
+The API is very simple. User need to call the `write` method of the `DataFrameToLocalFileSystemWriter` object.
+
+```scala
+def write(
+ dataFrame: DataFrame,
+ fileFormat: T,
+ path: String,
+ saveMode: SaveMode = SaveMode.Overwrite
+ ): Unit
+```
+
+The `write` method takes below parameters:
+
+| Parameter Name | Mandatory | Default Value | Description |
+|:---------------|:---------:|:-------------:|:-------------------------------------------------|
+| dataFrame | Yes | | The dataframe to write to the local file system. |
+| fileFormat | Yes | | The file format to use to write the dataframe. |
+| path | Yes | | The path to write the dataframe. |
+| saveMode | No | Overwrite | The save mode to use to write the dataframe. |
+
+Supported file formats are:
+
+* CSV
+* JSON
+* XML
+* Parquet
+
### CSV
Suppose user wants to write the dataframe `df` to the local file system under the path `outputPath` in the `csv` format.
Then user need to perform below steps:
-#### 1. Define file format
+#### 1. Import type class
+
+```scala
+import com.clairvoyant.data.scalaxy.writer.local.file.DataFrameToLocalFileSystemWriter
+```
+
+#### 2. Import type class instance
+
+```scala
+import com.clairvoyant.data.scalaxy.writer.local.file.instances.DataFrameToCSVFileWriter
+```
+
+#### 3. Define file format
```scala
import com.clairvoyant.data.scalaxy.writer.local.file.formats.CSVFileFormat
@@ -37,7 +78,7 @@ val csvFileFormat = CSVFileFormat(
User can provide below options to the `CSVFileFormat` instance:
| Parameter Name | Default Value | Description |
-| :------------------------ | :-------------------------: | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+|:--------------------------|:---------------------------:|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| charToEscapeQuoteEscaping | \ | Sets a single character used for escaping the escape for the quote character. |
| compression | none | Compression codec to use when saving to file. This can be one of the known case-insensitive shorten names (none, bzip2, gzip, lz4, snappy and deflate). |
| dateFormat | yyyy-MM-dd | Sets the string that indicates a date format. |
@@ -56,42 +97,49 @@ User can provide below options to the `CSVFileFormat` instance:
| timestampFormat | yyyy-MM-dd HH:mm:ss | Sets the string that indicates a timestamp format. |
| timestampNTZFormat | yyyy-MM-dd'T'HH:mm:ss[.SSS] | Sets the string that indicates a timestamp without timezone format. |
-#### 2. Import type class instance
+#### 4. Call API
```scala
-import com.clairvoyant.data.scalaxy.writer.local.file.instances.DataFrameToCSVFileWriter
-``````
+DataFrameToLocalFileSystemWriter[CSVFileFormat]
+ .write(
+ dataFrame = df,
+ fileFormat = csvFileFormat,
+ path = outputPath
+ )
+```
-#### 3. Call API
+### JSON
+
+Suppose user wants to write the dataframe `df` to the local file system under the path `outputPath` in the `json`
+format.
+Then user need to perform below steps:
+
+#### 1. Import type class
```scala
-DataFrameToLocalFileSystemWriter
- .write(
- dataFrame = df,
- fileFormat = csvFileFormat,
- path = outputPath
- )
-``````
+import com.clairvoyant.data.scalaxy.writer.local.file.DataFrameToLocalFileSystemWriter
+```
-### JSON
+#### 2. Import type class instance
-Suppose user wants to write the dataframe `df` to the local file system under the path `outputPath` in the `json` format.
-Then user need to perform below steps:
+```scala
+import com.clairvoyant.data.scalaxy.writer.local.file.instances.DataFrameToJSONFileWriter
+```
-#### 1. Define file format
+#### 3. Define file format
```scala
import com.clairvoyant.data.scalaxy.writer.local.file.formats.JSONFileFormat
val jsonFileFormat = JSONFileFormat(
- ignoreNullFields = true
- )
+ ignoreNullFields = true
+)
```
User can provide below options to the `JSONFileFormat` instance:
| Parameter Name | Default Value | Description |
-| :----------------- | :-------------------------: | :------------------------------------------------------------------------------------------------------------------------------------------------------ |
+|:-------------------|:---------------------------:|:--------------------------------------------------------------------------------------------------------------------------------------------------------|
| compression | none | Compression codec to use when saving to file. This can be one of the known case-insensitive shorten names (none, bzip2, gzip, lz4, snappy and deflate). |
| dateFormat | yyyy-MM-dd | Sets the string that indicates a date format. |
| encoding | UTF-8 | Specifies encoding (charset) of saved CSV files. |
@@ -101,42 +149,48 @@ User can provide below options to the `JSONFileFormat` instance:
| timestampNTZFormat | yyyy-MM-dd'T'HH:mm:ss[.SSS] | Sets the string that indicates a timestamp without timezone format. |
| timezone | UTC | Sets the string that indicates a time zone ID to be used to format timestamps in the JSON datasources or partition values. |
-#### 2. Import type class instance
-
-```scala
-import com.clairvoyant.data.scalaxy.writer.local.file.instances.DataFrameToJSONFileWriter
-``````
-
-#### 3. Call API
+#### 4. Call API
```scala
-DataFrameToLocalFileSystemWriter
- .write(
- dataFrame = df,
- fileFormat = jsonFileFormat,
- path = outputPath
- )
-``````
+DataFrameToLocalFileSystemWriter[JSONFileFormat]
+ .write(
+ dataFrame = df,
+ fileFormat = jsonFileFormat,
+ path = outputPath
+ )
+```
### XML
Suppose user wants to write the dataframe `df` to the local file system under the path `outputPath` in the `xml` format.
Then user need to perform below steps:
-#### 1. Define file format
+#### 1. Import type class
+
+```scala
+import com.clairvoyant.data.scalaxy.writer.local.file.DataFrameToLocalFileSystemWriter
+```
+
+#### 2. Import type class instance
+
+```scala
+import com.clairvoyant.data.scalaxy.writer.local.file.instances.DataFrameToXMLFileWriter
+```
+
+#### 3. Define file format
```scala
import com.clairvoyant.data.scalaxy.writer.local.file.formats.XMLFileFormat
val xmlFileFormat = XMLFileFormat(
- attributePrefix = "attr_"
- )
+ attributePrefix = "attr_"
+)
```
User can provide below options to the `XMLFileFormat` instance:
| Parameter Name | Default Value | Description |
-| :--------------- | :---------------------------------------------: | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+|:-----------------|:-----------------------------------------------:|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| arrayElementName | item | Name of XML element that encloses each element of an array-valued column when writing. |
| attributePrefix | _ | The prefix for attributes so that we can differentiating attributes and elements. This will be the prefix for field names. |
| compression | None | Compression codec to use when saving to file.
Should be the fully qualified name of a class implementing org.apache.hadoop.io.compress.CompressionCodec or one of case-insensitive shorten names (bzip2, gzip, lz4, and snappy).
Defaults to no compression when a codec is not specified. |
@@ -148,29 +202,36 @@ User can provide below options to the `XMLFileFormat` instance:
| timestampFormat | yyyy-MM-dd'T'HH:mm:ss[.SSS][XXX] | Controls the format used to write TimestampType format columns. |
| valueTag | _VALUE | The tag used for the value when there are attributes in the element having no child. |
-#### 2. Import type class instance
+#### 4. Call API
```scala
-import com.clairvoyant.data.scalaxy.writer.local.file.instances.DataFrameToXMLFileWriter
-``````
+DataFrameToLocalFileSystemWriter[XMLFileFormat]
+ .write(
+ dataFrame = df,
+ fileFormat = xmlFileFormat,
+ path = outputPath
+ )
+```
+
+### PARQUET
+
+Suppose user wants to write the dataframe `df` to the local file system under the path `outputPath` in the `parquet`
+format.
+Then user need to perform below steps:
-#### 3. Call API
+#### 1. Import type class
```scala
-DataFrameToLocalFileSystemWriter
- .write(
- dataFrame = df,
- fileFormat = xmlFileFormat,
- path = outputPath
- )
-``````
+import com.clairvoyant.data.scalaxy.writer.local.file.DataFrameToLocalFileSystemWriter
+```
-### PARQUET
+#### 2. Import type class instance
-Suppose user wants to write the dataframe `df` to the local file system under the path `outputPath` in the `parquet` format.
-Then user need to perform below steps:
+```scala
+import com.clairvoyant.data.scalaxy.writer.local.file.instances.DataFrameToParquetFileWriter
+```
-#### 1. Define file format
+#### 3. Define file format
```scala
import com.clairvoyant.data.scalaxy.writer.local.file.formats.ParquetFileFormat
@@ -181,25 +242,19 @@ val parquetFileFormat = ParquetFileFormat()
User can provide below options to the `ParquetFileFormat` instance:
| Parameter Name | Default Value | Description |
-| :----------------- | :-----------: | :-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+|:-------------------|:-------------:|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| datetimeRebaseMode | EXCEPTION | The datetimeRebaseMode option allows to specify the rebasing mode for the values of the DATE, TIMESTAMP_MILLIS, TIMESTAMP_MICROS logical types from the Julian to Proleptic Gregorian calendar.
Currently supported modes are:
EXCEPTION: fails in reads of ancient dates/timestamps that are ambiguous between the two calendars.
CORRECTED: loads dates/timestamps without rebasing.
LEGACY: performs rebasing of ancient dates/timestamps from the Julian to Proleptic Gregorian calendar. |
| int96RebaseMode | EXCEPTION | The int96RebaseMode option allows to specify the rebasing mode for INT96 timestamps from the Julian to Proleptic Gregorian calendar. Currently supported modes are:
EXCEPTION: fails in reads of ancient INT96 timestamps that are ambiguous between the two calendars.
CORRECTED: loads INT96 timestamps without rebasing.
LEGACY: performs rebasing of ancient timestamps from the Julian to Proleptic Gregorian calendar. |
| mergeSchema | false | Sets whether we should merge schemas collected from all Parquet part-files. |
| compression | snappy | Compression codec to use when saving to file. This can be one of the known case-insensitive shorten names (none, uncompressed, snappy, gzip, lzo, brotli, lz4, and zstd). |
-#### 2. Import type class instance
-
-```scala
-import com.clairvoyant.data.scalaxy.writer.local.file.instances.DataFrameToParquetFileWriter
-``````
-
-#### 3. Call API
+#### 4. Call API
```scala
-DataFrameToLocalFileSystemWriter
- .write(
- dataFrame = df,
- fileFormat = parquetFileFormat,
- path = outputPath
- )
+DataFrameToLocalFileSystemWriter[ParquetFileFormat]
+ .write(
+ dataFrame = df,
+ fileFormat = parquetFileFormat,
+ path = outputPath
+ )
``````
\ No newline at end of file
diff --git a/local-file-system/src/main/scala/com/clairvoyant/data/scalaxy/writer/local/file/DataFrameToLocalFileSystemWriter.scala b/local-file-system/src/main/scala/com/clairvoyant/data/scalaxy/writer/local/file/DataFrameToLocalFileSystemWriter.scala
index 7eab569..14d132a 100644
--- a/local-file-system/src/main/scala/com/clairvoyant/data/scalaxy/writer/local/file/DataFrameToLocalFileSystemWriter.scala
+++ b/local-file-system/src/main/scala/com/clairvoyant/data/scalaxy/writer/local/file/DataFrameToLocalFileSystemWriter.scala
@@ -1,17 +1,19 @@
package com.clairvoyant.data.scalaxy.writer.local.file
import com.clairvoyant.data.scalaxy.writer.local.file.formats.FileFormat
-import com.clairvoyant.data.scalaxy.writer.local.file.instances.DataFrameToFileWriter
import org.apache.spark.sql.{DataFrame, SaveMode}
-object DataFrameToLocalFileSystemWriter {
+trait DataFrameToLocalFileSystemWriter[T]:
- def write[T <: FileFormat](
+ def write(
dataFrame: DataFrame,
fileFormat: T,
path: String,
saveMode: SaveMode = SaveMode.Overwrite
- )(using dataFrameToFileWriter: DataFrameToFileWriter[T]): Unit =
- dataFrameToFileWriter.write(dataFrame, fileFormat, path, saveMode)
+ ): Unit
-}
+object DataFrameToLocalFileSystemWriter:
+
+ def apply[T <: FileFormat](
+ using dataFrameToLocalFileSystemWriter: DataFrameToLocalFileSystemWriter[T]
+ ): DataFrameToLocalFileSystemWriter[T] = dataFrameToLocalFileSystemWriter
diff --git a/local-file-system/src/main/scala/com/clairvoyant/data/scalaxy/writer/local/file/instances/DataFrameToCSVFileWriter.scala b/local-file-system/src/main/scala/com/clairvoyant/data/scalaxy/writer/local/file/instances/DataFrameToCSVFileWriter.scala
index 11a11bb..061f302 100644
--- a/local-file-system/src/main/scala/com/clairvoyant/data/scalaxy/writer/local/file/instances/DataFrameToCSVFileWriter.scala
+++ b/local-file-system/src/main/scala/com/clairvoyant/data/scalaxy/writer/local/file/instances/DataFrameToCSVFileWriter.scala
@@ -1,9 +1,10 @@
package com.clairvoyant.data.scalaxy.writer.local.file.instances
+import com.clairvoyant.data.scalaxy.writer.local.file.DataFrameToLocalFileSystemWriter
import com.clairvoyant.data.scalaxy.writer.local.file.formats.CSVFileFormat
import org.apache.spark.sql.{DataFrame, SaveMode}
-implicit object DataFrameToCSVFileWriter extends DataFrameToFileWriter[CSVFileFormat] {
+implicit object DataFrameToCSVFileWriter extends DataFrameToLocalFileSystemWriter[CSVFileFormat] {
import org.apache.spark.sql.catalyst.csv.CSVOptions.*
diff --git a/local-file-system/src/main/scala/com/clairvoyant/data/scalaxy/writer/local/file/instances/DataFrameToFileWriter.scala b/local-file-system/src/main/scala/com/clairvoyant/data/scalaxy/writer/local/file/instances/DataFrameToFileWriter.scala
deleted file mode 100644
index 3dc6231..0000000
--- a/local-file-system/src/main/scala/com/clairvoyant/data/scalaxy/writer/local/file/instances/DataFrameToFileWriter.scala
+++ /dev/null
@@ -1,7 +0,0 @@
-package com.clairvoyant.data.scalaxy.writer.local.file.instances
-
-import org.apache.spark.sql.{DataFrame, SaveMode}
-
-trait DataFrameToFileWriter[T]:
-
- def write(dataFrame: DataFrame, fileFormat: T, path: String, saveMode: SaveMode): Unit
diff --git a/local-file-system/src/main/scala/com/clairvoyant/data/scalaxy/writer/local/file/instances/DataFrameToJSONFileWriter.scala b/local-file-system/src/main/scala/com/clairvoyant/data/scalaxy/writer/local/file/instances/DataFrameToJSONFileWriter.scala
index be9d7ee..3e553a8 100644
--- a/local-file-system/src/main/scala/com/clairvoyant/data/scalaxy/writer/local/file/instances/DataFrameToJSONFileWriter.scala
+++ b/local-file-system/src/main/scala/com/clairvoyant/data/scalaxy/writer/local/file/instances/DataFrameToJSONFileWriter.scala
@@ -1,9 +1,10 @@
package com.clairvoyant.data.scalaxy.writer.local.file.instances
+import com.clairvoyant.data.scalaxy.writer.local.file.DataFrameToLocalFileSystemWriter
import com.clairvoyant.data.scalaxy.writer.local.file.formats.JSONFileFormat
import org.apache.spark.sql.{DataFrame, SaveMode}
-implicit object DataFrameToJSONFileWriter extends DataFrameToFileWriter[JSONFileFormat] {
+implicit object DataFrameToJSONFileWriter extends DataFrameToLocalFileSystemWriter[JSONFileFormat] {
import org.apache.spark.sql.catalyst.json.JSONOptions.*
diff --git a/local-file-system/src/main/scala/com/clairvoyant/data/scalaxy/writer/local/file/instances/DataFrameToParquetFileWriter.scala b/local-file-system/src/main/scala/com/clairvoyant/data/scalaxy/writer/local/file/instances/DataFrameToParquetFileWriter.scala
index 0b6016b..cdd9fde 100644
--- a/local-file-system/src/main/scala/com/clairvoyant/data/scalaxy/writer/local/file/instances/DataFrameToParquetFileWriter.scala
+++ b/local-file-system/src/main/scala/com/clairvoyant/data/scalaxy/writer/local/file/instances/DataFrameToParquetFileWriter.scala
@@ -1,9 +1,10 @@
package com.clairvoyant.data.scalaxy.writer.local.file.instances
+import com.clairvoyant.data.scalaxy.writer.local.file.DataFrameToLocalFileSystemWriter
import com.clairvoyant.data.scalaxy.writer.local.file.formats.ParquetFileFormat
import org.apache.spark.sql.{DataFrame, SaveMode}
-implicit object DataFrameToParquetFileWriter extends DataFrameToFileWriter[ParquetFileFormat] {
+implicit object DataFrameToParquetFileWriter extends DataFrameToLocalFileSystemWriter[ParquetFileFormat] {
override def write(dataFrame: DataFrame, fileFormat: ParquetFileFormat, path: String, saveMode: SaveMode): Unit =
dataFrame.write
diff --git a/local-file-system/src/main/scala/com/clairvoyant/data/scalaxy/writer/local/file/instances/DataFrameToXMLFileWriter.scala b/local-file-system/src/main/scala/com/clairvoyant/data/scalaxy/writer/local/file/instances/DataFrameToXMLFileWriter.scala
index 2b92d27..deb7628 100644
--- a/local-file-system/src/main/scala/com/clairvoyant/data/scalaxy/writer/local/file/instances/DataFrameToXMLFileWriter.scala
+++ b/local-file-system/src/main/scala/com/clairvoyant/data/scalaxy/writer/local/file/instances/DataFrameToXMLFileWriter.scala
@@ -1,9 +1,10 @@
package com.clairvoyant.data.scalaxy.writer.local.file.instances
+import com.clairvoyant.data.scalaxy.writer.local.file.DataFrameToLocalFileSystemWriter
import com.clairvoyant.data.scalaxy.writer.local.file.formats.XMLFileFormat
import org.apache.spark.sql.{DataFrame, SaveMode}
-implicit object DataFrameToXMLFileWriter extends DataFrameToFileWriter[XMLFileFormat] {
+implicit object DataFrameToXMLFileWriter extends DataFrameToLocalFileSystemWriter[XMLFileFormat] {
import com.databricks.spark.xml.*
diff --git a/local-file-system/src/test/scala/com/clairvoyant/data/scalaxy/writer/local/file/DataFrameToCSVLocalFileSystemWriterSpec.scala b/local-file-system/src/test/scala/com/clairvoyant/data/scalaxy/writer/local/file/DataFrameToCSVLocalFileSystemWriterSpec.scala
index f3e918c..fd74587 100644
--- a/local-file-system/src/test/scala/com/clairvoyant/data/scalaxy/writer/local/file/DataFrameToCSVLocalFileSystemWriterSpec.scala
+++ b/local-file-system/src/test/scala/com/clairvoyant/data/scalaxy/writer/local/file/DataFrameToCSVLocalFileSystemWriterSpec.scala
@@ -13,6 +13,8 @@ class DataFrameToCSVLocalFileSystemWriterSpec extends DataFrameReader with DataF
val outputDirPath = s"/tmp/out_${System.currentTimeMillis()}"
+ val dataFrameToLocalFileSystemWriter = DataFrameToLocalFileSystemWriter[CSVFileFormat]
+
"write()" should "write a dataframe to the provided path" in {
val df = readJSONFromText(
"""|{
@@ -24,7 +26,7 @@ class DataFrameToCSVLocalFileSystemWriterSpec extends DataFrameReader with DataF
val csvFileFormat = CSVFileFormat()
- DataFrameToLocalFileSystemWriter
+ dataFrameToLocalFileSystemWriter
.write(
dataFrame = df,
fileFormat = csvFileFormat,
@@ -54,7 +56,7 @@ class DataFrameToCSVLocalFileSystemWriterSpec extends DataFrameReader with DataF
emptyValue = "NA"
)
- DataFrameToLocalFileSystemWriter
+ dataFrameToLocalFileSystemWriter
.write(
dataFrame = df,
fileFormat = csvFileFormat,
@@ -84,7 +86,7 @@ class DataFrameToCSVLocalFileSystemWriterSpec extends DataFrameReader with DataF
header = false
)
- DataFrameToLocalFileSystemWriter
+ dataFrameToLocalFileSystemWriter
.write(
dataFrame = df,
fileFormat = csvFileFormat,
@@ -117,7 +119,7 @@ class DataFrameToCSVLocalFileSystemWriterSpec extends DataFrameReader with DataF
ignoreLeadingWhiteSpace = false
)
- DataFrameToLocalFileSystemWriter
+ dataFrameToLocalFileSystemWriter
.write(
dataFrame = df,
fileFormat = csvFileFormat,
@@ -147,7 +149,7 @@ class DataFrameToCSVLocalFileSystemWriterSpec extends DataFrameReader with DataF
ignoreLeadingWhiteSpace = true
)
- DataFrameToLocalFileSystemWriter
+ dataFrameToLocalFileSystemWriter
.write(
dataFrame = df,
fileFormat = csvFileFormat,
@@ -177,7 +179,7 @@ class DataFrameToCSVLocalFileSystemWriterSpec extends DataFrameReader with DataF
ignoreTrailingWhiteSpace = false
)
- DataFrameToLocalFileSystemWriter
+ dataFrameToLocalFileSystemWriter
.write(
dataFrame = df,
fileFormat = csvFileFormat,
@@ -207,7 +209,7 @@ class DataFrameToCSVLocalFileSystemWriterSpec extends DataFrameReader with DataF
ignoreTrailingWhiteSpace = true
)
- DataFrameToLocalFileSystemWriter
+ dataFrameToLocalFileSystemWriter
.write(
dataFrame = df,
fileFormat = csvFileFormat,
@@ -237,7 +239,7 @@ class DataFrameToCSVLocalFileSystemWriterSpec extends DataFrameReader with DataF
lineSep = "#"
)
- DataFrameToLocalFileSystemWriter
+ dataFrameToLocalFileSystemWriter
.write(
dataFrame = df,
fileFormat = csvFileFormat,
@@ -270,7 +272,7 @@ class DataFrameToCSVLocalFileSystemWriterSpec extends DataFrameReader with DataF
nullValue = "Invalid"
)
- DataFrameToLocalFileSystemWriter
+ dataFrameToLocalFileSystemWriter
.write(
dataFrame = df,
fileFormat = csvFileFormat,
@@ -300,7 +302,7 @@ class DataFrameToCSVLocalFileSystemWriterSpec extends DataFrameReader with DataF
sep = ";"
)
- DataFrameToLocalFileSystemWriter
+ dataFrameToLocalFileSystemWriter
.write(
dataFrame = df,
fileFormat = csvFileFormat,
diff --git a/local-file-system/src/test/scala/com/clairvoyant/data/scalaxy/writer/local/file/DataFrameToJSONLocalFileSystemWriterSpec.scala b/local-file-system/src/test/scala/com/clairvoyant/data/scalaxy/writer/local/file/DataFrameToJSONLocalFileSystemWriterSpec.scala
index a612c29..0250907 100644
--- a/local-file-system/src/test/scala/com/clairvoyant/data/scalaxy/writer/local/file/DataFrameToJSONLocalFileSystemWriterSpec.scala
+++ b/local-file-system/src/test/scala/com/clairvoyant/data/scalaxy/writer/local/file/DataFrameToJSONLocalFileSystemWriterSpec.scala
@@ -13,6 +13,8 @@ class DataFrameToJSONLocalFileSystemWriterSpec extends DataFrameReader with Data
val outputDirPath = s"/tmp/out_${System.currentTimeMillis()}"
+ val dataFrameToLocalFileSystemWriter = DataFrameToLocalFileSystemWriter[JSONFileFormat]
+
"write()" should "write a dataframe to the provided path" in {
val df = readJSONFromText(
"""|{
@@ -24,7 +26,7 @@ class DataFrameToJSONLocalFileSystemWriterSpec extends DataFrameReader with Data
val jsonFileFormat = JSONFileFormat()
- DataFrameToLocalFileSystemWriter
+ dataFrameToLocalFileSystemWriter
.write(
dataFrame = df,
fileFormat = jsonFileFormat,
@@ -50,7 +52,7 @@ class DataFrameToJSONLocalFileSystemWriterSpec extends DataFrameReader with Data
ignoreNullFields = true
)
- DataFrameToLocalFileSystemWriter
+ dataFrameToLocalFileSystemWriter
.write(
dataFrame = df,
fileFormat = jsonFileFormat,
diff --git a/local-file-system/src/test/scala/com/clairvoyant/data/scalaxy/writer/local/file/DataFrameToParquetLocalFileSystemWriterSpec.scala b/local-file-system/src/test/scala/com/clairvoyant/data/scalaxy/writer/local/file/DataFrameToParquetLocalFileSystemWriterSpec.scala
index 0e14d5d..163aba7 100644
--- a/local-file-system/src/test/scala/com/clairvoyant/data/scalaxy/writer/local/file/DataFrameToParquetLocalFileSystemWriterSpec.scala
+++ b/local-file-system/src/test/scala/com/clairvoyant/data/scalaxy/writer/local/file/DataFrameToParquetLocalFileSystemWriterSpec.scala
@@ -13,6 +13,8 @@ class DataFrameToParquetLocalFileSystemWriterSpec
val outputDirPath = s"/tmp/out_${System.currentTimeMillis()}"
+ val dataFrameToLocalFileSystemWriter = DataFrameToLocalFileSystemWriter[ParquetFileFormat]
+
"write()" should "write a dataframe to the provided path" in {
val df = readJSONFromText(
"""|{
@@ -24,7 +26,7 @@ class DataFrameToParquetLocalFileSystemWriterSpec
val parquetFileFormat = ParquetFileFormat()
- DataFrameToLocalFileSystemWriter
+ dataFrameToLocalFileSystemWriter
.write(
dataFrame = df,
fileFormat = parquetFileFormat,
diff --git a/local-file-system/src/test/scala/com/clairvoyant/data/scalaxy/writer/local/file/DataFrameToXMLLocalFileSystemWriterSpec.scala b/local-file-system/src/test/scala/com/clairvoyant/data/scalaxy/writer/local/file/DataFrameToXMLLocalFileSystemWriterSpec.scala
index ec21b2a..04e0b85 100644
--- a/local-file-system/src/test/scala/com/clairvoyant/data/scalaxy/writer/local/file/DataFrameToXMLLocalFileSystemWriterSpec.scala
+++ b/local-file-system/src/test/scala/com/clairvoyant/data/scalaxy/writer/local/file/DataFrameToXMLLocalFileSystemWriterSpec.scala
@@ -10,6 +10,8 @@ class DataFrameToXMLLocalFileSystemWriterSpec extends DataFrameReader with DataF
val outputDirPath = s"/tmp/out_${System.currentTimeMillis()}"
+ val dataFrameToLocalFileSystemWriter = DataFrameToLocalFileSystemWriter[XMLFileFormat]
+
"write()" should "write a dataframe to the provided path" in {
val df = readJSONFromText(
"""|{
@@ -21,7 +23,7 @@ class DataFrameToXMLLocalFileSystemWriterSpec extends DataFrameReader with DataF
val xmlFileFormat = XMLFileFormat()
- DataFrameToLocalFileSystemWriter
+ dataFrameToLocalFileSystemWriter
.write(
dataFrame = df,
fileFormat = xmlFileFormat,
@@ -58,7 +60,7 @@ class DataFrameToXMLLocalFileSystemWriterSpec extends DataFrameReader with DataF
attributePrefix = "attr_"
)
- DataFrameToLocalFileSystemWriter
+ dataFrameToLocalFileSystemWriter
.write(
dataFrame = df,
fileFormat = xmlFileFormat,
@@ -99,7 +101,7 @@ class DataFrameToXMLLocalFileSystemWriterSpec extends DataFrameReader with DataF
declaration = "custom_declaration"
)
- DataFrameToLocalFileSystemWriter
+ dataFrameToLocalFileSystemWriter
.write(
dataFrame = df,
fileFormat = xmlFileFormat,
@@ -135,7 +137,7 @@ class DataFrameToXMLLocalFileSystemWriterSpec extends DataFrameReader with DataF
rowTag = "item"
)
- DataFrameToLocalFileSystemWriter
+ dataFrameToLocalFileSystemWriter
.write(
dataFrame = df,
fileFormat = xmlFileFormat,
@@ -176,7 +178,7 @@ class DataFrameToXMLLocalFileSystemWriterSpec extends DataFrameReader with DataF
rootTag = "items"
)
- DataFrameToLocalFileSystemWriter
+ dataFrameToLocalFileSystemWriter
.write(
dataFrame = df,
fileFormat = xmlFileFormat,
@@ -223,7 +225,7 @@ class DataFrameToXMLLocalFileSystemWriterSpec extends DataFrameReader with DataF
valueTag = "#VALUE"
)
- DataFrameToLocalFileSystemWriter
+ dataFrameToLocalFileSystemWriter
.write(
dataFrame = df,
fileFormat = xmlFileFormat,