-
Notifications
You must be signed in to change notification settings - Fork 6
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #328 from marklogic/release/2.4.2
Merge 2.4.2 into master
- Loading branch information
Showing
22 changed files
with
306 additions
and
34 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
85 changes: 85 additions & 0 deletions
85
src/main/java/com/marklogic/spark/reader/file/JsonLinesFileReader.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,85 @@ | ||
/* | ||
* Copyright © 2024 MarkLogic Corporation. All Rights Reserved. | ||
*/ | ||
package com.marklogic.spark.reader.file; | ||
|
||
import org.apache.commons.io.IOUtils; | ||
import org.apache.spark.sql.catalyst.InternalRow; | ||
import org.apache.spark.sql.catalyst.expressions.GenericInternalRow; | ||
import org.apache.spark.sql.connector.read.PartitionReader; | ||
import org.apache.spark.unsafe.types.ByteArray; | ||
import org.apache.spark.unsafe.types.UTF8String; | ||
|
||
import java.io.BufferedReader; | ||
import java.util.Iterator; | ||
|
||
class JsonLinesFileReader implements PartitionReader<InternalRow> { | ||
|
||
private final FilePartition filePartition; | ||
private final FileContext fileContext; | ||
|
||
private BufferedReader bufferedReader; | ||
private Iterator<String> bufferedLines; | ||
|
||
private InternalRow nextRowToReturn; | ||
private String currentFilePath; | ||
private int lineCounter; | ||
private int filePathIndex; | ||
|
||
JsonLinesFileReader(FilePartition filePartition, FileContext fileContext) { | ||
this.filePartition = filePartition; | ||
this.fileContext = fileContext; | ||
} | ||
|
||
@Override | ||
public boolean next() { | ||
if (bufferedLines != null && bufferedLines.hasNext()) { | ||
this.nextRowToReturn = createRowFromNextJsonLine(); | ||
return true; | ||
} | ||
|
||
if (bufferedReader != null) { | ||
IOUtils.closeQuietly(bufferedReader); | ||
} | ||
|
||
if (filePathIndex >= filePartition.getPaths().size()) { | ||
return false; | ||
} | ||
|
||
openNextFile(); | ||
return next(); | ||
} | ||
|
||
@Override | ||
public InternalRow get() { | ||
return nextRowToReturn; | ||
} | ||
|
||
@Override | ||
public void close() { | ||
IOUtils.closeQuietly(bufferedReader); | ||
} | ||
|
||
private void openNextFile() { | ||
final String originalFilePath = filePartition.getPaths().get(filePathIndex); | ||
this.currentFilePath = fileContext.decodeFilePath(originalFilePath); | ||
this.lineCounter = 1; | ||
this.filePathIndex++; | ||
// To mimic the behavior of the Spark JSON data source, this will guess if the file is gzipped based on its | ||
// file extension. This allows for .gz/.gzip files to be supported without the user having to specify the | ||
// compression option, which is the same behavior as Spark JSON provides. | ||
this.bufferedReader = fileContext.openFileReader(currentFilePath, true); | ||
this.bufferedLines = bufferedReader.lines().iterator(); | ||
} | ||
|
||
private InternalRow createRowFromNextJsonLine() { | ||
String line = bufferedLines.next(); | ||
String uri = String.format("%s-%d.json", UTF8String.fromString(currentFilePath), lineCounter); | ||
lineCounter++; | ||
return new GenericInternalRow(new Object[]{ | ||
UTF8String.fromString(uri), | ||
ByteArray.concat(line.getBytes()), | ||
null, null, null, null, null, null | ||
}); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.