-
Notifications
You must be signed in to change notification settings - Fork 482
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
fix microsoft OOOM: don't stream into memory
I'm fixing a bug here where the implementation of the microsoft adapter was reading an arbitrary stream into memory (using DataChunk class), and instead "just stream directly to the endpoint". It's not _quite_ that simple though: we actually stream twice, for the very reason that the old implementation was streaming into memory: we need to know the filesize. while we're here I'm doing some "leave the campground better" tasks, so here's a more nitty-gritty breakdown of this PR: - microsoft adapter: marking duplicated code Microsoft{Photo,Video}* as needing to rely on newly refactored MicrosoftMedia (so hese kinds of bug fixes are easier to maintain, for example). - DTP: make testability (via DI) easier with for java.net.URL streamers (this has already become a pattern, so I just formalized it and dropped a TODO in the places that are doing this locally; this way, in the future it'll be easier/more obvious what "the pattern" is and how to stop maintaining disparate copies of it) - microsoft adapter: remainder of DataChunk code is now just a POJO, so switching to autovalue and letting the test live in the primary user: the new `StreamChunker` (as a mini "integrated" test). - microsoft adapter: all size-related operations are a `long` now
- Loading branch information
Showing
20 changed files
with
518 additions
and
254 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
80 changes: 24 additions & 56 deletions
80
...ransfer-microsoft/src/main/java/org/datatransferproject/transfer/microsoft/DataChunk.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,70 +1,38 @@ | ||
package org.datatransferproject.transfer.microsoft; | ||
|
||
import java.io.IOException; | ||
import java.io.InputStream; | ||
import java.util.ArrayList; | ||
import java.util.List; | ||
import com.google.auto.value.AutoValue; | ||
|
||
/** | ||
This utility class allows us to break up an InputStream into multiple chunks | ||
for part-by-part upload to a service, for example to be consumed in an upload session. | ||
*/ | ||
public class DataChunk { | ||
private static final int CHUNK_SIZE = 32000 * 1024; // 32000KiB | ||
/** Describe small buffers of bytes captured from a large java.io Stream. */ | ||
@AutoValue | ||
public abstract class DataChunk { | ||
/** Bytes being held in this buffer. */ | ||
public abstract byte[] chunk(); | ||
|
||
private final byte[] data; | ||
private final int size; | ||
private final int rangeStart; | ||
public DataChunk(byte[] data, int size, int rangeStart) { | ||
this.data = data; | ||
this.size = size; | ||
this.rangeStart = rangeStart; | ||
/** Byte count of {@link chunk}. */ | ||
public int size() { | ||
return chunk().length; | ||
} | ||
|
||
public int getSize() { | ||
return size; | ||
} | ||
/** Index-offset within the original java.io Stream at which {@link chunk} had started. */ | ||
public abstract long streamByteOffset(); | ||
|
||
public byte[] getData() { | ||
return data; | ||
/** | ||
* Index-offset within the original java.io Stream at which the final byte of {@link chunk} lived. | ||
*/ | ||
public long finalByteOffset() { | ||
return streamByteOffset() + size() - 1; | ||
} | ||
|
||
public int getStart() { | ||
return rangeStart; | ||
public static Builder builder() { | ||
return new org.datatransferproject.transfer.microsoft.AutoValue_DataChunk.Builder(); | ||
} | ||
|
||
public int getEnd() { | ||
return rangeStart + size - 1; | ||
} | ||
@AutoValue.Builder | ||
public abstract static class Builder { | ||
public abstract Builder setChunk(byte[] value); | ||
|
||
public static List<DataChunk> splitData(InputStream inputStream) throws IOException { | ||
ArrayList<DataChunk> chunksToSend = new ArrayList(); | ||
byte[] data = new byte[CHUNK_SIZE]; | ||
int totalFileSize = 0; | ||
int quantityToSend; | ||
int roomLeft = CHUNK_SIZE; | ||
int offset = 0; | ||
int chunksRead = 0; | ||
public abstract Builder setStreamByteOffset(long value); | ||
|
||
// start timing | ||
while ((quantityToSend = inputStream.read(data, offset, roomLeft)) != -1) { | ||
offset += quantityToSend; | ||
roomLeft -= quantityToSend; | ||
if (roomLeft == 0) { | ||
chunksToSend.add(new DataChunk(data, CHUNK_SIZE, chunksRead * CHUNK_SIZE)); | ||
chunksRead++; | ||
roomLeft = CHUNK_SIZE; | ||
offset = 0; | ||
totalFileSize += CHUNK_SIZE; | ||
data = new byte[CHUNK_SIZE]; | ||
} | ||
} | ||
if (offset != 0) { | ||
chunksToSend.add(new DataChunk(data, offset, chunksRead * CHUNK_SIZE)); | ||
totalFileSize += offset; | ||
chunksRead++; | ||
} | ||
return chunksToSend; | ||
public abstract DataChunk build(); | ||
} | ||
|
||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
42 changes: 42 additions & 0 deletions
42
...fer-microsoft/src/main/java/org/datatransferproject/transfer/microsoft/StreamChunker.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
package org.datatransferproject.transfer.microsoft; | ||
|
||
import java.io.IOException; | ||
import java.io.InputStream; | ||
import java.util.Optional; | ||
|
||
/** | ||
* Allows tracking reads across a stream. | ||
* | ||
* <p>Does not close the held input stream. | ||
*/ | ||
public class StreamChunker { | ||
private final int chunkSizeBytes; | ||
private final InputStream inputStream; | ||
|
||
private long streamByteOffset = 0; | ||
|
||
public StreamChunker(int chunkSizeBytes, InputStream inputStream) { | ||
this.inputStream = inputStream; | ||
this.chunkSizeBytes = chunkSizeBytes; | ||
} | ||
|
||
/** | ||
* Constructs a new DataChunk from just {@code chunkSizeBytes} bytes of the stream. | ||
* | ||
* <p>Returned chunk will be less than or equal to chunkSizeBytes, or absent if no bytes were | ||
* remaining in the stream. | ||
*/ | ||
public Optional<DataChunk> nextChunk() throws IOException { | ||
byte[] chunkOfData = inputStream.readNBytes(chunkSizeBytes); | ||
Optional<DataChunk> resp = | ||
chunkOfData.length == 0 | ||
? Optional.empty() | ||
: Optional.of( | ||
DataChunk.builder() | ||
.setChunk(chunkOfData) | ||
.setStreamByteOffset(streamByteOffset) | ||
.build()); | ||
streamByteOffset += chunkOfData.length; | ||
return resp; | ||
} | ||
} |
Oops, something went wrong.