Skip to content

Commit

Permalink
[DynamoDB] Avoid OOMing from snapshotting (#455)
Browse files Browse the repository at this point in the history
  • Loading branch information
Tang8330 authored Aug 1, 2024
1 parent 45e49c3 commit 0142796
Showing 1 changed file with 14 additions and 2 deletions.
16 changes: 14 additions & 2 deletions sources/dynamodb/snapshot.go
Original file line number Diff line number Diff line change
Expand Up @@ -78,9 +78,10 @@ func (s *SnapshotStore) streamAndPublish(ctx context.Context, writer writers.Wri
}

slog.Info("Processing file...", logFields...)
// We're using an unbuffered channel, this will block sender if the receiver is not ready.
ch := make(chan dynamodb.ItemResponse)
go func() {
if err := s.s3Client.StreamJsonGzipFile(file, ch); err != nil {
if err = s.s3Client.StreamJsonGzipFile(file, ch); err != nil {
logger.Panic("Failed to read file", slog.Any("err", err))
}
}()
Expand All @@ -91,12 +92,23 @@ func (s *SnapshotStore) streamAndPublish(ctx context.Context, writer writers.Wri
if err != nil {
return fmt.Errorf("failed to cast message from DynamoDB, msg: %v, err: %w", msg, err)
}

messages = append(messages, dynamoMsg.RawMessage())
// If there are more than 500k messages, we don't need to wait until the whole file is read.
// We can write what we have and continue reading the file. This is done to prevent OOM errors.
if len(messages) > 500_000 {
if _, err = writer.Write(ctx, iterator.Once(messages)); err != nil {
return fmt.Errorf("failed to write messages: %w", err)
}

// Clear messages
messages = []lib.RawMessage{}
}
}

// TODO: Create an actual iterator over the files that is passed to the writer.
if _, err = writer.Write(ctx, iterator.Once(messages)); err != nil {
return fmt.Errorf("failed to publish messages: %w", err)
return fmt.Errorf("failed to write messages: %w", err)
}

slog.Info("Successfully processed file...", logFields...)
Expand Down

0 comments on commit 0142796

Please sign in to comment.