Skip to content

Commit

Permalink
New backfill strategy to support Microsoft SQL Server (#382)
Browse files Browse the repository at this point in the history
Co-authored-by: Nathan <[email protected]>
  • Loading branch information
Tang8330 and nathan-artie authored May 15, 2024
1 parent 0650f53 commit 01eecb0
Show file tree
Hide file tree
Showing 3 changed files with 41 additions and 14 deletions.
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ go 1.22

require (
github.com/DataDog/datadog-go/v5 v5.5.0
github.com/artie-labs/transfer v1.25.11
github.com/artie-labs/transfer v1.25.13
github.com/aws/aws-sdk-go v1.44.327
github.com/aws/aws-sdk-go-v2 v1.18.1
github.com/aws/aws-sdk-go-v2/config v1.18.19
Expand Down
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -92,8 +92,8 @@ github.com/apache/thrift v0.0.0-20181112125854-24918abba929/go.mod h1:cp2SuWMxlE
github.com/apache/thrift v0.14.2/go.mod h1:cp2SuWMxlEZw2r+iP2GNCdIi4C1qmUzdZFSVb+bacwQ=
github.com/apache/thrift v0.16.0 h1:qEy6UW60iVOlUy+b9ZR0d5WzUWYGOo4HfopoyBaNmoY=
github.com/apache/thrift v0.16.0/go.mod h1:PHK3hniurgQaNMZYaCLEqXKsYK8upmhPbmdP2FXSqgU=
github.com/artie-labs/transfer v1.25.11 h1:1Olw8l7lQWowro23KFRXka20HYz1zSThJF2N94UMf1s=
github.com/artie-labs/transfer v1.25.11/go.mod h1:ihAnX9/V+7KcAYYiLP5m1nrPAnsltkoePc4I0d1Ake8=
github.com/artie-labs/transfer v1.25.13 h1:PEYzoWZSYVbbmZT64ahZjHi3Dy0FtrhyhCxNWxD3kRM=
github.com/artie-labs/transfer v1.25.13/go.mod h1:+jq98Vfn66LVB0WgPxoPB1U3gWWbZs62CzeuDgm/02s=
github.com/aws/aws-sdk-go v1.30.19/go.mod h1:5zCpMtNQVjRREroY7sYe8lOMRSxkhG6MZveU8YkpAk0=
github.com/aws/aws-sdk-go v1.44.327 h1:ZS8oO4+7MOBLhkdwIhgtVeDzCeWOlTfKJS7EgggbIEY=
github.com/aws/aws-sdk-go v1.44.327/go.mod h1:aVsgQcEevwlmQ7qHE9I3h+dtQgpqhFB+i8Phjh7fkwI=
Expand Down
49 changes: 38 additions & 11 deletions writers/transfer/writer.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (
"log/slog"
"time"

"github.com/artie-labs/transfer/clients/mssql/dialect"
"github.com/artie-labs/transfer/lib/artie"
"github.com/artie-labs/transfer/lib/cdc/mongo"
"github.com/artie-labs/transfer/lib/config"
Expand Down Expand Up @@ -81,7 +82,14 @@ func (w *Writer) messageToEvent(message lib.RawMessage) (event.Event, error) {
return event.ToMemoryEvent(evt, partitionKey, w.tc, config.Replication)
}

return event.ToMemoryEvent(evt, message.PartitionKey(), w.tc, config.Replication)
memoryEvent, err := event.ToMemoryEvent(evt, message.PartitionKey(), w.tc, config.Replication)
if err != nil {
return event.Event{}, err
}

// Setting the deleted column flag.
memoryEvent.Data[constants.DeleteColumnMarker] = false
return memoryEvent, nil
}

func (w *Writer) Write(_ context.Context, messages []lib.RawMessage) error {
Expand Down Expand Up @@ -174,18 +182,27 @@ func (w *Writer) flush(reason string) error {
}
}()

if !w.tc.SoftDelete {
columns := tableData.ReadOnlyInMemoryCols()
columns.DeleteColumn(constants.DeleteColumnMarker)
tableData.SetInMemoryColumns(columns)
}

tableData.ResetTempTableSuffix()
if err = w.destination.Append(tableData.TableData); err != nil {
tags["what"] = "merge_fail"
tags["retryable"] = fmt.Sprint(w.destination.IsRetryableError(err))
return fmt.Errorf("failed to append data to destination: %w", err)
if isMicrosoftSQLServer(w.destination) {
// Microsoft SQL Server uses MERGE not append
if err = w.destination.Merge(tableData.TableData); err != nil {
tags["what"] = "merge_fail"
tags["retryable"] = fmt.Sprint(w.destination.IsRetryableError(err))
return fmt.Errorf("failed to merge data to destination: %w", err)
}
} else {
// We should hide this column from getting added
if !tableData.TopicConfig().SoftDelete {
tableData.InMemoryColumns().DeleteColumn(constants.DeleteColumnMarker)
}

if err = w.destination.Append(tableData.TableData); err != nil {
tags["what"] = "merge_fail"
tags["retryable"] = fmt.Sprint(w.destination.IsRetryableError(err))
return fmt.Errorf("failed to append data to destination: %w", err)
}
}

w.inMemDB.ClearTableConfig(tableName)
return nil
}
Expand All @@ -204,6 +221,11 @@ func (w *Writer) OnComplete() error {
return err
}

if isMicrosoftSQLServer(w.destination) {
// We don't need to run dedupe because it's just merging.
return nil
}

slog.Info("Running dedupe...", slog.String("table", tableName))
tableID := w.destination.IdentifierFor(*w.tc, tableName)
start := time.Now()
Expand All @@ -213,3 +235,8 @@ func (w *Writer) OnComplete() error {
slog.Info("Dedupe complete", slog.String("table", tableName), slog.Duration("duration", time.Since(start)))
return nil
}

func isMicrosoftSQLServer(dwh destination.DataWarehouse) bool {
_, isOk := dwh.Dialect().(dialect.MSSQLDialect)
return isOk
}

0 comments on commit 01eecb0

Please sign in to comment.