Skip to content

Commit

Permalink
Support postgres loads (#44)
Browse files Browse the repository at this point in the history
  • Loading branch information
nathan-artie authored Jan 26, 2024
1 parent 574973c commit d0bd081
Show file tree
Hide file tree
Showing 3 changed files with 77 additions and 6 deletions.
8 changes: 5 additions & 3 deletions lib/postgres/iterator.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,15 +19,15 @@ const DefaultErrorRetries = 10
type TableIterator struct {
db *sql.DB
limit uint
statsD mtr.Client
statsD *mtr.Client
maxRowSize uint64
postgresTable *Table
firstRow bool
lastRow bool
done bool
}

func LoadTable(db *sql.DB, table *config.PostgreSQLTable, statsD mtr.Client, maxRowSize uint64) (TableIterator, error) {
func LoadTable(db *sql.DB, table *config.PostgreSQLTable, statsD *mtr.Client, maxRowSize uint64) (TableIterator, error) {
slog.Info("Loading configuration for table", slog.String("table", table.Name), slog.Any("limitSize", table.GetLimit()))

postgresTable := NewTable(table)
Expand Down Expand Up @@ -109,7 +109,9 @@ func (i *TableIterator) Next() ([]kafkalib.RawMessage, error) {
PartitionKey: partitionKeyMap,
Payload: payload,
})
i.statsD.Timing("scanned_and_parsed", time.Since(start), i.statsDTags())
if i.statsD != nil {
(*i.statsD).Timing("scanned_and_parsed", time.Since(start), i.statsDTags())
}
}

// TODO: This should really be re-written and tested thoroughly
Expand Down
59 changes: 59 additions & 0 deletions lib/postgres/load.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
package postgres

import (
"context"
"database/sql"
"log/slog"
"time"

"github.com/artie-labs/reader/config"
"github.com/artie-labs/reader/lib/kafkalib"
"github.com/artie-labs/reader/lib/logger"
"github.com/artie-labs/reader/lib/mtr"
)

func Run(ctx context.Context, cfg config.Settings, statsD *mtr.Client) {
slog.Info("Kafka config",
slog.Bool("aws", cfg.Kafka.AwsEnabled),
slog.String("kafkaBootstrapServer", cfg.Kafka.BootstrapServers),
slog.Any("publishSize", cfg.Kafka.GetPublishSize()),
slog.Uint64("maxRequestSize", cfg.Kafka.MaxRequestSize),
)
kafkaWriter, err := kafkalib.NewBatchWriter(ctx, *cfg.Kafka)
if err != nil {
logger.Fatal("Failed to create kafka writer", slog.Any("err", err))
}

db, err := sql.Open("postgres", NewConnection(cfg.PostgreSQL).String())
if err != nil {
logger.Fatal("Failed to connect to postgres", slog.Any("err", err))
}
defer db.Close()

for _, table := range cfg.PostgreSQL.Tables {
snapshotStartTime := time.Now()
iter, err := LoadTable(db, table, statsD, cfg.Kafka.MaxRequestSize)
if err != nil {
logger.Fatal("Failed to create table iterator", slog.Any("err", err), slog.String("table", table.Name))
}

var count int
for iter.HasNext() {
msgs, err := iter.Next()
if err != nil {
logger.Fatal("Failed to iterate over table", slog.Any("err", err), slog.String("table", table.Name))
} else if len(msgs) > 0 {
if err = kafkaWriter.Write(msgs); err != nil {
logger.Fatal("Failed to write messages to kafka", slog.Any("err", err), slog.String("table", table.Name))
}
count += len(msgs)
slog.Info("Scanning progress", slog.Duration("timing", time.Since(snapshotStartTime)), slog.Int("count", count))
}
}

slog.Info("Finished snapshotting",
slog.Int("scannedTotal", count),
slog.Duration("totalDuration", time.Since(snapshotStartTime)),
)
}
}
16 changes: 13 additions & 3 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import (
"github.com/artie-labs/reader/lib/kafkalib"
"github.com/artie-labs/reader/lib/logger"
"github.com/artie-labs/reader/lib/mtr"
"github.com/artie-labs/reader/lib/postgres"
"github.com/artie-labs/reader/sources/dynamodb"
"github.com/getsentry/sentry-go"
)
Expand All @@ -32,12 +33,21 @@ func main() {
}

ctx := config.InjectIntoContext(context.Background(), cfg)
ctx = kafkalib.InjectIntoContext(ctx)
var statsD *mtr.Client
if cfg.Metrics != nil {
slog.Info("Injecting datadog")
ctx = mtr.InjectDatadogIntoCtx(ctx, cfg.Metrics.Namespace, cfg.Metrics.Tags, 0.5)
client := mtr.FromContext(ctx)
statsD = &client
}

ddb := dynamodb.Load(*cfg)
ddb.Run(ctx)
switch cfg.Source {
case "", config.SourceDynamo:
// TODO: pull kafkalib out of context
ctx = kafkalib.InjectIntoContext(ctx)
ddb := dynamodb.Load(*cfg)
ddb.Run(ctx)
case config.SourcePostgreSQL:
postgres.Run(ctx, *cfg, statsD)
}
}

0 comments on commit d0bd081

Please sign in to comment.