Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

No buffer mode for parquet files which can not be appended to #771

Merged
merged 2 commits into from
Oct 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions pkg/formats/parquet/parquet.go
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ func (f *ParquetFormat) To(msgs []*kt.JCHF, serBuf []byte) (*kt.Output, error) {
}
fw.Close()

return kt.NewOutput(fw.Bytes()), nil
return kt.NewOutputNoBuffer(fw.Bytes()), nil
}

func (f *ParquetFormat) From(raw *kt.Output) ([]map[string]interface{}, error) {
Expand Down Expand Up @@ -194,7 +194,7 @@ func (f *ParquetFormat) Rollup(rolls []rollup.Rollup) (*kt.Output, error) {
}
fw.Close()

return kt.NewOutput(fw.Bytes()), nil
return kt.NewOutputNoBuffer(fw.Bytes()), nil
}

func (f *ParquetFormat) toParquetMetricRollup(in []rollup.Rollup, ts int64) []ParquetMetric {
Expand Down
11 changes: 8 additions & 3 deletions pkg/kt/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -353,9 +353,10 @@ type OutputContext struct {
}

type Output struct {
Body []byte
Ctx OutputContext
CB func(error) // Called when this is sent to a sink.
Body []byte
Ctx OutputContext
CB func(error) // Called when this is sent to a sink.
NoBuffer bool // If true, write out right away.
}

func NewOutput(body []byte) *Output {
Expand All @@ -370,6 +371,10 @@ func NewOutputWithProviderAndCompanySender(body []byte, prov Provider, cid Cid,
return &Output{Body: body, Ctx: OutputContext{Provider: prov, Type: stype, CompanyId: cid, SenderId: senderid}}
}

func NewOutputNoBuffer(body []byte) *Output {
return &Output{Body: body, NoBuffer: true}
}

func (o *Output) IsEvent() bool {
return o.Ctx.Type == EventOutput
}
Expand Down
20 changes: 20 additions & 0 deletions pkg/sinks/file/file.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,10 @@ type FileSink struct {
config *ktranslate.FileSinkConfig
}

const (
filePerms = os.FileMode(0664)
)

func NewSink(log logger.Underlying, registry go_metrics.Registry, cfg *ktranslate.FileSinkConfig) (*FileSink, error) {
rand.Seed(time.Now().UnixNano())
fs := &FileSink{
Expand Down Expand Up @@ -84,6 +88,12 @@ func (s *FileSink) Init(ctx context.Context, format formats.Format, compression
}

func (s *FileSink) Send(ctx context.Context, payload *kt.Output) {
// In the un-buffered case, write this out right away.
if payload.NoBuffer && len(payload.Body) > 0 {
go s.writeFileNow(ctx, payload.Body)
return
}

s.mux.Lock()
defer s.mux.Unlock()
if s.doWrite && s.fd != nil {
Expand Down Expand Up @@ -115,3 +125,13 @@ func (s *FileSink) HttpInfo() map[string]float64 {
"Write": doWrite,
}
}

func (s *FileSink) writeFileNow(ctx context.Context, data []byte) {
if s.doWrite {
name := s.getName()
err := os.WriteFile(name, data, filePerms)
if err != nil {
s.Errorf("There was an error when writing the %s file: %v.", name, err)
}
}
}
6 changes: 6 additions & 0 deletions pkg/sinks/gcloud/gcloud.go
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,12 @@ func (s *GCloudSink) Init(ctx context.Context, format formats.Format, compressio
}

func (s *GCloudSink) Send(ctx context.Context, payload *kt.Output) {
// In the un-buffered case, write this out right away.
if payload.NoBuffer && len(payload.Body) > 0 {
go s.send(ctx, payload.Body)
return
}

s.mux.Lock()
defer s.mux.Unlock()
s.buf.Write(payload.Body)
Expand Down
7 changes: 7 additions & 0 deletions pkg/sinks/s3/s3.go
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,13 @@ func (s *S3Sink) Init(ctx context.Context, format formats.Format, compression kt
}

func (s *S3Sink) Send(ctx context.Context, payload *kt.Output) {
// In the un-buffered case, write this out right away.
if payload.NoBuffer && len(payload.Body) > 0 {
go s.send(ctx, payload.Body)
return
}

// Else queue up for more efficient sending.
s.mux.Lock()
defer s.mux.Unlock()
s.buf.Write(payload.Body)
Expand Down
Loading