forked from streamingfast/substreams-sink-files
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch 'streamingfast:feature/parquet' into feature/parquet
- Loading branch information
Showing
24 changed files
with
938 additions
and
406 deletions.
There are no files selected for viewing
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,63 @@ | ||
package writer | ||
|
||
import ( | ||
"fmt" | ||
"strings" | ||
|
||
"github.com/bobg/go-generics/v2/maps" | ||
"github.com/bobg/go-generics/v2/slices" | ||
pbparquet "github.com/streamingfast/substreams-sink-files/pb/parquet" | ||
) | ||
|
||
// ParquetWriterOptions holds the configuration options for the Parquet writer. | ||
// It's the fully resolved, well typed version of ParquetWriterUserOptions which | ||
// is the user facing configuration. | ||
type ParquetWriterOptions struct { | ||
DefaultColumnCompression *pbparquet.Compression | ||
} | ||
|
||
// ParquetWriterUserOptions holds the configuration options for the Parquet writer. | ||
type ParquetWriterUserOptions struct { | ||
DefaultColumnCompression string | ||
} | ||
|
||
func NewParquetWriterOptions(opts []ParquetWriterOption) (*ParquetWriterOptions, error) { | ||
userOptions := &ParquetWriterUserOptions{} | ||
for _, opt := range opts { | ||
opt.apply(userOptions) | ||
} | ||
|
||
options := &ParquetWriterOptions{} | ||
if userOptions.DefaultColumnCompression != "" { | ||
compression, found := pbparquet.Compression_value[strings.ToUpper(userOptions.DefaultColumnCompression)] | ||
if !found { | ||
return nil, fmt.Errorf("invalid compression type %q, accepted compression values are %v", userOptions.DefaultColumnCompression, slices.Map(maps.Keys(pbparquet.Compression_value), strings.ToLower)) | ||
} | ||
|
||
options.DefaultColumnCompression = ptr(pbparquet.Compression(compression)) | ||
} | ||
|
||
return options, nil | ||
} | ||
|
||
// Option is a function that configures a ParquetWriterUserOptions. | ||
type ParquetWriterOption interface { | ||
apply(*ParquetWriterUserOptions) | ||
} | ||
|
||
type optionFunc func(*ParquetWriterUserOptions) | ||
|
||
func (f optionFunc) apply(o *ParquetWriterUserOptions) { | ||
f(o) | ||
} | ||
|
||
// ParquetDefaultColumnCompression sets the default column compression for the Parquet writer. | ||
func ParquetDefaultColumnCompression(compression string) ParquetWriterOption { | ||
return optionFunc(func(o *ParquetWriterUserOptions) { | ||
o.DefaultColumnCompression = compression | ||
}) | ||
} | ||
|
||
func ptr[T any](v T) *T { | ||
return &v | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
package main | ||
|
||
import ( | ||
"github.com/spf13/cobra" | ||
"github.com/spf13/pflag" | ||
"github.com/streamingfast/cli" | ||
"github.com/streamingfast/cli/sflags" | ||
"github.com/streamingfast/substreams-sink-files/bundler/writer" | ||
) | ||
|
||
// addCommonParquetFlags adds common flags for Parquet encoder. The list of flags added by this function are: | ||
// - parquet-default-column-compression | ||
func addCommonParquetFlags(flags *pflag.FlagSet) { | ||
flags.String("parquet-default-column-compression", "", cli.FlagDescription(` | ||
The default column compression to use for all tables that is going to be created that doesn't have a specific column | ||
compression set. | ||
If set, if a Protobuf field doesn't have a column specific compression extension, the default compression will be used | ||
for that column. If the field has a specific compression set, the field column specific compression will be used. | ||
Available values are: | ||
- uncompressed | ||
- snappy | ||
- gzip | ||
- lz4_raw | ||
- brotli | ||
- zstd | ||
Note that this setting is only used when the encoder is set to 'parquet'. | ||
`)) | ||
} | ||
|
||
type parquetCommonFlagValues struct { | ||
DefaultColumnCompression string | ||
} | ||
|
||
func (f parquetCommonFlagValues) AsParquetWriterOptions() []writer.ParquetWriterOption { | ||
writerOptions := []writer.ParquetWriterOption{} | ||
if f.DefaultColumnCompression != "" { | ||
writerOptions = append(writerOptions, writer.ParquetDefaultColumnCompression(f.DefaultColumnCompression)) | ||
} | ||
|
||
return writerOptions | ||
} | ||
|
||
func readCommonParquetFlags(cmd *cobra.Command) parquetCommonFlagValues { | ||
return parquetCommonFlagValues{ | ||
DefaultColumnCompression: sflags.MustGetString(cmd, "parquet-default-column-compression"), | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.