Skip to content

Commit

Permalink
remove pgzip usage because of klauspost/pgzip#56
Browse files Browse the repository at this point in the history
  • Loading branch information
CorentinB committed Aug 12, 2024
1 parent 15f75ef commit 0ce89f2
Show file tree
Hide file tree
Showing 5 changed files with 33 additions and 39 deletions.
21 changes: 10 additions & 11 deletions go.mod
Original file line number Diff line number Diff line change
@@ -1,31 +1,30 @@
module github.com/CorentinB/warc

go 1.21
go 1.22

require (
github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5
github.com/klauspost/compress v1.17.7
github.com/klauspost/pgzip v1.2.6
github.com/klauspost/compress v1.17.9
github.com/paulbellamy/ratecounter v0.2.0
github.com/refraction-networking/utls v1.6.3
github.com/refraction-networking/utls v1.6.7
github.com/remeh/sizedwaitgroup v1.0.0
github.com/satori/go.uuid v1.2.0
github.com/sirupsen/logrus v1.9.3
github.com/spf13/cobra v1.8.0
github.com/spf13/cobra v1.8.1
github.com/ulikunitz/xz v0.5.12
go.uber.org/goleak v1.3.0
golang.org/x/net v0.22.0
golang.org/x/sync v0.6.0
golang.org/x/net v0.28.0
golang.org/x/sync v0.8.0
)

require (
github.com/andybalholm/brotli v1.1.0 // indirect
github.com/cloudflare/circl v1.3.7 // indirect
github.com/cloudflare/circl v1.3.9 // indirect
github.com/inconshreveable/mousetrap v1.1.0 // indirect
github.com/kr/pretty v0.3.0 // indirect
github.com/quic-go/quic-go v0.41.0 // indirect
github.com/quic-go/quic-go v0.46.0 // indirect
github.com/spf13/pflag v1.0.5 // indirect
golang.org/x/crypto v0.21.0 // indirect
golang.org/x/sys v0.18.0 // indirect
golang.org/x/crypto v0.26.0 // indirect
golang.org/x/sys v0.24.0 // indirect
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c // indirect
)
22 changes: 20 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,10 @@ github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5 h1:0CwZNZbxp69SHPd
github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5/go.mod h1:wHh0iHkYZB8zMSxRWpUBQtwG5a7fFgvEO+odwuTv2gs=
github.com/cloudflare/circl v1.3.7 h1:qlCDlTPz2n9fu58M0Nh1J/JzcFpfgkFHHX3O35r5vcU=
github.com/cloudflare/circl v1.3.7/go.mod h1:sRTcRWXGLrKw6yIGJ+l7amYJFfAXbZG0kBSc8r4zxgA=
github.com/cloudflare/circl v1.3.9 h1:QFrlgFYf2Qpi8bSpVPK1HBvWpx16v/1TZivyo7pGuBE=
github.com/cloudflare/circl v1.3.9/go.mod h1:PDRU+oXvdD7KCtgKxW95M5Z8BpSCJXQORiZFnBQS5QU=
github.com/cpuguy83/go-md2man/v2 v2.0.3/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
github.com/cpuguy83/go-md2man/v2 v2.0.4/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
Expand All @@ -21,8 +24,8 @@ github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2
github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw=
github.com/klauspost/compress v1.17.7 h1:ehO88t2UGzQK66LMdE8tibEd1ErmzZjNEqWkjLAKQQg=
github.com/klauspost/compress v1.17.7/go.mod h1:Di0epgTjJY877eYKx5yC51cX2A2Vl2ibi7bDH9ttBbw=
github.com/klauspost/pgzip v1.2.6 h1:8RXeL5crjEUFnR2/Sn6GJNWtSQ3Dk8pq4CL3jvdDyjU=
github.com/klauspost/pgzip v1.2.6/go.mod h1:Ch1tH69qFZu15pkjo5kYi6mth2Zzwzt50oCQKQE9RUs=
github.com/klauspost/compress v1.17.9 h1:6KIumPrER1LHsvBVuDa0r5xaG0Es51mhhB9BQB2qeMA=
github.com/klauspost/compress v1.17.9/go.mod h1:Di0epgTjJY877eYKx5yC51cX2A2Vl2ibi7bDH9ttBbw=
github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI=
github.com/kr/pretty v0.3.0 h1:WgNl7dwNpEZ6jJ9k1snq4pZsg7DOEN8hP9Xw0Tsjwk0=
Expand All @@ -41,8 +44,12 @@ github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZb
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/quic-go/quic-go v0.41.0 h1:aD8MmHfgqTURWNJy48IYFg2OnxwHT3JL7ahGs73lb4k=
github.com/quic-go/quic-go v0.41.0/go.mod h1:qCkNjqczPEvgsOnxZ0eCD14lv+B2LHlFAB++CNOh9hA=
github.com/quic-go/quic-go v0.46.0 h1:uuwLClEEyk1DNvchH8uCByQVjo3yKL9opKulExNDs7Y=
github.com/quic-go/quic-go v0.46.0/go.mod h1:1dLehS7TIR64+vxGR70GDcatWTOtMX2PUtnKsjbTurI=
github.com/refraction-networking/utls v1.6.3 h1:MFOfRN35sSx6K5AZNIoESsBuBxS2LCgRilRIdHb6fDc=
github.com/refraction-networking/utls v1.6.3/go.mod h1:yil9+7qSl+gBwJqztoQseO6Pr3h62pQoY1lXiNR/FPs=
github.com/refraction-networking/utls v1.6.7 h1:zVJ7sP1dJx/WtVuITug3qYUq034cDq9B2MR1K67ULZM=
github.com/refraction-networking/utls v1.6.7/go.mod h1:BC3O4vQzye5hqpmDTWUqi4P5DDhzJfkV1tdqtawQIH0=
github.com/remeh/sizedwaitgroup v1.0.0 h1:VNGGFwNo/R5+MJBf6yrsr110p0m4/OX4S3DCy7Kyl5E=
github.com/remeh/sizedwaitgroup v1.0.0/go.mod h1:3j2R4OIe/SeS6YDhICBy22RWjJC5eNCJ1V+9+NVNYlo=
github.com/rogpeppe/go-internal v1.6.1 h1:/FiVV8dS/e+YqF2JvO3yXRFbBLTIuSDkuC7aBOAvL+k=
Expand All @@ -54,6 +61,8 @@ github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ
github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ=
github.com/spf13/cobra v1.8.0 h1:7aJaZx1B85qltLMc546zn58BxxfZdR/W22ej9CFoEf0=
github.com/spf13/cobra v1.8.0/go.mod h1:WXLWApfZ71AjXPya3WOlMsY9yMs7YeiHhFVlvLyhcho=
github.com/spf13/cobra v1.8.1 h1:e5/vxKd/rZsfSJMUX1agtjeTDf+qv1/JdBF8gg5k9ZM=
github.com/spf13/cobra v1.8.1/go.mod h1:wHxEcudfqmLYa8iTfL+OuZPbBZkmvliBWKIezN3kD9Y=
github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA=
github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
Expand All @@ -66,15 +75,24 @@ go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto=
go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE=
golang.org/x/crypto v0.21.0 h1:X31++rzVUdKhX5sWmSOFZxx8UW/ldWx55cbf08iNAMA=
golang.org/x/crypto v0.21.0/go.mod h1:0BP7YvVV9gBbVKyeTG0Gyn+gZm94bibOW5BjDEYAOMs=
golang.org/x/crypto v0.26.0 h1:RrRspgV4mU+YwB4FYnuBoKsUapNIL5cohGAmSH3azsw=
golang.org/x/crypto v0.26.0/go.mod h1:GY7jblb9wI+FOo5y8/S2oY4zWP07AkOJ4+jxCqdqn54=
golang.org/x/net v0.22.0 h1:9sGLhx7iRIHEiX0oAJ3MRZMUCElJgy7Br1nO+AMN3Tc=
golang.org/x/net v0.22.0/go.mod h1:JKghWKKOSdJwpW2GEx0Ja7fmaKnMsbu+MWVZTokSYmg=
golang.org/x/net v0.28.0 h1:a9JDOJc5GMUJ0+UDqmLT86WiEy7iWyIhz8gz8E4e5hE=
golang.org/x/net v0.28.0/go.mod h1:yqtgsTWOOnlGLG9GFRrK3++bGOUEkNBoHZc8MEDWPNg=
golang.org/x/sync v0.6.0 h1:5BMeUDZ7vkXGfEr1x9B4bRcTH4lpkTkpdh0T/J+qjbQ=
golang.org/x/sync v0.6.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
golang.org/x/sync v0.8.0 h1:3NFvSEYkUoMifnESzZl15y791HH1qU2xm6eCJU5ZPXQ=
golang.org/x/sync v0.8.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.18.0 h1:DBdB3niSjOA/O0blCZBqDefyWNYveAYMNF1Wum0DYQ4=
golang.org/x/sys v0.18.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/sys v0.24.0 h1:Twjiwq9dn6R1fQcyiK+wQyHWfaz/BJB+YIpzU/Cv3Xg=
golang.org/x/sys v0.24.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ=
golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
golang.org/x/text v0.17.0 h1:XtiM5bkSOt+ewxlOE/aE/AKEHibwj/6gvWMl9Rsh0Qc=
golang.org/x/tools v0.9.1 h1:8WMNJAz3zrtPmnYC7ISf5dEn3MT0gY7jBJfw27yrrLo=
golang.org/x/tools v0.9.1/go.mod h1:owI94Op576fPu3cIGQeHs3joujW/2Oc6MtlxbF5dfNc=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
Expand Down
25 changes: 3 additions & 22 deletions utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ import (
"time"

gzip "github.com/klauspost/compress/gzip"
"github.com/klauspost/pgzip"

"github.com/klauspost/compress/zstd"
)
Expand Down Expand Up @@ -117,26 +116,8 @@ func isLineStartingWithHTTPMethod(line string) bool {
func NewWriter(writer io.Writer, fileName string, compression string, contentLengthHeader string) (*Writer, error) {
if compression != "" {
if compression == "GZIP" {
var gzipWriter *gzip.Writer
// If the record's Content-Length is bigger than a megabyte, we use the parallel gzip library
if contentLengthHeader != "" {
contentLength, err := strconv.Atoi(contentLengthHeader)
if err != nil {
return nil, err
}

if contentLength > 1000000 {
pgzipWriter := pgzip.NewWriter(writer)
return &Writer{
FileName: fileName,
Compression: compression,
PGZIPWriter: pgzipWriter,
FileWriter: bufio.NewWriter(pgzipWriter),
}, nil
}
}
gzipWriter := gzip.NewWriter(writer)

gzipWriter = gzip.NewWriter(writer)
return &Writer{
FileName: fileName,
Compression: compression,
Expand All @@ -155,7 +136,7 @@ func NewWriter(writer io.Writer, fileName string, compression string, contentLen
FileWriter: bufio.NewWriter(zstdWriter),
}, nil
}
return nil, errors.New("Invalid compression algorithm: " + compression)
return nil, errors.New("invalid compression algorithm: " + compression)
}

return &Writer{
Expand Down Expand Up @@ -238,7 +219,7 @@ func checkRotatorSettings(settings *RotatorSettings) (err error) {

// Check if the specified compression algorithm is valid
if settings.Compression != "" && settings.Compression != "GZIP" && settings.Compression != "ZSTD" {
return errors.New("Invalid compression algorithm: " + settings.Compression)
return errors.New("invalid compression algorithm: " + settings.Compression)
}

// Add few headers to the warcinfo payload, to not have it empty
Expand Down
2 changes: 0 additions & 2 deletions warc.go
Original file line number Diff line number Diff line change
Expand Up @@ -76,8 +76,6 @@ func (s *RotatorSettings) NewWARCRotator() (recordWriterChan chan *RecordBatch,
func (w *Writer) CloseCompressedWriter() {
if w.GZIPWriter != nil {
w.GZIPWriter.Close()
} else if w.PGZIPWriter != nil {
w.PGZIPWriter.Close()
} else if w.ZSTDWriter != nil {
w.ZSTDWriter.Close()
}
Expand Down
2 changes: 0 additions & 2 deletions write.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ import (
"time"

"github.com/klauspost/compress/gzip"
"github.com/klauspost/pgzip"

"github.com/klauspost/compress/zstd"
uuid "github.com/satori/go.uuid"
Expand All @@ -20,7 +19,6 @@ type Writer struct {
FileName string
Compression string
GZIPWriter *gzip.Writer
PGZIPWriter *pgzip.Writer
ZSTDWriter *zstd.Encoder
FileWriter *bufio.Writer
ParallelGZIP bool
Expand Down

0 comments on commit 0ce89f2

Please sign in to comment.