Skip to content

Commit

Permalink
Improved WARC erroring (#31)
Browse files Browse the repository at this point in the history
* Better error reporting

* atomically sync serial number in WARC

fixes #24

* fix: update GitHub workflow to latest go.mod version.

* Run `go mod tidy` and manually check and update dependencies.
  • Loading branch information
NGTmeaty authored May 17, 2023
1 parent c6e08a4 commit 63b4f96
Show file tree
Hide file tree
Showing 10 changed files with 234 additions and 201 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/go.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,16 +7,16 @@ on:
branches: [ master ]

jobs:

build:
runs-on: ubuntu-latest
timeout-minutes: 5
steps:
- uses: actions/checkout@v2

- name: Set up Go
uses: actions/setup-go@v2
with:
go-version: 1.18
go-version: 1.19

- name: Build
run: go build -v ./...
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
[![GoDoc](https://godoc.org/github.com/CorentinB/warc?status.svg)](https://godoc.org/github.com/CorentinB/warc)
[![Go Report Card](https://goreportcard.com/badge/github.com/CorentinB/warc)](https://goreportcard.com/report/github.com/CorentinB/warc)

**WARNING: This project is still a WIP. It is NOT ready to be used in any project.**
**WARNING: This project is no longer a work-in-progress, but needs to be carefully implemented and tested, but is generating valid WARCs when used correctly!**

## Introduction

Expand Down
24 changes: 14 additions & 10 deletions client.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,11 @@ import (
"github.com/paulbellamy/ratecounter"
)

type Error struct {
Err error
Func string
}

type HTTPClientSettings struct {
RotatorSettings *RotatorSettings
DedupeOptions DedupeOptions
Expand All @@ -28,7 +33,7 @@ type CustomHTTPClient struct {
dedupeHashTable *sync.Map
dedupeOptions DedupeOptions
skipHTTPStatusCodes []int
errChan chan error
ErrChan chan *Error
verifyCerts bool
TempDir string
FullOnDisk bool
Expand All @@ -52,12 +57,12 @@ func (c *CustomHTTPClient) Close() error {
}

wg.Wait()
close(c.errChan)
close(c.ErrChan)

return nil
}

func NewWARCWritingHTTPClient(HTTPClientSettings HTTPClientSettings) (httpClient *CustomHTTPClient, errChan chan error, err error) {
func NewWARCWritingHTTPClient(HTTPClientSettings HTTPClientSettings) (httpClient *CustomHTTPClient, err error) {
httpClient = new(CustomHTTPClient)

// Init data counters
Expand All @@ -76,8 +81,7 @@ func NewWARCWritingHTTPClient(HTTPClientSettings HTTPClientSettings) (httpClient
httpClient.skipHTTPStatusCodes = HTTPClientSettings.SkipHTTPStatusCodes

// Create an error channel for sending WARC errors through
errChan = make(chan error)
httpClient.errChan = errChan
httpClient.ErrChan = make(chan *Error)

// Toggle verification of certificates
// InsecureSkipVerify expects the opposite of the verifyCerts flag, as such we flip it.
Expand All @@ -88,7 +92,7 @@ func NewWARCWritingHTTPClient(HTTPClientSettings HTTPClientSettings) (httpClient
httpClient.TempDir = HTTPClientSettings.TempDir
err = os.MkdirAll(httpClient.TempDir, os.ModePerm)
if err != nil {
return nil, errChan, err
return nil, err
}
}

Expand All @@ -108,7 +112,7 @@ func NewWARCWritingHTTPClient(HTTPClientSettings HTTPClientSettings) (httpClient
// Configure WARC writer
httpClient.WARCWriter, httpClient.WARCWriterDoneChannels, err = HTTPClientSettings.RotatorSettings.NewWARCRotator()
if err != nil {
return nil, errChan, err
return nil, err
}

// Configure HTTP client
Expand All @@ -119,15 +123,15 @@ func NewWARCWritingHTTPClient(HTTPClientSettings HTTPClientSettings) (httpClient
// Configure custom dialer / transport
customDialer, err := newCustomDialer(httpClient, HTTPClientSettings.Proxy)
if err != nil {
return nil, errChan, err
return nil, err
}

customTransport, err := newCustomTransport(customDialer, HTTPClientSettings.DecompressBody)
if err != nil {
return nil, errChan, err
return nil, err
}

httpClient.Transport = customTransport

return httpClient, errChan, nil
return httpClient, nil
}
Loading

0 comments on commit 63b4f96

Please sign in to comment.