From d06a75f4d8af8e5fc88b7224f3519d67078b13c6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rub=C3=A9n=20del=20Campo?= Date: Fri, 11 Oct 2024 11:20:49 +0200 Subject: [PATCH] chore: first release --- .editorconfig | 18 ++ .github/CODEOWNERS | 1 + .github/assets/logo/dark.svg | 18 ++ .github/assets/logo/light.svg | 18 ++ .github/workflows/ci.yml | 67 +++++ .gitignore | 152 ++++++++++ .golangci.yml | 171 +++++++++++ .pre-commit-config.yaml | 10 + CODE_OF_CONDUCT.md | 127 ++++++++ CONTRIBUTING.md | 61 ++++ LICENSE | 21 ++ README.md | 65 +++++ go.mod | 5 + go.sum | 2 + service/api/.golangci.yml | 171 +++++++++++ service/api/Makefile | 55 ++++ service/api/README.md | 353 +++++++++++++++++++++++ service/api/client.go | 122 ++++++++ service/api/cmd/nextversion/main.go | 47 +++ service/api/errors.go | 56 ++++ service/api/examples/concurrency/main.go | 45 +++ service/api/examples/retries/main.go | 52 ++++ service/api/go.mod | 12 + service/api/go.sum | 12 + service/api/interface.go | 15 + service/api/logger.go | 16 + service/api/options.go | 121 ++++++++ service/api/params.go | 353 +++++++++++++++++++++++ service/api/pkg/problem/problem.go | 42 +++ service/api/response.go | 127 ++++++++ service/api/version.go | 11 + service/api/version/version.go | 39 +++ 32 files changed, 2385 insertions(+) create mode 100644 .editorconfig create mode 100755 .github/CODEOWNERS create mode 100755 .github/assets/logo/dark.svg create mode 100755 .github/assets/logo/light.svg create mode 100755 .github/workflows/ci.yml create mode 100644 .gitignore create mode 100644 .golangci.yml create mode 100644 .pre-commit-config.yaml create mode 100644 CODE_OF_CONDUCT.md create mode 100644 CONTRIBUTING.md create mode 100644 LICENSE create mode 100644 README.md create mode 100644 go.mod create mode 100644 go.sum create mode 100644 service/api/.golangci.yml create mode 100644 service/api/Makefile create mode 100644 service/api/README.md create mode 100644 service/api/client.go create mode 100644 service/api/cmd/nextversion/main.go create mode 100644 service/api/errors.go create mode 100644 service/api/examples/concurrency/main.go create mode 100644 service/api/examples/retries/main.go create mode 100644 service/api/go.mod create mode 100644 service/api/go.sum create mode 100644 service/api/interface.go create mode 100644 service/api/logger.go create mode 100644 service/api/options.go create mode 100644 service/api/params.go create mode 100644 service/api/pkg/problem/problem.go create mode 100644 service/api/response.go create mode 100644 service/api/version.go create mode 100644 service/api/version/version.go diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 0000000..c8e61e7 --- /dev/null +++ b/.editorconfig @@ -0,0 +1,18 @@ +; https://editorconfig.org/ +root = true + +[*] +insert_final_newline = true +charset = utf-8 +trim_trailing_whitespace = true +indent_style = space +indent_size = 4 + +[{Makefile,.gitmodules}] +indent_style = tab + +[*.md] +trim_trailing_whitespace = false + +[{*.yml,*.yaml}] +indent_size = 2 \ No newline at end of file diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS new file mode 100755 index 0000000..b16ae24 --- /dev/null +++ b/.github/CODEOWNERS @@ -0,0 +1 @@ +* @ZenRows/engineering diff --git a/.github/assets/logo/dark.svg b/.github/assets/logo/dark.svg new file mode 100755 index 0000000..6926abb --- /dev/null +++ b/.github/assets/logo/dark.svg @@ -0,0 +1,18 @@ + + + + + + + + + + + + + + + + + + diff --git a/.github/assets/logo/light.svg b/.github/assets/logo/light.svg new file mode 100755 index 0000000..a4427cb --- /dev/null +++ b/.github/assets/logo/light.svg @@ -0,0 +1,18 @@ + + + + + + + + + + + + + + + + + + diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100755 index 0000000..fca706c --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,67 @@ +name: Quality control checks + +# This workflow is a collection of "quick checks" that should be reasonable +# to run for any new commit to this repository in principle. +# +# The main purpose of this workflow is to represent checks that we want to +# run prior to reviewing and merging a pull request. We should therefore aim +# for these checks to complete in no more than a few minutes in the common +# case. + +on: + pull_request: + types: [ opened, synchronize, reopened ] + push: + branches: + - main + +permissions: + contents: read + +jobs: + consistency-checks: + name: "Code Consistency Checks" + runs-on: ubuntu-latest + strategy: + matrix: + include: + - { name: "api", path: "service/api" } + steps: + - name: "Fetch source code" + uses: actions/checkout@v4 + with: + fetch-depth: 0 # We need to do comparisons against the main branch. + + - name: "Install Go toolchain" + uses: actions/setup-go@v5 + with: + go-version-file: "${{ matrix.path }}/go.mod" + cache-dependency-path: "${{ matrix.path }}/go.sum" + + - name: "Go Modules consistency check" + working-directory: ${{ matrix.path }} + run: | + go mod tidy + if [[ -n "$(git status --porcelain)" ]]; then + echo >&2 "ERROR: go.mod/go.sum are not up-to-date. Run 'go mod tidy' and then commit the updated files." + exit 1 + fi + + - name: "Unit tests" + working-directory: ${{ matrix.path }} + run: | + go test -v -race -count 1 -covermode atomic -coverprofile=coverage.out ./... + + - name: "Upload coverage" + uses: actions/upload-artifact@v4 + with: + name: ${{ matrix.name }}-coverage + path: ${{ matrix.path }}/coverage.out + + - name: "Lint" + uses: golangci/golangci-lint-action@v6 + with: + working-directory: ${{ matrix.path }} + version: v1.61.0 + skip-cache: true + args: -v diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..01e2005 --- /dev/null +++ b/.gitignore @@ -0,0 +1,152 @@ +# Created by https://www.toptal.com/developers/gitignore/api/intellij+all,macos,go +# Edit at https://www.toptal.com/developers/gitignore?templates=intellij+all,macos,go + +### Go ### +# If you prefer the allow list template instead of the deny list, see community template: +# https://github.com/github/gitignore/blob/main/community/Golang/Go.AllowList.gitignore +# +# Binaries for programs and plugins +*.exe +*.exe~ +*.dll +*.so +*.dylib + +# Test binary, built with `go test -c` +*.test + +# Output of the go coverage tool, specifically when used with LiteIDE +*.out + +# Dependency directories (remove the comment below to include it) +# vendor/ + +# Go workspace file +go.work + +### Intellij+all ### +# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider +# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 + +# User-specific stuff +.idea/**/workspace.xml +.idea/**/tasks.xml +.idea/**/usage.statistics.xml +.idea/**/dictionaries +.idea/**/shelf + +# AWS User-specific +.idea/**/aws.xml + +# Generated files +.idea/**/contentModel.xml + +# Sensitive or high-churn files +.idea/**/dataSources/ +.idea/**/dataSources.ids +.idea/**/dataSources.local.xml +.idea/**/sqlDataSources.xml +.idea/**/dynamic.xml +.idea/**/uiDesigner.xml +.idea/**/dbnavigator.xml + +# Gradle +.idea/**/gradle.xml +.idea/**/libraries + +# Gradle and Maven with auto-import +# When using Gradle or Maven with auto-import, you should exclude module files, +# since they will be recreated, and may cause churn. Uncomment if using +# auto-import. +# .idea/artifacts +# .idea/compiler.xml +# .idea/jarRepositories.xml +# .idea/modules.xml +# .idea/*.iml +# .idea/modules +# *.iml +# *.ipr + +# CMake +cmake-build-*/ + +# Mongo Explorer plugin +.idea/**/mongoSettings.xml + +# File-based project format +*.iws + +# IntelliJ +out/ + +# mpeltonen/sbt-idea plugin +.idea_modules/ + +# JIRA plugin +atlassian-ide-plugin.xml + +# Cursive Clojure plugin +.idea/replstate.xml + +# SonarLint plugin +.idea/sonarlint/ + +# Crashlytics plugin (for Android Studio and IntelliJ) +com_crashlytics_export_strings.xml +crashlytics.properties +crashlytics-build.properties +fabric.properties + +# Editor-based Rest Client +.idea/httpRequests + +# Android studio 3.1+ serialized cache file +.idea/caches/build_file_checksums.ser + +### Intellij+all Patch ### +# Ignore everything but code style settings and run configurations +# that are supposed to be shared within teams. + +.idea/* + +!.idea/codeStyles +!.idea/runConfigurations + +### macOS ### +# General +.DS_Store +.AppleDouble +.LSOverride + +# Icon must end with two \r +Icon + + +# Thumbnails +._* + +# Files that might appear in the root of a volume +.DocumentRevisions-V100 +.fseventsd +.Spotlight-V100 +.TemporaryItems +.Trashes +.VolumeIcon.icns +.com.apple.timemachine.donotpresent + +# Directories potentially created on remote AFP share +.AppleDB +.AppleDesktop +Network Trash Folder +Temporary Items +.apdisk + +### macOS Patch ### +# iCloud generated files +*.icloud + +# End of https://www.toptal.com/developers/gitignore/api/intellij+all,macos,go + +.env +!.gitkeep +*.db \ No newline at end of file diff --git a/.golangci.yml b/.golangci.yml new file mode 100644 index 0000000..561dc86 --- /dev/null +++ b/.golangci.yml @@ -0,0 +1,171 @@ +run: + timeout: 5m + issues-exit-code: 1 + tests: true + build-tags: + - integration + modules-download-mode: readonly + allow-parallel-runners: false + go: '' +linters: + disable-all: true + fast: false + enable: + - asasalint + - asciicheck + - bidichk + - bodyclose + - durationcheck + - errcheck + - errname + - errorlint + - exhaustive + - funlen + - gci + - goconst + - gocritic + - gocyclo + - gofmt + - gofumpt + - goimports + - mnd + - gosec + - gosimple + - govet + - ineffassign + - lll + - misspell + - nakedret + - nilerr + - nilnil + - noctx + - nolintlint + - prealloc + - predeclared + - promlinter + - reassign + - revive + - rowserrcheck + - sqlclosecheck + - staticcheck + - stylecheck + - tagliatelle + - tenv + - testableexamples + - thelper + - tparallel + - unconvert + - unparam + - unused + - usestdlibvars + - wastedassign +linters-settings: + gci: + sections: + - standard + - default + goconst: + min-len: 2 + min-occurrences: 3 + gocritic: + enabled-tags: + - diagnostic + - experimental + - opinionated + - performance + - style + disabled-checks: + - dupImport + - octalLiteral + - whyNoLint + - rangeExprCopy + - rangeValCopy + gosec: + excludes: + - G601 + errcheck: + check-type-assertions: true + check-blank: true + exclude-functions: + - io/ioutil.ReadFile + - io.Copy(*bytes.Buffer) + - io.Copy(os.Stdout) + funlen: + lines: -1 + statements: 50 + mnd: + checks: + - argument + - case + - condition + - return + ignored-numbers: + - '0' + - '1' + - '2' + - '3' + - '4' + - '5' + - '24' + - '1024' + ignored-functions: + - strings.SplitN + gocyclo: + min-complexity: 15 + gofmt: + rewrite-rules: + - pattern: 'interface{}' + replacement: any + govet: + disable: + - fieldalignment + lll: + line-length: 140 + misspell: + locale: US + nakedret: + max-func-lines: 1 + nolintlint: + allow-unused: false + require-explanation: false + require-specific: true + revive: + rules: + - name: unexported-return + disabled: true + - name: unused-parameter + stylecheck: + checks: + - all + - '-ST1003' + tagliatelle: + case: + rules: + json: snake + yaml: snake + xml: camel + bson: camel + avro: snake + mapstructure: kebab + exhaustive: + default-signifies-exhaustive: true +output: + print-issued-lines: true + print-linter-name: true + uniq-by-line: false + path-prefix: '' + sort-results: true +issues: + max-issues-per-linter: 0 + max-same-issues: 0 + new: false + fix: false + exclude-rules: + - path: _test\.go + linters: + - errcheck + - gocritic + - mnd + - lll + - revive + - unparam diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..1a36b43 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,10 @@ +repos: + - repo: https://github.com/tekwizely/pre-commit-golang + rev: master + hooks: + - id: go-mod-tidy-repo + - id: go-vet-mod + - repo: https://github.com/golangci/golangci-lint + rev: v1.61.0 + hooks: + - id: golangci-lint diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md new file mode 100644 index 0000000..41dd9eb --- /dev/null +++ b/CODE_OF_CONDUCT.md @@ -0,0 +1,127 @@ +# Contributor Covenant Code of Conduct + +## Our Pledge + +We as members, contributors, and leaders pledge to make participation in our +community a harassment-free experience for everyone, regardless of age, body +size, visible or invisible disability, ethnicity, sex characteristics, gender +identity and expression, level of experience, education, socio-economic status, +nationality, personal appearance, race, religion, or sexual identity +and orientation. + +We pledge to act and interact in ways that contribute to an open, welcoming, +diverse, inclusive, and healthy community. + +## Our Standards + +Examples of behavior that contributes to a positive environment for our +community include: + +* Demonstrating empathy and kindness toward other people +* Being respectful of differing opinions, viewpoints, and experiences +* Giving and gracefully accepting constructive feedback +* Accepting responsibility and apologizing to those affected by our mistakes, + and learning from the experience +* Focusing on what is best not just for us as individuals, but for the + overall community + +Examples of unacceptable behavior include: + +* The use of sexualized language or imagery, and sexual attention or + advances of any kind +* Trolling, insulting or derogatory comments, and personal or political attacks +* Public or private harassment +* Publishing others' private information, such as a physical or email + address, without their explicit permission +* Other conduct which could reasonably be considered inappropriate in a + professional setting + +## Enforcement Responsibilities + +Community leaders are responsible for clarifying and enforcing our standards of +acceptable behavior and will take appropriate and fair corrective action in +response to any behavior that they deem inappropriate, threatening, offensive, +or harmful. + +Community leaders have the right and responsibility to remove, edit, or reject +comments, commits, code, wiki edits, issues, and other contributions that are +not aligned to this Code of Conduct, and will communicate reasons for moderation +decisions when appropriate. + +## Scope + +This Code of Conduct applies within all community spaces, and also applies when +an individual is officially representing the community in public spaces. +Examples of representing our community include using an official e-mail address, +posting via an official social media account, or acting as an appointed +representative at an online or offline event. + +## Enforcement + +Instances of abusive, harassing, or otherwise unacceptable behavior may be +reported to the community leaders responsible for enforcement. +All complaints will be reviewed and investigated promptly and fairly. + +All community leaders are obligated to respect the privacy and security of the +reporter of any incident. + +## Enforcement Guidelines + +Community leaders will follow these Community Impact Guidelines in determining +the consequences for any action they deem in violation of this Code of Conduct: + +### 1. Correction + +**Community Impact**: Use of inappropriate language or other behavior deemed +unprofessional or unwelcome in the community. + +**Consequence**: A private, written warning from community leaders, providing +clarity around the nature of the violation and an explanation of why the +behavior was inappropriate. A public apology may be requested. + +### 2. Warning + +**Community Impact**: A violation through a single incident or series +of actions. + +**Consequence**: A warning with consequences for continued behavior. No +interaction with the people involved, including unsolicited interaction with +those enforcing the Code of Conduct, for a specified period of time. This +includes avoiding interactions in community spaces as well as external channels +like social media. Violating these terms may lead to a temporary or +permanent ban. + +### 3. Temporary Ban + +**Community Impact**: A serious violation of community standards, including +sustained inappropriate behavior. + +**Consequence**: A temporary ban from any sort of interaction or public +communication with the community for a specified period of time. No public or +private interaction with the people involved, including unsolicited interaction +with those enforcing the Code of Conduct, is allowed during this period. +Violating these terms may lead to a permanent ban. + +### 4. Permanent Ban + +**Community Impact**: Demonstrating a pattern of violation of community +standards, including sustained inappropriate behavior, harassment of an +individual, or aggression toward or disparagement of classes of individuals. + +**Consequence**: A permanent ban from any sort of public interaction within +the community. + +## Attribution + +This Code of Conduct is adapted from the [Contributor Covenant][homepage], +version 2.0, available at +https://www.contributor-covenant.org/version/2/0/code_of_conduct.html. + +Community Impact Guidelines were inspired by [Mozilla's code of conduct +enforcement ladder](https://github.com/mozilla/diversity). + +[homepage]: https://www.contributor-covenant.org + +For answers to common questions about this code of conduct, see the FAQ at +https://www.contributor-covenant.org/faq. Translations are available at +https://www.contributor-covenant.org/translations. diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..1075eff --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,61 @@ +# Contributing to ZenRows Go SDK + +First off, thanks for taking the time to contribute! + +The following is a set of guidelines for contributing to the ZenRows Go SDK. These are mostly guidelines, not rules. +Use your best judgment, and feel free to propose changes to this document in a pull request. + +## How Can I Contribute? + +### Reporting Bugs + +If you find a bug, please open an issue on GitHub. Provide as much detail as possible: + +- A clear and descriptive title. +- A description of the steps to reproduce the issue. +- Any error messages or logs. +- Your environment (Go version, OS, etc.) + +### Suggesting Enhancements + +Feel free to suggest new features or enhancements. Open an issue with the following details: + +- Use a clear and descriptive title. +- Provide a detailed explanation of the feature. +- Explain why this feature would be useful. + +### Pull Requests + +1. Fork the repository. +2. Clone your fork. +3. Create a new branch for your feature or bug fix: + ```bash + git checkout -b feature/your-feature-name + ``` +4. Make your changes and test them. +5. Commit your changes with a meaningful commit message. +6. Push your changes to your fork. +7. Open a pull request on the main repository. + +### Code Style + +- Follow Go conventions (e.g., `gofmt`). +- Write tests for new functionality. +- Make sure existing tests pass before submitting a PR. + +### Running Tests + +Run the test suite using: + +```bash +go test ./... +``` + +### Code of Conduct + +This project adheres to the Contributor Covenant [code of conduct](./CODE_OF_CONDUCT.md). By participating, you are +expected to uphold this code. Please report unacceptable behavior to [us](https://www.zenrows.com/contact). + +## Thank You! + +Thank you for considering contributing to the ZenRows Go SDK! Feel free to reach out if you have any questions. diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..cec0c09 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2024 ZenRows + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md new file mode 100644 index 0000000..d9d8b0b --- /dev/null +++ b/README.md @@ -0,0 +1,65 @@ +

+ + + ZenRows Logo + +

+ +

+ Documentation +  ·  + Pricing +

+
+ +# ZenRows Go SDK + +ZenRows® provides a powerful web scraping toolkit to help you collect, process, and manage web data effortlessly. +Whether you need scalable data extraction, a robust browser solution for dynamic websites or residential proxies to +access geo-targeted content, we have the right tools for your specific use cases. + +This repository hosts the official Go SDKs for integrating with different ZenRows services. Each SDK is located in +its respective subdirectory and includes comprehensive documentation, installation instructions, and usage examples. + +## Table of Contents + +- [Overview](#overview) +- [SDKs](#sdks) + - [Scraper API Service](#scraper-api-service) +- [Other Languages](#other-languages) +- [Contributing](#contributing) +- [License](#license) + +## SDKs + +### Scraper API Service + +> ZenRows®’ Scraper API enables fast, efficient, and hassle-free data extraction from web pages by providing versatile +scraping modes. Whether you’re new to scraping or already experienced, ZenRows adapts to your needs, making it easy to +collect data from the web while overcoming the common challenges posed by modern websites, including CAPTCHAs +and anti-bot mechanisms. + +**Directory**: [`service/api`](./service/api) + +The `service/api` SDK is a Go client for the ZenRows Scraper API, allowing developers to send HTTP requests to scrape +websites with support for various configurations like JavaScript rendering, custom headers, retries, and more. + +- [Installation and Usage](./service/api/README.md) +- [API Reference](https://docs.zenrows.com/scraper-api/api-reference) + +## Other Languages + +- **Node.js**: + - [Scraper API](https://github.com/ZenRows/zenrows-node-sdk) + - [Scraping Browser](https://github.com/ZenRows/browser-js-sdk) +- **Python**: + - [Scraper API]( https://github.com/ZenRows/zenrows-python-sdk) + +## Contributing + +Contributions to the SDKs are welcome! See [CONTRIBUTING.md](./CONTRIBUTING.md) for more information on how to +contribute to the repository. + +## License + +This project is licensed under the MIT License - see the [LICENSE](./LICENSE) file for details. diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..cb6e73a --- /dev/null +++ b/go.mod @@ -0,0 +1,5 @@ +module github.com/zenrows/zenrows-go-sdk + +go 1.23.1 + +require github.com/hashicorp/go-version v1.7.0 diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..985a85d --- /dev/null +++ b/go.sum @@ -0,0 +1,2 @@ +github.com/hashicorp/go-version v1.7.0 h1:5tqGy27NaOTB8yJKUZELlFAS/LTKJkrmONwQKeRZfjY= +github.com/hashicorp/go-version v1.7.0/go.mod h1:fltr4n8CU8Ke44wwGCBoEymUuxUHl09ZGVZPK5anwXA= diff --git a/service/api/.golangci.yml b/service/api/.golangci.yml new file mode 100644 index 0000000..561dc86 --- /dev/null +++ b/service/api/.golangci.yml @@ -0,0 +1,171 @@ +run: + timeout: 5m + issues-exit-code: 1 + tests: true + build-tags: + - integration + modules-download-mode: readonly + allow-parallel-runners: false + go: '' +linters: + disable-all: true + fast: false + enable: + - asasalint + - asciicheck + - bidichk + - bodyclose + - durationcheck + - errcheck + - errname + - errorlint + - exhaustive + - funlen + - gci + - goconst + - gocritic + - gocyclo + - gofmt + - gofumpt + - goimports + - mnd + - gosec + - gosimple + - govet + - ineffassign + - lll + - misspell + - nakedret + - nilerr + - nilnil + - noctx + - nolintlint + - prealloc + - predeclared + - promlinter + - reassign + - revive + - rowserrcheck + - sqlclosecheck + - staticcheck + - stylecheck + - tagliatelle + - tenv + - testableexamples + - thelper + - tparallel + - unconvert + - unparam + - unused + - usestdlibvars + - wastedassign +linters-settings: + gci: + sections: + - standard + - default + goconst: + min-len: 2 + min-occurrences: 3 + gocritic: + enabled-tags: + - diagnostic + - experimental + - opinionated + - performance + - style + disabled-checks: + - dupImport + - octalLiteral + - whyNoLint + - rangeExprCopy + - rangeValCopy + gosec: + excludes: + - G601 + errcheck: + check-type-assertions: true + check-blank: true + exclude-functions: + - io/ioutil.ReadFile + - io.Copy(*bytes.Buffer) + - io.Copy(os.Stdout) + funlen: + lines: -1 + statements: 50 + mnd: + checks: + - argument + - case + - condition + - return + ignored-numbers: + - '0' + - '1' + - '2' + - '3' + - '4' + - '5' + - '24' + - '1024' + ignored-functions: + - strings.SplitN + gocyclo: + min-complexity: 15 + gofmt: + rewrite-rules: + - pattern: 'interface{}' + replacement: any + govet: + disable: + - fieldalignment + lll: + line-length: 140 + misspell: + locale: US + nakedret: + max-func-lines: 1 + nolintlint: + allow-unused: false + require-explanation: false + require-specific: true + revive: + rules: + - name: unexported-return + disabled: true + - name: unused-parameter + stylecheck: + checks: + - all + - '-ST1003' + tagliatelle: + case: + rules: + json: snake + yaml: snake + xml: camel + bson: camel + avro: snake + mapstructure: kebab + exhaustive: + default-signifies-exhaustive: true +output: + print-issued-lines: true + print-linter-name: true + uniq-by-line: false + path-prefix: '' + sort-results: true +issues: + max-issues-per-linter: 0 + max-same-issues: 0 + new: false + fix: false + exclude-rules: + - path: _test\.go + linters: + - errcheck + - gocritic + - mnd + - lll + - revive + - unparam diff --git a/service/api/Makefile b/service/api/Makefile new file mode 100644 index 0000000..b3613cb --- /dev/null +++ b/service/api/Makefile @@ -0,0 +1,55 @@ +.NOTPARALLEL: + +.PHONY: next-patch-version next-minor-version next-major-version tag-next-patch tag-next-minor tag-next-major commit-patch-version commit-minor-version commit-major-version lint test + +PACKAGE := "service/api" +NEXT_PATCH_VERSION := $(shell go run cmd/nextversion/main.go -patch) +NEXT_MINOR_VERSION := $(shell go run cmd/nextversion/main.go -minor) +NEXT_MAJOR_VERSION := $(shell go run cmd/nextversion/main.go -major) + +.PHONY: +next-patch-version: + sed -i .bak s/"^const Version.*"/"const Version = \"${NEXT_PATCH_VERSION}\""/g version/version.go + +.PHONY: +commit-patch-version: + git add version/version.go + git commit -m "chore: set ${PACKAGE} version to ${NEXT_PATCH_VERSION}" + +.PHONY: +tag-next-patch: next-patch-version commit-patch-version + git tag -a ${PACKAGE}/v${NEXT_PATCH_VERSION} -m "New ${PACKAGE} release v${NEXT_PATCH_VERSION}" + +.PHONY: +next-minor-version: + sed -i .bak s/"^const Version.*"/"const Version = \"${NEXT_MINOR_VERSION}\""/g version/version.go + +.PHONY: +commit-minor-version: + git add version/version.go + git commit -m "chore: set ${PACKAGE} version to ${NEXT_MINOR_VERSION}" + +.PHONY: +tag-next-minor: next-minor-version commit-minor-version + git tag -a ${PACKAGE}/v${NEXT_MINOR_VERSION} -m "New ${PACKAGE} release v${NEXT_MINOR_VERSION}" + +.PHONY: +next-major-version: + sed -i .bak s/"^const Version.*"/"const Version = \"${NEXT_MAJOR_VERSION}\""/g version/version.go + +.PHONY: +commit-major-version: + git add version/version.go + git commit -m "chore: set ${PACKAGE} version to ${NEXT_MAJOR_VERSION}" + +.PHONY: +tag-next-major: next-major-version commit-major-version + git tag -a ${PACKAGE}/v${NEXT_MAJOR_VERSION} -m "New ${PACKAGE} release v${NEXT_MAJOR_VERSION}" + +.PHONY: +lint: + @golangci-lint run --fast + +.PHONY: +test: + @go test -race -count=1 ./... diff --git a/service/api/README.md b/service/api/README.md new file mode 100644 index 0000000..ef4a8e0 --- /dev/null +++ b/service/api/README.md @@ -0,0 +1,353 @@ +# ZenRows Scraper API Go SDK + +This is the Go SDK for interacting with the ZenRows Scraper API, designed to help developers integrate web scraping +capabilities into their Go applications. It simplifies the process of making HTTP requests, handling responses, +and managing configurations for interacting with the ZenRows Scraper API. + +## Introduction + +The ZenRows® Scraper API is a versatile tool designed to simplify and enhance the process of extracting data from +websites. Whether you’re dealing with static or dynamic content, our API provides a range of features to meet your +scraping needs efficiently. + +With Premium Proxies, ZenRows gives you access to over 55 million residential IPs from 190+ countries, +ensuring 99.9% uptime and highly reliable scraping sessions. Our system also handles advanced fingerprinting, header +rotation, and IP management, **enabling you to scrape even the most protected sites without needing to manually +configure these elements**. + +ZenRows makes it easy to bypass complex anti-bot measures, handle JavaScript-heavy sites, and interact with web +elements dynamically — all with the right features enabled. + +## Table of Contents + +- [Installation](#installation) +- [Getting Started](#getting-started) +- [Usage](#usage) + - [Client Initialization](#client-initialization) + - [Sending Requests](#sending-requests) + - [GET Requests](#get-requests) + - [POST/PUT Requests](#post-or-put-requests) + - [Custom Request Parameters](#custom-request-parameters) + - [Handling Responses](#handling-responses) +- [Configuration Options](#configuration-options) +- [Error Handling](#error-handling) +- [Examples](#examples) + - [Concurrency](#concurrency) + - [Retrying](#retrying) +- [Contributing](#contributing) +- [License](#license) + +## Installation + +To install the SDK, run: + +```bash +go get github.com/zenrows/zenrows-go-sdk/services/api +``` + +## Getting Started + +To use the SDK, you need a ZenRows API key. You can find your API key in the +ZenRows [dashboard](https://app.zenrows.com/builder). + +## Usage + +### Client Initialization + +Initialize the ZenRows client with your API key by either using the `WithAPIKey` client option or setting +the `ZENROWS_API_KEY` environment variable: + +```go +import ( + "context" + scraperapi "github.com/zenrows/zenrows-go-sdk/service/api" +) + +client := scraperapi.NewClient( + scraperapi.WithAPIKey("YOUR_API_KEY"), +) +``` + +### Sending Requests + +#### GET Requests + +```go +response, err := client.Get(context.Background(), "https://httpbin.io/anything", nil) +if err != nil { + // handle error +} + +if err = response.Error(); err != nil { + // handle error +} + +fmt.Println("Response Body:", string(response.Body())) +``` + +#### POST or PUT Requests + +```go +body := map[string]string{"key": "value"} +response, err := client.Post(context.Background(), "https://httpbin.io/anything", nil, body) +if err != nil { + // handle error +} + +if err = response.Error(); err != nil { + // handle error +} + +fmt.Println("Response Body:", string(response.Body())) +``` + +### Custom Request Parameters + +You can customize your requests using `RequestParameters` to modify the behavior of the scraping engine: + +```go +params := &scraperapi.RequestParameters{ + JSRender: true, + UsePremiumProxies: true, + ProxyCountry: "US", +} + +response, err := client.Get(context.Background(), "https://httpbin.io/anything", params) +if err != nil { + // handle error +} + +if err = response.Error(); err != nil { + // handle error +} + +fmt.Println("Response Body:", response.String()) +``` + +### Handling Responses + +The `Response` object provides several methods to access details about the HTTP response: + +- `Body() []byte`: Returns the raw response body. +- `String() string`: Returns the response body as a string. +- `Status() string`: Returns the status text (e.g., "200 OK"). +- `StatusCode() int`: Returns the HTTP status code (e.g., 200). +- `Header() http.Header`: Returns the response headers. +- `Time() time.Duration`: Returns the duration of the request. +- `ReceivedAt() time.Time`: Returns the time when the response was received. +- `Size() int64`: Returns the size of the response in bytes. +- `IsSuccess() bool`: Returns `true` if the response status is in the 2xx range. +- `IsError() bool`: Returns `true` if the response status is 4xx or higher. +- `Problem() *problem.Problem`: Returns a parsed problem description if the response contains an error. +- `Error() error`: Same as `Problem()`, but returns an error type. + +In order to access additional details about the scraping process, you can use the following methods: + +- `TargetHeaders() http.Header`: Returns headers from the target page. +- `TargetCookies() []*http.Cookie`: Returns cookies set by the target page. + +### Example + +```go +response, err := client.Get(context.Background(), "https://httpbin.io/anything", nil) +if err != nil { + // handle error +} else { + if prob := response.Problem(); prob != nil { + fmt.Println("API Error:", prob.Detail) + return + } + + fmt.Println("Response Body:", response.String()) + fmt.Println("Response Target Headers:", response.TargetHeaders()) + fmt.Println("Response Target Cookies:", response.TargetCookies()) +} +``` + +### Configuration Options + +You can customize the client using different options: + +- `WithAPIKey(apiKey string)`: Sets the API key for authentication. If not provided, the SDK will look for +the `ZENROWS_API_KEY` environment variable. +- `WithMaxRetryCount(maxRetryCount int)`: Sets the maximum number of retries for failed requests. _Default is 0 (no retries)._ +- `WithRetryWaitTime(retryWaitTime time.Duration)`: Sets the time to wait before retrying a request. _Default is 5 second._ +- `WithRetryMaxWaitTime(retryMaxWaitTime time.Duration)`: Sets the maximum time to wait for retries. _Default is 30 seconds._ +- `WithMaxConcurrentRequests(maxConcurrentRequests int)`: Limits the number of concurrent requests. _Default is 5._ +Make sure this value does not exceed your plan's concurrency limit, as it may result in _429 Too Many Requests_ errors. + +### Error Handling + +The SDK provides custom error types for better error handling: + +- `NotConfiguredError`: Thrown when the client is not properly configured (e.g., missing API key). +- `InvalidHTTPMethodError`: Thrown when an unsupported HTTP method is used (e.g., when sending PATCH or DELETE requests). +- `InvalidTargetURLError`: Thrown when an invalid target URL is provided (e.g., target URL is empty, or malformed). +- `InvalidParameterError`: Thrown when invalid parameters are used in the request. See the error message for details. + +### Examples + +#### Concurrency + +Concurrency in web scraping is essential for efficient data extraction, especially when dealing with multiple URLs. +Managing the number of concurrent requests helps prevent overwhelming the target server and ensures you stay within +rate limits. Depending on your subscription plan, you can perform twenty or more concurrent requests. + +To limit the concurrency, the SDK uses a semaphore to control the number of concurrent requests that a single client +can make. This value is set by the `WithMaxConcurrentRequests` option when initializing the client and defaults to 5. + +See the [example](examples/concurrency/main.go) below for a demonstration of how to use the SDK with concurrency: + +```go +package main + +import ( + "context" + "fmt" + "sync" + + scraperapi "github.com/zenrows/zenrows-go-sdk/service/api" +) + +const ( + maxConcurrentRequests = 5 // run 5 scraping requests at the same time + totalRequests = 10 // send a total of 10 scraping requests +) + +func main() { + client := scraperapi.NewClient( + scraperapi.WithAPIKey("YOUR_API_KEY"), + scraperapi.WithMaxConcurrentRequests(maxConcurrentRequests), + ) + + var wg sync.WaitGroup + for i := 0; i < totalRequests; i++ { + wg.Add(1) + go func(i int) { + defer wg.Done() + + res, err := client.Get(context.Background(), "https://httpbin.io/anything", &scraperapi.RequestParameters{}) + if err != nil { + fmt.Println(i, err) + return + } + + if err = res.Error(); err != nil { + fmt.Println(i, err) + return + } + + fmt.Printf("[#%d]: %s\n", i, res.Status()) + }(i) + } + + wg.Wait() + fmt.Println("done") +} +``` + +This program will output the status of each request, running up to five concurrent requests at a time: + +``` +[#1]: 200 OK +[#0]: 200 OK +[#9]: 200 OK +[#5]: 200 OK +[#2]: 200 OK +[#8]: 200 OK +[#7]: 200 OK +[#6]: 200 OK +[#4]: 200 OK +[#3]: 200 OK +done +``` + +#### Retrying + +The SDK supports automatic retries for failed requests. You can configure the maximum number of retries and the +wait time between retries using the `WithMaxRetryCount`, `WithRetryWaitTime`, and `WithRetryMaxWaitTime` options. + +A backoff strategy is used to increase the wait time between retries, starting at the `RetryWaitTime` and doubling +the wait time for each subsequent retry until it reaches the `RetryMaxWaitTime`. + +See the [example](examples/retries/main.go) below for a demonstration of how to use the SDK with retries: + +```go +package main + +import ( + "context" + "fmt" + "sync" + "time" + + scraperapi "github.com/zenrows/zenrows-go-sdk/service/api" +) + +const ( + maxConcurrentRequests = 5 // run 5 scraping requests at the same time + totalRequests = 10 // send a total of 10 scraping requests +) + +func main() { + client := scraperapi.NewClient( + scraperapi.WithAPIKey("YOUR_API_KEY"), + scraperapi.WithMaxConcurrentRequests(maxConcurrentRequests), + scraperapi.WithMaxRetryCount(5), // retry up to five times + scraperapi.WithRetryWaitTime(20*time.Second), // waiting at least 20s between retries (just for demonstration purposes!) + scraperapi.WithRetryMaxWaitTime(25*time.Second), // and waiting a maximum of 20s between retries + ) + + var wg sync.WaitGroup + for i := 0; i < totalRequests; i++ { + wg.Add(1) + go func(i int) { + defer wg.Done() + now := time.Now() // store the time, to be able to print the elapsed duration + + // target the https://httpbin.io/unstable endpoint, as it fails half of the times, so the retry mechanism takes care of + // making sure we eventually receive a successful request + res, err := client.Get(context.Background(), "https://httpbin.io/unstable", &scraperapi.RequestParameters{}) + if err != nil { + fmt.Println(i, err) + return + } + + if err = res.Error(); err != nil { + fmt.Println(i, err) + return + } + + fmt.Printf("[#%d]: %s (in %s)\n", i, res.Status(), time.Since(now)) + }(i) + } + + wg.Wait() + fmt.Println("done") +} +``` + +This program will output the status of each request, and the elapsed time. As we've set the retry mechanism to retry +up to five times, with a minimum wait time of 20 seconds and a maximum of 25 seconds, the output will look like this: + +``` +[#6]: 200 OK (in 743.064708ms) +[#2]: 200 OK (in 1.202448208s) +[#1]: 200 OK (in 1.380041292s) +[#5]: 200 OK (in 1.626613583s) +[#8]: 200 OK (in 2.635505541s) +[#4]: 200 OK (in 3.217849791s) +[#9]: 200 OK (in 21.973982334s) <-- this request took longer because it had to retry 1 time +[#3]: 200 OK (in 22.031445708s) <-- this request took longer because it had to retry 1 time +[#7]: 200 OK (in 22.130371583s) <-- this request took longer because it had to retry 1 time +[#0]: 200 OK (in 45.030251042s) <-- this request took longer because it had to retry 2 times +done +``` + +### Contributing + +Contributions are welcome! Please see [CONTRIBUTING.md](../../CONTRIBUTING.md) for details. + +## License + +This project is licensed under the MIT License - see the [LICENSE](../../LICENSE) file for details. diff --git a/service/api/client.go b/service/api/client.go new file mode 100644 index 0000000..c1291ad --- /dev/null +++ b/service/api/client.go @@ -0,0 +1,122 @@ +package scraperapi + +import ( + "context" + "net/http" + "net/url" + "slices" + + "github.com/go-resty/resty/v2" + "github.com/zenrows/zenrows-go-sdk/service/api/version" +) + +const ( + apiKeyParamName = "apikey" + urlParamName = "url" +) + +// Client is the ZenRows Scraper API client +type Client struct { + cfg options + http *resty.Client + concurrencySemaphore chan struct{} +} + +// NewClient creates and returns a new ZenRows Scraper API client +func NewClient(opts ...Option) *Client { + client := &Client{cfg: defaultOptions()} + + for _, opt := range opts { + opt.apply(&client.cfg) + } + + client.http = resty.New(). + SetLogger(noopLogger{}). + SetBaseURL(client.cfg.baseURL). + SetHeader("User-Agent", "zenrows-go/"+version.Version). + SetQueryParam(apiKeyParamName, client.cfg.apiKey). + SetRetryCount(client.cfg.retryOptions.maxRetryCount). + SetRetryWaitTime(client.cfg.retryOptions.retryWaitTime). + SetRetryMaxWaitTime(client.cfg.retryOptions.retryMaxWaitTime). + AddRetryCondition(func(r *resty.Response, err error) bool { + return err != nil || slices.Contains(retryableStatusCodes, r.StatusCode()) + }) + + // if the maxConcurrentRequests is set, create a semaphore to limit the number of concurrent requests + if client.cfg.maxConcurrentRequests > 0 { + client.concurrencySemaphore = make(chan struct{}, client.cfg.maxConcurrentRequests) + } + + return client +} + +// isConfigured returns true if the client is configured with a base url and a secret key +func (c *Client) isConfigured() bool { + return c.cfg.baseURL != "" && c.cfg.apiKey != "" +} + +// Scrape sends a request to the ZenRows Scraper API to scrape the given target URL using the specified method and parameters. +func (c *Client) Scrape(ctx context.Context, method, targetURL string, params *RequestParameters, body any) (*Response, error) { + // make sure the client is configured before sending the request + if !c.isConfigured() { + return nil, NotConfiguredError{} + } + + // make sure the method is valid + if !slices.Contains(validHTTPMethods, method) { + return nil, InvalidHTTPMethodError{} + } + + // make sure a target url is provided + if targetURL == "" { + return nil, InvalidTargetURLError{Msg: "target url cannot be empty"} + } + + // make sure the target url is a valid url + parsedURL, parseErr := url.Parse(targetURL) + if parseErr != nil { + return nil, InvalidTargetURLError{URL: targetURL, Err: parseErr} + } + + // create the request + req := c.http.R().SetContext(ctx).SetQueryParam(urlParamName, parsedURL.String()).SetBody(body) + + // if parameters are provided, validate them and set them on the request + if params != nil { + if err := params.Validate(); err != nil { + return nil, err + } + + req.SetHeaderMultiValues(params.CustomHeaders) + req.SetQueryParamsFromValues(params.ToURLValues()) + } + + // if the concurrency semaphore is initialized, acquire a token before sending the request + // and release it after the request is done + if c.concurrencySemaphore != nil { + c.concurrencySemaphore <- struct{}{} + defer func() { <-c.concurrencySemaphore }() + } + + // execute the request, and return the response or an error if one occurred + res, err := req.Execute(method, "/") + if err != nil { + return nil, err + } + return &Response{res: res}, nil +} + +// Get sends an HTTP GET request to the ZenRows Scraper API to scrape the given target URL using the specified parameters. +func (c *Client) Get(ctx context.Context, targetURL string, params *RequestParameters) (*Response, error) { + return c.Scrape(ctx, http.MethodGet, targetURL, params, nil) +} + +// Post sends an HTTP POST request to the ZenRows Scraper API to scrape the given target URL using the specified parameters. +func (c *Client) Post(ctx context.Context, targetURL string, params *RequestParameters, body any) (*Response, error) { + return c.Scrape(ctx, http.MethodPost, targetURL, params, body) +} + +// Put sends an HTTP PUT request to the ZenRows Scraper API to scrape the given target URL using the specified parameters. +func (c *Client) Put(ctx context.Context, targetURL string, params *RequestParameters, body any) (*Response, error) { + return c.Scrape(ctx, http.MethodPut, targetURL, params, body) +} diff --git a/service/api/cmd/nextversion/main.go b/service/api/cmd/nextversion/main.go new file mode 100644 index 0000000..d972ed5 --- /dev/null +++ b/service/api/cmd/nextversion/main.go @@ -0,0 +1,47 @@ +package main + +import ( + "flag" + "fmt" + "os" + "strings" + + "github.com/zenrows/zenrows-go-sdk/service/api/version" +) + +func main() { + major := flag.Bool("major", false, "Increment the major version") + minor := flag.Bool("minor", false, "Increment the minor version") + patch := flag.Bool("patch", false, "Increment the patch version") + flag.Parse() + + parts := strings.Split(version.Version, ".") + if len(parts) != 3 { + fmt.Println("Invalid version format. Must be in the form 'MAJOR.MINOR.PATCH'") + os.Exit(1) + } + + var majorVer, minorVer, patchVer int + _, err := fmt.Sscanf(version.Version, "%d.%d.%d", &majorVer, &minorVer, &patchVer) + if err != nil { + fmt.Println("Error parsing version:", err) + os.Exit(1) + } + + switch { + case *major: + majorVer++ + minorVer = 0 + patchVer = 0 + case *minor: + minorVer++ + patchVer = 0 + case *patch: + patchVer++ + default: + fmt.Println("Please provide a flag: -major, -minor, or -patch") + os.Exit(1) + } + + fmt.Printf("%d.%d.%d\n", majorVer, minorVer, patchVer) +} diff --git a/service/api/errors.go b/service/api/errors.go new file mode 100644 index 0000000..b09c635 --- /dev/null +++ b/service/api/errors.go @@ -0,0 +1,56 @@ +package scraperapi + +import ( + "fmt" + "strings" +) + +// NotConfiguredError results when the ZenRows Scraper API client is used without a valid API Key. +type NotConfiguredError struct{} + +func (NotConfiguredError) Error() string { + return "zenrows scraper api client is not configured" +} + +// InvalidHTTPMethodError results when the ZenRows Scraper API client is used with an invalid HTTP method. +type InvalidHTTPMethodError struct{} + +func (InvalidHTTPMethodError) Error() string { + return fmt.Sprintf("invalid http method. supported methods are: %s", strings.Join(validHTTPMethods, ", ")) +} + +// InvalidTargetURLError results when the ZenRows Scraper API client is used with an invalid target URL. +type InvalidTargetURLError struct { + URL string + Msg string + Err error +} + +func (e InvalidTargetURLError) Unwrap() error { + return e.Err +} + +func (e InvalidTargetURLError) Error() string { + if e.Msg == "" { + e.Msg = "invalid target url" + } + + if e.Err != nil { + return e.Msg + ": " + e.Err.Error() + } + + return e.Msg +} + +// InvalidParameterError results when the ZenRows Scraper API client is used with an invalid parameter. +type InvalidParameterError struct { + Msg string +} + +func (e InvalidParameterError) Error() string { + if e.Msg == "" { + e.Msg = "invalid parameter" + } + + return e.Msg +} diff --git a/service/api/examples/concurrency/main.go b/service/api/examples/concurrency/main.go new file mode 100644 index 0000000..d7f7e13 --- /dev/null +++ b/service/api/examples/concurrency/main.go @@ -0,0 +1,45 @@ +package main + +import ( + "context" + "fmt" + "sync" + + scraperapi "github.com/zenrows/zenrows-go-sdk/service/api" +) + +const ( + maxConcurrentRequests = 5 // run 5 scraping requests at the same time + totalRequests = 15 // send a total of 15 scraping requests +) + +func main() { + client := scraperapi.NewClient( + scraperapi.WithAPIKey("YOUR_API_KEY"), + scraperapi.WithMaxConcurrentRequests(maxConcurrentRequests), + ) + + var wg sync.WaitGroup + for i := 0; i < totalRequests; i++ { + wg.Add(1) + go func(i int) { + defer wg.Done() + + res, err := client.Get(context.Background(), "https://httpbin.io/anything", &scraperapi.RequestParameters{}) + if err != nil { + fmt.Println(i, err) + return + } + + if err = res.Error(); err != nil { + fmt.Println(i, err) + return + } + + fmt.Printf("[#%d]: %s\n", i, res.Status()) + }(i) + } + + wg.Wait() + fmt.Println("done") +} diff --git a/service/api/examples/retries/main.go b/service/api/examples/retries/main.go new file mode 100644 index 0000000..7295d72 --- /dev/null +++ b/service/api/examples/retries/main.go @@ -0,0 +1,52 @@ +package main + +import ( + "context" + "fmt" + "sync" + "time" + + scraperapi "github.com/zenrows/zenrows-go-sdk/service/api" +) + +const ( + maxConcurrentRequests = 5 // run 5 scraping requests at the same time + totalRequests = 10 // send a total of 10 scraping requests +) + +func main() { + client := scraperapi.NewClient( + scraperapi.WithAPIKey("YOUR_API_KEY"), + scraperapi.WithMaxConcurrentRequests(maxConcurrentRequests), + scraperapi.WithMaxRetryCount(5), // retry up to five times + scraperapi.WithRetryWaitTime(20*time.Second), // waiting at least 20s between retries (just for demonstration purposes!) + scraperapi.WithRetryMaxWaitTime(25*time.Second), // and waiting a maximum of 20s between retries + ) + + var wg sync.WaitGroup + for i := 0; i < totalRequests; i++ { + wg.Add(1) + go func(i int) { + defer wg.Done() + now := time.Now() // store the time, to be able to print the elapsed duration + + // target the https://httpbin.io/unstable endpoint, as it fails half of the times, so the retry mechanism takes care of + // making sure we eventually receive a successful request + res, err := client.Get(context.Background(), "https://httpbin.io/unstable", &scraperapi.RequestParameters{}) + if err != nil { + fmt.Println(i, err) + return + } + + if err = res.Error(); err != nil { + fmt.Println(i, err) + return + } + + fmt.Printf("[#%d]: %s (in %s)\n", i, res.Status(), time.Since(now)) + }(i) + } + + wg.Wait() + fmt.Println("done") +} diff --git a/service/api/go.mod b/service/api/go.mod new file mode 100644 index 0000000..b1c4428 --- /dev/null +++ b/service/api/go.mod @@ -0,0 +1,12 @@ +module github.com/zenrows/zenrows-go-sdk/service/api + +go 1.23.1 + +require ( + github.com/fatih/structs v1.1.0 + github.com/go-resty/resty/v2 v2.15.3 + github.com/gorilla/schema v1.4.1 + github.com/hashicorp/go-version v1.7.0 +) + +require golang.org/x/net v0.30.0 // indirect diff --git a/service/api/go.sum b/service/api/go.sum new file mode 100644 index 0000000..238c32c --- /dev/null +++ b/service/api/go.sum @@ -0,0 +1,12 @@ +github.com/fatih/structs v1.1.0 h1:Q7juDM0QtcnhCpeyLGQKyg4TOIghuNXrkL32pHAUMxo= +github.com/fatih/structs v1.1.0/go.mod h1:9NiDSp5zOcgEDl+j00MP/WkGVPOlPRLejGD8Ga6PJ7M= +github.com/go-resty/resty/v2 v2.15.3 h1:bqff+hcqAflpiF591hhJzNdkRsFhlB96CYfBwSFvql8= +github.com/go-resty/resty/v2 v2.15.3/go.mod h1:0fHAoK7JoBy/Ch36N8VFeMsK7xQOHhvWaC3iOktwmIU= +github.com/gorilla/schema v1.4.1 h1:jUg5hUjCSDZpNGLuXQOgIWGdlgrIdYvgQ0wZtdK1M3E= +github.com/gorilla/schema v1.4.1/go.mod h1:Dg5SSm5PV60mhF2NFaTV1xuYYj8tV8NOPRo4FggUMnM= +github.com/hashicorp/go-version v1.7.0 h1:5tqGy27NaOTB8yJKUZELlFAS/LTKJkrmONwQKeRZfjY= +github.com/hashicorp/go-version v1.7.0/go.mod h1:fltr4n8CU8Ke44wwGCBoEymUuxUHl09ZGVZPK5anwXA= +golang.org/x/net v0.30.0 h1:AcW1SDZMkb8IpzCdQUaIq2sP4sZ4zw+55h6ynffypl4= +golang.org/x/net v0.30.0/go.mod h1:2wGyMJ5iFasEhkwi13ChkO/t1ECNC4X4eBKkVFyYFlU= +golang.org/x/time v0.6.0 h1:eTDhh4ZXt5Qf0augr54TN6suAUudPcawVZeIAPU7D4U= +golang.org/x/time v0.6.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM= diff --git a/service/api/interface.go b/service/api/interface.go new file mode 100644 index 0000000..5678e8a --- /dev/null +++ b/service/api/interface.go @@ -0,0 +1,15 @@ +package scraperapi + +import "context" + +//go:generate mockery +type IClient interface { + // Scrape sends a request to the ZenRows Scraper API to scrape the given target URL using the specified method and parameters. + Scrape(ctx context.Context, targetURL, method string, params RequestParameters) (*Response, error) + // Get sends a GET request to the ZenRows Scraper API to scrape the given target URL using the specified parameters. + Get(ctx context.Context, targetURL string, params RequestParameters) (*Response, error) + // Post sends a POST request to the ZenRows Scraper API to scrape the given target URL using the specified parameters. + Post(ctx context.Context, targetURL string, params RequestParameters) (*Response, error) + // Put sends a PUT request to the ZenRows Scraper API to scrape the given target URL using the specified parameters. + Put(ctx context.Context, targetURL string, params RequestParameters) (*Response, error) +} diff --git a/service/api/logger.go b/service/api/logger.go new file mode 100644 index 0000000..19c4be4 --- /dev/null +++ b/service/api/logger.go @@ -0,0 +1,16 @@ +package scraperapi + +// noopLogger is a logger that does nothing +type noopLogger struct{} + +func (l noopLogger) Errorf(_ string, _ ...any) { + // no-op +} + +func (l noopLogger) Warnf(_ string, _ ...any) { + // no-op +} + +func (l noopLogger) Debugf(_ string, _ ...any) { + // no-op +} diff --git a/service/api/options.go b/service/api/options.go new file mode 100644 index 0000000..5c67fe8 --- /dev/null +++ b/service/api/options.go @@ -0,0 +1,121 @@ +package scraperapi + +import ( + "net/http" + "os" + "time" +) + +const ( + defaultBaseURL = "https://api.zenrows.com/v1" + defaultMaxRetryCount = 0 + defaultRetryWaitTime = 5 * time.Second + defaultRetryMaxWaitTime = 30 * time.Second +) + +var retryableStatusCodes = []int{http.StatusUnprocessableEntity, http.StatusTooManyRequests, http.StatusInternalServerError} + +// Option configures the ZenRows Scraper API client. +type Option interface { + apply(*options) +} + +// options holds the configuration for the ZenRows Scraper API service +type options struct { + // baseURL is the base url of the ZenRows Scraper API service. Defaults to: "https://api.zenrows.com/v1" + baseURL string + // apiKey is the secret token to use to authenticate with the ZenRows Scraper API client + apiKey string + // retryOptions holds the configuration for the retry mechanism of the ZenRows Scraper API client + retryOptions retryOptions + // maxConcurrentRequests is the maximum number of concurrent requests that can be handled by the ZenRows Scraper API client at a time + maxConcurrentRequests int +} + +// retryOptions holds the configuration for the retry mechanism of the ZenRows Scraper API client. Only response status codes in +// the retryableStatusCodes list will be retried. +type retryOptions struct { + // maxRetryCount is the maximum number of retries to perform. If set to a non-zero value, the client will retry the request up to + // this number of times using a backoff strategy. Defaults to 0. + maxRetryCount int + + // retryWaitTime is the time to wait before retrying the request. Defaults to 5 seconds. + retryWaitTime time.Duration + + // retryMaxWaitTime is the maximum time to wait before retrying the request. Defaults to 30 seconds. + retryMaxWaitTime time.Duration +} + +// defaultOptions returns the default options for the ZenRows Scraper API client. +func defaultOptions() options { + return options{ + baseURL: defaultBaseURL, + apiKey: os.Getenv("ZENROWS_API_KEY"), + retryOptions: retryOptions{ + maxRetryCount: defaultMaxRetryCount, + retryWaitTime: defaultRetryWaitTime, + retryMaxWaitTime: defaultRetryMaxWaitTime, + }, + } +} + +// funcOption wraps a function that modifies options into an implementation of the Option interface. +type funcOption struct { + f func(*options) +} + +func (fdo *funcOption) apply(do *options) { + fdo.f(do) +} + +func newFuncDialOption(f func(*options)) *funcOption { + return &funcOption{ + f: f, + } +} + +// WithBaseURL returns an Option which configures the base URL of the ZenRows Scraper API client. +func WithBaseURL(baseURL string) Option { + return newFuncDialOption(func(o *options) { + o.baseURL = baseURL + }) +} + +// WithAPIKey returns an Option which configures the API key of the ZenRows Scraper API client. +func WithAPIKey(apiKey string) Option { + return newFuncDialOption(func(o *options) { + o.apiKey = apiKey + }) +} + +// WithMaxRetryCount returns an Option which configures the maximum number of retries to perform. +func WithMaxRetryCount(maxRetryCount int) Option { + return newFuncDialOption(func(o *options) { + o.retryOptions.maxRetryCount = maxRetryCount + }) +} + +// WithRetryWaitTime returns an Option which configures the time to wait before retrying the request. +func WithRetryWaitTime(retryWaitTime time.Duration) Option { + return newFuncDialOption(func(o *options) { + o.retryOptions.retryWaitTime = retryWaitTime + }) +} + +// WithRetryMaxWaitTime returns an Option which configures the maximum time to wait before retrying the request. +func WithRetryMaxWaitTime(retryMaxWaitTime time.Duration) Option { + return newFuncDialOption(func(o *options) { + o.retryOptions.retryMaxWaitTime = retryMaxWaitTime + }) +} + +// WithMaxConcurrentRequests returns an Option which configures the maximum number of concurrent requests to the ZenRows Scraper API. +// See https://docs.zenrows.com/scraper-api/features/concurrency for more information. +// +// IMPORTANT: Breaking the concurrency limit will result in a 429 Too Many Requests error. If you exceed the limit repeatedly, your +// account may be temporarily suspended, so make sure to set this value to a reasonable number according to your subscription plan. +func WithMaxConcurrentRequests(maxConcurrentRequests int) Option { + return newFuncDialOption(func(o *options) { + o.maxConcurrentRequests = maxConcurrentRequests + }) +} diff --git a/service/api/params.go b/service/api/params.go new file mode 100644 index 0000000..754d55e --- /dev/null +++ b/service/api/params.go @@ -0,0 +1,353 @@ +package scraperapi + +import ( + "fmt" + "net/http" + "net/url" + "reflect" + "strings" + + "github.com/fatih/structs" + "github.com/gorilla/schema" +) + +// decoder is a schema decoder that will be used to decode the query parameters into a RequestParameters object. +var decoder = schema.NewDecoder() + +// validHTTPMethods is a list of valid HTTP methods that can be used in a request. +var validHTTPMethods = []string{http.MethodGet, http.MethodPost, http.MethodPut} + +// ResponseType represents the type of response that the ZenRows Scraper API should return. +type ResponseType string + +const ( + ResponseTypeMarkdown ResponseType = "markdown" + ResponseTypePlainText ResponseType = "plaintext" + ResponseTypePDF ResponseType = "pdf" +) + +var AllResponseTypes = map[ResponseType]struct{}{ + ResponseTypeMarkdown: {}, + ResponseTypePlainText: {}, + ResponseTypePDF: {}, +} + +type OutputType string + +const ( + OutputTypeEmails OutputType = "emails" + OutputTypePhoneNumbers OutputType = "phone_numbers" + OutputTypeHeadings OutputType = "headings" + OutputTypeImages OutputType = "images" + OutputTypeAudios OutputType = "audios" + OutputTypeVideos OutputType = "videos" + OutputTypeLinks OutputType = "links" + OutputTypeTables OutputType = "tables" + OutputTypeMenus OutputType = "menus" + OutputTypeHashtags OutputType = "hashtags" + OutputTypeMetadata OutputType = "metadata" + OutputTypeFavicon OutputType = "favicon" + OutputTypeAll OutputType = "*" +) + +var AllOutputTypes = map[OutputType]struct{}{ + OutputTypeEmails: {}, + OutputTypePhoneNumbers: {}, + OutputTypeHeadings: {}, + OutputTypeImages: {}, + OutputTypeAudios: {}, + OutputTypeVideos: {}, + OutputTypeLinks: {}, + OutputTypeTables: {}, + OutputTypeMenus: {}, + OutputTypeHashtags: {}, + OutputTypeMetadata: {}, + OutputTypeFavicon: {}, + OutputTypeAll: {}, +} + +type ScreenshotFormat string + +const ( + ScreenshotFormatPNG ScreenshotFormat = "png" + ScreenshotFormatJPEG ScreenshotFormat = "jpeg" +) + +var AllScreenshotFormats = map[ScreenshotFormat]struct{}{ + ScreenshotFormatPNG: {}, + ScreenshotFormatJPEG: {}, +} + +type ResourceType string + +const ( + ResourceTypeEventSource ResourceType = "eventsource" + ResourceTypeFetch ResourceType = "fetch" + ResourceTypeFont ResourceType = "font" + ResourceTypeImage ResourceType = "image" + ResourceTypeManifest ResourceType = "manifest" + ResourceTypeMedia ResourceType = "media" + ResourceTypeOther ResourceType = "other" + ResourceTypeScript ResourceType = "script" + ResourceTypeStylesheet ResourceType = "stylesheet" + ResourceTypeTextTrack ResourceType = "texttrack" + ResourceTypeWebSocket ResourceType = "websocket" + ResourceTypeXHR ResourceType = "xhr" +) + +var AllResourceTypes = map[ResourceType]struct{}{ + ResourceTypeEventSource: {}, + ResourceTypeFetch: {}, + ResourceTypeFont: {}, + ResourceTypeImage: {}, + ResourceTypeManifest: {}, + ResourceTypeMedia: {}, + ResourceTypeOther: {}, + ResourceTypeScript: {}, + ResourceTypeStylesheet: {}, + ResourceTypeTextTrack: {}, + ResourceTypeWebSocket: {}, + ResourceTypeXHR: {}, +} + +// RequestParameters represents the parameters that can be passed to the ZenRows Scraper API when making a request to modify the behavior +// of the scraping engine. +// +// See https://docs.zenrows.com/scraper-api/api-reference for more information. +type RequestParameters struct { + // Proxy settings + UsePremiumProxies bool `json:"premium_proxy,omitempty" structs:"premium_proxy,omitempty" schema:"premium_proxy"` + ProxyCountry string `json:"proxy_country,omitempty" structs:"proxy_country,omitempty" schema:"proxy_country"` + + // Output modifiers + AutoParse bool `json:"autoparse,omitempty" structs:"autoparse,omitempty" schema:"autoparse"` + CSSExtractor string `json:"css_extractor,omitempty" structs:"css_extractor,omitempty" schema:"css_extractor"` + JSONResponse bool `json:"json_response,omitempty" structs:"json_response,omitempty" schema:"json_response"` + ResponseType ResponseType `json:"response_type,omitempty" structs:"response_type,omitempty" schema:"response_type"` + Outputs []OutputType `json:"outputs,omitempty" structs:"outputs,omitempty" schema:"outputs"` + + //////////////////////////////////////////////// + // Headless settings + //////////////////////////////////////////////// + + // JSRender enables JavaScript rendering for the request. If not enabled, the request will be processed by the standard scraping engine, + // which is faster but does not execute JavaScript and may not bypass some anti-bot systems. + // + // See https://docs.zenrows.com/scraper-api/features/js-rendering for more information. + JSRender bool `json:"js_render,omitempty" structs:"js_render,omitempty" schema:"js_render"` + + // JSInstructions is a serialized JSON object that contains custom JavaScript instructions that will be executed in the page before + // returning the response (only available when using JSRender). + // + // See https://docs.zenrows.com/scraper-api/features/js-rendering#using-the-javascript-instructions for more information. + JSInstructions string `json:"js_instructions,omitempty" structs:"js_instructions,omitempty" schema:"js_instructions"` + + // WaitMilliseconds will wait for the specified number of milliseconds before returning the response (only available when + // using JSRender). The maximum wait time is 30 seconds (30000 ms). + WaitMilliseconds int `json:"wait,omitempty" structs:"wait,omitempty" schema:"wait"` + + // WaitForSelector will wait for the specified element to appear in the page before returning the response (only available when + // using JSRender). + // + // See https://docs.zenrows.com/scraper-api/features/js-rendering#wait-for-selector for more information. + // + // IMPORTANT: Make sure that the element you are waiting for is present in the page. If the element does not appear, the request will + // fail by a timeout error after a few seconds. + WaitForSelector string `json:"wait_for,omitempty" structs:"wait_for,omitempty" schema:"wait_for"` + + // Screenshot will return a screenshot of the page (only available when using JSRender) + Screenshot bool `json:"screenshot,omitempty" structs:"screenshot,omitempty" schema:"screenshot"` + + // ScreenshotFullPage will take a screenshot of the full page (only available when using JSRender and Screenshot is set to true) + ScreenshotFullPage bool `json:"screenshot_fullpage,omitempty" structs:"screenshot_fullpage,omitempty" schema:"screenshot_fullpage"` + + // ScreenshotSelector will take a screenshot of the specified element (only available when using JSRender and Screenshot is set to true) + ScreenshotSelector string `json:"screenshot_selector,omitempty" structs:"screenshot_selector,omitempty" schema:"screenshot_selector"` + + // ScreenshotFormat will set the format of the screenshot (only available when using JSRender and Screenshot is set to true). + // The available formats are ScreenshotFormatPNG and ScreenshotFormatJPEG. The default format is ScreenshotFormatPNG. + ScreenshotFormat ScreenshotFormat `json:"screenshot_format,omitempty" structs:"screenshot_format,omitempty" schema:"screenshot_format"` + + // ScreenshotQuality will set the quality of the screenshot (only available when using JSRender and Screenshot is set to true, and + // the format is ScreenshotFormatJPEG). The quality must be between 1 and 100. The default quality is 100. + ScreenshotQuality int `json:"screenshot_quality,omitempty" structs:"screenshot_quality,omitempty" schema:"screenshot_quality"` + + //////////////////////////////////////////////// + // Advanced settings - USE WITH CAUTION + //////////////////////////////////////////////// + + // ReturnOriginalStatus will return the original status code of the response wthen the request is not successful. When a request is not + // successful, the ZenRows Scraper API will always return a 422 status code. If you enable this feature, the original status code will + // be returned instead. + ReturnOriginalStatus bool `json:"original_status,omitempty" structs:"original_status,omitempty" schema:"original_status"` + + // SessionID is an integer between 0 and 99999 that can be used to group requests together. If you provide a SessionID, all requests + // with the same SessionID will use the same IP address for up to 10 minutes. This feature is useful for web scraping sites that track + // sessions or limit IP rotation. It helps simulate a persistent session and avoids triggering anti-bot systems that flag + // frequent IP changes. + // + // See https://docs.zenrows.com/scraper-api/features/other#session-id for more information. + // + // IMPORTANT: Use this feature only if you know what you are doing. If you provide a SessionID, the IP rotation feature will be disabled + // for all requests with the same SessionID. This may affect the scraping quality and increase the chances of being blocked. + SessionID int `json:"session_id,omitempty" structs:"session_id,omitempty" schema:"session_id"` + + // AllowedStatusCodes will return the response body of a request even if the status code is not a successful one (2xx), but + // is one of the specified status codes in this list. + // + // See https://docs.zenrows.com/scraper-api/features/other#return-content-on-error for more information. + // + // IMPORTANT: ZenRows Scraper API only charges for successful requests. If you use this feature, you will also be charged for + // unsuccessful requests matching the specified status codes. + AllowedStatusCodes []int `json:"allowed_status_codes,omitempty" structs:"allowed_status_codes,omitempty" schema:"allowed_status_codes"` + + // BlockResources will block the specified resources from loading (only available when using JSRender) + // + // See https://docs.zenrows.com/scraper-api/features/js-rendering#block-resources for more information. + // + // IMPORTANT: ZenRows Scraper API already blocks some resources by default to improve the scraping quality. Use this feature only if you + // know what you are doing. + BlockResources []ResourceType `json:"block_resources,omitempty" structs:"block_resources,omitempty" schema:"block_resources"` + + // CustomHeaders is a http.Header object that will be used to set custom headers in the request. + // + // See https://docs.zenrows.com/scraper-api/features/headers for more information. + // + // IMPORTANT: ZenRows Scraper API already rotates and selects the best combination of headers (like User-Agent, Accept-Language, etc.) + // automatically for each request. If you provide custom headers, the scraping quality may be affected. Use this feature only if you + // know what you are doing. + CustomHeaders http.Header `json:"custom_headers,omitempty" structs:"-" schema:"-"` +} + +func (p *RequestParameters) Validate() error { //nolint:gocyclo + if p.ScreenshotQuality < 0 || p.ScreenshotQuality > 100 { + return InvalidParameterError{Msg: "screenshot quality must be between 1 and 100"} + } + + if p.SessionID < 0 || p.SessionID > 99_999 { + return InvalidParameterError{Msg: "session id must be between 0 and 99999"} + } + + if p.WaitMilliseconds < 0 || p.WaitMilliseconds > 30_000 { + return InvalidParameterError{Msg: "wait must be between 0 and 30000 (ms)"} + } + + if p.ResponseType != "" { + if _, ok := AllResponseTypes[p.ResponseType]; !ok { + return InvalidParameterError{Msg: "invalid response type"} + } + } + + if p.ScreenshotFormat != "" { + if _, ok := AllScreenshotFormats[p.ScreenshotFormat]; !ok { + return InvalidParameterError{Msg: "invalid screenshot format"} + } + } + + for _, output := range p.Outputs { + if _, ok := AllOutputTypes[output]; !ok { + return InvalidParameterError{Msg: "invalid output type"} + } + } + + for _, resource := range p.BlockResources { + if _, ok := AllResourceTypes[resource]; !ok { + return InvalidParameterError{Msg: "invalid resource type"} + } + } + + if !p.JSRender { + if p.Screenshot { + return InvalidParameterError{Msg: "screenshot is only available when using javascript rendering"} + } + if p.JSInstructions != "" { + return InvalidParameterError{Msg: "js_instructions is only available when using javascript rendering"} + } + if p.WaitMilliseconds > 0 { + return InvalidParameterError{Msg: "wait is only available when using javascript rendering"} + } + if p.WaitForSelector != "" { + return InvalidParameterError{Msg: "wait_for is only available when using javascript rendering"} + } + if len(p.BlockResources) > 0 { + return InvalidParameterError{Msg: "block_resources is only available when using javascript rendering"} + } + } + + if !p.Screenshot { + if p.ScreenshotFullPage { + return InvalidParameterError{Msg: "screenshot_fullpage is only available when screenshot parameter is set to true"} + } + if p.ScreenshotSelector != "" { + return InvalidParameterError{Msg: "screenshot_selector is only available when screenshot parameter is set to true"} + } + if p.ScreenshotFormat != "" { + return InvalidParameterError{Msg: "screenshot_format is only available when screenshot parameter is set to true"} + } + if p.ScreenshotQuality > 0 { + return InvalidParameterError{Msg: "screenshot_quality is only available when screenshot parameter is set to true"} + } + } + + if p.ScreenshotQuality > 0 && p.ScreenshotFormat != ScreenshotFormatJPEG { + return InvalidParameterError{Msg: "screenshot_quality is only available when screenshot_format is set to jpeg"} + } + + if p.ProxyCountry != "" && !p.UsePremiumProxies { + return InvalidParameterError{Msg: "proxy country is only available when using premium proxies"} + } + + return nil +} + +// ToURLValues converts the RequestParameters to a url.Values object +func (p *RequestParameters) ToURLValues() url.Values { + values := make(url.Values) + for k, v := range structs.Map(p) { + rv := reflect.ValueOf(v) + if rv.Kind() == reflect.Slice { + var strValues []string + for i := 0; i < rv.Len(); i++ { + strValues = append(strValues, fmt.Sprintf("%v", rv.Index(i))) + } + values.Set(k, strings.Join(strValues, ",")) + } else { + values.Set(k, fmt.Sprintf("%v", v)) + } + } + + // if custom headers are set, we need to set the custom_headers flag to true + if len(p.CustomHeaders) > 0 { + values.Set("custom_headers", "true") + } + + return values +} + +// ParseQueryRequestParameters parses the provided url.Values object and returns a RequestParameters object, or an error if the parsing +// fails. +func ParseQueryRequestParameters(query url.Values) (*RequestParameters, error) { + var requestParameters RequestParameters + if err := decoder.Decode(&requestParameters, query); err != nil { + return nil, err + } + + return &requestParameters, nil +} + +func init() { + decoder.RegisterConverter([]ResourceType{}, func(input string) reflect.Value { + var resourceTypes []ResourceType + for _, resourceType := range strings.Split(input, ",") { + resourceTypes = append(resourceTypes, ResourceType(resourceType)) + } + return reflect.ValueOf(resourceTypes) + }) + decoder.RegisterConverter([]OutputType{}, func(input string) reflect.Value { + var outputTypes []OutputType + for _, outputType := range strings.Split(input, ",") { + outputTypes = append(outputTypes, OutputType(outputType)) + } + return reflect.ValueOf(outputTypes) + }) +} diff --git a/service/api/pkg/problem/problem.go b/service/api/pkg/problem/problem.go new file mode 100644 index 0000000..89e6d94 --- /dev/null +++ b/service/api/pkg/problem/problem.go @@ -0,0 +1,42 @@ +package problem + +import "fmt" + +const ( + // ContentTypeJSON https://tools.ietf.org/html/rfc7807#section-6.1 + ContentTypeJSON = "application/problem+json" +) + +// Problem represents an RFC7807 error response. +type Problem struct { //nolint:errname + // Code is an application-specific error code, expressed as a string value. + Code string `json:"code"` + + // Detail is the human-readable explanation specific to this occurrence of the problem. + Detail string `json:"detail"` + + // Instance an absolute URI that identifies the specific occurrence of the problem. + Instance string `json:"instance"` + + // Status is the HTTP status code generated by the origin server for this + // occurrence of the problem. + Status int `json:"status"` + + // Title is the title that appropriately describes it (think short) + // Written in english and readable for engineers (usually not suited for + // non-technical stakeholders and not localized); example: "Service Unavailable" + Title string `json:"title"` + + // Type is the type URI (typically, with the "http" or "https" scheme) that identifies the problem type. + // When dereferenced, it SHOULD provide human-readable documentation for the problem type + Type string `json:"type"` +} + +func (p *Problem) Error() string { + msg := fmt.Sprintf("%s [HTTP %d]", p.Title, p.Status) + if p.Detail != "" { + msg += fmt.Sprintf(": %s", p.Detail) + } + + return msg +} diff --git a/service/api/response.go b/service/api/response.go new file mode 100644 index 0000000..99c86a0 --- /dev/null +++ b/service/api/response.go @@ -0,0 +1,127 @@ +package scraperapi + +import ( + "encoding/json" + "net/http" + "strings" + "time" + + "github.com/go-resty/resty/v2" + "github.com/zenrows/zenrows-go-sdk/service/api/pkg/problem" +) + +// Response struct holds response values of executed requests. +type Response struct { + // RawResponse is the original `*http.Response` object. + RawResponse *http.Response + + res *resty.Response +} + +// Body method returns the HTTP response as `[]byte` slice for the executed request. +func (r *Response) Body() []byte { + return r.res.Body() +} + +// Status method returns the HTTP status string for the executed request. +// +// Example: 200 OK +func (r *Response) Status() string { + return r.res.Status() +} + +// StatusCode method returns the HTTP status code for the executed request. +// +// Example: 200 +func (r *Response) StatusCode() int { + return r.res.StatusCode() +} + +// Header method returns the response headers +func (r *Response) Header() http.Header { + return r.res.Header() +} + +// String method returns the body of the HTTP response as a `string`. +// It returns an empty string if it is nil or the body is zero length. +func (r *Response) String() string { + return r.res.String() +} + +// Problem method returns the problem description of the HTTP response if any. +func (r *Response) Problem() *problem.Problem { + if r.IsError() && r.Header().Get("Content-Type") == problem.ContentTypeJSON { + var prob *problem.Problem + if err := json.Unmarshal(r.Body(), &prob); err == nil { + return prob + } + } + + return nil +} + +// Error method returns the error message of the HTTP response if any. +func (r *Response) Error() error { + if prob := r.Problem(); prob != nil { + return prob + } + + return nil +} + +// Time method returns the duration of HTTP response time from the request we sent +// and received a request. +// +// See [Response.ReceivedAt] to know when the client received a response. +func (r *Response) Time() time.Duration { + return r.res.Time() +} + +// ReceivedAt method returns the time we received a response from the server for the request. +func (r *Response) ReceivedAt() time.Time { + return r.res.ReceivedAt() +} + +// Size method returns the HTTP response size in bytes. +func (r *Response) Size() int64 { + return r.res.Size() +} + +// IsSuccess method returns true if HTTP status `code >= 200 and <= 299` otherwise false. +func (r *Response) IsSuccess() bool { + return r.res.IsSuccess() +} + +// IsError method returns true if HTTP status `code >= 400` otherwise false. +func (r *Response) IsError() bool { + return r.res.IsError() +} + +// TargetHeaders method to returns all the response headers that the target page has set, if any. ZenRows Scraper API encodes these headers +// with a "Z-" prefix, so this method filters out all headers that do not have this prefix. +// +// To get all the headers, see the [Response.Headers] field. +func (r *Response) TargetHeaders() http.Header { + targetPageHeaders := make(http.Header) + for k, v := range r.Header() { + if strings.HasPrefix(k, "Z-") { + targetPageHeaders[k] = v + } + } + return targetPageHeaders +} + +// TargetCookies method to returns all the response cookies that the target page has set, if any. +func (r *Response) TargetCookies() []*http.Cookie { + cookieCount := len(r.Header()["Z-Set-Cookie"]) + if cookieCount == 0 { + return []*http.Cookie{} + } + cookies := make([]*http.Cookie, 0, cookieCount) + for _, line := range r.Header()["Z-Set-Cookie"] { + if cookie, err := http.ParseSetCookie(line); err == nil { + cookies = append(cookies, cookie) + } + } + return cookies +} diff --git a/service/api/version.go b/service/api/version.go new file mode 100644 index 0000000..cde4365 --- /dev/null +++ b/service/api/version.go @@ -0,0 +1,11 @@ +package scraperapi + +import ( + "github.com/zenrows/zenrows-go-sdk/service/api/version" +) + +//goland:noinspection GoUnusedGlobalVariable +var Version = version.Version + +//goland:noinspection GoUnusedGlobalVariable +var VersionPrerelease = version.Prerelease diff --git a/service/api/version/version.go b/service/api/version/version.go new file mode 100644 index 0000000..76bf235 --- /dev/null +++ b/service/api/version/version.go @@ -0,0 +1,39 @@ +// Package version provides a location to set the release versions for all +// packages to consume, without creating import cycles. +// +// This package should not import any other packages. +package version + +import ( + "fmt" + + "github.com/hashicorp/go-version" +) + +// Version is the main version number that is being run at the moment. +const Version = "1.0.0" + +// Prerelease is a prerelease marker for the version. If this is "" (empty string) +// then it means that it is a final release. Otherwise, this is a prerelease +// such as "dev" (in development), "beta", "rc1", etc. +const Prerelease = "" + +// SemVer is an instance of version.Version. This has the secondary +// benefit of verifying during tests and init time that our version is a +// proper semantic version, which should always be the case. +// +//goland:noinspection GoUnusedGlobalVariable +var SemVer *version.Version + +func init() { + SemVer = version.Must(version.NewVersion(Version)) +} + +// String returns the complete version string, including prerelease +func String() string { + //goland:noinspection GoBoolExpressions + if Prerelease != "" { + return fmt.Sprintf("%s-%s", Version, Prerelease) + } + return Version +}