Skip to content

Commit

Permalink
Integrated the 'retryable' gem to support retrying failed connection,…
Browse files Browse the repository at this point in the history
… including a new '--tries' option accepting a number of times to retry if a connection fails for a fatal error (not just an HTTP 4XX/5XX error; the default is 20 retries'). Issue cocoflan#1
  • Loading branch information
morgant committed Jun 3, 2024
1 parent be2d407 commit 6be4b5e
Show file tree
Hide file tree
Showing 4 changed files with 16 additions and 4 deletions.
2 changes: 2 additions & 0 deletions Gemfile
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
source "https://rubygems.org"

gemspec

gem "retryable", "~> 3.0"
4 changes: 4 additions & 0 deletions bin/wayback_machine_downloader
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,10 @@ option_parser = OptionParser.new do |opts|
options[:wait_randomize] = true
end

opts.on("--tries NUMBER", Integer, "Number of times to retry for non-fatal connection errors (Default is 20)") do |t|
options[:tries] = t
end

opts.on("-x", "--exclude EXCLUDE_FILTER", String, "Skip downloading of urls that match this filter", "(use // notation for the filter to be treated as a regex)") do |t|
options[:exclude_filter] = t
end
Expand Down
10 changes: 7 additions & 3 deletions lib/wayback_machine_downloader.rb
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
require 'fileutils'
require 'cgi'
require 'json'
require 'retryable'
require_relative 'wayback_machine_downloader/tidy_bytes'
require_relative 'wayback_machine_downloader/to_regex'
require_relative 'wayback_machine_downloader/archive_api'
Expand Down Expand Up @@ -34,6 +35,7 @@ def initialize params
@threads_count = params[:threads_count].to_i
@wait_seconds = params[:wait_seconds].to_i
@wait_randomized = params[:wait_randomized]
@tries = params[:tries] ? params[:tries].to_i : 20
end

def backup_name
Expand Down Expand Up @@ -271,10 +273,12 @@ def download_file file_remote_info
begin
structure_dir_path dir_path
open(file_path, "wb") do |file|
file_url_escaped = CGI.escape file_url
begin
file_url_escaped = CGI.escape file_url
URI.open("http://web.archive.org/web/#{file_timestamp}id_/#{file_url_escaped}", "Accept-Encoding" => "plain") do |uri|
file.write(uri.read)
Retryable.retryable(tries: @tries, on: Net::ReadTimeout, sleep_method: self.method(:wait)) do
URI.open("http://web.archive.org/web/#{file_timestamp}id_/#{file_url_escaped}", "Accept-Encoding" => "plain") do |uri|
file.write(uri.read)
end
end
rescue OpenURI::HTTPError => e
puts "#{file_url} # #{e}"
Expand Down
4 changes: 3 additions & 1 deletion lib/wayback_machine_downloader/archive_api.rb
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@ def get_raw_list_from_api url, page_index
request_url += CGI.escape url
request_url += parameters_for_api page_index

URI.open(request_url).read
Retryable.retryable(tries: @tries, on: Net::ReadTimeout, sleep_method: self.method(:wait)) do
URI.open(request_url).read
end
end

def parameters_for_api page_index
Expand Down

0 comments on commit 6be4b5e

Please sign in to comment.