From 5c99be987224872a79c57ae0114fe66fbd962711 Mon Sep 17 00:00:00 2001 From: Kingdon Barrett Date: Mon, 14 Aug 2023 18:21:57 -0400 Subject: [PATCH 01/12] add some specificity, refining ChatGPT has identified download_and_analyze_links as a likely culprit We'll now delve into the files that make it up and see what we can fix Signed-off-by: Kingdon Barrett --- lib/link_checker.rb | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/lib/link_checker.rb b/lib/link_checker.rb index f515e42..961f615 100644 --- a/lib/link_checker.rb +++ b/lib/link_checker.rb @@ -27,17 +27,19 @@ def fetch_sitemap fetcher = SitemapFetcher.new(@domain, @masquerade_domain) @sitemap_urls = fetcher.fetch_sitemap_urls puts "Fetched sitemap with #{@sitemap_urls.size} URLs." - rescue => e + rescue StandardError => e puts "Error fetching sitemap: #{e.message}" exit end def download_and_analyze_links if File.exist?(LINKS_DATA_FILE) + # Loading from cache: Parse JSON data into Link objects links_data_hashes = JSON.parse(File.read(LINKS_DATA_FILE), symbolize_names: true) @links_data = links_data_hashes.map { |hash| Link.from_h(hash) } puts "Loaded links data from cache." else + # Fetching fresh data: Use LinkAnalyzer to get Link objects and cache for future use analyzer = LinkAnalyzer.new(@domain, @masquerade_domain) @links_data = analyzer.analyze_links(@sitemap_urls) @@ -46,7 +48,7 @@ def download_and_analyze_links puts "Links data saved to cache." end - rescue => e + rescue StandardError => e puts "Error downloading and analyzing links: #{e.message}" exit end @@ -54,7 +56,7 @@ def download_and_analyze_links def validate_links validator = LinkValidator.new(@links_data, @domain, @masquerade_domain) @links_data = validator.validate_links - rescue => e + rescue StandardError => e # PRY_MUTEX.synchronize{binding.pry} puts "Error validating links: #{e.message}" exit @@ -64,7 +66,7 @@ def generate_report generator = ReportGenerator.new(@links_data, @report_file) generator.generate puts "Report generated at #{@report_file}." - rescue => e + rescue StandardError => e puts "Error generating report: #{e.message}" end end From b5c42abc133c1c24f2e4c1337af02c7514738c27 Mon Sep 17 00:00:00 2001 From: Kingdon Barrett Date: Mon, 14 Aug 2023 19:56:50 -0400 Subject: [PATCH 02/12] make use of helpers Signed-off-by: Kingdon Barrett --- lib/link.rb | 32 +++++++++++--------------- lib/link/link_analyzer.rb | 27 +++++++++++++--------- lib/url_helper.rb | 21 +++++++++++++++++ lib/validator/remote_link_validator.rb | 6 ++--- 4 files changed, 54 insertions(+), 32 deletions(-) create mode 100644 lib/url_helper.rb diff --git a/lib/link.rb b/lib/link.rb index 4b3cb34..eb18584 100644 --- a/lib/link.rb +++ b/lib/link.rb @@ -1,3 +1,6 @@ +require './lib/cache_helper' +require './lib/url_helper' + class Link attr_accessor :source_file, :target, :type, :anchor, :response_status, :link_string, :link_text, :line_no, :reference_intact @@ -46,7 +49,7 @@ def self.from_h(hash) end def download_and_store - cache_path = get_cache_path + cache_path = CacheHelper.get_cache_path(@source_file) unless File.exist?(cache_path) html_content = Net::HTTP.get(URI(@source_file)) FileUtils.mkdir_p(File.dirname(cache_path)) @@ -66,6 +69,14 @@ def reference_intact? @reference_intact end + def set_error(error_message) + @error = error_message + end + + def has_error? + !@error.nil? + end + private def determine_type @@ -77,27 +88,12 @@ def determine_type end def extract_anchor - @anchor = URI(@link_string).fragment - rescue URI::InvalidURIError - @anchor = URI(URI::Parser.new.escape(@link_string)).fragment + @anchor = URLHelper.extract_fragment(@link_string) end def make_absolute return unless @link_string - @target = URI.join(@source_file, @link_string).to_s - rescue URI::InvalidURIError - @target = URI.join(@source_file, URI::Parser.new.escape(@link_string)).to_s - nil - end - - def get_cache_path - uri = URI(@source_file) - cache_path = "cache" + uri.path - # If the path doesn't have a common file extension, treat it as a directory. - unless cache_path.match(/\.(html|xml|json|txt|js|css|jpg|jpeg|png|gif)$/i) - cache_path += "/index.html" - end - cache_path + @target = URLHelper.make_absolute(@source_file, @link_string) end end diff --git a/lib/link/link_analyzer.rb b/lib/link/link_analyzer.rb index 30074e3..9d0d0ff 100644 --- a/lib/link/link_analyzer.rb +++ b/lib/link/link_analyzer.rb @@ -9,16 +9,17 @@ def initialize(domain, masquerade_domain) end def analyze_links(sitemap_urls) - links_data = [] + links_data = {} threads = [] sitemap_urls.each_slice(SLICE_SIZE) do |slice| threads << Thread.new do slice.each do |url| + link = ensure_link(links_data, url, nil) begin url = masquerade_url(url) if @masquerade_domain puts "Visiting: #{url}" - doc = Link.new(url, nil, @domain).download_and_store + doc = link.download_and_store # Extracting all the links from the page doc.css('a').each do |link_element| @@ -26,16 +27,11 @@ def analyze_links(sitemap_urls) # Skip links without href or with href set to '#' next if link_href.nil? || link_href.strip == '#' - begin - link = Link.new(url, link_element, @domain) - rescue URI::InvalidURIError => e - PRY_MUTEX.synchronize{binding.pry} - end - LINKS_MUTEX.synchronize do - links_data << link - end + target_url = URI.join(url, link_href).to_s + link = ensure_link(links_data, target_url, link_element) end rescue StandardError => e + link.response_status = "Error: #{e.message}" puts "Error downloading or analyzing URL #{url}: #{e.message}" end end @@ -43,7 +39,7 @@ def analyze_links(sitemap_urls) end threads.each(&:join) - links_data + links_data.values end private @@ -57,4 +53,13 @@ def masquerade_url(url) url end end + + def ensure_link(links_data, url, link_element) + LINKS_MUTEX.synchronize do + unless links_data[url] + links_data[url] = Link.new(url, link_element, @domain) + end + links_data[url] + end + end end diff --git a/lib/url_helper.rb b/lib/url_helper.rb new file mode 100644 index 0000000..b0fd03e --- /dev/null +++ b/lib/url_helper.rb @@ -0,0 +1,21 @@ +module URLHelper + def self.make_absolute(base_url, relative_url) + return relative_url if relative_url.nil? + + begin + URI.join(base_url, relative_url).to_s + rescue URI::InvalidURIError + URI.join(base_url, URI::Parser.new.escape(relative_url)).to_s + end + end + + def self.extract_fragment(url) + return nil unless url + + begin + URI(url).fragment + rescue URI::InvalidURIError + URI(URI::Parser.new.escape(url)).fragment + end + end +end diff --git a/lib/validator/remote_link_validator.rb b/lib/validator/remote_link_validator.rb index 4daa25c..4a77ac3 100644 --- a/lib/validator/remote_link_validator.rb +++ b/lib/validator/remote_link_validator.rb @@ -15,13 +15,13 @@ def validate retries += 1 retry if retries < MAX_RETRIES puts "Error after #{MAX_RETRIES} retries for link #{link.target}: #{e.message}" - link.response_status = "Timeout" + link.set_error "Timeout" rescue SocketError => e puts "Network error for link #{link.target}: #{e.message}" - link.response_status = "Network Error" + link.set_error "Network Error" rescue StandardError => e puts "Unexpected error for link #{link.target}: #{e.message}" - link.response_status = "Error" + link.set_error "Error (#{e.message})" end end end From 94ec62996d119feb50327e040e1c6e1eab0585d1 Mon Sep 17 00:00:00 2001 From: Kingdon Barrett Date: Mon, 14 Aug 2023 20:08:59 -0400 Subject: [PATCH 03/12] try to implement chatgpt's refinements Signed-off-by: Kingdon Barrett --- lib/validator/link_validator.rb | 54 +++++++++++++++++++-------------- 1 file changed, 31 insertions(+), 23 deletions(-) diff --git a/lib/validator/link_validator.rb b/lib/validator/link_validator.rb index 3fec91e..f57c40c 100644 --- a/lib/validator/link_validator.rb +++ b/lib/validator/link_validator.rb @@ -5,41 +5,49 @@ class LinkValidator MAX_THREADS = 4 def initialize(links_data, domain, masquerade_domain, process_remote_links = false) - @links_data = links_data.map do |link| - link.dup.tap do |ld| - ld.target = ld.target.gsub(domain, masquerade_domain) if ld.type != 'remote' - end - end + @links_data = links_data + adjust_links_target(domain, masquerade_domain) + @parsed_docs_cache = {} + @parsed_docs_cache_mutex = Mutex.new @domain = domain @masquerade_domain = masquerade_domain @process_remote_links = process_remote_links end def validate_links - # Separate remote links for parallel processing - remote_links = @links_data.select { |link| link.type == 'remote' } - local_links = @links_data.reject { |link| link.type == 'remote' } + handle_local_links + handle_remote_links if @process_remote_links + @links_data + end - # Handle local links - local_links.each do |link| - next if link.target =~ /^mailto:/ - LocalLinkValidator.new(link, @parsed_docs_cache).validate + private + + def adjust_links_target(domain, masquerade_domain) + @links_data.each do |link| + link.target.gsub!(domain, masquerade_domain) if link.type != 'remote' end + end - if @process_remote_links - # Parallel processing for remote links - thread_pool = [] - remote_links.each_slice(remote_links.size / MAX_THREADS + 1) do |link_slice| - thread_pool << Thread.new do - link_slice.each do |link| - RemoteLinkValidator.new(link).validate - end + def handle_local_links + @links_data.each do |link| + next if link.type == 'remote' || link.target =~ /^mailto:/ + + validator = LocalLinkValidator.new(link, @parsed_docs_cache, @parsed_docs_cache_mutex) + validator.validate + end + end + + def handle_remote_links + thread_pool = [] + remote_links = @links_data.select { |link| link.type == 'remote' } + remote_links.each_slice(remote_links.size / MAX_THREADS + 1) do |link_slice| + thread_pool << Thread.new do + link_slice.each do |link| + RemoteLinkValidator.new(link).validate end end - thread_pool.each(&:join) end - - @links_data + thread_pool.each(&:join) end end From b3aec77187d2e91a8c20a560fa6421391b17493b Mon Sep 17 00:00:00 2001 From: Kingdon Barrett Date: Mon, 14 Aug 2023 20:43:25 -0400 Subject: [PATCH 04/12] fix some bugs according to gpt Signed-off-by: Kingdon Barrett --- lib/cache_helper.rb | 18 ++++++++++++++---- lib/link.rb | 19 ++++++++++++++----- lib/validator/base_link_validator.rb | 5 +++-- lib/validator/local_link_validator.rb | 14 +++++++++----- 4 files changed, 40 insertions(+), 16 deletions(-) diff --git a/lib/cache_helper.rb b/lib/cache_helper.rb index 442bb57..7c8e20c 100644 --- a/lib/cache_helper.rb +++ b/lib/cache_helper.rb @@ -1,13 +1,23 @@ module CacheHelper def self.get_cache_path(url) uri = URI(url) - cache_path = "cache" + uri.path - + cache_path = File.join("cache", uri.path) # If the path doesn't have a common file extension, treat it as a directory. unless cache_path.match(/\.(html|xml|json|txt|js|css|jpg|jpeg|png|gif)$/i) - cache_path += "/index.html" + cache_path = File.join(cache_path, "index.html") end - cache_path end + + def self.write_to_cache(url, content, status) + cache_path = get_cache_path(url) + data = { content: content, status: status } + File.write(cache_path, JSON.dump(data)) + end + + def self.read_from_cache(url) + cache_path = get_cache_path(url) + data = JSON.parse(File.read(cache_path)) + [data["content"], data["status"]] + end end diff --git a/lib/link.rb b/lib/link.rb index eb18584..3ee8bf5 100644 --- a/lib/link.rb +++ b/lib/link.rb @@ -15,6 +15,11 @@ def initialize(source_url, link_element, domain) @line_no = link_element.line determine_type extract_anchor + else + # If no link_element is provided, assume the source is the target and type is local. + @link_string = source_url + @target = source_url + @type = 'local' end make_absolute @@ -50,12 +55,16 @@ def self.from_h(hash) def download_and_store cache_path = CacheHelper.get_cache_path(@source_file) - unless File.exist?(cache_path) - html_content = Net::HTTP.get(URI(@source_file)) - FileUtils.mkdir_p(File.dirname(cache_path)) - File.write(cache_path, html_content) + if File.exist?(cache_path) + html_content, status = CacheHelper.read_from_cache(@source_file) + @response_status = status if status && status.to_i >= 400 else - html_content = File.read(cache_path) + response = Net::HTTP.get_response(URI(@source_file)) + html_content = response.body + # Ensure the directory exists before writing the cache + FileUtils.mkdir_p(File.dirname(cache_path)) + CacheHelper.write_to_cache(@source_file, html_content, response.code) + @response_status = response.code if response.code.to_i >= 400 end Nokogiri::HTML(html_content) diff --git a/lib/validator/base_link_validator.rb b/lib/validator/base_link_validator.rb index 7043d26..a8caf39 100644 --- a/lib/validator/base_link_validator.rb +++ b/lib/validator/base_link_validator.rb @@ -1,9 +1,10 @@ class BaseLinkValidator - attr_reader :link, :parsed_docs_cache + attr_reader :link, :parsed_docs_cache, :links_mutex - def initialize(link, parsed_docs_cache = {}) + def initialize(link, parsed_docs_cache = {}, links_mutex = Mutex.new) @link = link @parsed_docs_cache = parsed_docs_cache + @links_mutex = links_mutex end def valid_anchor? diff --git a/lib/validator/local_link_validator.rb b/lib/validator/local_link_validator.rb index 1ad17fa..804887d 100644 --- a/lib/validator/local_link_validator.rb +++ b/lib/validator/local_link_validator.rb @@ -3,18 +3,22 @@ class LocalLinkValidator < BaseLinkValidator def validate + return if @link.response_status && @link.response_status.to_i >= 400 + normalized_url = URI(@link.target).normalize.to_s cache_path = CacheHelper.get_cache_path(normalized_url) return @link.response_status = "Not Cached" unless File.exist?(cache_path) - unless @parsed_docs_cache[normalized_url] - html_content = File.read(cache_path) - @parsed_docs_cache[normalized_url] = Nokogiri::HTML(html_content) + doc = nil + @links_mutex.synchronize do + unless @parsed_docs_cache[normalized_url] + html_content = File.read(cache_path) + @parsed_docs_cache[normalized_url] = Nokogiri::HTML(html_content) + end + doc = @parsed_docs_cache[normalized_url] end - doc = @parsed_docs_cache[normalized_url] - if valid_anchor? escaped = escaped_anchor @link.check_reference_intact!(escaped_anchor, doc) From 0718c52df2186d61574643994dd9ca2b8af8345b Mon Sep 17 00:00:00 2001 From: Kingdon Barrett Date: Mon, 14 Aug 2023 21:00:26 -0400 Subject: [PATCH 05/12] fix encoding issues to parse all the links This change resolves all outstanding crashes and reduces the final output to something less than we were emitting before. (Did the links that dropped out of the report incorrectly represent an error?) Signed-off-by: Kingdon Barrett --- lib/link.rb | 1 + lib/link/link_analyzer.rb | 15 +++++++++++++-- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/lib/link.rb b/lib/link.rb index 3ee8bf5..9e51ade 100644 --- a/lib/link.rb +++ b/lib/link.rb @@ -61,6 +61,7 @@ def download_and_store else response = Net::HTTP.get_response(URI(@source_file)) html_content = response.body + html_content.force_encoding('UTF-8') # Ensure the directory exists before writing the cache FileUtils.mkdir_p(File.dirname(cache_path)) CacheHelper.write_to_cache(@source_file, html_content, response.code) diff --git a/lib/link/link_analyzer.rb b/lib/link/link_analyzer.rb index 9d0d0ff..2712426 100644 --- a/lib/link/link_analyzer.rb +++ b/lib/link/link_analyzer.rb @@ -18,7 +18,11 @@ def analyze_links(sitemap_urls) link = ensure_link(links_data, url, nil) begin url = masquerade_url(url) if @masquerade_domain - puts "Visiting: #{url}" + base_url, fragment = url.split('#', 2) + fragment = URI::Parser.new.escape(fragment) if fragment + full_url = fragment ? "#{base_url}##{fragment}" : base_url + + puts "Visiting: #{full_url}" doc = link.download_and_store # Extracting all the links from the page @@ -27,7 +31,14 @@ def analyze_links(sitemap_urls) # Skip links without href or with href set to '#' next if link_href.nil? || link_href.strip == '#' - target_url = URI.join(url, link_href).to_s + # Splitting the base URL and fragment for proper handling + base_url, fragment = link_href.split('#', 2) + fragment = URI::Parser.new.escape(fragment) if fragment + + # Combine the base URL with the original URL and append the fragment if present + joined_url = URI.join(url, base_url).to_s + target_url = fragment ? "#{joined_url}##{fragment}" : joined_url + link = ensure_link(links_data, target_url, link_element) end rescue StandardError => e From 3fe4b4741238db02f572fa0256c36386b5b3443f Mon Sep 17 00:00:00 2001 From: Kingdon Barrett Date: Mon, 14 Aug 2023 22:13:06 -0400 Subject: [PATCH 06/12] use link_text setter Signed-off-by: Kingdon Barrett --- lib/link.rb | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/lib/link.rb b/lib/link.rb index 9e51ade..9008c4e 100644 --- a/lib/link.rb +++ b/lib/link.rb @@ -11,7 +11,7 @@ def initialize(source_url, link_element, domain) if link_element @link_string = link_element['href'] - @link_text = link_element.text.strip + link_text = link_element.text @line_no = link_element.line determine_type extract_anchor @@ -25,6 +25,10 @@ def initialize(source_url, link_element, domain) make_absolute end + def link_text=(value) + @link_text = value.strip.gsub(/\s+/, ' ') + end + def to_h { source_file: @source_file, From 3205ff9f5e24d8b606bdaec9a99a2fdf6b8899ab Mon Sep 17 00:00:00 2001 From: Kingdon Barrett Date: Mon, 14 Aug 2023 22:28:08 -0400 Subject: [PATCH 07/12] add a normalize target in the makefile Signed-off-by: Kingdon Barrett --- Makefile | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 760158d..b531c1a 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -.PHONY: main clean-cache preview all clean +.PHONY: main clean-cache preview all clean normalize all: main clean-cache preview main: @@ -15,3 +15,13 @@ preview: clean: clean-cache @rm -f report.csv preview-report.csv @echo "Clean complete!" + +normalize: + @# Normalize the main report.csv + @gsed -i '1d' report.csv + @awk 'NR==1{print $0; next} {print $0 | "sort"}' report.csv > tmp.csv && mv tmp.csv report.csv + @gsed -i 's/fluxcd.io/deploy-preview-1573--fluxcd.netlify.app/1; s/fluxcd.io/deploy-preview-1573--fluxcd.netlify.app/1' report.csv + + @# Normalize the preview-report.csv + @gsed -i '1d' preview-report.csv + @awk 'NR==1{print $0; next} {print $0 | "sort"}' preview-report.csv > tmp.csv && mv tmp.csv preview-report.csv From 5fb238b3d52c0e8b0372fbbd50328d5269b10ad3 Mon Sep 17 00:00:00 2001 From: Kingdon Barrett Date: Mon, 14 Aug 2023 22:31:47 -0400 Subject: [PATCH 08/12] introducing summary.rb Signed-off-by: Kingdon Barrett --- Makefile | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index b531c1a..2d8e474 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,5 @@ -.PHONY: main clean-cache preview all clean normalize -all: main clean-cache preview +.PHONY: main clean-cache preview all clean normalize summary +all: main clean-cache preview normalize summary main: ruby ./main.rb @@ -25,3 +25,6 @@ normalize: @# Normalize the preview-report.csv @gsed -i '1d' preview-report.csv @awk 'NR==1{print $0; next} {print $0 | "sort"}' preview-report.csv > tmp.csv && mv tmp.csv preview-report.csv + +summary: + ruby ./lib/summary.rb From c108da71a441466582f966aa75e8f10f53a31649 Mon Sep 17 00:00:00 2001 From: Kingdon Barrett Date: Mon, 14 Aug 2023 22:31:54 -0400 Subject: [PATCH 09/12] summary reporting for CI Signed-off-by: Kingdon Barrett --- lib/summary.rb | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100644 lib/summary.rb diff --git a/lib/summary.rb b/lib/summary.rb new file mode 100644 index 0000000..e5dd4a2 --- /dev/null +++ b/lib/summary.rb @@ -0,0 +1,24 @@ +main_report = File.readlines('report.csv').map(&:strip) +preview_report = File.readlines('preview-report.csv').map(&:strip) + +# Find the differences between the two reports +resolved_issues = main_report - preview_report +new_issues = preview_report - main_report + +puts "Summary:" +puts "--------" + +puts "Total issues in main site: #{main_report.count}" +puts "Total issues in preview site: #{preview_report.count}" + +puts "\nResolved issues: #{resolved_issues.count}" +puts "New issues: #{new_issues.count}" + +# Check if there are any new issues +if new_issues.count > 0 + puts "\nFail: The preview site has introduced new issues!" + exit(1) +else + puts "\nPass: No new issues introduced in the preview site." + exit(0) +end From 1d264ead6d2f24a03a526691a76a0868785d9a62 Mon Sep 17 00:00:00 2001 From: Kingdon Barrett Date: Mon, 14 Aug 2023 23:06:20 -0400 Subject: [PATCH 10/12] Emit summary reports when summary.rb is called The interesting report is pr-summary.csv because it ostensibly tells what wrong things were caused by this PR, such that we cannot merge it without fixing them. May also be interested in baseline-unresolved.csv so we can start to tackle some of the issues that are in the deployed website. Signed-off-by: Kingdon Barrett --- Makefile | 2 +- lib/summary.rb | 34 +++++++++++++++++++++++++++++++++- 2 files changed, 34 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 2d8e474..f9610e6 100644 --- a/Makefile +++ b/Makefile @@ -13,7 +13,7 @@ preview: ruby ./main.rb fluxcd.io deploy-preview-1573--fluxcd.netlify.app preview-report.csv false clean: clean-cache - @rm -f report.csv preview-report.csv + @rm -f report.csv preview-report.csv pr-summary.csv baseline-unresolved.csv @echo "Clean complete!" normalize: diff --git a/lib/summary.rb b/lib/summary.rb index e5dd4a2..384b11a 100644 --- a/lib/summary.rb +++ b/lib/summary.rb @@ -1,3 +1,7 @@ +require 'csv' + +HEADER = ["Link Source", "Link Target", "Type", "Anchor?", "Reference Intact?", "Response Status", "Link String", "Link Text", "Line No."] + main_report = File.readlines('report.csv').map(&:strip) preview_report = File.readlines('preview-report.csv').map(&:strip) @@ -5,6 +9,24 @@ resolved_issues = main_report - preview_report new_issues = preview_report - main_report +unresolved_issues = main_report & preview_report + +# Write to the pr-summary.csv +CSV.open('pr-summary.csv', 'wb') do |csv| + csv << HEADER + new_issues.each do |issue| + csv << issue.split(',') + end +end + +# Write to the baseline-unresolved.csv +CSV.open('baseline-unresolved.csv', 'wb') do |csv| + csv << HEADER + unresolved_issues.each do |issue| + csv << issue.split(',') + end +end + puts "Summary:" puts "--------" @@ -14,9 +36,19 @@ puts "\nResolved issues: #{resolved_issues.count}" puts "New issues: #{new_issues.count}" -# Check if there are any new issues +# Check if there are any new issues and show top 3 problematic links if new_issues.count > 0 puts "\nFail: The preview site has introduced new issues!" + puts "\nTop 3 problematic links introduced in the PR:" + + new_issues.first(3).each do |issue| + data = issue.split(',') + puts "Link: #{data[1]}" + puts "Found on: #{data[0]}" + puts "---------" + end + + puts "Please check pr-summary.csv for the full list of new issues." exit(1) else puts "\nPass: No new issues introduced in the preview site." From 4bfa1c899b4f8a8b41930213e06cfb1fed72aaa3 Mon Sep 17 00:00:00 2001 From: Kingdon Barrett Date: Mon, 14 Aug 2023 23:50:57 -0400 Subject: [PATCH 11/12] the action according to gpt Signed-off-by: Kingdon Barrett --- Makefile | 11 ++++++--- action/action.yml | 57 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 65 insertions(+), 3 deletions(-) create mode 100644 action/action.yml diff --git a/Makefile b/Makefile index f9610e6..8c081e7 100644 --- a/Makefile +++ b/Makefile @@ -12,6 +12,10 @@ clean-cache: preview: ruby ./main.rb fluxcd.io deploy-preview-1573--fluxcd.netlify.app preview-report.csv false +run_with_preview: + @echo "Running with preview URL: $(PREVIEW_URL)" + ruby ./main.rb fluxcd.io $(PREVIEW_URL) preview-report.csv false + clean: clean-cache @rm -f report.csv preview-report.csv pr-summary.csv baseline-unresolved.csv @echo "Clean complete!" @@ -19,12 +23,13 @@ clean: clean-cache normalize: @# Normalize the main report.csv @gsed -i '1d' report.csv - @awk 'NR==1{print $0; next} {print $0 | "sort"}' report.csv > tmp.csv && mv tmp.csv report.csv - @gsed -i 's/fluxcd.io/deploy-preview-1573--fluxcd.netlify.app/1; s/fluxcd.io/deploy-preview-1573--fluxcd.netlify.app/1' report.csv + @PREVIEW_DOMAIN=$(if [ -z "$(PREVIEW_URL)" ]; then echo "deploy-preview-1573--fluxcd.netlify.app"; else echo "$(PREVIEW_URL)"; fi) + @gsed -i "s/fluxcd.io/$$PREVIEW_DOMAIN/1; s/fluxcd.io/$$PREVIEW_DOMAIN/1" report.csv + @sort -o report.csv report.csv @# Normalize the preview-report.csv @gsed -i '1d' preview-report.csv - @awk 'NR==1{print $0; next} {print $0 | "sort"}' preview-report.csv > tmp.csv && mv tmp.csv preview-report.csv + @sort -o preview-report.csv preview-report.csv summary: ruby ./lib/summary.rb diff --git a/action/action.yml b/action/action.yml new file mode 100644 index 0000000..4f98dff --- /dev/null +++ b/action/action.yml @@ -0,0 +1,57 @@ +name: Link Checker + +on: [pull_request] + +jobs: + check-links: + runs-on: ubuntu-latest + + steps: + - name: Checkout code + uses: actions/checkout@v2 + + - name: Set up Ruby 3.0 + uses: ruby/setup-ruby@v1 + with: + ruby-version: 3.0 + bundler-cache: true + + - name: Run main target + run: make main + + - name: Clean cache + run: make clean-cache + + - name: Run with preview + run: make run_with_preview + + - name: Normalize reports + run: make normalize + + - name: Run summary + id: run-summary + run: make summary + continue-on-error: true + + - name: Check summary results + run: ./.github/scripts/check_summary.sh + if: steps.run-summary.outcome == 'failure' + + - name: Comment on PR if necessary + run: ./.github/scripts/comment_on_pr.sh + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + + - name: Upload pr-summary.csv + uses: actions/upload-artifact@v3 + if: always() && steps.run-summary.outcome == 'failure' + with: + name: pr-summary + path: pr-summary.csv + + - name: Upload baseline-unresolved.csv + uses: actions/upload-artifact@v3 + if: always() && steps.run-summary.outcome == 'success' + with: + name: baseline-unresolved + path: baseline-unresolved.csv From 469b7701ae15ada6e75cb66a4e5c2c0a744622b2 Mon Sep 17 00:00:00 2001 From: Kingdon Barrett Date: Mon, 14 Aug 2023 23:51:14 -0400 Subject: [PATCH 12/12] github scripts and test workflow Signed-off-by: Kingdon Barrett --- .github/scripts/check_summary.sh | 10 ++++++++++ .github/scripts/comment_on_pr.sh | 9 +++++++++ .github/workflows/test.yml | 29 +++++++++++++++++++++++++++++ 3 files changed, 48 insertions(+) create mode 100755 .github/scripts/check_summary.sh create mode 100755 .github/scripts/comment_on_pr.sh create mode 100644 .github/workflows/test.yml diff --git a/.github/scripts/check_summary.sh b/.github/scripts/check_summary.sh new file mode 100755 index 0000000..f5ee5a3 --- /dev/null +++ b/.github/scripts/check_summary.sh @@ -0,0 +1,10 @@ +#!/bin/bash +set -e + +LINE_COUNT=$(wc -l < pr-summary.csv) + +if [ "$LINE_COUNT" -gt 1 ]; then + echo "Issues found in PR. Attaching pr-summary.csv for review..." +else + echo "No direct issues found in PR. Attaching baseline-unresolved.csv for reference..." +fi diff --git a/.github/scripts/comment_on_pr.sh b/.github/scripts/comment_on_pr.sh new file mode 100755 index 0000000..8336e70 --- /dev/null +++ b/.github/scripts/comment_on_pr.sh @@ -0,0 +1,9 @@ +#!/bin/bash +set -e + +LINE_COUNT=$(wc -l < pr-summary.csv) + +if [ "$LINE_COUNT" -le 1 ]; then + # Using GitHub CLI to comment on the PR. + gh pr comment ${{ github.event.pull_request.number }} --body "Warning: Some unresolved baseline issues are present. Please check the attached baseline-unresolved.csv." +fi diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml new file mode 100644 index 0000000..c64a26c --- /dev/null +++ b/.github/workflows/test.yml @@ -0,0 +1,29 @@ +name: Test Action Workflow + +on: + workflow_dispatch: + inputs: + prNumber: + description: 'PR number to test against' + required: true + default: '1573' + +jobs: + test-action: + runs-on: ubuntu-latest + + steps: + - name: Checkout code + uses: actions/checkout@v2 + + - name: Set up Ruby 3.0 + uses: ruby/setup-ruby@v1 + with: + ruby-version: 3.0 + bundler-cache: true + + - name: Run the action + uses: ./action/ + with: + token: ${{ secrets.GITHUB_TOKEN }} + prNumber: ${{ github.event.inputs.prNumber }}