Skip to content

Commit

Permalink
Merge pull request #7 from kingdonb/action
Browse files Browse the repository at this point in the history
Add GitHub Action workflows
  • Loading branch information
Kingdon Barrett authored Aug 15, 2023
2 parents 2801670 + 469b770 commit e45e3ea
Show file tree
Hide file tree
Showing 15 changed files with 331 additions and 80 deletions.
10 changes: 10 additions & 0 deletions .github/scripts/check_summary.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
#!/bin/bash
set -e

LINE_COUNT=$(wc -l < pr-summary.csv)

if [ "$LINE_COUNT" -gt 1 ]; then
echo "Issues found in PR. Attaching pr-summary.csv for review..."
else
echo "No direct issues found in PR. Attaching baseline-unresolved.csv for reference..."
fi
9 changes: 9 additions & 0 deletions .github/scripts/comment_on_pr.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#!/bin/bash
set -e

LINE_COUNT=$(wc -l < pr-summary.csv)

if [ "$LINE_COUNT" -le 1 ]; then
# Using GitHub CLI to comment on the PR.
gh pr comment ${{ github.event.pull_request.number }} --body "Warning: Some unresolved baseline issues are present. Please check the attached baseline-unresolved.csv."
fi
29 changes: 29 additions & 0 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
name: Test Action Workflow

on:
workflow_dispatch:
inputs:
prNumber:
description: 'PR number to test against'
required: true
default: '1573'

jobs:
test-action:
runs-on: ubuntu-latest

steps:
- name: Checkout code
uses: actions/checkout@v2

- name: Set up Ruby 3.0
uses: ruby/setup-ruby@v1
with:
ruby-version: 3.0
bundler-cache: true

- name: Run the action
uses: ./action/
with:
token: ${{ secrets.GITHUB_TOKEN }}
prNumber: ${{ github.event.inputs.prNumber }}
24 changes: 21 additions & 3 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
.PHONY: main clean-cache preview all clean
all: main clean-cache preview
.PHONY: main clean-cache preview all clean normalize summary
all: main clean-cache preview normalize summary

main:
ruby ./main.rb
Expand All @@ -12,6 +12,24 @@ clean-cache:
preview:
ruby ./main.rb fluxcd.io deploy-preview-1573--fluxcd.netlify.app preview-report.csv false

run_with_preview:
@echo "Running with preview URL: $(PREVIEW_URL)"
ruby ./main.rb fluxcd.io $(PREVIEW_URL) preview-report.csv false

clean: clean-cache
@rm -f report.csv preview-report.csv
@rm -f report.csv preview-report.csv pr-summary.csv baseline-unresolved.csv
@echo "Clean complete!"

normalize:
@# Normalize the main report.csv
@gsed -i '1d' report.csv
@PREVIEW_DOMAIN=$(if [ -z "$(PREVIEW_URL)" ]; then echo "deploy-preview-1573--fluxcd.netlify.app"; else echo "$(PREVIEW_URL)"; fi)
@gsed -i "s/fluxcd.io/$$PREVIEW_DOMAIN/1; s/fluxcd.io/$$PREVIEW_DOMAIN/1" report.csv
@sort -o report.csv report.csv

@# Normalize the preview-report.csv
@gsed -i '1d' preview-report.csv
@sort -o preview-report.csv preview-report.csv

summary:
ruby ./lib/summary.rb
57 changes: 57 additions & 0 deletions action/action.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
name: Link Checker

on: [pull_request]

jobs:
check-links:
runs-on: ubuntu-latest

steps:
- name: Checkout code
uses: actions/checkout@v2

- name: Set up Ruby 3.0
uses: ruby/setup-ruby@v1
with:
ruby-version: 3.0
bundler-cache: true

- name: Run main target
run: make main

- name: Clean cache
run: make clean-cache

- name: Run with preview
run: make run_with_preview

- name: Normalize reports
run: make normalize

- name: Run summary
id: run-summary
run: make summary
continue-on-error: true

- name: Check summary results
run: ./.github/scripts/check_summary.sh
if: steps.run-summary.outcome == 'failure'

- name: Comment on PR if necessary
run: ./.github/scripts/comment_on_pr.sh
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

- name: Upload pr-summary.csv
uses: actions/upload-artifact@v3
if: always() && steps.run-summary.outcome == 'failure'
with:
name: pr-summary
path: pr-summary.csv

- name: Upload baseline-unresolved.csv
uses: actions/upload-artifact@v3
if: always() && steps.run-summary.outcome == 'success'
with:
name: baseline-unresolved
path: baseline-unresolved.csv
18 changes: 14 additions & 4 deletions lib/cache_helper.rb
Original file line number Diff line number Diff line change
@@ -1,13 +1,23 @@
module CacheHelper
def self.get_cache_path(url)
uri = URI(url)
cache_path = "cache" + uri.path

cache_path = File.join("cache", uri.path)
# If the path doesn't have a common file extension, treat it as a directory.
unless cache_path.match(/\.(html|xml|json|txt|js|css|jpg|jpeg|png|gif)$/i)
cache_path += "/index.html"
cache_path = File.join(cache_path, "index.html")
end

cache_path
end

def self.write_to_cache(url, content, status)
cache_path = get_cache_path(url)
data = { content: content, status: status }
File.write(cache_path, JSON.dump(data))
end

def self.read_from_cache(url)
cache_path = get_cache_path(url)
data = JSON.parse(File.read(cache_path))
[data["content"], data["status"]]
end
end
58 changes: 34 additions & 24 deletions lib/link.rb
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
require './lib/cache_helper'
require './lib/url_helper'

class Link
attr_accessor :source_file, :target, :type, :anchor,
:response_status, :link_string, :link_text, :line_no, :reference_intact
Expand All @@ -8,15 +11,24 @@ def initialize(source_url, link_element, domain)

if link_element
@link_string = link_element['href']
@link_text = link_element.text.strip
link_text = link_element.text
@line_no = link_element.line
determine_type
extract_anchor
else
# If no link_element is provided, assume the source is the target and type is local.
@link_string = source_url
@target = source_url
@type = 'local'
end

make_absolute
end

def link_text=(value)
@link_text = value.strip.gsub(/\s+/, ' ')
end

def to_h
{
source_file: @source_file,
Expand Down Expand Up @@ -46,13 +58,18 @@ def self.from_h(hash)
end

def download_and_store
cache_path = get_cache_path
unless File.exist?(cache_path)
html_content = Net::HTTP.get(URI(@source_file))
FileUtils.mkdir_p(File.dirname(cache_path))
File.write(cache_path, html_content)
cache_path = CacheHelper.get_cache_path(@source_file)
if File.exist?(cache_path)
html_content, status = CacheHelper.read_from_cache(@source_file)
@response_status = status if status && status.to_i >= 400
else
html_content = File.read(cache_path)
response = Net::HTTP.get_response(URI(@source_file))
html_content = response.body
html_content.force_encoding('UTF-8')
# Ensure the directory exists before writing the cache
FileUtils.mkdir_p(File.dirname(cache_path))
CacheHelper.write_to_cache(@source_file, html_content, response.code)
@response_status = response.code if response.code.to_i >= 400
end

Nokogiri::HTML(html_content)
Expand All @@ -66,6 +83,14 @@ def reference_intact?
@reference_intact
end

def set_error(error_message)
@error = error_message
end

def has_error?
!@error.nil?
end

private

def determine_type
Expand All @@ -77,27 +102,12 @@ def determine_type
end

def extract_anchor
@anchor = URI(@link_string).fragment
rescue URI::InvalidURIError
@anchor = URI(URI::Parser.new.escape(@link_string)).fragment
@anchor = URLHelper.extract_fragment(@link_string)
end

def make_absolute
return unless @link_string
@target = URI.join(@source_file, @link_string).to_s
rescue URI::InvalidURIError
@target = URI.join(@source_file, URI::Parser.new.escape(@link_string)).to_s
nil
end

def get_cache_path
uri = URI(@source_file)
cache_path = "cache" + uri.path
# If the path doesn't have a common file extension, treat it as a directory.
unless cache_path.match(/\.(html|xml|json|txt|js|css|jpg|jpeg|png|gif)$/i)
cache_path += "/index.html"
end
cache_path
@target = URLHelper.make_absolute(@source_file, @link_string)
end
end

Expand Down
40 changes: 28 additions & 12 deletions lib/link/link_analyzer.rb
Original file line number Diff line number Diff line change
Expand Up @@ -9,41 +9,48 @@ def initialize(domain, masquerade_domain)
end

def analyze_links(sitemap_urls)
links_data = []
links_data = {}
threads = []

sitemap_urls.each_slice(SLICE_SIZE) do |slice|
threads << Thread.new do
slice.each do |url|
link = ensure_link(links_data, url, nil)
begin
url = masquerade_url(url) if @masquerade_domain
puts "Visiting: #{url}"
doc = Link.new(url, nil, @domain).download_and_store
base_url, fragment = url.split('#', 2)
fragment = URI::Parser.new.escape(fragment) if fragment
full_url = fragment ? "#{base_url}##{fragment}" : base_url

puts "Visiting: #{full_url}"
doc = link.download_and_store

# Extracting all the links from the page
doc.css('a').each do |link_element|
link_href = link_element['href']
# Skip links without href or with href set to '#'
next if link_href.nil? || link_href.strip == '#'

begin
link = Link.new(url, link_element, @domain)
rescue URI::InvalidURIError => e
PRY_MUTEX.synchronize{binding.pry}
end
LINKS_MUTEX.synchronize do
links_data << link
end
# Splitting the base URL and fragment for proper handling
base_url, fragment = link_href.split('#', 2)
fragment = URI::Parser.new.escape(fragment) if fragment

# Combine the base URL with the original URL and append the fragment if present
joined_url = URI.join(url, base_url).to_s
target_url = fragment ? "#{joined_url}##{fragment}" : joined_url

link = ensure_link(links_data, target_url, link_element)
end
rescue StandardError => e
link.response_status = "Error: #{e.message}"
puts "Error downloading or analyzing URL #{url}: #{e.message}"
end
end
end
end

threads.each(&:join)
links_data
links_data.values
end

private
Expand All @@ -57,4 +64,13 @@ def masquerade_url(url)
url
end
end

def ensure_link(links_data, url, link_element)
LINKS_MUTEX.synchronize do
unless links_data[url]
links_data[url] = Link.new(url, link_element, @domain)
end
links_data[url]
end
end
end
10 changes: 6 additions & 4 deletions lib/link_checker.rb
Original file line number Diff line number Diff line change
Expand Up @@ -27,17 +27,19 @@ def fetch_sitemap
fetcher = SitemapFetcher.new(@domain, @masquerade_domain)
@sitemap_urls = fetcher.fetch_sitemap_urls
puts "Fetched sitemap with #{@sitemap_urls.size} URLs."
rescue => e
rescue StandardError => e
puts "Error fetching sitemap: #{e.message}"
exit
end

def download_and_analyze_links
if File.exist?(LINKS_DATA_FILE)
# Loading from cache: Parse JSON data into Link objects
links_data_hashes = JSON.parse(File.read(LINKS_DATA_FILE), symbolize_names: true)
@links_data = links_data_hashes.map { |hash| Link.from_h(hash) }
puts "Loaded links data from cache."
else
# Fetching fresh data: Use LinkAnalyzer to get Link objects and cache for future use
analyzer = LinkAnalyzer.new(@domain, @masquerade_domain)
@links_data = analyzer.analyze_links(@sitemap_urls)

Expand All @@ -46,15 +48,15 @@ def download_and_analyze_links

puts "Links data saved to cache."
end
rescue => e
rescue StandardError => e
puts "Error downloading and analyzing links: #{e.message}"
exit
end

def validate_links
validator = LinkValidator.new(@links_data, @domain, @masquerade_domain)
@links_data = validator.validate_links
rescue => e
rescue StandardError => e
# PRY_MUTEX.synchronize{binding.pry}
puts "Error validating links: #{e.message}"
exit
Expand All @@ -64,7 +66,7 @@ def generate_report
generator = ReportGenerator.new(@links_data, @report_file)
generator.generate
puts "Report generated at #{@report_file}."
rescue => e
rescue StandardError => e
puts "Error generating report: #{e.message}"
end
end
Loading

0 comments on commit e45e3ea

Please sign in to comment.