From 3dec4f905365fb54e26c2421b6fafe5c8ca2ae44 Mon Sep 17 00:00:00 2001 From: Dave Molk Date: Thu, 15 Sep 2022 05:19:13 -0600 Subject: [PATCH] update readme for installation --- README.md | 18 +++++++++++++----- cmd/goGetJS/helpers.go | 4 ++-- cmd/goGetJS/main.go | 18 +++++++++--------- cmd/goGetJS/requests.go | 1 - 4 files changed, 24 insertions(+), 17 deletions(-) diff --git a/README.md b/README.md index 17dabef..b31a39c 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,7 @@ [![Go Report Card](https://goreportcard.com/badge/github.com/davemolk/goGetJS)](https://goreportcard.com/report/github.com/davemolk/goGetJS) [![contributions welcome](https://img.shields.io/badge/contributions-welcome-brightgreen.svg?style=flat)](https://github.com/davemolk/goGetJS/issues) -goGetJS extracts, searches, and saves JavaScript files. Includes an optional chromium browser (via playwright) for dealing with JavaScript-heavy sites. +goGetJS extracts, searches, and saves JavaScript files. Includes an optional chromium headless browser (playwright) for dealing with JavaScript-heavy sites. ![demo](demo.gif) @@ -16,12 +16,12 @@ goGetJS extracts, searches, and saves JavaScript files. Includes an optional chr * Use -term, -regex, and -terms, respectively, to scan each script for a specific word, with a regular expression, or with a list of words (input as a file). * goGetJS does not follow redirects by default, but this can be toggled with -redirect=true. -## Example Usages +## Example Usages (use browser and search each script for a list of terms in search.txt) ``` -go run ./cmd/goGetJS -u=https://go.dev -b -terms=search.txt +go run ./cmd/goGetJS -u https://go.dev -b -terms search.txt ``` ``` -echo https://go.dev | goGetJS -b -terms=search.txt +echo https://go.dev | goGetJS -b -terms search.txt ``` ## Command-line Options @@ -51,9 +51,17 @@ Usage of goGetJS: URL to extract JS files from. ``` +## Installation +First, you'll need to [install go](https://golang.org/doc/install). + +Then run this command to download + compile goGetJS: +``` +go install github.com/davemolk/goGetJS@latest +``` + ## Additional Notes * goGetJS names JavaScript files with ```fName := regexp.MustCompile(`[\w-&]+(\.js)?$`)```. Most scripts play nice, but those that don't are still saved. Each saved script has the full URL prepended to the file. -* Occasionally, an src will link to an empty page. These are automatically retried and will sometimes get a script on that second attempt (which is searched and saved). Set a timeout for these retries with -rt. More often, these pages are legitimately blank, causing the number of saved files printed to the terminal to be fewer than the number of processed files. +* Occasionally, an src will link to an empty page. These are automatically retried (set a timeout for these retries with -rt). Typically, these pages are legitimately blank, causing the number of saved files printed to the terminal to be fewer than the number of processed files. Sometimes we're lucky though, and the successful retry will be searched and saved. ## Changelog * **2022-08-26** : Add proxy, redirect, and rt flags. Refactor client creation. Improve error handling throughout. diff --git a/cmd/goGetJS/helpers.go b/cmd/goGetJS/helpers.go index b428b5e..c87a9e1 100644 --- a/cmd/goGetJS/helpers.go +++ b/cmd/goGetJS/helpers.go @@ -9,8 +9,8 @@ import ( "regexp" ) -// assertErrorToNilf is a simple helper function for error handling. -func (app *application) assertErrorToNilf(err error) { +// assertErrorToNil is a simple helper function for error handling. +func (app *application) assertErrorToNil(err error) { if err != nil { app.errorLog.Fatal(err) } diff --git a/cmd/goGetJS/main.go b/cmd/goGetJS/main.go index 4f8eb9a..90833e0 100644 --- a/cmd/goGetJS/main.go +++ b/cmd/goGetJS/main.go @@ -72,19 +72,19 @@ func main() { if app.config.url == "" { err := app.getInput() - app.assertErrorToNilf(err) + app.assertErrorToNil(err) } baseURL, err := app.getBaseURL(cfg.url) - app.assertErrorToNilf(err) + app.assertErrorToNil(err) app.baseURL = baseURL err = os.Mkdir("data", 0755) - app.assertErrorToNilf(err) + app.assertErrorToNil(err) if cfg.term != "" || cfg.terms != "" || cfg.regex != "" { err := os.Mkdir("searchResults", 0755) - app.assertErrorToNilf(err) + app.assertErrorToNil(err) } app.client = app.makeClient(cfg.timeout, cfg.proxy, cfg.redirect) @@ -96,21 +96,21 @@ func main() { switch { case cfg.useBrowser: reader, err = app.browser(cfg.url, &cfg.browserTimeout, cfg.extraWait, app.client) - app.assertErrorToNilf(err) + app.assertErrorToNil(err) default: resp, err := app.makeRequest(cfg.url, app.client) - app.assertErrorToNilf(err) + app.assertErrorToNil(err) defer resp.Body.Close() reader = resp.Body } // parse for src, writing javascript files without src srcs, anonCount, err := app.parseDoc(reader, cfg.url, app.query) - app.assertErrorToNilf(err) + app.assertErrorToNil(err) // write src text file err = app.writeFile(srcs, "scriptSRC.txt") - app.assertErrorToNilf(err) + app.assertErrorToNil(err) // handling situations when src doesn't end with .js fName := regexp.MustCompile(`[\w-&]+(\.js)?$`) @@ -138,7 +138,7 @@ func main() { // save search results (if applicable) if cfg.term != "" || cfg.terms != "" || cfg.regex != "" { err = app.writeSearchResults(app.searches.Searches) - app.assertErrorToNilf(err) + app.assertErrorToNil(err) } fmt.Println() diff --git a/cmd/goGetJS/requests.go b/cmd/goGetJS/requests.go index c75aaed..e44e701 100644 --- a/cmd/goGetJS/requests.go +++ b/cmd/goGetJS/requests.go @@ -80,7 +80,6 @@ func (app *application) quickRetry(url string, query interface{}, r *regexp.Rege resp, err := app.makeRequest(url, app.retryClient) if err != nil { app.errorLog.Printf("retry request error for %v: %v\n", url, err) - resp.Body.Close() return } defer resp.Body.Close()