Skip to content

Commit

Permalink
Merge pull request #123 from garlic0x1/master
Browse files Browse the repository at this point in the history
Added `-json` option
  • Loading branch information
hakluke authored Apr 27, 2022
2 parents 3d73808 + 62f6a67 commit e8ed1d3
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 7 deletions.
4 changes: 3 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -86,9 +86,11 @@ Usage of hakrawler:
-d int
Depth to crawl. (default 2)
-h string
Custom headers separated by two semi-colons. E.g. -h "Cookie: foo=bar;;Referer: http://example.com/"
Custom headers separated by two semi-colons. E.g. -h "Cookie: foo=bar;;Referer: http://example.com/"
-insecure
Disable TLS verification.
-json
Output as JSON.
-proxy string
Proxy URL. E.g. -proxy http://127.0.0.1:8080
-s Show the source of URL based on where it was found. E.g. href, form, script, etc.
Expand Down
24 changes: 18 additions & 6 deletions hakrawler.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package main
import (
"bufio"
"crypto/tls"
"encoding/json"
"errors"
"flag"
"fmt"
Expand All @@ -18,6 +19,11 @@ import (
"github.com/gocolly/colly/v2"
)

type Result struct {
Source string
URL string
}

var headers map[string]string

// Thread safe map
Expand All @@ -29,6 +35,7 @@ func main() {
maxSize := flag.Int("size", -1, "Page size limit, in KB.")
insecure := flag.Bool("insecure", false, "Disable TLS verification.")
subsInScope := flag.Bool("subs", false, "Include subdomains for crawling.")
showJson := flag.Bool("json", false, "Output as JSON.")
showSource := flag.Bool("s", false, "Show the source of URL based on where it was found. E.g. href, form, script, etc.")
rawHeaders := flag.String(("h"), "", "Custom headers separated by two semi-colons. E.g. -h \"Cookie: foo=bar;;Referer: http://example.com/\" ")
unique := flag.Bool(("u"), false, "Show only unique urls.")
Expand Down Expand Up @@ -107,18 +114,18 @@ func main() {
// Print every href found, and visit it
c.OnHTML("a[href]", func(e *colly.HTMLElement) {
link := e.Attr("href")
printResult(link, "href", *showSource, results, e)
printResult(link, "href", *showSource, *showJson, results, e)
e.Request.Visit(link)
})

// find and print all the JavaScript files
c.OnHTML("script[src]", func(e *colly.HTMLElement) {
printResult(e.Attr("src"), "script", *showSource, results, e)
printResult(e.Attr("src"), "script", *showSource, *showJson, results, e)
})

// find and print all the form action URLs
c.OnHTML("form[action]", func(e *colly.HTMLElement) {
printResult(e.Attr("action"), "form", *showSource, results, e)
printResult(e.Attr("action"), "form", *showSource, *showJson, results, e)
})

// add the custom headers
Expand Down Expand Up @@ -226,11 +233,16 @@ func extractHostname(urlString string) (string, error) {
}

// print result constructs output lines and sends them to the results chan
func printResult(link string, sourceName string, showSource bool, results chan string, e *colly.HTMLElement) {

func printResult(link string, sourceName string, showSource bool, showJson bool, results chan string, e *colly.HTMLElement) {
result := e.Request.AbsoluteURL(link)
if result != "" {
if showSource {
if showJson {
bytes, _ := json.Marshal(Result{
Source: sourceName,
URL: result,
})
result = string(bytes)
} else if showSource {
result = "[" + sourceName + "] " + result
}
// If timeout occurs before goroutines are finished, recover from panic that may occur when attempting writing to results to closed results channel
Expand Down

0 comments on commit e8ed1d3

Please sign in to comment.