Skip to content

Commit

Permalink
Import from hacks repo
Browse files Browse the repository at this point in the history
  • Loading branch information
tomnomnom committed Feb 26, 2018
0 parents commit 1f3ce85
Show file tree
Hide file tree
Showing 4 changed files with 267 additions and 0 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
unfurl
37 changes: 37 additions & 0 deletions README.mkd
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
# unfurl

Pull out bits of URLs provided on `stdin`

## Help

```
▶ unfurl -h
Format URLs provided on stdin
Usage:
unfurl [OPTIONS] [MODE] [FORMATSTRING]
Options:
-u, --unique Only output unique values
-v, --verbose Verbose mode (output URL parse errors)
Modes:
keys Keys from the query string (one per line)
values Values from the query string (one per line)
domains The hostname (e.g. sub.example.com)
paths The request path (e.g. /users)
format Specify a custom format (see below)
Format Directives:
%% A literal percent character
%s The request scheme (e.g. https)
%d The domain (e.g. sub.example.com)
%P The port (e.g. 8080)
%p The path (e.g. /users)
%q The raw query string (e.g. a=1&b=2)
%f The page fragment (e.g. page-section)
Examples:
cat urls.txt | unfurl keys
cat urls.txt | unfurl format %s://%d%p?%q
```
43 changes: 43 additions & 0 deletions format_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
package main

import (
"net/url"
"testing"
)

func TestFormat(t *testing.T) {
cases := []struct {
url string
format string
expected string
}{
{"https://example.com/foo", "%d", "example.com"},
{"https://example.com/foo", "%d%p", "example.com/foo"},
{"https://example.com/foo", "%s://%d%p", "https://example.com/foo"},

{"https://example.com:8080/foo", "%d", "example.com"},
{"https://example.com:8080/foo", "%P", "8080"},

{"https://example.com/foo?a=b&c=d", "%p", "/foo"},
{"https://example.com/foo?a=b&c=d", "%q", "a=b&c=d"},

{"https://example.com/foo#bar", "%f", "bar"},
{"https://example.com#bar", "%f", "bar"},

{"https://example.com#bar", "foo%%bar", "foo%bar"},
{"https://example.com#bar", "%s://%%", "https://%"},
}

for _, c := range cases {
u, err := url.Parse(c.url)
if err != nil {
t.Fatal(err)
}

actual := format(u, c.format)

if actual[0] != c.expected {
t.Errorf("want %s for format(%s, %s); have %s", c.expected, c.url, c.format, actual)
}
}
}
186 changes: 186 additions & 0 deletions main.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,186 @@
package main

import (
"bufio"
"bytes"
"flag"
"fmt"
"net/url"
"os"
)

func main() {

var unique bool
flag.BoolVar(&unique, "u", false, "")
flag.BoolVar(&unique, "unique", false, "")

var verbose bool
flag.BoolVar(&verbose, "v", false, "")
flag.BoolVar(&verbose, "verbose", false, "")

flag.Parse()

mode := flag.Arg(0)
fmtStr := flag.Arg(1)

procFn, ok := map[string]urlProc{
"keys": keys,
"values": values,
"domains": domains,
"paths": paths,
"format": format,
}[mode]

if !ok {
fmt.Fprintf(os.Stderr, "unknown mode: %s\n", mode)
return
}

sc := bufio.NewScanner(os.Stdin)

seen := make(map[string]bool)

for sc.Scan() {
u, err := url.Parse(sc.Text())
if err != nil {
if verbose {
fmt.Fprintf(os.Stderr, "parse failure: %s\n", err)
}
continue
}

// some urlProc functions return multiple things,
// so it's just easier to always get a slice and
// loop over it instead of having two kinds of
// urlProc functions.
for _, val := range procFn(u, fmtStr) {

// you do see empty values sometimes
if val == "" {
continue
}

if seen[val] && unique {
continue
}

fmt.Println(val)

// no point using up memory if we're outputting dupes
if unique {
seen[val] = true
}
}
}

if err := sc.Err(); err != nil {
fmt.Fprintf(os.Stderr, "failed to read input: %s\n", err)
}
}

type urlProc func(*url.URL, string) []string

func keys(u *url.URL, _ string) []string {
out := make([]string, 0)
for key, _ := range u.Query() {
out = append(out, key)
}
return out
}

func values(u *url.URL, _ string) []string {
out := make([]string, 0)
for _, vals := range u.Query() {
for _, val := range vals {
out = append(out, val)
}
}
return out
}

func domains(u *url.URL, f string) []string {
return format(u, "%d")
}

func paths(u *url.URL, f string) []string {
return format(u, "%p")
}

func format(u *url.URL, f string) []string {
out := &bytes.Buffer{}

inFormat := false
for _, r := range f {

if r == '%' && !inFormat {
inFormat = true
continue
}

if !inFormat {
out.WriteRune(r)
continue
}

switch r {
case '%':
out.WriteRune('%')
case 's':
out.WriteString(u.Scheme)
case 'd':
out.WriteString(u.Hostname())
case 'P':
out.WriteString(u.Port())
case 'p':
out.WriteString(u.EscapedPath())
case 'q':
out.WriteString(u.RawQuery)
case 'f':
out.WriteString(u.Fragment)
default:
// output untouched
out.WriteRune('%')
out.WriteRune(r)
}

inFormat = false
}

return []string{out.String()}
}

func init() {
flag.Usage = func() {
h := "Format URLs provided on stdin\n\n"

h += "Usage:\n"
h += " unfurl [OPTIONS] [MODE] [FORMATSTRING]\n\n"

h += "Options:\n"
h += " -u, --unique Only output unique values\n"
h += " -v, --verbose Verbose mode (output URL parse errors)\n\n"

h += "Modes:\n"
h += " keys Keys from the query string (one per line)\n"
h += " values Values from the query string (one per line)\n"
h += " domains The hostname (e.g. sub.example.com)\n"
h += " paths The request path (e.g. /users)\n"
h += " format Specify a custom format (see below)\n\n"

h += "Format Directives:\n"
h += " %% A literal percent character\n"
h += " %s The request scheme (e.g. https)\n"
h += " %d The domain (e.g. sub.example.com)\n"
h += " %P The port (e.g. 8080)\n"
h += " %p The path (e.g. /users)\n"
h += " %q The raw query string (e.g. a=1&b=2)\n"
h += " %f The page fragment (e.g. page-section)\n\n"

h += "Examples:\n"
h += " cat urls.txt | unfurl keys\n"
h += " cat urls.txt | unfurl format %s://%d%p?%q\n"

fmt.Fprint(os.Stderr, h)
}
}

0 comments on commit 1f3ce85

Please sign in to comment.