Skip to content

Commit

Permalink
do not lowercase href attribute of A tag links
Browse files Browse the repository at this point in the history
  • Loading branch information
Mario Hros committed Oct 3, 2019
1 parent 89ca77f commit 62431c4
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 12 deletions.
25 changes: 13 additions & 12 deletions html2text.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@ package html2text
import (
"bytes"
"regexp"
"strings"
"strconv"
"strings"
)

const (
Expand Down Expand Up @@ -123,7 +123,7 @@ func HTML2Text(html string) string {
switch {
// skip new lines and spaces adding a single space if not there yet
case r <= 0xD, r == 0x85, r == 0x2028, r == 0x2029, // new lines
r == ' ', r >= 0x2008 && r <= 0x200B: // spaces
r == ' ', r >= 0x2008 && r <= 0x200B: // spaces
writeSpace(outBuf)
continue

Expand Down Expand Up @@ -168,31 +168,32 @@ func HTML2Text(html string) string {

case r == '>': // end of a tag
shouldOutput = true
tagName := strings.ToLower(html[tagStart:i])
tag := html[tagStart:i]
tagNameLowercase := strings.ToLower(tag)

if tagName == "/ul" {
if tagNameLowercase == "/ul" {
outBuf.WriteString(lbr)
} else if tagName == "li" || tagName == "li/" {
} else if tagNameLowercase == "li" || tagNameLowercase == "li/" {
outBuf.WriteString(lbr)
} else if headersRE.MatchString(tagName) {
} else if headersRE.MatchString(tagNameLowercase) {
if canPrintNewline {
outBuf.WriteString(lbr + lbr)
}
canPrintNewline = false
} else if tagName == "br" || tagName == "br/" {
} else if tagNameLowercase == "br" || tagNameLowercase == "br/" {
// new line
outBuf.WriteString(lbr)
} else if tagName == "p" || tagName == "/p" {
} else if tagNameLowercase == "p" || tagNameLowercase == "/p" {
if canPrintNewline {
outBuf.WriteString(lbr + lbr)
}
canPrintNewline = false
} else if badTagnamesRE.MatchString(tagName) {
} else if badTagnamesRE.MatchString(tagNameLowercase) {
// unwanted block
badTagStackDepth++

// parse link href
m := linkTagRE.FindStringSubmatch(tagName)
m := linkTagRE.FindStringSubmatch(tag)
if len(m) == 4 {
link := m[2]
if len(link) == 0 {
Expand All @@ -203,8 +204,8 @@ func HTML2Text(html string) string {
outBuf.WriteString(HTMLEntitiesToText(link))
}
}
} else if len(tagName) > 0 && tagName[0] == '/' &&
badTagnamesRE.MatchString(tagName[1:]) {
} else if len(tagNameLowercase) > 0 && tagNameLowercase[0] == '/' &&
badTagnamesRE.MatchString(tagNameLowercase[1:]) {
// end of unwanted block
badTagStackDepth--
}
Expand Down
1 change: 1 addition & 0 deletions html2text_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ func TestHTML2Text(t *testing.T) {
So(HTML2Text(`click <a class="x" href="test">here</a>`), ShouldEqual, "click test")
So(HTML2Text(`click <a href="ents/&apos;x&apos;">here</a>`), ShouldEqual, "click ents/'x'")
So(HTML2Text(`click <a href="javascript:void(0)">here</a>`), ShouldEqual, "click ")
So(HTML2Text(`click <a href="http://bit.ly/2n4wXRs">news</a>`), ShouldEqual, "click http://bit.ly/2n4wXRs")
})

Convey("Inlines", func() {
Expand Down

0 comments on commit 62431c4

Please sign in to comment.