diff --git a/html2text.go b/html2text.go
index a9842f4..88bceeb 100644
--- a/html2text.go
+++ b/html2text.go
@@ -15,7 +15,7 @@ const (
var lbr = WIN_LBR
var badTagnamesRE = regexp.MustCompile(`^(head|script|style|a)($|\s*)`)
var linkTagRE = regexp.MustCompile(`a.*href=('([^']*?)'|"([^"]*?)")`)
-var badLinkHrefRE = regexp.MustCompile(`#|javascript:`)
+var badLinkHrefRE = regexp.MustCompile(`javascript:`)
var headersRE = regexp.MustCompile(`^(\/)?h[1-6]`)
var numericEntityRE = regexp.MustCompile(`^#([0-9]+)$`)
@@ -124,12 +124,14 @@ func HTML2Text(html string) string {
// skip new lines and spaces adding a single space if not there yet
case r <= 0xD, r == 0x85, r == 0x2028, r == 0x2029, // new lines
r == ' ', r >= 0x2008 && r <= 0x200B: // spaces
- writeSpace(outBuf)
+ if shouldOutput && badTagStackDepth == 0 && !inEnt {
+ //outBuf.WriteString(fmt.Sprintf("{DBG r:%c, inEnt:%t, tag:%s}", r, inEnt, html[tagStart:i]))
+ writeSpace(outBuf)
+ }
continue
case r == ';' && inEnt: // end of html entity
inEnt = false
- shouldOutput = true
continue
case r == '&' && shouldOutput: // possible html entity
@@ -156,7 +158,6 @@ func HTML2Text(html string) string {
if ent, isEnt := parseHTMLEntity(entName); isEnt {
outBuf.WriteString(ent)
inEnt = true
- shouldOutput = false
continue
}
}
diff --git a/html2text_test.go b/html2text_test.go
index 07491e4..b84d149 100644
--- a/html2text_test.go
+++ b/html2text_test.go
@@ -16,7 +16,9 @@ func TestHTML2Text(t *testing.T) {
So(HTML2Text(`click here`), ShouldEqual, "click test")
So(HTML2Text(`click here`), ShouldEqual, "click ents/'x'")
So(HTML2Text(`click here`), ShouldEqual, "click ")
+ So(HTML2Text(`click here or here`), ShouldEqual, "click test")
So(HTML2Text(`click news`), ShouldEqual, "click http://bit.ly/2n4wXRs")
+ So(HTML2Text(`yet, not yet`), ShouldEqual, "/wiki/yet#English, /wiki/not_yet#English")
})
Convey("Inlines", func() {