diff --git a/.gitignore b/.gitignore
index b8ae847..e2dc727 100644
--- a/.gitignore
+++ b/.gitignore
@@ -10,3 +10,6 @@ cabal-dev
.stack-work/
cabal.sandbox.config
cabal.config
+*.swp
+*.swo
+*.DS_Store
diff --git a/.travis/stack-lts-2.yaml b/.travis/stack-lts-2.yaml
index f031ed0..1bea951 100644
--- a/.travis/stack-lts-2.yaml
+++ b/.travis/stack-lts-2.yaml
@@ -8,4 +8,5 @@ extra-deps:
- http-client-0.4.30
- http-client-tls-0.2.4
- pointedlist-0.6.1
+- tagsoup-0.14.8@sha256:56b2023d2e9fdbff093719ce9af5285d2436b234c6c684d6a69f14595a8348ae
resolver: lts-2.0
diff --git a/.travis/stack-lts-6.yaml b/.travis/stack-lts-6.yaml
index c6e581b..fd55c4f 100644
--- a/.travis/stack-lts-6.yaml
+++ b/.travis/stack-lts-6.yaml
@@ -6,4 +6,5 @@ packages:
extra-deps:
- fail-4.9.0.0
- http-client-0.4.30
+- tagsoup-0.14.8@sha256:56b2023d2e9fdbff093719ce9af5285d2436b234c6c684d6a69f14595a8348ae
resolver: lts-6.0
diff --git a/.travis/stack-lts-7.yaml b/.travis/stack-lts-7.yaml
index cbba31d..6738aa0 100644
--- a/.travis/stack-lts-7.yaml
+++ b/.travis/stack-lts-7.yaml
@@ -5,4 +5,5 @@ packages:
- ../examples/
extra-deps:
- fail-4.9.0.0
+- tagsoup-0.14.8@sha256:56b2023d2e9fdbff093719ce9af5285d2436b234c6c684d6a69f14595a8348ae
resolver: lts-7.4
diff --git a/examples/complex-predicates/Main.hs b/examples/complex-predicates/Main.hs
index 081b0c7..fd1a330 100644
--- a/examples/complex-predicates/Main.hs
+++ b/examples/complex-predicates/Main.hs
@@ -1,10 +1,13 @@
{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE ExtendedDefaultRules #-}
import Text.HTML.Scalpel
import Control.Applicative
import Control.Monad
import Data.List (isInfixOf)
+default (String)
+
exampleHtml :: String
exampleHtml = "\
@@ -37,7 +40,7 @@ catComment :: Scraper String String
catComment =
-- 1. First narrow the current context to the div containing the comment's
-- textual content.
- chroot ("div" @: [hasClass "comment", hasClass "text"]) $ do
+ chroot (("div" :: TagName String) @: [hasClass "comment", hasClass "text"]) $ do
-- 2. Any can be used to access the root tag of the current context.
contents <- text anySelector
-- 3. Skip comment divs that do not contain "cat".
diff --git a/examples/example-from-documentation/Main.hs b/examples/example-from-documentation/Main.hs
index 57067d6..9589bda 100644
--- a/examples/example-from-documentation/Main.hs
+++ b/examples/example-from-documentation/Main.hs
@@ -1,8 +1,11 @@
{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE ExtendedDefaultRules #-}
import Text.HTML.Scalpel
import Control.Applicative
+default (String)
+
exampleHtml :: String
exampleHtml = "\
@@ -35,19 +38,19 @@ main :: IO ()
main = print $ scrapeStringLike exampleHtml comments
where
comments :: Scraper String [Comment]
- comments = chroots ("div" @: [hasClass "container"]) comment
+ comments = chroots (("div" :: TagName String) @: [hasClass "container"]) comment
comment :: Scraper String Comment
comment = textComment <|> imageComment
textComment :: Scraper String Comment
textComment = do
- author <- text $ "span" @: [hasClass "author"]
- commentText <- text $ "div" @: [hasClass "text"]
+ author <- text $ ("span" :: TagName String) @: [hasClass "author"]
+ commentText <- text $ ("div" :: TagName String) @: [hasClass "text"]
return $ TextComment author commentText
imageComment :: Scraper String Comment
imageComment = do
- author <- text $ "span" @: [hasClass "author"]
- imageURL <- attr "src" $ "img" @: [hasClass "image"]
+ author <- text $ ("span" :: TagName String) @: [hasClass "author"]
+ imageURL <- attr "src" $ ("img" :: TagName String) @: [hasClass "image"]
return $ ImageComment author imageURL
diff --git a/examples/html-to-markdown/Main.hs b/examples/html-to-markdown/Main.hs
index 8d9145c..5aeace0 100644
--- a/examples/html-to-markdown/Main.hs
+++ b/examples/html-to-markdown/Main.hs
@@ -2,6 +2,7 @@
-- the rough markdown equivalent using serial scrapers.
{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE RankNTypes #-}
import Control.Applicative
import System.Environment
import Text.HTML.Scalpel
diff --git a/scalpel-core/benchmarks/Main.hs b/scalpel-core/benchmarks/Main.hs
index 4c4faa0..b4fe01b 100644
--- a/scalpel-core/benchmarks/Main.hs
+++ b/scalpel-core/benchmarks/Main.hs
@@ -1,4 +1,5 @@
{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE ExtendedDefaultRules #-}
import Text.HTML.Scalpel.Core
@@ -9,6 +10,8 @@ import Data.Foldable (foldr')
import qualified Data.Text as T
import qualified Text.HTML.TagSoup as TagSoup
+default (String, Int)
+
main :: IO ()
main = do
diff --git a/scalpel-core/scalpel-core.cabal b/scalpel-core/scalpel-core.cabal
index d1670c5..d369b6f 100644
--- a/scalpel-core/scalpel-core.cabal
+++ b/scalpel-core/scalpel-core.cabal
@@ -50,7 +50,7 @@ library
, pointedlist
, regex-base
, regex-tdfa
- , tagsoup >= 0.12.2
+ , tagsoup >= 0.14.8
, text
, vector
default-extensions:
diff --git a/scalpel-core/src/Text/HTML/Scalpel/Core.hs b/scalpel-core/src/Text/HTML/Scalpel/Core.hs
index eb7f2cd..8cbaef3 100644
--- a/scalpel-core/src/Text/HTML/Scalpel/Core.hs
+++ b/scalpel-core/src/Text/HTML/Scalpel/Core.hs
@@ -61,10 +61,14 @@ module Text.HTML.Scalpel.Core (
, seekBack
, untilNext
, untilBack
+
+-- * Select
+, select
) where
import Text.HTML.Scalpel.Internal.Scrape
import Text.HTML.Scalpel.Internal.Scrape.StringLike
import Text.HTML.Scalpel.Internal.Select.Combinators
+import Text.HTML.Scalpel.Internal.Select
import Text.HTML.Scalpel.Internal.Select.Types
import Text.HTML.Scalpel.Internal.Serial
diff --git a/scalpel-core/src/Text/HTML/Scalpel/Internal/Scrape.hs b/scalpel-core/src/Text/HTML/Scalpel/Internal/Scrape.hs
index c70297a..5a55e46 100644
--- a/scalpel-core/src/Text/HTML/Scalpel/Internal/Scrape.hs
+++ b/scalpel-core/src/Text/HTML/Scalpel/Internal/Scrape.hs
@@ -22,6 +22,7 @@ import Text.HTML.Scalpel.Internal.Select.Types
import Control.Applicative
import Control.Monad
import Data.Maybe
+import Text.StringLike (StringLike, castString)
import qualified Control.Monad.Fail as Fail
import qualified Data.Vector as Vector
@@ -67,7 +68,7 @@ instance Fail.MonadFail (Scraper str) where
-- | The 'scrape' function executes a 'Scraper' on a list of
-- 'TagSoup.Tag's and produces an optional value.
-scrape :: (TagSoup.StringLike str)
+scrape :: (StringLike str)
=> Scraper str a -> [TagSoup.Tag str] -> Maybe a
scrape s = scrapeTagSpec s . tagsToSpec . TagSoup.canonicalizeTags
@@ -77,7 +78,7 @@ scrape s = scrapeTagSpec s . tagsToSpec . TagSoup.canonicalizeTags
--
-- This function will match only the first set of tags matching the selector, to
-- match every set of tags, use 'chroots'.
-chroot :: (TagSoup.StringLike str)
+chroot :: (StringLike str)
=> Selector -> Scraper str a -> Scraper str a
chroot selector inner = do
maybeResult <- listToMaybe <$> chroots selector inner
@@ -91,14 +92,14 @@ chroot selector inner = do
--
-- > s = "
"
-- > scrapeStringLike s (chroots "div" (pure 0)) == Just [0, 0]
-chroots :: (TagSoup.StringLike str)
+chroots :: (StringLike str)
=> Selector -> Scraper str a -> Scraper str [a]
chroots selector (MkScraper inner) = MkScraper
$ return . mapMaybe inner . select selector
-- | The 'matches' function takes a selector and returns `()` if the selector
-- matches any node in the DOM.
-matches :: (TagSoup.StringLike str) => Selector -> Scraper str ()
+matches :: (StringLike str) => Selector -> Scraper str ()
matches s = MkScraper $ withHead (pure ()) . select s
-- | The 'text' function takes a selector and returns the inner text from the
@@ -106,7 +107,7 @@ matches s = MkScraper $ withHead (pure ()) . select s
--
-- This function will match only the first set of tags matching the selector, to
-- match every set of tags, use 'texts'.
-text :: (TagSoup.StringLike str) => Selector -> Scraper str str
+text :: (StringLike str) => Selector -> Scraper str str
text s = MkScraper $ withHead tagsToText . select s
-- | The 'texts' function takes a selector and returns the inner text from every
@@ -114,7 +115,7 @@ text s = MkScraper $ withHead tagsToText . select s
--
-- > s = ""
-- > scrapeStringLike s (texts "div") == Just ["Hello World", "World"]
-texts :: (TagSoup.StringLike str)
+texts :: (StringLike str)
=> Selector -> Scraper str [str]
texts s = MkScraper $ withAll tagsToText . select s
@@ -123,7 +124,7 @@ texts s = MkScraper $ withAll tagsToText . select s
--
-- This function will match only the first set of tags matching the selector, to
-- match every set of tags, use 'htmls'.
-html :: (TagSoup.StringLike str) => Selector -> Scraper str str
+html :: (StringLike str) => Selector -> Scraper str str
html s = MkScraper $ withHead tagsToHTML . select s
-- | The 'htmls' function takes a selector and returns the html string from
@@ -131,7 +132,7 @@ html s = MkScraper $ withHead tagsToHTML . select s
--
-- > s = ""
-- > scrapeStringLike s (htmls "div") == Just ["", "A
"]
-htmls :: (TagSoup.StringLike str)
+htmls :: (StringLike str)
=> Selector -> Scraper str [str]
htmls s = MkScraper $ withAll tagsToHTML . select s
@@ -141,7 +142,7 @@ htmls s = MkScraper $ withAll tagsToHTML . select s
--
-- This function will match only the first set of tags matching the selector, to
-- match every set of tags, use 'innerHTMLs'.
-innerHTML :: (TagSoup.StringLike str)
+innerHTML :: (StringLike str)
=> Selector -> Scraper str str
innerHTML s = MkScraper $ withHead tagsToInnerHTML . select s
@@ -150,7 +151,7 @@ innerHTML s = MkScraper $ withHead tagsToInnerHTML . select s
--
-- > s = ""
-- > scrapeStringLike s (innerHTMLs "div") == Just ["A
", "A"]
-innerHTMLs :: (TagSoup.StringLike str)
+innerHTMLs :: (StringLike str)
=> Selector -> Scraper str [str]
innerHTMLs s = MkScraper $ withAll tagsToInnerHTML . select s
@@ -160,10 +161,10 @@ innerHTMLs s = MkScraper $ withAll tagsToInnerHTML . select s
--
-- This function will match only the opening tag matching the selector, to match
-- every tag, use 'attrs'.
-attr :: (Show str, TagSoup.StringLike str)
- => String -> Selector -> Scraper str str
+attr :: (Show str, StringLike str)
+ => str -> Selector -> Scraper str str
attr name s = MkScraper
- $ join . withHead (tagsToAttr $ TagSoup.castString name) . select s
+ $ join . withHead (tagsToAttr $ castString name) . select s
-- | The 'attrs' function takes an attribute name and a selector and returns the
-- value of the attribute of the given name for every opening tag
@@ -171,11 +172,11 @@ attr name s = MkScraper
--
-- > s = ""
-- > scrapeStringLike s (attrs "id" "div") == Just ["out", "in"]
-attrs :: (Show str, TagSoup.StringLike str)
- => String -> Selector -> Scraper str [str]
+attrs :: (Show str, StringLike str)
+ => str -> Selector -> Scraper str [str]
attrs name s = MkScraper
$ fmap catMaybes . withAll (tagsToAttr nameStr) . select s
- where nameStr = TagSoup.castString name
+ where nameStr = castString name
-- | The 'position' function is intended to be used within the do-block of a
-- `chroots` call. Within the do-block position will return the index of the
@@ -211,7 +212,7 @@ attrs name s = MkScraper
-- , (2, "Third paragraph.")
-- ]
-- @
-position :: (TagSoup.StringLike str) => Scraper str Int
+position :: (StringLike str) => Scraper str Int
position = MkScraper $ Just . tagsToPosition
withHead :: (a -> b) -> [a] -> Maybe b
@@ -221,27 +222,27 @@ withHead f (x:_) = Just $ f x
withAll :: (a -> b) -> [a] -> Maybe [b]
withAll f xs = Just $ map f xs
-foldSpec :: TagSoup.StringLike str
+foldSpec :: StringLike str
=> (TagSoup.Tag str -> str -> str) -> TagSpec str -> str
foldSpec f = Vector.foldr' (f . infoTag) TagSoup.empty . (\(a, _, _) -> a)
-tagsToText :: TagSoup.StringLike str => TagSpec str -> str
+tagsToText :: StringLike str => TagSpec str -> str
tagsToText = foldSpec f
where
f (TagSoup.TagText str) s = str `TagSoup.append` s
f _ s = s
-tagsToHTML :: TagSoup.StringLike str => TagSpec str -> str
+tagsToHTML :: StringLike str => TagSpec str -> str
tagsToHTML = foldSpec (\tag s -> TagSoup.renderTags [tag] `TagSoup.append` s)
-tagsToInnerHTML :: TagSoup.StringLike str => TagSpec str -> str
+tagsToInnerHTML :: StringLike str => TagSpec str -> str
tagsToInnerHTML (tags, tree, ctx)
| len < 2 = TagSoup.empty
| otherwise = tagsToHTML (Vector.slice 1 (len - 2) tags, tree, ctx)
where len = Vector.length tags
-tagsToAttr :: (Show str, TagSoup.StringLike str)
+tagsToAttr :: (Show str, StringLike str)
=> str -> TagSpec str -> Maybe str
tagsToAttr attr (tags, _, _) = do
guard $ 0 < Vector.length tags
diff --git a/scalpel-core/src/Text/HTML/Scalpel/Internal/Select.hs b/scalpel-core/src/Text/HTML/Scalpel/Internal/Select.hs
index 665a62d..442fbe7 100644
--- a/scalpel-core/src/Text/HTML/Scalpel/Internal/Select.hs
+++ b/scalpel-core/src/Text/HTML/Scalpel/Internal/Select.hs
@@ -15,13 +15,13 @@ import Text.HTML.Scalpel.Internal.Select.Types
import Control.Applicative ((<$>), (<|>))
import Data.Maybe (catMaybes, isJust, fromJust, fromMaybe)
+import Text.StringLike (StringLike, castString)
import qualified Data.Map.Strict as Map
import qualified Data.Text as T
import qualified Data.Tree as Tree
import qualified Data.Vector as Vector
import qualified Text.HTML.TagSoup as TagSoup
-import qualified Text.StringLike as TagSoup
type Index = Int
@@ -73,7 +73,7 @@ type TagSpec str = (TagVector str, TagForest, SelectContext)
-- | The 'select' function takes a 'Selectable' value and a list of
-- 'TagSoup.Tag's and returns a list of every subsequence of the given list of
-- Tags that matches the given selector.
-select :: (TagSoup.StringLike str)
+select :: (StringLike str)
=> Selector -> TagSpec str -> [TagSpec str]
select s tagSpec = newSpecs
where
@@ -83,7 +83,7 @@ select s tagSpec = newSpecs
applyPosition p (tags, f, _) = (tags, f, SelectContext p True)
-- | Creates a TagSpec from a list of tags parsed by TagSoup.
-tagsToSpec :: forall str. (TagSoup.StringLike str)
+tagsToSpec :: forall str. (StringLike str)
=> [TagSoup.Tag str] -> TagSpec str
tagsToSpec tags = (vector, tree, ctx)
where
@@ -113,7 +113,7 @@ tagsToSpec tags = (vector, tree, ctx)
-- closing offset.
--
-- (5) The result set is then sorted by their indices.
-tagsToVector :: forall str. (TagSoup.StringLike str)
+tagsToVector :: forall str. (StringLike str)
=> [TagSoup.Tag str] -> TagVector str
tagsToVector tags = let indexed = zip tags [0..]
total = length indexed
@@ -164,9 +164,9 @@ tagsToVector tags = let indexed = zip tags [0..]
popTag (Just (_ : y : xs)) = let s = y : xs in s `seq` Just s
popTag _ = Nothing
-getTagName :: TagSoup.StringLike str => TagSoup.Tag str -> T.Text
-getTagName (TagSoup.TagOpen name _) = TagSoup.castString name
-getTagName (TagSoup.TagClose name) = TagSoup.castString name
+getTagName :: StringLike str => TagSoup.Tag str -> T.Text
+getTagName (TagSoup.TagOpen name _) = castString name
+getTagName (TagSoup.TagClose name) = castString name
getTagName _ = undefined
-- | Builds a forest describing the structure of the tags within a given vector.
@@ -174,7 +174,7 @@ getTagName _ = undefined
-- vector of an open and close pair. The tree is organized such for any node n
-- the parent of node n is the smallest span that completely encapsulates the
-- span of node n.
-vectorToTree :: TagSoup.StringLike str => TagVector str -> TagForest
+vectorToTree :: StringLike str => TagVector str -> TagForest
vectorToTree tags = fixup $ forestWithin 0 (Vector.length tags)
where
forestWithin :: Int -> Int -> TagForest
@@ -217,7 +217,7 @@ vectorToTree tags = fixup $ forestWithin 0 (Vector.length tags)
-- tree the SelectNode is popped and the current node's sub-forest is traversed
-- with the remaining SelectNodes. If there is only a single SelectNode then any
-- node encountered that satisfies the SelectNode is returned as an answer.
-selectNodes :: TagSoup.StringLike str
+selectNodes :: StringLike str
=> [(SelectNode, SelectSettings)]
-> TagSpec str
-> TagSpec str
@@ -305,7 +305,7 @@ boolMatch True = MatchOk
boolMatch False = MatchFail
-- | Returns True if a tag satisfies a given SelectNode's condition.
-nodeMatches :: TagSoup.StringLike str
+nodeMatches :: StringLike str
=> (SelectNode, SelectSettings)
-> TagInfo str
-> TagSpec str
@@ -322,7 +322,7 @@ nodeMatches (SelectText , settings) info cur root =
-- | Given a SelectSettings, the current node under consideration, and the last
-- matched node, returns true IFF the current node satisfies all of the
-- selection settings.
-checkSettings :: TagSoup.StringLike str
+checkSettings :: StringLike str
=> SelectSettings -> TagSpec str -> TagSpec str -> MatchResult
checkSettings (SelectSettings (Just depth))
(_, curRoot : _, _)
@@ -341,7 +341,7 @@ checkSettings (SelectSettings _) _ _ = MatchOk
-- | Given a tag name and a list of attribute predicates return a function that
-- returns true if a given tag matches the supplied name and predicates.
-checkTag :: TagSoup.StringLike str
+checkTag :: StringLike str
=> T.Text -> [AttributePredicate] -> TagInfo str -> MatchResult
checkTag name preds (TagInfo tag tagName _)
= boolMatch (
@@ -351,7 +351,7 @@ checkTag name preds (TagInfo tag tagName _)
) `andMatch` checkPreds preds tag
-- | Returns True if a tag satisfies a list of attribute predicates.
-checkPreds :: TagSoup.StringLike str
+checkPreds :: StringLike str
=> [AttributePredicate] -> TagSoup.Tag str -> MatchResult
checkPreds [] tag = boolMatch
$ TagSoup.isTagOpen tag || TagSoup.isTagText tag
diff --git a/scalpel-core/src/Text/HTML/Scalpel/Internal/Select/Combinators.hs b/scalpel-core/src/Text/HTML/Scalpel/Internal/Select/Combinators.hs
index 67fa014..697ee75 100644
--- a/scalpel-core/src/Text/HTML/Scalpel/Internal/Select/Combinators.hs
+++ b/scalpel-core/src/Text/HTML/Scalpel/Internal/Select/Combinators.hs
@@ -14,15 +14,15 @@ module Text.HTML.Scalpel.Internal.Select.Combinators (
) where
import Text.HTML.Scalpel.Internal.Select.Types
+import Text.StringLike (StringLike, castString, toString)
import qualified Data.Text as T
import qualified Text.Regex.Base.RegexLike as RE
-import qualified Text.StringLike as TagSoup
-- | The '@:' operator creates a 'Selector' by combining a 'TagName' with a list
-- of 'AttributePredicate's.
-(@:) :: TagName -> [AttributePredicate] -> Selector
+(@:) :: StringLike str => TagName str -> [AttributePredicate] -> Selector
(@:) tag attrs = MkSelector [(toSelectNode tag attrs, defaultSelectSettings)]
infixl 9 @:
@@ -31,20 +31,20 @@ infixl 9 @:
--
-- If you are attempting to match a specific class of a tag with potentially
-- multiple classes, you should use the 'hasClass' utility function.
-(@=) :: AttributeName -> String -> AttributePredicate
+(@=) :: StringLike str => AttributeName str -> str -> AttributePredicate
(@=) key value = anyAttrPredicate $ \(attrKey, attrValue) ->
- matchKey key attrKey
- && TagSoup.fromString value == attrValue
+ matchKey key (castString attrKey)
+ && value == (castString attrValue)
infixl 6 @=
-- | The '@=~' operator creates an 'AttributePredicate' that will match
-- attributes with the given name and whose value matches the given regular
-- expression.
-(@=~) :: RE.RegexLike re String
- => AttributeName -> re -> AttributePredicate
+(@=~) :: (RE.RegexLike re String, StringLike str)
+ => AttributeName str -> re -> AttributePredicate
(@=~) key re = anyAttrPredicate $ \(attrKey, attrValue) ->
- matchKey key attrKey
- && RE.matchTest re (TagSoup.toString attrValue)
+ matchKey key (castString attrKey)
+ && RE.matchTest re (toString attrValue)
infixl 6 @=~
-- | The 'atDepth' operator constrains a 'Selector' to only match when it is at
@@ -73,14 +73,14 @@ infixl 5 //
-- | The classes of a tag are defined in HTML as a space separated list given by
-- the @class@ attribute. The 'hasClass' function will match a @class@ attribute
-- if the given class appears anywhere in the space separated list of classes.
-hasClass :: String -> AttributePredicate
+hasClass :: StringLike str => str -> AttributePredicate
hasClass clazz = anyAttrPredicate hasClass'
where
hasClass' (attrName, classes)
- | "class" == TagSoup.toString attrName = textClass `elem` classList
+ | "class" == toString attrName = textClass `elem` classList
| otherwise = False
- where textClass = TagSoup.castString clazz
- textClasses = TagSoup.castString classes
+ where textClass = castString clazz
+ textClasses = castString classes
classList = T.split (== ' ') textClasses
-- | Negates an 'AttributePredicate'.
@@ -91,6 +91,6 @@ notP (MkAttributePredicate p) = MkAttributePredicate $ not . p
-- 'AttributePredicate's. The argument is a function that takes the attribute
-- key followed by the attribute value and returns a boolean indicating if the
-- attribute satisfies the predicate.
-match :: (String -> String -> Bool) -> AttributePredicate
+match :: StringLike str => (str -> str -> Bool) -> AttributePredicate
match f = anyAttrPredicate $ \(attrKey, attrValue) ->
- f (TagSoup.toString attrKey) (TagSoup.toString attrValue)
+ f (castString attrKey) (castString attrValue)
diff --git a/scalpel-core/src/Text/HTML/Scalpel/Internal/Select/Types.hs b/scalpel-core/src/Text/HTML/Scalpel/Internal/Select/Types.hs
index f1211ac..716a270 100644
--- a/scalpel-core/src/Text/HTML/Scalpel/Internal/Select/Types.hs
+++ b/scalpel-core/src/Text/HTML/Scalpel/Internal/Select/Types.hs
@@ -22,37 +22,37 @@ module Text.HTML.Scalpel.Internal.Select.Types (
import Data.Char (toLower)
import Data.String (IsString, fromString)
+import Text.StringLike (StringLike, strMap, castString)
import qualified Text.HTML.TagSoup as TagSoup
-import qualified Text.StringLike as TagSoup
import qualified Data.Text as T
-- | The 'AttributeName' type can be used when creating 'Selector's to specify
-- the name of an attribute of a tag.
-data AttributeName = AnyAttribute | AttributeString String
+data StringLike str => AttributeName str = AnyAttribute | AttributeString str
-matchKey :: TagSoup.StringLike str => AttributeName -> str -> Bool
-matchKey (AttributeString s) = ((TagSoup.fromString $ map toLower s) ==)
+matchKey :: StringLike str => AttributeName str -> str -> Bool
+matchKey (AttributeString s) = ((strMap toLower s) ==)
matchKey AnyAttribute = const True
-instance IsString AttributeName where
- fromString = AttributeString
+instance StringLike str => IsString (AttributeName str) where
+ fromString = AttributeString . castString
-- | An 'AttributePredicate' is a method that takes a 'TagSoup.Attribute' and
-- returns a 'Bool' indicating if the given attribute matches a predicate.
data AttributePredicate
= MkAttributePredicate
- (forall str. TagSoup.StringLike str => [TagSoup.Attribute str]
+ (forall str. StringLike str => [TagSoup.Attribute str]
-> Bool)
-checkPred :: TagSoup.StringLike str
+checkPred :: StringLike str
=> AttributePredicate -> [TagSoup.Attribute str] -> Bool
checkPred (MkAttributePredicate p) = p
-- | Creates an 'AttributePredicate' from a predicate function of a single
-- attribute that matches if any one of the attributes matches the predicate.
-anyAttrPredicate :: (forall str. TagSoup.StringLike str => (str, str) -> Bool)
+anyAttrPredicate :: (forall str. StringLike str => (str, str) -> Bool)
-> AttributePredicate
anyAttrPredicate p = MkAttributePredicate $ any p
@@ -76,7 +76,7 @@ defaultSelectSettings = SelectSettings {
selectSettingsDepth = Nothing
}
-tagSelector :: String -> Selector
+tagSelector :: StringLike str => str -> Selector
tagSelector tag = MkSelector [
(toSelectNode (TagString tag) [], defaultSelectSettings)
]
@@ -98,11 +98,11 @@ data SelectNode = SelectNode !T.Text [AttributePredicate]
-- | The 'TagName' type is used when creating a 'Selector' to specify the name
-- of a tag.
-data TagName = AnyTag | TagString String
+data StringLike str => TagName str = AnyTag | TagString str
-instance IsString TagName where
- fromString = TagString
+instance StringLike str => IsString (TagName str) where
+ fromString = TagString . castString
-toSelectNode :: TagName -> [AttributePredicate] -> SelectNode
+toSelectNode :: StringLike str => TagName str -> [AttributePredicate] -> SelectNode
toSelectNode AnyTag = SelectAny
-toSelectNode (TagString str) = SelectNode . TagSoup.fromString $ map toLower str
+toSelectNode (TagString str) = SelectNode $ T.map toLower $ castString str
diff --git a/scalpel-core/tests/TestMain.hs b/scalpel-core/tests/TestMain.hs
index 539e7eb..914c2bc 100644
--- a/scalpel-core/tests/TestMain.hs
+++ b/scalpel-core/tests/TestMain.hs
@@ -1,9 +1,11 @@
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE ExtendedDefaultRules #-}
module Main (main) where
-import Text.HTML.Scalpel.Core
+import Text.HTML.Scalpel.Core hiding ((@:), (@=~))
+import qualified Text.HTML.Scalpel.Core
import Control.Applicative
import Control.Monad (guard)
@@ -14,6 +16,8 @@ import Test.HUnit (Test(..), (@=?), (~:), runTestTT, failures)
import qualified Text.HTML.TagSoup as TagSoup
import qualified Text.Regex.TDFA
+default (String, Int)
+
main :: IO ()
main = do
n <- runTestTT (TestList [scrapeTests])
@@ -26,6 +30,12 @@ exit n = exitWith $ ExitFailure n
re :: String -> Text.Regex.TDFA.Regex
re = Text.Regex.TDFA.makeRegex
+(@:) :: TagName String -> [AttributePredicate] -> Selector
+(@:) = (Text.HTML.Scalpel.Core.@:)
+
+(@=~) :: AttributeName String -> Text.Regex.TDFA.Regex -> AttributePredicate
+(@=~) = (Text.HTML.Scalpel.Core.@=~)
+
scrapeTests = "scrapeTests" ~: TestList [
scrapeTest
"htmls should extract matching tag"
diff --git a/scalpel/scalpel.cabal b/scalpel/scalpel.cabal
index 6e8779d..e57e53e 100644
--- a/scalpel/scalpel.cabal
+++ b/scalpel/scalpel.cabal
@@ -44,7 +44,7 @@ library
, data-default
, http-client >= 0.4.30
, http-client-tls >= 0.2.4
- , tagsoup >= 0.12.2
+ , tagsoup >= 0.14.8
, text
default-extensions:
ParallelListComp
diff --git a/scalpel/src/Text/HTML/Scalpel/Internal/Scrape/URL.hs b/scalpel/src/Text/HTML/Scalpel/Internal/Scrape/URL.hs
index 16f2723..73a3418 100644
--- a/scalpel/src/Text/HTML/Scalpel/Internal/Scrape/URL.hs
+++ b/scalpel/src/Text/HTML/Scalpel/Internal/Scrape/URL.hs
@@ -1,4 +1,5 @@
{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE RankNTypes #-}
{-# OPTIONS_HADDOCK hide #-}
module Text.HTML.Scalpel.Internal.Scrape.URL (
URL
@@ -19,6 +20,7 @@ import Control.Applicative ((<$>))
import Data.CaseInsensitive ()
import Data.Default (def)
import Data.Maybe (fromMaybe, listToMaybe)
+import Text.StringLike (StringLike, castString)
import qualified Data.ByteString.Lazy as LBS
import qualified Data.Default as Default
@@ -43,7 +45,7 @@ data Config str = Config {
, manager :: Maybe HTTP.Manager
}
-instance TagSoup.StringLike str => Default.Default (Config str) where
+instance StringLike str => Default.Default (Config str) where
def = Config {
decoder = defaultDecoder
, manager = Nothing
@@ -55,28 +57,28 @@ instance TagSoup.StringLike str => Default.Default (Config str) where
-- The default behavior is to use the global manager provided by
-- http-client-tls (via 'HTTP.getGlobalManager'). Any exceptions thrown by
-- http-client are not caught and are bubbled up to the caller.
-scrapeURL :: (TagSoup.StringLike str)
- => URL -> Scraper str a -> IO (Maybe a)
+scrapeURL :: (StringLike str, StringLike url)
+ => url -> Scraper str a -> IO (Maybe a)
scrapeURL = scrapeURLWithConfig def
-- | The 'scrapeURLWithConfig' function takes a 'Config' record type and
-- downloads the contents of the given URL and executes a 'Scraper' on it.
-scrapeURLWithConfig :: (TagSoup.StringLike str)
- => Config str -> URL -> Scraper str a -> IO (Maybe a)
+scrapeURLWithConfig :: (StringLike str, StringLike url)
+ => Config str -> url -> Scraper str a -> IO (Maybe a)
scrapeURLWithConfig config url scraper = do
manager <- fromMaybe HTTP.getGlobalManager (return <$> manager config)
tags <- downloadAsTags (decoder config) manager url
return (scrape scraper tags)
where
downloadAsTags decoder manager url = do
- request <- HTTP.parseRequest url
+ request <- HTTP.parseRequest $ castString url
response <- HTTP.httpLbs request manager
return $ TagSoup.parseTags $ decoder response
-- | The default response decoder. This decoder attempts to infer the character
-- set of the HTTP response body from the `Content-Type` header. If this header
-- is not present, then the character set is assumed to be `ISO-8859-1`.
-defaultDecoder :: TagSoup.StringLike str => Decoder str
+defaultDecoder :: StringLike str => Decoder str
defaultDecoder response = TagSoup.castString
$ choosenDecoder body
where
@@ -95,9 +97,9 @@ defaultDecoder response = TagSoup.castString
| otherwise = Text.decodeLatin1 . LBS.toStrict
-- | A decoder that will always decode using `UTF-8`.
-utf8Decoder :: TagSoup.StringLike str => Decoder str
-utf8Decoder = TagSoup.castString . Text.decodeUtf8 . LBS.toStrict . HTTP.responseBody
+utf8Decoder :: StringLike str => Decoder str
+utf8Decoder = castString . Text.decodeUtf8 . LBS.toStrict . HTTP.responseBody
-- | A decoder that will always decode using `ISO-8859-1`.
-iso88591Decoder :: TagSoup.StringLike str => Decoder str
-iso88591Decoder = TagSoup.castString . Text.decodeLatin1 . LBS.toStrict . HTTP.responseBody
+iso88591Decoder :: StringLike str => Decoder str
+iso88591Decoder = castString . Text.decodeLatin1 . LBS.toStrict . HTTP.responseBody
diff --git a/stack.yaml b/stack.yaml
index 4415597..a098f9c 100644
--- a/stack.yaml
+++ b/stack.yaml
@@ -3,4 +3,5 @@ packages:
- scalpel/
- scalpel-core/
- examples/
-resolver: lts-13.7
+resolver: lts-13.21
+extra-deps: []