From 4f5cc9bb417a22ca8eddaf21c98b23ab6b454cc0 Mon Sep 17 00:00:00 2001 From: Will Coster Date: Fri, 8 Dec 2023 01:10:48 +0000 Subject: [PATCH 1/2] Update stack resolver to something recent --- stack.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stack.yaml b/stack.yaml index 4415597..d2fde3d 100644 --- a/stack.yaml +++ b/stack.yaml @@ -3,4 +3,4 @@ packages: - scalpel/ - scalpel-core/ - examples/ -resolver: lts-13.7 +resolver: lts-18.28 From 72987322a95a0d691c4c56b9ea2bce3dddb06cbb Mon Sep 17 00:00:00 2001 From: Will Coster Date: Fri, 8 Dec 2023 01:45:40 +0000 Subject: [PATCH 2/2] Add error handling examples --- README.md | 78 +++++++++++++++++++++ examples/error-handling-with-writer/Main.hs | 72 +++++++++++++++++++ examples/error-handling/Main.hs | 71 +++++++++++++++++++ examples/scalpel-examples.cabal | 18 +++++ 4 files changed, 239 insertions(+) create mode 100644 examples/error-handling-with-writer/Main.hs create mode 100644 examples/error-handling/Main.hs diff --git a/README.md b/README.md index 776fbf0..67bb9dd 100644 --- a/README.md +++ b/README.md @@ -279,6 +279,84 @@ For the full source of this example, see For more documentation on monad transformers, see the [hackage page](https://hackage.haskell.org/package/transformers) +### Explicit error handling + +`ScraperT` is an instance of `MonadError` which allows you to throw errors from +within parsing code to stop parsing and return an error. + +When doing error handling in this way, there are 3 cases to consider: + 1. An explicitly thrown error + 2. A failed scraping without a thrown error + 3. A valid result + +This can be implemented for `String` valued errors as follows: + +``` +type Error = String +type ScraperWithError a = ScraperT String (Either Error) a + +scrapeStringOrError :: String -> ScraperWithError a -> Either Error a +scrapeStringOrError html scraper + | Left error <- result = Left error + | Right Nothing <- result = Left "Unknown error" + | Right (Just a) <- result = Right a + where + result = scrapeStringLikeT html scraper +``` + +To add explicit erroring you can use the <|> operator from Alternative to throw +an error when something fails: + +``` +comment :: ScraperWithError Comment +comment = textComment <|> imageComment <|> throwError "Unknown comment type" +``` + +With this approach, when you throw an error it will stop all parsing. So if you +have an expression `a <|> b` and there is a nested throwError in `a`, then the +parsing will fail. Even if `b` would be successful. + +For the full source for this approach, see +[error-handling](https://github.com/fimad/scalpel/tree/master/examples/error-handling/) +in the examples directory. + +Another approach that would let you accumulate errors without stopping parsing +would be to use `MonadWriter` and accumulate debugging information in a `Monoid` +like a list: + +``` +type Error = String +type ScraperWithError a = ScraperT String (Writer [Error]) a + +scrapeStringOrError :: String -> ScraperWithError a -> (Maybe a, [Error]) +scrapeStringOrError html scraper = runWriter . scrapeStringLikeT +``` + +Then to log an error you can use `tell`: + +``` +comment :: ScraperWithError Comment +comment = textComment <|> imageComment <|> (tell ["Unknown comment type"] >> empty) +``` + +You can also retrieve the current HTML being parsed with `html anySelector` and +incorporate that into your log message: + +``` +logError :: String -> ScraperWithError a +logError message = do + currentHtml <- html anySelector + tell ["Unknown comment type: " ++ html] + empty + +comment :: ScraperWithError Comment +comment = textComment <|> imageComment <|> logError "Unknown comment type: " +``` + +For the full source for this approach, see +[error-handling-with-writer](https://github.com/fimad/scalpel/tree/master/examples/error-handling-with-writer/) +in the examples directory. + ### scalpel-core The `scalpel` package depends on 'http-client' and 'http-client-tls' to provide diff --git a/examples/error-handling-with-writer/Main.hs b/examples/error-handling-with-writer/Main.hs new file mode 100644 index 0000000..f3bd9e3 --- /dev/null +++ b/examples/error-handling-with-writer/Main.hs @@ -0,0 +1,72 @@ +{-# LANGUAGE OverloadedStrings #-} + +import Text.HTML.Scalpel +import Control.Applicative +import Control.Monad.Writer.Class (tell) +import Control.Monad.Writer.Strict (Writer, runWriter) + + +exampleHtml :: String +exampleHtml = "\ +\ \ +\
\ +\
\ +\ Sally\ +\
Woo hoo!
\ +\
\ +\
\ +\ Bill\ +\ \ +\
\ +\
\ +\ Susan\ +\
WTF!?!
\ +\
\ +\
\ +\ Susan\ +\
A video? That's new!
\ +\
\ +\
\ +\ \ +\" + +type Error = String + +type Author = String + +data Comment + = TextComment Author String + | ImageComment Author URL + deriving (Show, Eq) + +type ScraperWithError a = ScraperT String (Writer [Error]) a + +scrapeStringOrError :: String -> ScraperWithError a -> (Maybe a, [Error]) +scrapeStringOrError html scraper = runWriter $ scrapeStringLikeT html scraper + +main :: IO () +main = print $ scrapeStringOrError exampleHtml comments + where + comments :: ScraperWithError [Comment] + comments = chroots ("div" @: [hasClass "container"]) comment + + logError :: String -> ScraperWithError a + logError message = do + currentHtml <- html anySelector + tell [message ++ currentHtml] + empty + + comment :: ScraperWithError Comment + comment = textComment <|> imageComment <|> logError "Unknown comment type: " + + textComment :: ScraperWithError Comment + textComment = do + author <- text $ "span" @: [hasClass "author"] + commentText <- text $ "div" @: [hasClass "text"] + return $ TextComment author commentText + + imageComment :: ScraperWithError Comment + imageComment = do + author <- text $ "span" @: [hasClass "author"] + imageURL <- attr "src" $ "img" @: [hasClass "image"] + return $ ImageComment author imageURL diff --git a/examples/error-handling/Main.hs b/examples/error-handling/Main.hs new file mode 100644 index 0000000..c4c0251 --- /dev/null +++ b/examples/error-handling/Main.hs @@ -0,0 +1,71 @@ +{-# LANGUAGE OverloadedStrings #-} + +import Text.HTML.Scalpel +import Control.Applicative +import Control.Monad.Error.Class (throwError) +import Control.Monad.Writer.Class (tell) + + +exampleHtml :: String +exampleHtml = "\ +\ \ +\
\ +\
\ +\ Sally\ +\
Woo hoo!
\ +\
\ +\
\ +\ Bill\ +\ \ +\
\ +\
\ +\ Susan\ +\
WTF!?!
\ +\
\ +\
\ +\ Susan\ +\
A video? That's new!
\ +\
\ +\
\ +\ \ +\" + +type Error = String + +type Author = String + +data Comment + = TextComment Author String + | ImageComment Author URL + deriving (Show, Eq) + +type ScraperWithError a = ScraperT String (Either Error) a + +scrapeStringOrError :: String -> ScraperWithError a -> Either Error a +scrapeStringOrError html scraper + | Left error <- result = Left error + | Right Nothing <- result = Left "Unknown error" + | Right (Just a) <- result = Right a + where + result = scrapeStringLikeT html scraper + +main :: IO () +main = print $ scrapeStringOrError exampleHtml comments + where + comments :: ScraperWithError [Comment] + comments = chroots ("div" @: [hasClass "container"]) comment + + comment :: ScraperWithError Comment + comment = textComment <|> imageComment <|> throwError "Unknown comment type" + + textComment :: ScraperWithError Comment + textComment = do + author <- text $ "span" @: [hasClass "author"] + commentText <- text $ "div" @: [hasClass "text"] + return $ TextComment author commentText + + imageComment :: ScraperWithError Comment + imageComment = do + author <- text $ "span" @: [hasClass "author"] + imageURL <- attr "src" $ "img" @: [hasClass "image"] + return $ ImageComment author imageURL diff --git a/examples/scalpel-examples.cabal b/examples/scalpel-examples.cabal index 8d2e122..59f4ce2 100644 --- a/examples/scalpel-examples.cabal +++ b/examples/scalpel-examples.cabal @@ -19,6 +19,24 @@ executable complex-predicates , scalpel >= 0.2.0 ghc-options: -W +executable error-handling + default-language: Haskell2010 + main-is: error-handling/Main.hs + build-depends: + base >= 4.6 && < 5 + , scalpel >= 0.2.0 + , mtl + ghc-options: -W + +executable error-handling-with-writer + default-language: Haskell2010 + main-is: error-handling-with-writer/Main.hs + build-depends: + base >= 4.6 && < 5 + , scalpel >= 0.2.0 + , mtl + ghc-options: -W + executable example-from-documentation default-language: Haskell2010 main-is: example-from-documentation/Main.hs