Skip to content

Commit

Permalink
Merge pull request #103 from fimad/error-handling-examples
Browse files Browse the repository at this point in the history
Error handling examples
  • Loading branch information
fimad authored Dec 8, 2023
2 parents 134db02 + 7298732 commit 88980e6
Show file tree
Hide file tree
Showing 5 changed files with 240 additions and 1 deletion.
78 changes: 78 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -279,6 +279,84 @@ For the full source of this example, see

For more documentation on monad transformers, see the [hackage page](https://hackage.haskell.org/package/transformers)

### Explicit error handling

`ScraperT` is an instance of `MonadError` which allows you to throw errors from
within parsing code to stop parsing and return an error.

When doing error handling in this way, there are 3 cases to consider:
1. An explicitly thrown error
2. A failed scraping without a thrown error
3. A valid result

This can be implemented for `String` valued errors as follows:

```
type Error = String
type ScraperWithError a = ScraperT String (Either Error) a
scrapeStringOrError :: String -> ScraperWithError a -> Either Error a
scrapeStringOrError html scraper
| Left error <- result = Left error
| Right Nothing <- result = Left "Unknown error"
| Right (Just a) <- result = Right a
where
result = scrapeStringLikeT html scraper
```

To add explicit erroring you can use the <|> operator from Alternative to throw
an error when something fails:

```
comment :: ScraperWithError Comment
comment = textComment <|> imageComment <|> throwError "Unknown comment type"
```

With this approach, when you throw an error it will stop all parsing. So if you
have an expression `a <|> b` and there is a nested throwError in `a`, then the
parsing will fail. Even if `b` would be successful.

For the full source for this approach, see
[error-handling](https://github.com/fimad/scalpel/tree/master/examples/error-handling/)
in the examples directory.

Another approach that would let you accumulate errors without stopping parsing
would be to use `MonadWriter` and accumulate debugging information in a `Monoid`
like a list:

```
type Error = String
type ScraperWithError a = ScraperT String (Writer [Error]) a
scrapeStringOrError :: String -> ScraperWithError a -> (Maybe a, [Error])
scrapeStringOrError html scraper = runWriter . scrapeStringLikeT
```

Then to log an error you can use `tell`:

```
comment :: ScraperWithError Comment
comment = textComment <|> imageComment <|> (tell ["Unknown comment type"] >> empty)
```

You can also retrieve the current HTML being parsed with `html anySelector` and
incorporate that into your log message:

```
logError :: String -> ScraperWithError a
logError message = do
currentHtml <- html anySelector
tell ["Unknown comment type: " ++ html]
empty
comment :: ScraperWithError Comment
comment = textComment <|> imageComment <|> logError "Unknown comment type: "
```

For the full source for this approach, see
[error-handling-with-writer](https://github.com/fimad/scalpel/tree/master/examples/error-handling-with-writer/)
in the examples directory.

### scalpel-core

The `scalpel` package depends on 'http-client' and 'http-client-tls' to provide
Expand Down
72 changes: 72 additions & 0 deletions examples/error-handling-with-writer/Main.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
{-# LANGUAGE OverloadedStrings #-}

import Text.HTML.Scalpel
import Control.Applicative
import Control.Monad.Writer.Class (tell)
import Control.Monad.Writer.Strict (Writer, runWriter)


exampleHtml :: String
exampleHtml = "<html>\
\ <body>\
\ <div class='comments'>\
\ <div class='comment container'>\
\ <span class='comment author'>Sally</span>\
\ <div class='comment text'>Woo hoo!</div>\
\ </div>\
\ <div class='comment container'>\
\ <span class='comment author'>Bill</span>\
\ <img class='comment image' src='http://example.com/cat.gif' />\
\ </div>\
\ <div class='comment container'>\
\ <span class='comment author'>Susan</span>\
\ <div class='comment text'>WTF!?!</div>\
\ </div>\
\ <div class='comment container'>\
\ <span class='comment author'>Susan</span>\
\ <div class='comment video'>A video? That's new!</div>\
\ </div>\
\ </div>\
\ </body>\
\</html>"

type Error = String

type Author = String

data Comment
= TextComment Author String
| ImageComment Author URL
deriving (Show, Eq)

type ScraperWithError a = ScraperT String (Writer [Error]) a

scrapeStringOrError :: String -> ScraperWithError a -> (Maybe a, [Error])
scrapeStringOrError html scraper = runWriter $ scrapeStringLikeT html scraper

main :: IO ()
main = print $ scrapeStringOrError exampleHtml comments
where
comments :: ScraperWithError [Comment]
comments = chroots ("div" @: [hasClass "container"]) comment

logError :: String -> ScraperWithError a
logError message = do
currentHtml <- html anySelector
tell [message ++ currentHtml]
empty

comment :: ScraperWithError Comment
comment = textComment <|> imageComment <|> logError "Unknown comment type: "

textComment :: ScraperWithError Comment
textComment = do
author <- text $ "span" @: [hasClass "author"]
commentText <- text $ "div" @: [hasClass "text"]
return $ TextComment author commentText

imageComment :: ScraperWithError Comment
imageComment = do
author <- text $ "span" @: [hasClass "author"]
imageURL <- attr "src" $ "img" @: [hasClass "image"]
return $ ImageComment author imageURL
71 changes: 71 additions & 0 deletions examples/error-handling/Main.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
{-# LANGUAGE OverloadedStrings #-}

import Text.HTML.Scalpel
import Control.Applicative
import Control.Monad.Error.Class (throwError)
import Control.Monad.Writer.Class (tell)


exampleHtml :: String
exampleHtml = "<html>\
\ <body>\
\ <div class='comments'>\
\ <div class='comment container'>\
\ <span class='comment author'>Sally</span>\
\ <div class='comment text'>Woo hoo!</div>\
\ </div>\
\ <div class='comment container'>\
\ <span class='comment author'>Bill</span>\
\ <img class='comment image' src='http://example.com/cat.gif' />\
\ </div>\
\ <div class='comment container'>\
\ <span class='comment author'>Susan</span>\
\ <div class='comment text'>WTF!?!</div>\
\ </div>\
\ <div class='comment container'>\
\ <span class='comment author'>Susan</span>\
\ <div class='comment video'>A video? That's new!</div>\
\ </div>\
\ </div>\
\ </body>\
\</html>"

type Error = String

type Author = String

data Comment
= TextComment Author String
| ImageComment Author URL
deriving (Show, Eq)

type ScraperWithError a = ScraperT String (Either Error) a

scrapeStringOrError :: String -> ScraperWithError a -> Either Error a
scrapeStringOrError html scraper
| Left error <- result = Left error
| Right Nothing <- result = Left "Unknown error"
| Right (Just a) <- result = Right a
where
result = scrapeStringLikeT html scraper

main :: IO ()
main = print $ scrapeStringOrError exampleHtml comments
where
comments :: ScraperWithError [Comment]
comments = chroots ("div" @: [hasClass "container"]) comment

comment :: ScraperWithError Comment
comment = textComment <|> imageComment <|> throwError "Unknown comment type"

textComment :: ScraperWithError Comment
textComment = do
author <- text $ "span" @: [hasClass "author"]
commentText <- text $ "div" @: [hasClass "text"]
return $ TextComment author commentText

imageComment :: ScraperWithError Comment
imageComment = do
author <- text $ "span" @: [hasClass "author"]
imageURL <- attr "src" $ "img" @: [hasClass "image"]
return $ ImageComment author imageURL
18 changes: 18 additions & 0 deletions examples/scalpel-examples.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,24 @@ executable complex-predicates
, scalpel >= 0.2.0
ghc-options: -W

executable error-handling
default-language: Haskell2010
main-is: error-handling/Main.hs
build-depends:
base >= 4.6 && < 5
, scalpel >= 0.2.0
, mtl
ghc-options: -W

executable error-handling-with-writer
default-language: Haskell2010
main-is: error-handling-with-writer/Main.hs
build-depends:
base >= 4.6 && < 5
, scalpel >= 0.2.0
, mtl
ghc-options: -W

executable example-from-documentation
default-language: Haskell2010
main-is: example-from-documentation/Main.hs
Expand Down
2 changes: 1 addition & 1 deletion stack.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@ packages:
- scalpel/
- scalpel-core/
- examples/
resolver: lts-13.7
resolver: lts-18.28

0 comments on commit 88980e6

Please sign in to comment.