aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Readers/HTML.hs
diff options
context:
space:
mode:
Diffstat (limited to 'src/Text/Pandoc/Readers/HTML.hs')
-rw-r--r--src/Text/Pandoc/Readers/HTML.hs21
1 files changed, 14 insertions, 7 deletions
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index 02ff07e73..52358e51e 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -1,4 +1,5 @@
-{-# LANGUAGE FlexibleContexts, FlexibleInstances, MultiParamTypeClasses #-}
+{-# LANGUAGE FlexibleContexts, FlexibleInstances, MultiParamTypeClasses,
+ViewPatterns#-}
{-
Copyright (C) 2006-2014 John MacFarlane <jgm@berkeley.edu>
@@ -43,7 +44,7 @@ import Text.Pandoc.Definition
import qualified Text.Pandoc.Builder as B
import Text.Pandoc.Builder (Blocks, Inlines, trimInlines, HasMeta(..))
import Text.Pandoc.Shared ( extractSpaces, renderTags'
- , escapeURI, safeRead )
+ , escapeURI, safeRead, mapLeft )
import Text.Pandoc.Options (ReaderOptions(readerParseRaw, readerTrace)
, Extension (Ext_epub_html_exts,
Ext_native_divs, Ext_native_spans))
@@ -62,15 +63,18 @@ import Text.TeXMath (readMathML, writeTeX)
import Data.Default (Default (..), def)
import Control.Monad.Reader (Reader,ask, asks, local, runReader)
+import Text.Pandoc.Error
+
+import Text.Parsec.Error
+
-- | Convert HTML-formatted string to 'Pandoc' document.
readHtml :: ReaderOptions -- ^ Reader options
-> String -- ^ String to parse (assumes @'\n'@ line endings)
- -> Pandoc
+ -> Either PandocError Pandoc
readHtml opts inp =
- case flip runReader def $ runParserT parseDoc (HTMLState def{ stateOptions = opts } []) "source" tags of
- Left err' -> error $ "\nError at " ++ show err'
- Right result -> result
+ mapLeft (ParseFailure . getError) . flip runReader def $
+ runParserT parseDoc (HTMLState def{ stateOptions = opts } []) "source" tags
where tags = stripPrefixes . canonicalizeTags $
parseTagsOptions parseOptions{ optTagPosition = True } inp
parseDoc = do
@@ -78,6 +82,9 @@ readHtml opts inp =
meta <- stateMeta . parserState <$> getState
bs' <- replaceNotes (B.toList blocks)
return $ Pandoc meta bs'
+ getError (errorMessages -> ms) = case ms of
+ [] -> ""
+ (m:_) -> messageString m
replaceNotes :: [Block] -> TagParser [Block]
replaceNotes = walkM replaceNotes'
@@ -880,7 +887,7 @@ htmlTag :: Monad m
=> (Tag String -> Bool)
-> ParserT [Char] st m (Tag String, String)
htmlTag f = try $ do
- lookAhead $ char '<' >> (oneOf "/!?" <|> letter)
+ lookAhead $ char '<' >> ((oneOf "/!?" >> nonspaceChar) <|> letter)
(next : _) <- getInput >>= return . canonicalizeTags . parseTags
guard $ f next
-- advance the parser