aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Readers/HTML.hs
diff options
context:
space:
mode:
Diffstat (limited to 'src/Text/Pandoc/Readers/HTML.hs')
-rw-r--r--src/Text/Pandoc/Readers/HTML.hs53
1 files changed, 45 insertions, 8 deletions
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index 32a1ba5a6..b06e07a80 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -68,11 +68,13 @@ import qualified Text.Pandoc.Builder as B
import Text.Pandoc.Class (PandocMonad (..))
import Text.Pandoc.CSS (foldOrElse, pickStyleAttrProps)
import Text.Pandoc.Definition
+import Text.Pandoc.Readers.LaTeX (rawLaTeXInline)
+import Text.Pandoc.Readers.LaTeX.Types (Macro)
import Text.Pandoc.Error
import Text.Pandoc.Logging
import Text.Pandoc.Options (
Extension (Ext_epub_html_exts, Ext_empty_paragraphs, Ext_native_divs,
- Ext_native_spans, Ext_raw_html, Ext_line_blocks),
+ Ext_native_spans, Ext_raw_html, Ext_line_blocks, Ext_raw_tex),
ReaderOptions (readerExtensions, readerStripComments),
extensionEnabled)
import Text.Pandoc.Parsing hiding ((<|>))
@@ -102,7 +104,8 @@ readHtml opts inp = do
(m:_) -> messageString m
result <- flip runReaderT def $
runParserT parseDoc
- (HTMLState def{ stateOptions = opts } [] Nothing Set.empty M.empty [])
+ (HTMLState def{ stateOptions = opts }
+ [] Nothing Set.empty M.empty [] M.empty)
"source" tags
case result of
Right doc -> return doc
@@ -124,7 +127,8 @@ data HTMLState =
baseHref :: Maybe URI,
identifiers :: Set.Set String,
headerMap :: M.Map Inlines String,
- logMessages :: [LogMessage]
+ logMessages :: [LogMessage],
+ macros :: M.Map Text Macro
}
data HTMLLocal = HTMLLocal { quoteContext :: QuoteContext
@@ -659,6 +663,7 @@ inline = choice
, pCode
, pSpan
, pMath False
+ , pScriptMath
, pRawHtmlInline
]
@@ -745,18 +750,18 @@ pLink = try $ do
let uid = fromMaybe (T.unpack $ fromAttrib "name" tag) $
maybeFromAttrib "id" tag
let cls = words $ T.unpack $ fromAttrib "class" tag
- lab <- trimInlines . mconcat <$> manyTill inline (pCloses "a")
+ lab <- mconcat <$> manyTill inline (pCloses "a")
-- check for href; if href, then a link, otherwise a span
case maybeFromAttrib "href" tag of
Nothing ->
- return $ B.spanWith (uid, cls, []) lab
+ return $ extractSpaces (B.spanWith (uid, cls, [])) lab
Just url' -> do
mbBaseHref <- baseHref <$> getState
let url = case (parseURIReference url', mbBaseHref) of
(Just rel, Just bs) ->
show (rel `nonStrictRelativeTo` bs)
_ -> url'
- return $ B.linkWith (uid, cls, []) (escapeURI url) title lab
+ return $ extractSpaces (B.linkWith (uid, cls, []) (escapeURI url) title) lab
pImage :: PandocMonad m => TagParser m Inlines
pImage = do
@@ -818,6 +823,17 @@ toStringAttr :: [(Text, Text)] -> [(String, String)]
toStringAttr = map go
where go (x,y) = (T.unpack x, T.unpack y)
+pScriptMath :: PandocMonad m => TagParser m Inlines
+pScriptMath = try $ do
+ TagOpen _ attr' <- pSatisfy $ tagOpen (=="script") (const True)
+ isdisplay <- case lookup "type" attr' of
+ Just x | "math/tex" `T.isPrefixOf` x
+ -> return $ "display" `T.isSuffixOf` x
+ _ -> mzero
+ contents <- T.unpack . innerText <$>
+ manyTill pAnyTag (pSatisfy (matchTagClose "script"))
+ return $ (if isdisplay then B.displayMath else B.math) contents
+
pMath :: PandocMonad m => Bool -> TagParser m Inlines
pMath inCase = try $ do
open@(TagOpen _ attr') <- pSatisfy $ tagOpen (=="math") (const True)
@@ -852,7 +868,7 @@ pInTags' tagtype tagtest parser = try $ do
pSatisfy (\t -> t ~== TagOpen tagtype [] && tagtest t)
mconcat <$> manyTill parser (pCloses tagtype <|> eof)
--- parses p, preceeded by an optional opening tag
+-- parses p, preceded by an optional opening tag
-- and followed by an optional closing tags
pOptInTag :: PandocMonad m => Text -> TagParser m a -> TagParser m a
pOptInTag tagtype p = try $ do
@@ -907,9 +923,25 @@ pTagContents =
<|> pStr
<|> pSpace
<|> smartPunctuation pTagContents
+ <|> pRawTeX
<|> pSymbol
<|> pBad
+pRawTeX :: PandocMonad m => InlinesParser m Inlines
+pRawTeX = do
+ lookAhead $ try $ do
+ char '\\'
+ choice $ map (try . string) ["begin", "eqref", "ref"]
+ guardEnabled Ext_raw_tex
+ inp <- getInput
+ st <- getState
+ res <- lift $ runParserT (withRaw rawLaTeXInline) st "chunk" (T.unpack inp)
+ case res of
+ Left _ -> mzero
+ Right (contents, raw) -> do
+ _ <- count (length raw) anyChar
+ return $ B.rawInline "tex" contents
+
pStr :: PandocMonad m => InlinesParser m Inlines
pStr = do
result <- many1 $ satisfy $ \c ->
@@ -923,6 +955,7 @@ isSpecial '\'' = True
isSpecial '.' = True
isSpecial '-' = True
isSpecial '$' = True
+isSpecial '\\' = True
isSpecial '\8216' = True
isSpecial '\8217' = True
isSpecial '\8220' = True
@@ -1249,6 +1282,10 @@ isSpace _ = False
-- Instances
+instance HasMacros HTMLState where
+ extractMacros = macros
+ updateMacros f st = st{ macros = f $ macros st }
+
instance HasIdentifierList HTMLState where
extractIdentifierList = identifiers
updateIdentifierList f s = s{ identifiers = f (identifiers s) }
@@ -1281,7 +1318,7 @@ instance HasLastStrPosition HTMLState where
setLastStrPos s st = st {parserState = setLastStrPos s (parserState st)}
getLastStrPos = getLastStrPos . parserState
--- For now we need a special verison here; the one in Shared has String type
+-- For now we need a special version here; the one in Shared has String type
renderTags' :: [Tag Text] -> Text
renderTags' = renderTagsOptions
renderOptions{ optMinimize = matchTags ["hr", "br", "img",