diff options
Diffstat (limited to 'src/Text/Pandoc/Readers/HTML.hs')
-rw-r--r-- | src/Text/Pandoc/Readers/HTML.hs | 53 |
1 files changed, 45 insertions, 8 deletions
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index 32a1ba5a6..b06e07a80 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -68,11 +68,13 @@ import qualified Text.Pandoc.Builder as B import Text.Pandoc.Class (PandocMonad (..)) import Text.Pandoc.CSS (foldOrElse, pickStyleAttrProps) import Text.Pandoc.Definition +import Text.Pandoc.Readers.LaTeX (rawLaTeXInline) +import Text.Pandoc.Readers.LaTeX.Types (Macro) import Text.Pandoc.Error import Text.Pandoc.Logging import Text.Pandoc.Options ( Extension (Ext_epub_html_exts, Ext_empty_paragraphs, Ext_native_divs, - Ext_native_spans, Ext_raw_html, Ext_line_blocks), + Ext_native_spans, Ext_raw_html, Ext_line_blocks, Ext_raw_tex), ReaderOptions (readerExtensions, readerStripComments), extensionEnabled) import Text.Pandoc.Parsing hiding ((<|>)) @@ -102,7 +104,8 @@ readHtml opts inp = do (m:_) -> messageString m result <- flip runReaderT def $ runParserT parseDoc - (HTMLState def{ stateOptions = opts } [] Nothing Set.empty M.empty []) + (HTMLState def{ stateOptions = opts } + [] Nothing Set.empty M.empty [] M.empty) "source" tags case result of Right doc -> return doc @@ -124,7 +127,8 @@ data HTMLState = baseHref :: Maybe URI, identifiers :: Set.Set String, headerMap :: M.Map Inlines String, - logMessages :: [LogMessage] + logMessages :: [LogMessage], + macros :: M.Map Text Macro } data HTMLLocal = HTMLLocal { quoteContext :: QuoteContext @@ -659,6 +663,7 @@ inline = choice , pCode , pSpan , pMath False + , pScriptMath , pRawHtmlInline ] @@ -745,18 +750,18 @@ pLink = try $ do let uid = fromMaybe (T.unpack $ fromAttrib "name" tag) $ maybeFromAttrib "id" tag let cls = words $ T.unpack $ fromAttrib "class" tag - lab <- trimInlines . mconcat <$> manyTill inline (pCloses "a") + lab <- mconcat <$> manyTill inline (pCloses "a") -- check for href; if href, then a link, otherwise a span case maybeFromAttrib "href" tag of Nothing -> - return $ B.spanWith (uid, cls, []) lab + return $ extractSpaces (B.spanWith (uid, cls, [])) lab Just url' -> do mbBaseHref <- baseHref <$> getState let url = case (parseURIReference url', mbBaseHref) of (Just rel, Just bs) -> show (rel `nonStrictRelativeTo` bs) _ -> url' - return $ B.linkWith (uid, cls, []) (escapeURI url) title lab + return $ extractSpaces (B.linkWith (uid, cls, []) (escapeURI url) title) lab pImage :: PandocMonad m => TagParser m Inlines pImage = do @@ -818,6 +823,17 @@ toStringAttr :: [(Text, Text)] -> [(String, String)] toStringAttr = map go where go (x,y) = (T.unpack x, T.unpack y) +pScriptMath :: PandocMonad m => TagParser m Inlines +pScriptMath = try $ do + TagOpen _ attr' <- pSatisfy $ tagOpen (=="script") (const True) + isdisplay <- case lookup "type" attr' of + Just x | "math/tex" `T.isPrefixOf` x + -> return $ "display" `T.isSuffixOf` x + _ -> mzero + contents <- T.unpack . innerText <$> + manyTill pAnyTag (pSatisfy (matchTagClose "script")) + return $ (if isdisplay then B.displayMath else B.math) contents + pMath :: PandocMonad m => Bool -> TagParser m Inlines pMath inCase = try $ do open@(TagOpen _ attr') <- pSatisfy $ tagOpen (=="math") (const True) @@ -852,7 +868,7 @@ pInTags' tagtype tagtest parser = try $ do pSatisfy (\t -> t ~== TagOpen tagtype [] && tagtest t) mconcat <$> manyTill parser (pCloses tagtype <|> eof) --- parses p, preceeded by an optional opening tag +-- parses p, preceded by an optional opening tag -- and followed by an optional closing tags pOptInTag :: PandocMonad m => Text -> TagParser m a -> TagParser m a pOptInTag tagtype p = try $ do @@ -907,9 +923,25 @@ pTagContents = <|> pStr <|> pSpace <|> smartPunctuation pTagContents + <|> pRawTeX <|> pSymbol <|> pBad +pRawTeX :: PandocMonad m => InlinesParser m Inlines +pRawTeX = do + lookAhead $ try $ do + char '\\' + choice $ map (try . string) ["begin", "eqref", "ref"] + guardEnabled Ext_raw_tex + inp <- getInput + st <- getState + res <- lift $ runParserT (withRaw rawLaTeXInline) st "chunk" (T.unpack inp) + case res of + Left _ -> mzero + Right (contents, raw) -> do + _ <- count (length raw) anyChar + return $ B.rawInline "tex" contents + pStr :: PandocMonad m => InlinesParser m Inlines pStr = do result <- many1 $ satisfy $ \c -> @@ -923,6 +955,7 @@ isSpecial '\'' = True isSpecial '.' = True isSpecial '-' = True isSpecial '$' = True +isSpecial '\\' = True isSpecial '\8216' = True isSpecial '\8217' = True isSpecial '\8220' = True @@ -1249,6 +1282,10 @@ isSpace _ = False -- Instances +instance HasMacros HTMLState where + extractMacros = macros + updateMacros f st = st{ macros = f $ macros st } + instance HasIdentifierList HTMLState where extractIdentifierList = identifiers updateIdentifierList f s = s{ identifiers = f (identifiers s) } @@ -1281,7 +1318,7 @@ instance HasLastStrPosition HTMLState where setLastStrPos s st = st {parserState = setLastStrPos s (parserState st)} getLastStrPos = getLastStrPos . parserState --- For now we need a special verison here; the one in Shared has String type +-- For now we need a special version here; the one in Shared has String type renderTags' :: [Tag Text] -> Text renderTags' = renderTagsOptions renderOptions{ optMinimize = matchTags ["hr", "br", "img", |