From 01fed75b8f755bf4de3d4ec2b720e4dd9e66951a Mon Sep 17 00:00:00 2001 From: MinRK Date: Thu, 7 Nov 2013 22:25:44 -0800 Subject: recognize svg tag in HTML Reader avoids adding lots of `

` tags in embedded SVG content, for instance in markdown to HTML. --- src/Text/Pandoc/Readers/HTML.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/Text/Pandoc/Readers/HTML.hs') diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index 80279bf61..d691c9878 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -557,7 +557,7 @@ blockHtmlTags = ["address", "article", "aside", "blockquote", "body", "button", "noframes", "noscript", "object", "ol", "output", "p", "pre", "progress", "section", "table", "tbody", "textarea", "thead", "tfoot", "ul", "dd", "dt", "frameset", "li", "tbody", "td", "tfoot", - "th", "thead", "tr", "script", "style", "video"] + "th", "thead", "tr", "script", "style", "svg", "video"] -- We want to allow raw docbook in markdown documents, so we -- include docbook block tags here too. -- cgit v1.2.3 From def05d3504bfc90c31c1621438f7a51c95079ad2 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Fri, 6 Dec 2013 16:43:59 -0800 Subject: HTML reader: Parse LaTeX math if appropriate options are set. * Moved inlineMath, displayMath from Markdown reader to Parsing. * Export them from Parsing. (API change.) * Generalize their types. --- src/Text/Pandoc/Parsing.hs | 35 +++++++++++++++++++++++++++++++++++ src/Text/Pandoc/Readers/HTML.hs | 9 ++++++++- src/Text/Pandoc/Readers/LaTeX.hs | 3 ++- src/Text/Pandoc/Readers/Markdown.hs | 33 --------------------------------- 4 files changed, 45 insertions(+), 35 deletions(-) (limited to 'src/Text/Pandoc/Readers/HTML.hs') diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs index 9687d7712..e15854333 100644 --- a/src/Text/Pandoc/Parsing.hs +++ b/src/Text/Pandoc/Parsing.hs @@ -48,6 +48,8 @@ module Text.Pandoc.Parsing ( (>>~), romanNumeral, emailAddress, uri, + mathInline, + mathDisplay, withHorizDisplacement, withRaw, escaped, @@ -455,6 +457,39 @@ uri = try $ do let uri' = scheme ++ ":" ++ fromEntities str' return (uri', escapeURI uri') +mathInlineWith :: String -> String -> Parser [Char] st String +mathInlineWith op cl = try $ do + string op + notFollowedBy space + words' <- many1Till (count 1 (noneOf "\n\\") + <|> (char '\\' >> anyChar >>= \c -> return ['\\',c]) + <|> count 1 newline <* notFollowedBy' blankline + *> return " ") + (try $ string cl) + notFollowedBy digit -- to prevent capture of $5 + return $ concat words' + +mathDisplayWith :: String -> String -> Parser [Char] st String +mathDisplayWith op cl = try $ do + string op + many1Till (noneOf "\n" <|> (newline >>~ notFollowedBy' blankline)) (try $ string cl) + +mathDisplay :: Parser [Char] ParserState String +mathDisplay = + (guardEnabled Ext_tex_math_dollars >> mathDisplayWith "$$" "$$") + <|> (guardEnabled Ext_tex_math_single_backslash >> + mathDisplayWith "\\[" "\\]") + <|> (guardEnabled Ext_tex_math_double_backslash >> + mathDisplayWith "\\\\[" "\\\\]") + +mathInline :: Parser [Char] ParserState String +mathInline = + (guardEnabled Ext_tex_math_dollars >> mathInlineWith "$" "$") + <|> (guardEnabled Ext_tex_math_single_backslash >> + mathInlineWith "\\(" "\\)") + <|> (guardEnabled Ext_tex_math_double_backslash >> + mathInlineWith "\\\\(" "\\\\)") + -- | Applies a parser, returns tuple of its results and its horizontal -- displacement (the difference between the source column at the end -- and the source column at the beginning). Vertical displacement diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index d691c9878..e758f712f 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -467,7 +467,13 @@ pBlank = try $ do pTagContents :: Parser [Char] ParserState Inline pTagContents = - pStr <|> pSpace <|> smartPunctuation pTagContents <|> pSymbol <|> pBad + Math InlineMath `fmap` mathInline + <|> Math DisplayMath `fmap` mathDisplay + <|> pStr + <|> pSpace + <|> smartPunctuation pTagContents + <|> pSymbol + <|> pBad pStr :: Parser [Char] ParserState Inline pStr = do @@ -482,6 +488,7 @@ isSpecial '"' = True isSpecial '\'' = True isSpecial '.' = True isSpecial '-' = True +isSpecial '$' = True isSpecial '\8216' = True isSpecial '\8217' = True isSpecial '\8220' = True diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index 75e29ebb9..509cb5d74 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -38,7 +38,8 @@ import Text.Pandoc.Definition import Text.Pandoc.Walk import Text.Pandoc.Shared import Text.Pandoc.Options -import Text.Pandoc.Parsing hiding ((<|>), many, optional, space) +import Text.Pandoc.Parsing hiding ((<|>), many, optional, space, + mathDisplay, mathInline) import qualified Text.Pandoc.UTF8 as UTF8 import Data.Char ( chr, ord ) import Control.Monad diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 4cb75d86c..11168bc09 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -1408,39 +1408,6 @@ math :: MarkdownParser (F Inlines) math = (return . B.displayMath <$> (mathDisplay >>= applyMacros')) <|> (return . B.math <$> (mathInline >>= applyMacros')) -mathDisplay :: MarkdownParser String -mathDisplay = - (guardEnabled Ext_tex_math_dollars >> mathDisplayWith "$$" "$$") - <|> (guardEnabled Ext_tex_math_single_backslash >> - mathDisplayWith "\\[" "\\]") - <|> (guardEnabled Ext_tex_math_double_backslash >> - mathDisplayWith "\\\\[" "\\\\]") - -mathDisplayWith :: String -> String -> MarkdownParser String -mathDisplayWith op cl = try $ do - string op - many1Till (noneOf "\n" <|> (newline >>~ notFollowedBy' blankline)) (try $ string cl) - -mathInline :: MarkdownParser String -mathInline = - (guardEnabled Ext_tex_math_dollars >> mathInlineWith "$" "$") - <|> (guardEnabled Ext_tex_math_single_backslash >> - mathInlineWith "\\(" "\\)") - <|> (guardEnabled Ext_tex_math_double_backslash >> - mathInlineWith "\\\\(" "\\\\)") - -mathInlineWith :: String -> String -> MarkdownParser String -mathInlineWith op cl = try $ do - string op - notFollowedBy space - words' <- many1Till (count 1 (noneOf "\n\\") - <|> (char '\\' >> anyChar >>= \c -> return ['\\',c]) - <|> count 1 newline <* notFollowedBy' blankline - *> return " ") - (try $ string cl) - notFollowedBy digit -- to prevent capture of $5 - return $ concat words' - -- Parses material enclosed in *s, **s, _s, or __s. -- Designed to avoid backtracking. enclosure :: Char -- cgit v1.2.3 From 3d70059a4896c66cab59832a3ed9eb8334b84a2f Mon Sep 17 00:00:00 2001 From: Henry de Valence Date: Thu, 19 Dec 2013 21:07:09 -0500 Subject: HLint: use fromMaybe Replace uses of `maybe x id` with `fromMaybe x`. --- src/Text/Pandoc/PDF.hs | 3 ++- src/Text/Pandoc/Readers/DocBook.hs | 7 ++++--- src/Text/Pandoc/Readers/HTML.hs | 4 ++-- src/Text/Pandoc/Readers/Markdown.hs | 4 ++-- src/Text/Pandoc/Readers/MediaWiki.hs | 7 ++++--- src/Text/Pandoc/Writers/Docx.hs | 6 ++++-- src/Text/Pandoc/Writers/EPUB.hs | 12 ++++++------ src/Text/Pandoc/Writers/HTML.hs | 4 ++-- src/Text/Pandoc/Writers/LaTeX.hs | 3 ++- src/Text/Pandoc/Writers/ODT.hs | 3 ++- 10 files changed, 30 insertions(+), 23 deletions(-) (limited to 'src/Text/Pandoc/Readers/HTML.hs') diff --git a/src/Text/Pandoc/PDF.hs b/src/Text/Pandoc/PDF.hs index e8683b98f..360338f8f 100644 --- a/src/Text/Pandoc/PDF.hs +++ b/src/Text/Pandoc/PDF.hs @@ -41,6 +41,7 @@ import System.Directory import System.Environment import Control.Monad (unless) import Data.List (isInfixOf) +import Data.Maybe (fromMaybe) import qualified Data.ByteString.Base64 as B64 import qualified Text.Pandoc.UTF8 as UTF8 import Text.Pandoc.Definition @@ -87,7 +88,7 @@ handleImage' baseURL tmpdir (Image ils (src,tit)) = do res <- fetchItem baseURL src case res of Right (contents, Just mime) -> do - let ext = maybe (takeExtension src) id $ + let ext = fromMaybe (takeExtension src) $ extensionFromMimeType mime let basename = UTF8.toString $ B64.encode $ UTF8.fromString src let fname = tmpdir basename <.> ext diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs index 03c6140ac..56cb16b20 100644 --- a/src/Text/Pandoc/Readers/DocBook.hs +++ b/src/Text/Pandoc/Readers/DocBook.hs @@ -12,6 +12,7 @@ import Data.Char (isSpace) import Control.Monad.State import Control.Applicative ((<$>)) import Data.List (intersperse) +import Data.Maybe (fromMaybe) {- @@ -683,7 +684,7 @@ parseBlock (Elem e) = "lowerroman" -> LowerRoman "upperroman" -> UpperRoman _ -> Decimal - let start = maybe 1 id $ + let start = fromMaybe 1 $ (attrValue "override" <$> filterElement (named "listitem") e) >>= safeRead orderedListWith (start,listStyle,DefaultDelim) @@ -779,7 +780,7 @@ parseBlock (Elem e) = caption <- case filterChild isCaption e of Just t -> getInlines t Nothing -> return mempty - let e' = maybe e id $ filterChild (named "tgroup") e + let e' = fromMaybe e $ filterChild (named "tgroup") e let isColspec x = named "colspec" x || named "col" x let colspecs = case filterChild (named "colgroup") e' of Just c -> filterChildren isColspec c @@ -801,7 +802,7 @@ parseBlock (Elem e) = Just "center" -> AlignCenter _ -> AlignDefault let toWidth c = case findAttr (unqual "colwidth") c of - Just w -> maybe 0 id + Just w -> fromMaybe 0 $ safeRead $ '0': filter (\x -> (x >= '0' && x <= '9') || x == '.') w diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index e758f712f..4b44a3a21 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -207,7 +207,7 @@ pHeader = try $ do let bodyTitle = TagOpen tagtype attr ~== TagOpen "h1" [("class","title")] let level = read (drop 1 tagtype) contents <- liftM concat $ manyTill inline (pCloses tagtype <|> eof) - let ident = maybe "" id $ lookup "id" attr + let ident = fromMaybe "" $ lookup "id" attr let classes = maybe [] words $ lookup "class" attr let keyvals = [(k,v) | (k,v) <- attr, k /= "class", k /= "id"] return $ if bodyTitle @@ -257,7 +257,7 @@ pCol = try $ do skipMany pBlank return $ case lookup "width" attribs of Just x | not (null x) && last x == '%' -> - maybe 0.0 id $ safeRead ('0':'.':init x) + fromMaybe 0.0 $ safeRead ('0':'.':init x) _ -> 0.0 pColgroup :: TagParser [Double] diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 166c524ef..3a5d29b4e 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -1722,7 +1722,7 @@ spanHtml = try $ do guardEnabled Ext_markdown_in_html_blocks (TagOpen _ attrs, _) <- htmlTag (~== TagOpen "span" []) contents <- mconcat <$> manyTill inline (htmlTag (~== TagClose "span")) - let ident = maybe "" id $ lookup "id" attrs + let ident = fromMaybe "" $ lookup "id" attrs let classes = maybe [] words $ lookup "class" attrs let keyvals = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"] return $ B.spanWith (ident, classes, keyvals) <$> contents @@ -1732,7 +1732,7 @@ divHtml = try $ do guardEnabled Ext_markdown_in_html_blocks (TagOpen _ attrs, _) <- htmlTag (~== TagOpen "div" []) contents <- mconcat <$> manyTill block (htmlTag (~== TagClose "div")) - let ident = maybe "" id $ lookup "id" attrs + let ident = fromMaybe "" $ lookup "id" attrs let classes = maybe [] words $ lookup "class" attrs let keyvals = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"] return $ B.divWith (ident, classes, keyvals) <$> contents diff --git a/src/Text/Pandoc/Readers/MediaWiki.hs b/src/Text/Pandoc/Readers/MediaWiki.hs index 1c074e3de..8d8ea0199 100644 --- a/src/Text/Pandoc/Readers/MediaWiki.hs +++ b/src/Text/Pandoc/Readers/MediaWiki.hs @@ -54,6 +54,7 @@ import Data.Sequence (viewl, ViewL(..), (<|)) import qualified Data.Foldable as F import qualified Data.Map as M import Data.Char (isDigit, isSpace) +import Data.Maybe (fromMaybe) -- | Read mediawiki from an input string and return a Pandoc document. readMediaWiki :: ReaderOptions -- ^ Reader options @@ -204,7 +205,7 @@ table = do tableStart styles <- option [] parseAttrs <* blankline let tableWidth = case lookup "width" styles of - Just w -> maybe 1.0 id $ parseWidth w + Just w -> fromMaybe 1.0 $ parseWidth w Nothing -> 1.0 caption <- option mempty tableCaption optional rowsep @@ -285,7 +286,7 @@ tableCell = try $ do Just "center" -> AlignCenter _ -> AlignDefault let width = case lookup "width" attrs of - Just xs -> maybe 0.0 id $ parseWidth xs + Just xs -> fromMaybe 0.0 $ parseWidth xs Nothing -> 0.0 return ((align, width), bs) @@ -387,7 +388,7 @@ orderedList = spaces items <- many (listItem '#' <|> li) optional (htmlTag (~== TagClose "ol")) - let start = maybe 1 id $ safeRead $ fromAttrib "start" tag + let start = fromMaybe 1 $ safeRead $ fromAttrib "start" tag return $ B.orderedListWith (start, DefaultStyle, DefaultDelim) items definitionList :: MWParser Blocks diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index 5c7341b69..32ba7715a 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -29,6 +29,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Conversion of 'Pandoc' documents to docx. -} module Text.Pandoc.Writers.Docx ( writeDocx ) where +import Data.Maybe (fromMaybe) import Data.List ( intercalate, groupBy ) import qualified Data.ByteString as B import qualified Data.ByteString.Lazy as BL @@ -130,7 +131,8 @@ writeDocx opts doc@(Pandoc meta _) = do let mkOverrideNode (part', contentType') = mknode "Override" [("PartName",part'),("ContentType",contentType')] () let mkImageOverride (_, imgpath, mbMimeType, _, _) = - mkOverrideNode ("/word/" ++ imgpath, maybe "application/octet-stream" id mbMimeType) + mkOverrideNode ("/word/" ++ imgpath, + fromMaybe "application/octet-stream" mbMimeType) let overrides = map mkOverrideNode [("/word/webSettings.xml", "application/vnd.openxmlformats-officedocument.wordprocessingml.webSettings+xml") @@ -322,7 +324,7 @@ mkNum markers marker numid = NumberMarker _ _ start -> map (\lvl -> mknode "w:lvlOverride" [("w:ilvl",show (lvl :: Int))] $ mknode "w:startOverride" [("w:val",show start)] ()) [0..6] - where absnumid = maybe 0 id $ M.lookup marker markers + where absnumid = fromMaybe 0 $ M.lookup marker markers mkAbstractNum :: (ListMarker,Int) -> IO Element mkAbstractNum (marker,numid) = do diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs index 36ead0b8f..4daa9609e 100644 --- a/src/Text/Pandoc/Writers/EPUB.hs +++ b/src/Text/Pandoc/Writers/EPUB.hs @@ -176,8 +176,8 @@ addMetadataFromXML e@(Element (QName name _ (Just "dc")) attrs _ _) md , titleFileAs = getAttr "file-as" , titleType = getAttr "type" } : epubTitle md } - | name == "date" = md{ epubDate = maybe "" id $ normalizeDate' - $ strContent e } + | name == "date" = md{ epubDate = fromMaybe "" $ normalizeDate' + $ strContent e } | name == "language" = md{ epubLanguage = strContent e } | name == "creator" = md{ epubCreator = Creator{ creatorText = strContent e @@ -271,7 +271,7 @@ metadataFromMeta opts meta = EPUBMetadata{ } where identifiers = getIdentifier meta titles = getTitle meta - date = maybe "" id $ + date = fromMaybe "" $ (metaValueToString <$> lookupMeta "date" meta) >>= normalizeDate' language = maybe "" metaValueToString $ lookupMeta "language" meta `mplus` lookupMeta "lang" meta @@ -297,7 +297,7 @@ writeEPUB :: WriterOptions -- ^ Writer options -> Pandoc -- ^ Document to convert -> IO B.ByteString writeEPUB opts doc@(Pandoc meta _) = do - let version = maybe EPUB2 id (writerEpubVersion opts) + let version = fromMaybe EPUB2 (writerEpubVersion opts) let epub3 = version == EPUB3 epochtime <- floor `fmap` getPOSIXTime let mkEntry path content = toEntry path epochtime content @@ -401,7 +401,7 @@ writeEPUB opts doc@(Pandoc meta _) = do chapToEntry num (Chapter mbnum bs) = mkEntry (showChapter num) $ renderHtml $ writeHtml opts'{ writerNumberOffset = - maybe [] id mbnum } + fromMaybe [] mbnum } $ case bs of (Header _ _ xs : _) -> Pandoc (setMeta "title" (fromList xs) nullMeta) bs @@ -436,7 +436,7 @@ writeEPUB opts doc@(Pandoc meta _) = do let fontNode ent = unode "item" ! [("id", takeBaseName $ eRelativePath ent), ("href", eRelativePath ent), - ("media-type", maybe "" id $ getMimeType $ eRelativePath ent)] $ () + ("media-type", fromMaybe "" $ getMimeType $ eRelativePath ent)] $ () let plainTitle = case docTitle meta of [] -> case epubTitle metadata of [] -> "UNTITLED" diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs index 2c6435457..129776363 100644 --- a/src/Text/Pandoc/Writers/HTML.hs +++ b/src/Text/Pandoc/Writers/HTML.hs @@ -45,7 +45,7 @@ import Numeric ( showHex ) import Data.Char ( ord, toLower ) import Data.List ( isPrefixOf, intersperse ) import Data.String ( fromString ) -import Data.Maybe ( catMaybes ) +import Data.Maybe ( catMaybes, fromMaybe ) import Control.Monad.State import Text.Blaze.Html hiding(contents) import Text.Blaze.Internal(preEscapedString) @@ -118,7 +118,7 @@ pandocToHtml opts (Pandoc meta blocks) = do let stringifyHTML = escapeStringForXML . stringify let authsMeta = map stringifyHTML $ docAuthors meta let dateMeta = stringifyHTML $ docDate meta - let slideLevel = maybe (getSlideLevel blocks) id $ writerSlideLevel opts + let slideLevel = fromMaybe (getSlideLevel blocks) $ writerSlideLevel opts let sects = hierarchicalize $ if writerSlideVariant opts == NoSlides then blocks diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs index 1deacecb4..a76d6d82b 100644 --- a/src/Text/Pandoc/Writers/LaTeX.hs +++ b/src/Text/Pandoc/Writers/LaTeX.hs @@ -40,6 +40,7 @@ import Network.URI ( isURI, unEscapeString ) import Data.List ( (\\), isSuffixOf, isInfixOf, isPrefixOf, intercalate, intersperse ) import Data.Char ( toLower, isPunctuation, isAscii, isLetter, isDigit, ord ) +import Data.Maybe ( fromMaybe ) import Control.Applicative ((<|>)) import Control.Monad.State import Text.Pandoc.Pretty @@ -240,7 +241,7 @@ inCmd cmd contents = char '\\' <> text cmd <> braces contents toSlides :: [Block] -> State WriterState [Block] toSlides bs = do opts <- gets stOptions - let slideLevel = maybe (getSlideLevel bs) id $ writerSlideLevel opts + let slideLevel = fromMaybe (getSlideLevel bs) $ writerSlideLevel opts let bs' = prepSlides slideLevel bs concat `fmap` (mapM (elementToBeamer slideLevel) $ hierarchicalize bs') diff --git a/src/Text/Pandoc/Writers/ODT.hs b/src/Text/Pandoc/Writers/ODT.hs index cc0a06243..25cd5ae13 100644 --- a/src/Text/Pandoc/Writers/ODT.hs +++ b/src/Text/Pandoc/Writers/ODT.hs @@ -31,6 +31,7 @@ Conversion of 'Pandoc' documents to ODT. module Text.Pandoc.Writers.ODT ( writeODT ) where import Data.IORef import Data.List ( isPrefixOf ) +import Data.Maybe ( fromMaybe ) import qualified Data.ByteString.Lazy as B import Text.Pandoc.UTF8 ( fromStringLazy ) import Codec.Archive.Zip @@ -127,7 +128,7 @@ transformPic opts entriesRef (Image lab (src,_)) = do return $ Emph lab Right (img, _) -> do let size = imageSize img - let (w,h) = maybe (0,0) id $ sizeInPoints `fmap` size + let (w,h) = fromMaybe (0,0) $ sizeInPoints `fmap` size let tit' = show w ++ "x" ++ show h entries <- readIORef entriesRef let newsrc = "Pictures/" ++ show (length entries) ++ takeExtension src -- cgit v1.2.3 From f3ee82373b4ad8e955db12d3c2c2159a2bea53a0 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Wed, 1 Jan 2014 09:22:37 -0800 Subject: HTML reader: Parse name/content pairs from meta tags as metadata. Closes #1106. --- src/Text/Pandoc/Readers/HTML.hs | 11 ++++++++++- tests/html-reader.native | 2 +- 2 files changed, 11 insertions(+), 2 deletions(-) (limited to 'src/Text/Pandoc/Readers/HTML.hs') diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index 4b44a3a21..506fe7770 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -76,9 +76,18 @@ pBody :: TagParser [Block] pBody = pInTags "body" block pHead :: TagParser [Block] -pHead = pInTags "head" $ pTitle <|> ([] <$ pAnyTag) +pHead = pInTags "head" $ pTitle <|> pMetaTag <|> ([] <$ pAnyTag) where pTitle = pInTags "title" inline >>= setTitle . normalizeSpaces setTitle t = [] <$ (updateState $ B.setMeta "title" (B.fromList t)) + pMetaTag = do + mt <- pSatisfy (~== TagOpen "meta" []) + let name = fromAttrib "name" mt + if null name + then return [] + else do + let content = fromAttrib "content" mt + updateState $ B.setMeta name (B.text content) + return [] block :: TagParser [Block] block = choice diff --git a/tests/html-reader.native b/tests/html-reader.native index 794512426..e80905729 100644 --- a/tests/html-reader.native +++ b/tests/html-reader.native @@ -1,4 +1,4 @@ -Pandoc (Meta {unMeta = fromList [("title",MetaInlines [Str "Pandoc",Space,Str "Test",Space,Str "Suite"])]}) +Pandoc (Meta {unMeta = fromList [("generator",MetaInlines [Str "pandoc"]),("title",MetaInlines [Str "Pandoc",Space,Str "Test",Space,Str "Suite"])]}) [Para [Str "This",Space,Str "is",Space,Str "a",Space,Str "set",Space,Str "of",Space,Str "tests",Space,Str "for",Space,Str "pandoc",Str ".",Space,Str "Most",Space,Str "of",Space,Str "them",Space,Str "are",Space,Str "adapted",Space,Str "from",Space,Str "John",Space,Str "Gruber",Str "'",Str "s",Space,Str "markdown",Space,Str "test",Space,Str "suite",Str "."] ,HorizontalRule ,Header 1 ("",[],[]) [Str "Headers"] -- cgit v1.2.3 From 6c59f060a769c7f312137d5c1a49cf878ea5b2a2 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Mon, 20 Jan 2014 11:09:44 -0800 Subject: HTML reader: Fixed bug reading inline math with `$$`. See #225. --- src/Text/Pandoc/Readers/HTML.hs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/Text/Pandoc/Readers/HTML.hs') diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index 506fe7770..d1e4d0024 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -476,8 +476,8 @@ pBlank = try $ do pTagContents :: Parser [Char] ParserState Inline pTagContents = - Math InlineMath `fmap` mathInline - <|> Math DisplayMath `fmap` mathDisplay + Math DisplayMath `fmap` mathDisplay + <|> Math InlineMath `fmap` mathInline <|> pStr <|> pSpace <|> smartPunctuation pTagContents -- cgit v1.2.3