diff options
Diffstat (limited to 'src/Text/Pandoc/Readers')
-rw-r--r-- | src/Text/Pandoc/Readers/CommonMark.hs | 4 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/DocBook.hs | 24 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Docx.hs | 4 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/EPUB.hs | 12 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/HTML.hs | 26 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/LaTeX.hs | 29 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Markdown.hs | 57 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/MediaWiki.hs | 30 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/RST.hs | 18 |
9 files changed, 117 insertions, 87 deletions
diff --git a/src/Text/Pandoc/Readers/CommonMark.hs b/src/Text/Pandoc/Readers/CommonMark.hs index 51a35c8ad..7f752c446 100644 --- a/src/Text/Pandoc/Readers/CommonMark.hs +++ b/src/Text/Pandoc/Readers/CommonMark.hs @@ -113,7 +113,7 @@ addInline (Node _ EMPH nodes) = addInline (Node _ STRONG nodes) = (Strong (addInlines nodes) :) addInline (Node _ (LINK url title) nodes) = - (Link (addInlines nodes) (unpack url, unpack title) :) + (Link nullAttr (addInlines nodes) (unpack url, unpack title) :) addInline (Node _ (IMAGE url title) nodes) = - (Image (addInlines nodes) (unpack url, unpack title) :) + (Image nullAttr (addInlines nodes) (unpack url, unpack title) :) addInline _ = id diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs index f679ddb57..e8fe92e27 100644 --- a/src/Text/Pandoc/Readers/DocBook.hs +++ b/src/Text/Pandoc/Readers/DocBook.hs @@ -635,11 +635,20 @@ addToStart toadd bs = -- A DocBook mediaobject is a wrapper around a set of alternative presentations getMediaobject :: Element -> DB Inlines getMediaobject e = do - imageUrl <- case filterChild (named "imageobject") e of - Nothing -> return mempty - Just z -> case filterChild (named "imagedata") z of - Nothing -> return mempty - Just i -> return $ attrValue "fileref" i + (imageUrl, attr) <- + case filterChild (named "imageobject") e of + Nothing -> return (mempty, nullAttr) + Just z -> case filterChild (named "imagedata") z of + Nothing -> return (mempty, nullAttr) + Just i -> let atVal a = attrValue a i + w = case atVal "width" of + "" -> [] + d -> [("width", d)] + h = case atVal "depth" of + "" -> [] + d -> [("height", d)] + atr = (atVal "id", words $ atVal "role", w ++ h) + in return (atVal "fileref", atr) let getCaption el = case filterChild (\x -> named "caption" x || named "textobject" x || named "alt" x) el of @@ -649,7 +658,7 @@ getMediaobject e = do let (caption, title) = if isNull figTitle then (getCaption e, "") else (return figTitle, "fig:") - liftM (image imageUrl title) caption + liftM (imageWith attr imageUrl title) caption getBlocks :: Element -> DB Blocks getBlocks e = mconcat <$> (mapM parseBlock $ elContent e) @@ -968,7 +977,8 @@ parseInline (Elem e) = Just h -> h _ -> ('#' : attrValue "linkend" e) let ils' = if ils == mempty then str href else ils - return $ link href "" ils' + let attr = (attrValue "id" e, words $ attrValue "role" e, []) + return $ linkWith attr href "" ils' "foreignphrase" -> emph <$> innerInlines "emphasis" -> case attrValue "role" e of "bold" -> strong <$> innerInlines diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs index 9f1c7af0a..439e2d3e4 100644 --- a/src/Text/Pandoc/Readers/Docx.hs +++ b/src/Text/Pandoc/Readers/Docx.hs @@ -539,10 +539,10 @@ bodyPartToBlocks (OMathPara e) = do -- replace targets with generated anchors. rewriteLink' :: Inline -> DocxContext Inline -rewriteLink' l@(Link ils ('#':target, title)) = do +rewriteLink' l@(Link attr ils ('#':target, title)) = do anchorMap <- gets docxAnchorMap return $ case M.lookup target anchorMap of - Just newTarget -> (Link ils ('#':newTarget, title)) + Just newTarget -> (Link attr ils ('#':newTarget, title)) Nothing -> l rewriteLink' il = return il diff --git a/src/Text/Pandoc/Readers/EPUB.hs b/src/Text/Pandoc/Readers/EPUB.hs index b8698fe26..79aa540f6 100644 --- a/src/Text/Pandoc/Readers/EPUB.hs +++ b/src/Text/Pandoc/Readers/EPUB.hs @@ -100,12 +100,12 @@ fetchImages mimes root arc (query iq -> links) = <$> findEntryByPath abslink arc iq :: Inline -> [FilePath] -iq (Image _ (url, _)) = [url] +iq (Image _ _ (url, _)) = [url] iq _ = [] -- Remove relative paths renameImages :: FilePath -> Inline -> Inline -renameImages root (Image a (url, b)) = Image a (collapseFilePath (root </> url), b) +renameImages root (Image attr a (url, b)) = Image attr a (collapseFilePath (root </> url), b) renameImages _ x = x imageToPandoc :: FilePath -> Pandoc @@ -190,14 +190,14 @@ fixInlineIRs s (Span as v) = Span (fixAttrs s as) v fixInlineIRs s (Code as code) = Code (fixAttrs s as) code -fixInlineIRs s (Link t ('#':url, tit)) = - Link t (addHash s url, tit) +fixInlineIRs s (Link attr t ('#':url, tit)) = + Link attr t (addHash s url, tit) fixInlineIRs _ v = v prependHash :: [String] -> Inline -> Inline -prependHash ps l@(Link is (url, tit)) +prependHash ps l@(Link attr is (url, tit)) | or [s `isPrefixOf` url | s <- ps] = - Link is ('#':url, tit) + Link attr is ('#':url, tit) | otherwise = l prependHash _ i = i diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index ce10a289e..85e9a0743 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -601,16 +601,8 @@ pLineBreak = do return B.linebreak pLink :: TagParser Inlines -pLink = pRelLink <|> pAnchor - -pAnchor :: TagParser Inlines -pAnchor = try $ do - tag <- pSatisfy (tagOpenLit "a" (isJust . lookup "id")) - return $ B.spanWith (fromAttrib "id" tag , [], []) mempty - -pRelLink :: TagParser Inlines -pRelLink = try $ do - tag <- pSatisfy (tagOpenLit "a" (isJust . lookup "href")) +pLink = try $ do + tag <- pSatisfy $ tagOpenLit "a" (const True) mbBaseHref <- baseHref <$> getState let url' = fromAttrib "href" tag let url = case (isURI url', mbBaseHref) of @@ -618,11 +610,9 @@ pRelLink = try $ do _ -> url' let title = fromAttrib "title" tag let uid = fromAttrib "id" tag - let spanC = case uid of - [] -> id - s -> B.spanWith (s, [], []) + let cls = words $ fromAttrib "class" tag lab <- trimInlines . mconcat <$> manyTill inline (pCloses "a") - return $ spanC $ B.link (escapeURI url) title lab + return $ B.linkWith (uid, cls, []) (escapeURI url) title lab pImage :: TagParser Inlines pImage = do @@ -634,7 +624,13 @@ pImage = do _ -> url' let title = fromAttrib "title" tag let alt = fromAttrib "alt" tag - return $ B.image (escapeURI url) title (B.text alt) + let uid = fromAttrib "id" tag + let cls = words $ fromAttrib "class" tag + let getAtt k = case fromAttrib k tag of + "" -> [] + v -> [(k, v)] + let kvs = concat $ map getAtt ["width", "height", "sizes", "srcset"] + return $ B.imageWith (uid, cls, kvs) (escapeURI url) title (B.text alt) pCode :: TagParser Inlines pCode = try $ do diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index b9645d034..673deba14 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -54,6 +54,7 @@ import Data.List (intercalate) import qualified Data.Map as M import qualified Control.Exception as E import Text.Pandoc.Highlighting (fromListingsLanguage) +import Text.Pandoc.ImageSize (numUnit, showFl) import Text.Pandoc.Error -- | Parse LaTeX from string and return 'Pandoc' document. @@ -398,7 +399,8 @@ inlineCommand = try $ do star <- option "" (string "*") let name' = name ++ star let raw = do - rawcommand <- getRawCommand name' + rawargs <- withRaw (skipopts *> option "" dimenarg *> many braced) + let rawcommand = '\\' : name ++ star ++ snd rawargs transformed <- applyMacros' rawcommand if transformed /= rawcommand then parseFromString inlines transformed @@ -528,7 +530,9 @@ inlineCommands = M.fromList $ , ("href", (unescapeURL <$> braced <* optional sp) >>= \url -> tok >>= \lab -> pure (link url "" lab)) - , ("includegraphics", skipopts *> (unescapeURL <$> braced) >>= mkImage) + , ("includegraphics", do options <- option [] keyvals + src <- unescapeURL <$> braced + mkImage options src) , ("enquote", enquote) , ("cite", citation "cite" AuthorInText False) , ("Cite", citation "cite" AuthorInText False) @@ -590,14 +594,19 @@ inlineCommands = M.fromList $ -- in which case they will appear as raw latex blocks: [ "index" ] -mkImage :: String -> LP Inlines -mkImage src = do +mkImage :: [(String, String)] -> String -> LP Inlines +mkImage options src = do + let replaceTextwidth (k,v) = case numUnit v of + Just (num, "\\textwidth") -> (k, showFl (num * 100) ++ "%") + _ -> (k, v) + let kvs = map replaceTextwidth $ filter (\(k,_) -> k `elem` ["width", "height"]) options + let attr = ("",[], kvs) let alt = str "image" case takeExtension src of "" -> do defaultExt <- getOption readerDefaultImageExtension - return $ image (addExtension src defaultExt) "" alt - _ -> return $ image src "" alt + return $ imageWith attr (addExtension src defaultExt) "" alt + _ -> return $ imageWith attr src "" alt inNote :: Inlines -> Inlines inNote ils = @@ -978,7 +987,7 @@ readFileFromDirs (d:ds) f = keyval :: LP (String, String) keyval = try $ do key <- many1 alphaNum - val <- option "" $ char '=' >> many1 alphaNum + val <- option "" $ char '=' >> many1 (alphaNum <|> char '.' <|> char '\\') skipMany spaceChar optional (char ',') skipMany spaceChar @@ -1005,11 +1014,11 @@ rawLaTeXInline = do addImageCaption :: Blocks -> LP Blocks addImageCaption = walkM go - where go (Image alt (src,tit)) = do + where go (Image attr alt (src,tit)) = do mbcapt <- stateCaption <$> getState return $ case mbcapt of - Just ils -> Image (toList ils) (src, "fig:") - Nothing -> Image alt (src,tit) + Just ils -> Image attr (toList ils) (src, "fig:") + Nothing -> Image attr alt (src,tit) go x = return x addTableCaption :: Blocks -> LP Blocks diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 7e811a966..fd16a5f75 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -368,23 +368,26 @@ referenceKey = try $ do let sourceURL = liftM unwords $ many $ try $ do skipMany spaceChar notFollowedBy' referenceTitle + notFollowedBy' $ guardEnabled Ext_common_link_attributes >> attributes notFollowedBy' (() <$ reference) many1 $ notFollowedBy space >> litChar let betweenAngles = try $ char '<' >> manyTill litChar (char '>') src <- try betweenAngles <|> sourceURL tit <- option "" referenceTitle - -- currently we just ignore MMD-style link/image attributes - _kvs <- option [] $ guardEnabled Ext_link_attributes - >> many (try $ spnl >> keyValAttr) + attr <- option nullAttr $ try $ + guardEnabled Ext_common_link_attributes >> skipSpaces >> attributes + addKvs <- option [] $ guardEnabled Ext_link_attributes + >> many (try $ spnl >> keyValAttr) blanklines - let target = (escapeURI $ trimr src, tit) + let attr' = extractIdClass $ foldl (\x f -> f x) attr addKvs + target = (escapeURI $ trimr src, tit) st <- getState let oldkeys = stateKeys st let key = toKey raw case M.lookup key oldkeys of Just _ -> addWarning (Just pos) $ "Duplicate link reference `" ++ raw ++ "'" Nothing -> return () - updateState $ \s -> s { stateKeys = M.insert key target oldkeys } + updateState $ \s -> s { stateKeys = M.insert key (target, attr') oldkeys } return $ return mempty referenceTitle :: MarkdownParser String @@ -517,9 +520,9 @@ atxHeader = try $ do (text, raw) <- withRaw $ trimInlinesF . mconcat <$> many (notFollowedBy atxClosing >> inline) attr <- atxClosing - attr'@(ident,_,_) <- registerHeader attr (runF text defaultParserState) + attr' <- registerHeader attr (runF text defaultParserState) guardDisabled Ext_implicit_header_references - <|> registerImplicitHeader raw ident + <|> registerImplicitHeader raw attr' return $ B.headerWith attr' level <$> text atxClosing :: MarkdownParser Attr @@ -560,16 +563,16 @@ setextHeader = try $ do many (char underlineChar) blanklines let level = (fromMaybe 0 $ findIndex (== underlineChar) setextHChars) + 1 - attr'@(ident,_,_) <- registerHeader attr (runF text defaultParserState) + attr' <- registerHeader attr (runF text defaultParserState) guardDisabled Ext_implicit_header_references - <|> registerImplicitHeader raw ident + <|> registerImplicitHeader raw attr' return $ B.headerWith attr' level <$> text -registerImplicitHeader :: String -> String -> MarkdownParser () -registerImplicitHeader raw ident = do +registerImplicitHeader :: String -> Attr -> MarkdownParser () +registerImplicitHeader raw attr@(ident, _, _) = do let key = toKey $ "[" ++ raw ++ "]" updateState (\s -> s { stateHeaderKeys = - M.insert key ('#':ident,"") (stateHeaderKeys s) }) + M.insert key (('#':ident,""), attr) (stateHeaderKeys s) }) -- -- hrule block @@ -980,11 +983,11 @@ para = try $ do return $ do result' <- result case B.toList result' of - [Image alt (src,tit)] + [Image attr alt (src,tit)] | Ext_implicit_figures `Set.member` exts -> -- the fig: at beginning of title indicates a figure return $ B.para $ B.singleton - $ Image alt (src,'f':'i':'g':':':tit) + $ Image attr alt (src,'f':'i':'g':':':tit) _ -> return $ B.para result' plain :: MarkdownParser (F Blocks) @@ -1719,16 +1722,18 @@ link = try $ do setState $ st{ stateAllowLinks = False } (lab,raw) <- reference setState $ st{ stateAllowLinks = True } - regLink B.link lab <|> referenceLink B.link (lab,raw) + regLink B.linkWith lab <|> referenceLink B.linkWith (lab,raw) -regLink :: (String -> String -> Inlines -> Inlines) +regLink :: (Attr -> String -> String -> Inlines -> Inlines) -> F Inlines -> MarkdownParser (F Inlines) regLink constructor lab = try $ do (src, tit) <- source - return $ constructor src tit <$> lab + attr <- option nullAttr $ + guardEnabled Ext_common_link_attributes >> attributes + return $ constructor attr src tit <$> lab -- a link like [this][ref] or [this][] or [this] -referenceLink :: (String -> String -> Inlines -> Inlines) +referenceLink :: (Attr -> String -> String -> Inlines -> Inlines) -> (F Inlines, String) -> MarkdownParser (F Inlines) referenceLink constructor (lab, raw) = do sp <- (True <$ lookAhead (char ' ')) <|> return False @@ -1740,7 +1745,7 @@ referenceLink constructor (lab, raw) = do let labIsRef = raw' == "" || raw' == "[]" let key = toKey $ if labIsRef then raw else raw' parsedRaw <- parseFromString (mconcat <$> many inline) raw' - fallback <- parseFromString (mconcat <$> many inline) $ dropBrackets raw + fallback <- parseFromString (mconcat <$> many inline) $ dropBrackets raw implicitHeaderRefs <- option False $ True <$ guardEnabled Ext_implicit_header_references let makeFallback = do @@ -1757,10 +1762,10 @@ referenceLink constructor (lab, raw) = do then do headerKeys <- asksF stateHeaderKeys case M.lookup key headerKeys of - Just (src, tit) -> constructor src tit <$> lab - Nothing -> makeFallback + Just ((src, tit), _) -> constructor nullAttr src tit <$> lab + Nothing -> makeFallback else makeFallback - Just (src,tit) -> constructor src tit <$> lab + Just ((src,tit), attr) -> constructor attr src tit <$> lab dropBrackets :: String -> String dropBrackets = reverse . dropRB . reverse . dropLB @@ -1794,9 +1799,9 @@ image = try $ do char '!' (lab,raw) <- reference defaultExt <- getOption readerDefaultImageExtension - let constructor src = case takeExtension src of - "" -> B.image (addExtension src defaultExt) - _ -> B.image src + let constructor attr' src = case takeExtension src of + "" -> B.imageWith attr' (addExtension src defaultExt) + _ -> B.imageWith attr' src regLink constructor lab <|> referenceLink constructor (lab,raw) note :: MarkdownParser (F Inlines) @@ -1947,7 +1952,7 @@ textualCite = try $ do spc | null spaces' = mempty | otherwise = B.space lab <- parseFromString (mconcat <$> many inline) $ dropBrackets raw' - fallback <- referenceLink B.link (lab,raw') + fallback <- referenceLink B.linkWith (lab,raw') return $ do fallback' <- fallback cs' <- cs diff --git a/src/Text/Pandoc/Readers/MediaWiki.hs b/src/Text/Pandoc/Readers/MediaWiki.hs index ffac51e7b..24b3f5c7e 100644 --- a/src/Text/Pandoc/Readers/MediaWiki.hs +++ b/src/Text/Pandoc/Readers/MediaWiki.hs @@ -576,21 +576,29 @@ image = try $ do sym "[[" choice imageIdentifiers fname <- many1 (noneOf "|]") - _ <- many (try $ char '|' *> imageOption) + _ <- many imageOption + dims <- try (char '|' *> (sepBy (many digit) (char 'x')) <* string "px") + <|> return [] + _ <- many imageOption + let kvs = case dims of + w:[] -> [("width", w)] + w:(h:[]) -> [("width", w), ("height", h)] + _ -> [] + let attr = ("", [], kvs) caption <- (B.str fname <$ sym "]]") <|> try (char '|' *> (mconcat <$> manyTill inline (sym "]]"))) - return $ B.image fname ("fig:" ++ stringify caption) caption + return $ B.imageWith attr fname ("fig:" ++ stringify caption) caption imageOption :: MWParser String -imageOption = - try (oneOfStrings [ "border", "thumbnail", "frameless" - , "thumb", "upright", "left", "right" - , "center", "none", "baseline", "sub" - , "super", "top", "text-top", "middle" - , "bottom", "text-bottom" ]) - <|> try (string "frame") - <|> try (many1 (oneOf "x0123456789") <* string "px") - <|> try (oneOfStrings ["link=","alt=","page=","class="] <* many (noneOf "|]")) +imageOption = try $ char '|' *> opt + where + opt = try (oneOfStrings [ "border", "thumbnail", "frameless" + , "thumb", "upright", "left", "right" + , "center", "none", "baseline", "sub" + , "super", "top", "text-top", "middle" + , "bottom", "text-bottom" ]) + <|> try (string "frame") + <|> try (oneOfStrings ["link=","alt=","page=","class="] <* many (noneOf "|]")) collapseUnderscores :: String -> String collapseUnderscores [] = [] diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs index 199e7f3f8..0e5bb2a87 100644 --- a/src/Text/Pandoc/Readers/RST.hs +++ b/src/Text/Pandoc/Readers/RST.hs @@ -812,9 +812,9 @@ substKey = try $ do res <- B.toList <$> directive' il <- case res of -- use alt unless :alt: attribute on image: - [Para [Image [Str "image"] (src,tit)]] -> + [Para [Image _ [Str "image"] (src,tit)]] -> return $ B.image src tit alt - [Para [Link [Image [Str "image"] (src,tit)] (src',tit')]] -> + [Para [Link _ [Image _ [Str "image"] (src,tit)] (src',tit')]] -> return $ B.link src' tit' (B.image src tit alt) [Para ils] -> return $ B.fromList ils _ -> mzero @@ -827,7 +827,8 @@ anonymousKey = try $ do src <- targetURI pos <- getPosition let key = toKey $ "_" ++ printf "%09d" (sourceLine pos) - updateState $ \s -> s { stateKeys = M.insert key (src,"") $ stateKeys s } + --TODO: parse width, height, class and name attributes + updateState $ \s -> s { stateKeys = M.insert key ((src,""), nullAttr) $ stateKeys s } stripTicks :: String -> String stripTicks = reverse . stripTick . reverse . stripTick @@ -841,7 +842,8 @@ regularKey = try $ do char ':' src <- targetURI let key = toKey $ stripTicks ref - updateState $ \s -> s { stateKeys = M.insert key (src,"") $ stateKeys s } + --TODO: parse width, height, class and name attributes + updateState $ \s -> s { stateKeys = M.insert key ((src,""), nullAttr) $ stateKeys s } -- -- tables @@ -1131,12 +1133,12 @@ referenceLink = try $ do if null anonKeys then mzero else return (head anonKeys) - (src,tit) <- case M.lookup key keyTable of - Nothing -> fail "no corresponding key" - Just target -> return target + ((src,tit), attr) <- case M.lookup key keyTable of + Nothing -> fail "no corresponding key" + Just val -> return val -- if anonymous link, remove key so it won't be used again when (isAnonKey key) $ updateState $ \s -> s{ stateKeys = M.delete key keyTable } - return $ B.link src tit label' + return $ B.linkWith attr src tit label' autoURI :: RSTParser Inlines autoURI = do |