aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Readers
diff options
context:
space:
mode:
Diffstat (limited to 'src/Text/Pandoc/Readers')
-rw-r--r--src/Text/Pandoc/Readers/CommonMark.hs4
-rw-r--r--src/Text/Pandoc/Readers/DocBook.hs24
-rw-r--r--src/Text/Pandoc/Readers/Docx.hs4
-rw-r--r--src/Text/Pandoc/Readers/EPUB.hs12
-rw-r--r--src/Text/Pandoc/Readers/HTML.hs26
-rw-r--r--src/Text/Pandoc/Readers/LaTeX.hs29
-rw-r--r--src/Text/Pandoc/Readers/Markdown.hs57
-rw-r--r--src/Text/Pandoc/Readers/MediaWiki.hs30
-rw-r--r--src/Text/Pandoc/Readers/RST.hs18
9 files changed, 117 insertions, 87 deletions
diff --git a/src/Text/Pandoc/Readers/CommonMark.hs b/src/Text/Pandoc/Readers/CommonMark.hs
index 51a35c8ad..7f752c446 100644
--- a/src/Text/Pandoc/Readers/CommonMark.hs
+++ b/src/Text/Pandoc/Readers/CommonMark.hs
@@ -113,7 +113,7 @@ addInline (Node _ EMPH nodes) =
addInline (Node _ STRONG nodes) =
(Strong (addInlines nodes) :)
addInline (Node _ (LINK url title) nodes) =
- (Link (addInlines nodes) (unpack url, unpack title) :)
+ (Link nullAttr (addInlines nodes) (unpack url, unpack title) :)
addInline (Node _ (IMAGE url title) nodes) =
- (Image (addInlines nodes) (unpack url, unpack title) :)
+ (Image nullAttr (addInlines nodes) (unpack url, unpack title) :)
addInline _ = id
diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs
index f679ddb57..e8fe92e27 100644
--- a/src/Text/Pandoc/Readers/DocBook.hs
+++ b/src/Text/Pandoc/Readers/DocBook.hs
@@ -635,11 +635,20 @@ addToStart toadd bs =
-- A DocBook mediaobject is a wrapper around a set of alternative presentations
getMediaobject :: Element -> DB Inlines
getMediaobject e = do
- imageUrl <- case filterChild (named "imageobject") e of
- Nothing -> return mempty
- Just z -> case filterChild (named "imagedata") z of
- Nothing -> return mempty
- Just i -> return $ attrValue "fileref" i
+ (imageUrl, attr) <-
+ case filterChild (named "imageobject") e of
+ Nothing -> return (mempty, nullAttr)
+ Just z -> case filterChild (named "imagedata") z of
+ Nothing -> return (mempty, nullAttr)
+ Just i -> let atVal a = attrValue a i
+ w = case atVal "width" of
+ "" -> []
+ d -> [("width", d)]
+ h = case atVal "depth" of
+ "" -> []
+ d -> [("height", d)]
+ atr = (atVal "id", words $ atVal "role", w ++ h)
+ in return (atVal "fileref", atr)
let getCaption el = case filterChild (\x -> named "caption" x
|| named "textobject" x
|| named "alt" x) el of
@@ -649,7 +658,7 @@ getMediaobject e = do
let (caption, title) = if isNull figTitle
then (getCaption e, "")
else (return figTitle, "fig:")
- liftM (image imageUrl title) caption
+ liftM (imageWith attr imageUrl title) caption
getBlocks :: Element -> DB Blocks
getBlocks e = mconcat <$> (mapM parseBlock $ elContent e)
@@ -968,7 +977,8 @@ parseInline (Elem e) =
Just h -> h
_ -> ('#' : attrValue "linkend" e)
let ils' = if ils == mempty then str href else ils
- return $ link href "" ils'
+ let attr = (attrValue "id" e, words $ attrValue "role" e, [])
+ return $ linkWith attr href "" ils'
"foreignphrase" -> emph <$> innerInlines
"emphasis" -> case attrValue "role" e of
"bold" -> strong <$> innerInlines
diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs
index 9f1c7af0a..439e2d3e4 100644
--- a/src/Text/Pandoc/Readers/Docx.hs
+++ b/src/Text/Pandoc/Readers/Docx.hs
@@ -539,10 +539,10 @@ bodyPartToBlocks (OMathPara e) = do
-- replace targets with generated anchors.
rewriteLink' :: Inline -> DocxContext Inline
-rewriteLink' l@(Link ils ('#':target, title)) = do
+rewriteLink' l@(Link attr ils ('#':target, title)) = do
anchorMap <- gets docxAnchorMap
return $ case M.lookup target anchorMap of
- Just newTarget -> (Link ils ('#':newTarget, title))
+ Just newTarget -> (Link attr ils ('#':newTarget, title))
Nothing -> l
rewriteLink' il = return il
diff --git a/src/Text/Pandoc/Readers/EPUB.hs b/src/Text/Pandoc/Readers/EPUB.hs
index b8698fe26..79aa540f6 100644
--- a/src/Text/Pandoc/Readers/EPUB.hs
+++ b/src/Text/Pandoc/Readers/EPUB.hs
@@ -100,12 +100,12 @@ fetchImages mimes root arc (query iq -> links) =
<$> findEntryByPath abslink arc
iq :: Inline -> [FilePath]
-iq (Image _ (url, _)) = [url]
+iq (Image _ _ (url, _)) = [url]
iq _ = []
-- Remove relative paths
renameImages :: FilePath -> Inline -> Inline
-renameImages root (Image a (url, b)) = Image a (collapseFilePath (root </> url), b)
+renameImages root (Image attr a (url, b)) = Image attr a (collapseFilePath (root </> url), b)
renameImages _ x = x
imageToPandoc :: FilePath -> Pandoc
@@ -190,14 +190,14 @@ fixInlineIRs s (Span as v) =
Span (fixAttrs s as) v
fixInlineIRs s (Code as code) =
Code (fixAttrs s as) code
-fixInlineIRs s (Link t ('#':url, tit)) =
- Link t (addHash s url, tit)
+fixInlineIRs s (Link attr t ('#':url, tit)) =
+ Link attr t (addHash s url, tit)
fixInlineIRs _ v = v
prependHash :: [String] -> Inline -> Inline
-prependHash ps l@(Link is (url, tit))
+prependHash ps l@(Link attr is (url, tit))
| or [s `isPrefixOf` url | s <- ps] =
- Link is ('#':url, tit)
+ Link attr is ('#':url, tit)
| otherwise = l
prependHash _ i = i
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index ce10a289e..85e9a0743 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -601,16 +601,8 @@ pLineBreak = do
return B.linebreak
pLink :: TagParser Inlines
-pLink = pRelLink <|> pAnchor
-
-pAnchor :: TagParser Inlines
-pAnchor = try $ do
- tag <- pSatisfy (tagOpenLit "a" (isJust . lookup "id"))
- return $ B.spanWith (fromAttrib "id" tag , [], []) mempty
-
-pRelLink :: TagParser Inlines
-pRelLink = try $ do
- tag <- pSatisfy (tagOpenLit "a" (isJust . lookup "href"))
+pLink = try $ do
+ tag <- pSatisfy $ tagOpenLit "a" (const True)
mbBaseHref <- baseHref <$> getState
let url' = fromAttrib "href" tag
let url = case (isURI url', mbBaseHref) of
@@ -618,11 +610,9 @@ pRelLink = try $ do
_ -> url'
let title = fromAttrib "title" tag
let uid = fromAttrib "id" tag
- let spanC = case uid of
- [] -> id
- s -> B.spanWith (s, [], [])
+ let cls = words $ fromAttrib "class" tag
lab <- trimInlines . mconcat <$> manyTill inline (pCloses "a")
- return $ spanC $ B.link (escapeURI url) title lab
+ return $ B.linkWith (uid, cls, []) (escapeURI url) title lab
pImage :: TagParser Inlines
pImage = do
@@ -634,7 +624,13 @@ pImage = do
_ -> url'
let title = fromAttrib "title" tag
let alt = fromAttrib "alt" tag
- return $ B.image (escapeURI url) title (B.text alt)
+ let uid = fromAttrib "id" tag
+ let cls = words $ fromAttrib "class" tag
+ let getAtt k = case fromAttrib k tag of
+ "" -> []
+ v -> [(k, v)]
+ let kvs = concat $ map getAtt ["width", "height", "sizes", "srcset"]
+ return $ B.imageWith (uid, cls, kvs) (escapeURI url) title (B.text alt)
pCode :: TagParser Inlines
pCode = try $ do
diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index b9645d034..673deba14 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -54,6 +54,7 @@ import Data.List (intercalate)
import qualified Data.Map as M
import qualified Control.Exception as E
import Text.Pandoc.Highlighting (fromListingsLanguage)
+import Text.Pandoc.ImageSize (numUnit, showFl)
import Text.Pandoc.Error
-- | Parse LaTeX from string and return 'Pandoc' document.
@@ -398,7 +399,8 @@ inlineCommand = try $ do
star <- option "" (string "*")
let name' = name ++ star
let raw = do
- rawcommand <- getRawCommand name'
+ rawargs <- withRaw (skipopts *> option "" dimenarg *> many braced)
+ let rawcommand = '\\' : name ++ star ++ snd rawargs
transformed <- applyMacros' rawcommand
if transformed /= rawcommand
then parseFromString inlines transformed
@@ -528,7 +530,9 @@ inlineCommands = M.fromList $
, ("href", (unescapeURL <$> braced <* optional sp) >>= \url ->
tok >>= \lab ->
pure (link url "" lab))
- , ("includegraphics", skipopts *> (unescapeURL <$> braced) >>= mkImage)
+ , ("includegraphics", do options <- option [] keyvals
+ src <- unescapeURL <$> braced
+ mkImage options src)
, ("enquote", enquote)
, ("cite", citation "cite" AuthorInText False)
, ("Cite", citation "cite" AuthorInText False)
@@ -590,14 +594,19 @@ inlineCommands = M.fromList $
-- in which case they will appear as raw latex blocks:
[ "index" ]
-mkImage :: String -> LP Inlines
-mkImage src = do
+mkImage :: [(String, String)] -> String -> LP Inlines
+mkImage options src = do
+ let replaceTextwidth (k,v) = case numUnit v of
+ Just (num, "\\textwidth") -> (k, showFl (num * 100) ++ "%")
+ _ -> (k, v)
+ let kvs = map replaceTextwidth $ filter (\(k,_) -> k `elem` ["width", "height"]) options
+ let attr = ("",[], kvs)
let alt = str "image"
case takeExtension src of
"" -> do
defaultExt <- getOption readerDefaultImageExtension
- return $ image (addExtension src defaultExt) "" alt
- _ -> return $ image src "" alt
+ return $ imageWith attr (addExtension src defaultExt) "" alt
+ _ -> return $ imageWith attr src "" alt
inNote :: Inlines -> Inlines
inNote ils =
@@ -978,7 +987,7 @@ readFileFromDirs (d:ds) f =
keyval :: LP (String, String)
keyval = try $ do
key <- many1 alphaNum
- val <- option "" $ char '=' >> many1 alphaNum
+ val <- option "" $ char '=' >> many1 (alphaNum <|> char '.' <|> char '\\')
skipMany spaceChar
optional (char ',')
skipMany spaceChar
@@ -1005,11 +1014,11 @@ rawLaTeXInline = do
addImageCaption :: Blocks -> LP Blocks
addImageCaption = walkM go
- where go (Image alt (src,tit)) = do
+ where go (Image attr alt (src,tit)) = do
mbcapt <- stateCaption <$> getState
return $ case mbcapt of
- Just ils -> Image (toList ils) (src, "fig:")
- Nothing -> Image alt (src,tit)
+ Just ils -> Image attr (toList ils) (src, "fig:")
+ Nothing -> Image attr alt (src,tit)
go x = return x
addTableCaption :: Blocks -> LP Blocks
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 7e811a966..fd16a5f75 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -368,23 +368,26 @@ referenceKey = try $ do
let sourceURL = liftM unwords $ many $ try $ do
skipMany spaceChar
notFollowedBy' referenceTitle
+ notFollowedBy' $ guardEnabled Ext_common_link_attributes >> attributes
notFollowedBy' (() <$ reference)
many1 $ notFollowedBy space >> litChar
let betweenAngles = try $ char '<' >> manyTill litChar (char '>')
src <- try betweenAngles <|> sourceURL
tit <- option "" referenceTitle
- -- currently we just ignore MMD-style link/image attributes
- _kvs <- option [] $ guardEnabled Ext_link_attributes
- >> many (try $ spnl >> keyValAttr)
+ attr <- option nullAttr $ try $
+ guardEnabled Ext_common_link_attributes >> skipSpaces >> attributes
+ addKvs <- option [] $ guardEnabled Ext_link_attributes
+ >> many (try $ spnl >> keyValAttr)
blanklines
- let target = (escapeURI $ trimr src, tit)
+ let attr' = extractIdClass $ foldl (\x f -> f x) attr addKvs
+ target = (escapeURI $ trimr src, tit)
st <- getState
let oldkeys = stateKeys st
let key = toKey raw
case M.lookup key oldkeys of
Just _ -> addWarning (Just pos) $ "Duplicate link reference `" ++ raw ++ "'"
Nothing -> return ()
- updateState $ \s -> s { stateKeys = M.insert key target oldkeys }
+ updateState $ \s -> s { stateKeys = M.insert key (target, attr') oldkeys }
return $ return mempty
referenceTitle :: MarkdownParser String
@@ -517,9 +520,9 @@ atxHeader = try $ do
(text, raw) <- withRaw $
trimInlinesF . mconcat <$> many (notFollowedBy atxClosing >> inline)
attr <- atxClosing
- attr'@(ident,_,_) <- registerHeader attr (runF text defaultParserState)
+ attr' <- registerHeader attr (runF text defaultParserState)
guardDisabled Ext_implicit_header_references
- <|> registerImplicitHeader raw ident
+ <|> registerImplicitHeader raw attr'
return $ B.headerWith attr' level <$> text
atxClosing :: MarkdownParser Attr
@@ -560,16 +563,16 @@ setextHeader = try $ do
many (char underlineChar)
blanklines
let level = (fromMaybe 0 $ findIndex (== underlineChar) setextHChars) + 1
- attr'@(ident,_,_) <- registerHeader attr (runF text defaultParserState)
+ attr' <- registerHeader attr (runF text defaultParserState)
guardDisabled Ext_implicit_header_references
- <|> registerImplicitHeader raw ident
+ <|> registerImplicitHeader raw attr'
return $ B.headerWith attr' level <$> text
-registerImplicitHeader :: String -> String -> MarkdownParser ()
-registerImplicitHeader raw ident = do
+registerImplicitHeader :: String -> Attr -> MarkdownParser ()
+registerImplicitHeader raw attr@(ident, _, _) = do
let key = toKey $ "[" ++ raw ++ "]"
updateState (\s -> s { stateHeaderKeys =
- M.insert key ('#':ident,"") (stateHeaderKeys s) })
+ M.insert key (('#':ident,""), attr) (stateHeaderKeys s) })
--
-- hrule block
@@ -980,11 +983,11 @@ para = try $ do
return $ do
result' <- result
case B.toList result' of
- [Image alt (src,tit)]
+ [Image attr alt (src,tit)]
| Ext_implicit_figures `Set.member` exts ->
-- the fig: at beginning of title indicates a figure
return $ B.para $ B.singleton
- $ Image alt (src,'f':'i':'g':':':tit)
+ $ Image attr alt (src,'f':'i':'g':':':tit)
_ -> return $ B.para result'
plain :: MarkdownParser (F Blocks)
@@ -1719,16 +1722,18 @@ link = try $ do
setState $ st{ stateAllowLinks = False }
(lab,raw) <- reference
setState $ st{ stateAllowLinks = True }
- regLink B.link lab <|> referenceLink B.link (lab,raw)
+ regLink B.linkWith lab <|> referenceLink B.linkWith (lab,raw)
-regLink :: (String -> String -> Inlines -> Inlines)
+regLink :: (Attr -> String -> String -> Inlines -> Inlines)
-> F Inlines -> MarkdownParser (F Inlines)
regLink constructor lab = try $ do
(src, tit) <- source
- return $ constructor src tit <$> lab
+ attr <- option nullAttr $
+ guardEnabled Ext_common_link_attributes >> attributes
+ return $ constructor attr src tit <$> lab
-- a link like [this][ref] or [this][] or [this]
-referenceLink :: (String -> String -> Inlines -> Inlines)
+referenceLink :: (Attr -> String -> String -> Inlines -> Inlines)
-> (F Inlines, String) -> MarkdownParser (F Inlines)
referenceLink constructor (lab, raw) = do
sp <- (True <$ lookAhead (char ' ')) <|> return False
@@ -1740,7 +1745,7 @@ referenceLink constructor (lab, raw) = do
let labIsRef = raw' == "" || raw' == "[]"
let key = toKey $ if labIsRef then raw else raw'
parsedRaw <- parseFromString (mconcat <$> many inline) raw'
- fallback <- parseFromString (mconcat <$> many inline) $ dropBrackets raw
+ fallback <- parseFromString (mconcat <$> many inline) $ dropBrackets raw
implicitHeaderRefs <- option False $
True <$ guardEnabled Ext_implicit_header_references
let makeFallback = do
@@ -1757,10 +1762,10 @@ referenceLink constructor (lab, raw) = do
then do
headerKeys <- asksF stateHeaderKeys
case M.lookup key headerKeys of
- Just (src, tit) -> constructor src tit <$> lab
- Nothing -> makeFallback
+ Just ((src, tit), _) -> constructor nullAttr src tit <$> lab
+ Nothing -> makeFallback
else makeFallback
- Just (src,tit) -> constructor src tit <$> lab
+ Just ((src,tit), attr) -> constructor attr src tit <$> lab
dropBrackets :: String -> String
dropBrackets = reverse . dropRB . reverse . dropLB
@@ -1794,9 +1799,9 @@ image = try $ do
char '!'
(lab,raw) <- reference
defaultExt <- getOption readerDefaultImageExtension
- let constructor src = case takeExtension src of
- "" -> B.image (addExtension src defaultExt)
- _ -> B.image src
+ let constructor attr' src = case takeExtension src of
+ "" -> B.imageWith attr' (addExtension src defaultExt)
+ _ -> B.imageWith attr' src
regLink constructor lab <|> referenceLink constructor (lab,raw)
note :: MarkdownParser (F Inlines)
@@ -1947,7 +1952,7 @@ textualCite = try $ do
spc | null spaces' = mempty
| otherwise = B.space
lab <- parseFromString (mconcat <$> many inline) $ dropBrackets raw'
- fallback <- referenceLink B.link (lab,raw')
+ fallback <- referenceLink B.linkWith (lab,raw')
return $ do
fallback' <- fallback
cs' <- cs
diff --git a/src/Text/Pandoc/Readers/MediaWiki.hs b/src/Text/Pandoc/Readers/MediaWiki.hs
index ffac51e7b..24b3f5c7e 100644
--- a/src/Text/Pandoc/Readers/MediaWiki.hs
+++ b/src/Text/Pandoc/Readers/MediaWiki.hs
@@ -576,21 +576,29 @@ image = try $ do
sym "[["
choice imageIdentifiers
fname <- many1 (noneOf "|]")
- _ <- many (try $ char '|' *> imageOption)
+ _ <- many imageOption
+ dims <- try (char '|' *> (sepBy (many digit) (char 'x')) <* string "px")
+ <|> return []
+ _ <- many imageOption
+ let kvs = case dims of
+ w:[] -> [("width", w)]
+ w:(h:[]) -> [("width", w), ("height", h)]
+ _ -> []
+ let attr = ("", [], kvs)
caption <- (B.str fname <$ sym "]]")
<|> try (char '|' *> (mconcat <$> manyTill inline (sym "]]")))
- return $ B.image fname ("fig:" ++ stringify caption) caption
+ return $ B.imageWith attr fname ("fig:" ++ stringify caption) caption
imageOption :: MWParser String
-imageOption =
- try (oneOfStrings [ "border", "thumbnail", "frameless"
- , "thumb", "upright", "left", "right"
- , "center", "none", "baseline", "sub"
- , "super", "top", "text-top", "middle"
- , "bottom", "text-bottom" ])
- <|> try (string "frame")
- <|> try (many1 (oneOf "x0123456789") <* string "px")
- <|> try (oneOfStrings ["link=","alt=","page=","class="] <* many (noneOf "|]"))
+imageOption = try $ char '|' *> opt
+ where
+ opt = try (oneOfStrings [ "border", "thumbnail", "frameless"
+ , "thumb", "upright", "left", "right"
+ , "center", "none", "baseline", "sub"
+ , "super", "top", "text-top", "middle"
+ , "bottom", "text-bottom" ])
+ <|> try (string "frame")
+ <|> try (oneOfStrings ["link=","alt=","page=","class="] <* many (noneOf "|]"))
collapseUnderscores :: String -> String
collapseUnderscores [] = []
diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs
index 199e7f3f8..0e5bb2a87 100644
--- a/src/Text/Pandoc/Readers/RST.hs
+++ b/src/Text/Pandoc/Readers/RST.hs
@@ -812,9 +812,9 @@ substKey = try $ do
res <- B.toList <$> directive'
il <- case res of
-- use alt unless :alt: attribute on image:
- [Para [Image [Str "image"] (src,tit)]] ->
+ [Para [Image _ [Str "image"] (src,tit)]] ->
return $ B.image src tit alt
- [Para [Link [Image [Str "image"] (src,tit)] (src',tit')]] ->
+ [Para [Link _ [Image _ [Str "image"] (src,tit)] (src',tit')]] ->
return $ B.link src' tit' (B.image src tit alt)
[Para ils] -> return $ B.fromList ils
_ -> mzero
@@ -827,7 +827,8 @@ anonymousKey = try $ do
src <- targetURI
pos <- getPosition
let key = toKey $ "_" ++ printf "%09d" (sourceLine pos)
- updateState $ \s -> s { stateKeys = M.insert key (src,"") $ stateKeys s }
+ --TODO: parse width, height, class and name attributes
+ updateState $ \s -> s { stateKeys = M.insert key ((src,""), nullAttr) $ stateKeys s }
stripTicks :: String -> String
stripTicks = reverse . stripTick . reverse . stripTick
@@ -841,7 +842,8 @@ regularKey = try $ do
char ':'
src <- targetURI
let key = toKey $ stripTicks ref
- updateState $ \s -> s { stateKeys = M.insert key (src,"") $ stateKeys s }
+ --TODO: parse width, height, class and name attributes
+ updateState $ \s -> s { stateKeys = M.insert key ((src,""), nullAttr) $ stateKeys s }
--
-- tables
@@ -1131,12 +1133,12 @@ referenceLink = try $ do
if null anonKeys
then mzero
else return (head anonKeys)
- (src,tit) <- case M.lookup key keyTable of
- Nothing -> fail "no corresponding key"
- Just target -> return target
+ ((src,tit), attr) <- case M.lookup key keyTable of
+ Nothing -> fail "no corresponding key"
+ Just val -> return val
-- if anonymous link, remove key so it won't be used again
when (isAnonKey key) $ updateState $ \s -> s{ stateKeys = M.delete key keyTable }
- return $ B.link src tit label'
+ return $ B.linkWith attr src tit label'
autoURI :: RSTParser Inlines
autoURI = do