aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Readers/Markdown.hs
diff options
context:
space:
mode:
Diffstat (limited to 'src/Text/Pandoc/Readers/Markdown.hs')
-rw-r--r--src/Text/Pandoc/Readers/Markdown.hs146
1 files changed, 95 insertions, 51 deletions
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index f2b0872bb..7b1341af4 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -39,6 +39,8 @@ import Data.Ord ( comparing )
import Data.Char ( isSpace, isAlphaNum, toLower )
import Data.Maybe
import Text.Pandoc.Definition
+import Text.Pandoc.Emoji (emojis)
+import Text.Pandoc.Generic (bottomUp)
import qualified Data.Text as T
import Data.Text (Text)
import qualified Data.Yaml as Yaml
@@ -50,6 +52,7 @@ import qualified Data.Vector as V
import Text.Pandoc.Builder (Inlines, Blocks, trimInlines)
import Text.Pandoc.Options
import Text.Pandoc.Shared
+import Text.Pandoc.Pretty (charWidth)
import Text.Pandoc.XML (fromEntities)
import Text.Pandoc.Parsing hiding (tableWith)
import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXBlock )
@@ -62,6 +65,7 @@ import Text.HTML.TagSoup.Match (tagOpen)
import qualified Data.Set as Set
import Text.Printf (printf)
import Debug.Trace (trace)
+import Text.Pandoc.Compat.Monoid ((<>))
import Text.Pandoc.Error
type MarkdownParser = Parser [Char] ParserState
@@ -326,23 +330,22 @@ stopLine = try $ (string "---" <|> string "...") >> blankline >> return ()
mmdTitleBlock :: MarkdownParser ()
mmdTitleBlock = try $ do
guardEnabled Ext_mmd_title_block
- kvPairs <- many1 kvPair
+ firstPair <- kvPair False
+ restPairs <- many (kvPair True)
+ let kvPairs = firstPair : restPairs
blanklines
updateState $ \st -> st{ stateMeta' = stateMeta' st <>
return (Meta $ M.fromList kvPairs) }
-kvPair :: MarkdownParser (String, MetaValue)
-kvPair = try $ do
+kvPair :: Bool -> MarkdownParser (String, MetaValue)
+kvPair allowEmpty = try $ do
key <- many1Till (alphaNum <|> oneOf "_- ") (char ':')
- skipMany1 spaceNoNewline
- val <- manyTill anyChar
+ val <- trim <$> manyTill anyChar
(try $ newline >> lookAhead (blankline <|> nonspaceChar))
- guard $ not . null . trim $ val
+ guard $ allowEmpty || not (null val)
let key' = concat $ words $ map toLower key
- let val' = MetaBlocks $ B.toList $ B.plain $ B.text $ trim val
+ let val' = MetaBlocks $ B.toList $ B.plain $ B.text $ val
return (key',val')
- where
- spaceNoNewline = satisfy (\x -> isSpace x && (x/='\n') && (x/='\r'))
parseMarkdown :: MarkdownParser Pandoc
parseMarkdown = do
@@ -355,7 +358,19 @@ parseMarkdown = do
st <- getState
let meta = runF (stateMeta' st) st
let Pandoc _ bs = B.doc $ runF blocks st
- return $ Pandoc meta bs
+ eastAsianLineBreaks <- option False $
+ True <$ guardEnabled Ext_east_asian_line_breaks
+ return $ (if eastAsianLineBreaks
+ then bottomUp softBreakFilter
+ else id) $ Pandoc meta bs
+
+softBreakFilter :: [Inline] -> [Inline]
+softBreakFilter (x:SoftBreak:y:zs) =
+ case (stringify x, stringify y) of
+ (xs@(_:_), (c:_))
+ | charWidth (last xs) == 2 && charWidth c == 2 -> x:y:zs
+ _ -> x:SoftBreak:y:zs
+softBreakFilter xs = xs
referenceKey :: MarkdownParser (F Blocks)
referenceKey = try $ do
@@ -367,23 +382,26 @@ referenceKey = try $ do
let sourceURL = liftM unwords $ many $ try $ do
skipMany spaceChar
notFollowedBy' referenceTitle
+ notFollowedBy' $ guardEnabled Ext_link_attributes >> attributes
notFollowedBy' (() <$ reference)
many1 $ notFollowedBy space >> litChar
let betweenAngles = try $ char '<' >> manyTill litChar (char '>')
src <- try betweenAngles <|> sourceURL
tit <- option "" referenceTitle
- -- currently we just ignore MMD-style link/image attributes
- _kvs <- option [] $ guardEnabled Ext_link_attributes
- >> many (try $ spnl >> keyValAttr)
+ attr <- option nullAttr $ try $
+ guardEnabled Ext_link_attributes >> skipSpaces >> attributes
+ addKvs <- option [] $ guardEnabled Ext_mmd_link_attributes
+ >> many (try $ spnl >> keyValAttr)
blanklines
- let target = (escapeURI $ trimr src, tit)
+ let attr' = extractIdClass $ foldl (\x f -> f x) attr addKvs
+ target = (escapeURI $ trimr src, tit)
st <- getState
let oldkeys = stateKeys st
let key = toKey raw
case M.lookup key oldkeys of
Just _ -> addWarning (Just pos) $ "Duplicate link reference `" ++ raw ++ "'"
Nothing -> return ()
- updateState $ \s -> s { stateKeys = M.insert key target oldkeys }
+ updateState $ \s -> s { stateKeys = M.insert key (target, attr') oldkeys }
return $ return mempty
referenceTitle :: MarkdownParser String
@@ -466,7 +484,6 @@ block = do
res <- choice [ mempty <$ blanklines
, codeBlockFenced
, yamlMetaBlock
- , guardEnabled Ext_latex_macros *> (macro >>= return . return)
-- note: bulletList needs to be before header because of
-- the possibility of empty list items: -
, bulletList
@@ -476,6 +493,7 @@ block = do
, htmlBlock
, table
, codeBlockIndented
+ , guardEnabled Ext_latex_macros *> (macro >>= return . return)
, rawTeXBlock
, lineBlock
, blockQuote
@@ -516,9 +534,9 @@ atxHeader = try $ do
(text, raw) <- withRaw $
trimInlinesF . mconcat <$> many (notFollowedBy atxClosing >> inline)
attr <- atxClosing
- attr'@(ident,_,_) <- registerHeader attr (runF text defaultParserState)
+ attr' <- registerHeader attr (runF text defaultParserState)
guardDisabled Ext_implicit_header_references
- <|> registerImplicitHeader raw ident
+ <|> registerImplicitHeader raw attr'
return $ B.headerWith attr' level <$> text
atxClosing :: MarkdownParser Attr
@@ -559,16 +577,16 @@ setextHeader = try $ do
many (char underlineChar)
blanklines
let level = (fromMaybe 0 $ findIndex (== underlineChar) setextHChars) + 1
- attr'@(ident,_,_) <- registerHeader attr (runF text defaultParserState)
+ attr' <- registerHeader attr (runF text defaultParserState)
guardDisabled Ext_implicit_header_references
- <|> registerImplicitHeader raw ident
+ <|> registerImplicitHeader raw attr'
return $ B.headerWith attr' level <$> text
-registerImplicitHeader :: String -> String -> MarkdownParser ()
-registerImplicitHeader raw ident = do
+registerImplicitHeader :: String -> Attr -> MarkdownParser ()
+registerImplicitHeader raw attr@(ident, _, _) = do
let key = toKey $ "[" ++ raw ++ "]"
updateState (\s -> s { stateHeaderKeys =
- M.insert key ('#':ident,"") (stateHeaderKeys s) })
+ M.insert key (('#':ident,""), attr) (stateHeaderKeys s) })
--
-- hrule block
@@ -979,11 +997,11 @@ para = try $ do
return $ do
result' <- result
case B.toList result' of
- [Image alt (src,tit)]
+ [Image attr alt (src,tit)]
| Ext_implicit_figures `Set.member` exts ->
-- the fig: at beginning of title indicates a figure
return $ B.para $ B.singleton
- $ Image alt (src,'f':'i':'g':':':tit)
+ $ Image attr alt (src,'f':'i':'g':':':tit)
_ -> return $ B.para result'
plain :: MarkdownParser (F Blocks)
@@ -1321,7 +1339,7 @@ removeOneLeadingSpace xs =
gridTableFooter :: MarkdownParser [Char]
gridTableFooter = blanklines
-pipeBreak :: MarkdownParser [Alignment]
+pipeBreak :: MarkdownParser ([Alignment], [Int])
pipeBreak = try $ do
nonindentSpaces
openPipe <- (True <$ char '|') <|> return False
@@ -1331,16 +1349,23 @@ pipeBreak = try $ do
guard $ not (null rest && not openPipe)
optional (char '|')
blankline
- return (first:rest)
+ return $ unzip (first:rest)
pipeTable :: MarkdownParser ([Alignment], [Double], F [Blocks], F [[Blocks]])
pipeTable = try $ do
nonindentSpaces
lookAhead nonspaceChar
- (heads,aligns) <- (,) <$> pipeTableRow <*> pipeBreak
- lines' <- sequence <$> many pipeTableRow
- let widths = replicate (length aligns) 0.0
- return $ (aligns, widths, heads, lines')
+ (heads,(aligns, seplengths)) <- (,) <$> pipeTableRow <*> pipeBreak
+ lines' <- many pipeTableRow
+ let maxlength = maximum $
+ map (\x -> length . stringify $ runF x def) (heads : lines')
+ numColumns <- getOption readerColumns
+ let widths = if maxlength > numColumns
+ then map (\len ->
+ fromIntegral (len + 1) / fromIntegral numColumns)
+ seplengths
+ else replicate (length aligns) 0.0
+ return $ (aligns, widths, heads, sequence lines')
sepPipe :: MarkdownParser ()
sepPipe = try $ do
@@ -1369,19 +1394,20 @@ pipeTableRow = do
ils' | B.isNull ils' -> mempty
| otherwise -> B.plain $ ils') cells'
-pipeTableHeaderPart :: Parser [Char] st Alignment
+pipeTableHeaderPart :: Parser [Char] st (Alignment, Int)
pipeTableHeaderPart = try $ do
skipMany spaceChar
left <- optionMaybe (char ':')
- many1 (char '-')
+ pipe <- many1 (char '-')
right <- optionMaybe (char ':')
skipMany spaceChar
+ let len = length pipe + maybe 0 (const 1) left + maybe 0 (const 1) right
return $
- case (left,right) of
- (Nothing,Nothing) -> AlignDefault
- (Just _,Nothing) -> AlignLeft
- (Nothing,Just _) -> AlignRight
- (Just _,Just _) -> AlignCenter
+ ((case (left,right) of
+ (Nothing,Nothing) -> AlignDefault
+ (Just _,Nothing) -> AlignLeft
+ (Nothing,Just _) -> AlignRight
+ (Just _,Just _) -> AlignCenter), len)
-- Succeed only if current line contains a pipe.
scanForPipe :: Parser [Char] st ()
@@ -1460,6 +1486,7 @@ inline = choice [ whitespace
, exampleRef
, smart
, return . B.singleton <$> charRef
+ , emoji
, symbol
, ltSign
] <?> "inline"
@@ -1666,7 +1693,7 @@ endline = try $ do
(eof >> return mempty)
<|> (guardEnabled Ext_hard_line_breaks >> return (return B.linebreak))
<|> (guardEnabled Ext_ignore_line_breaks >> return mempty)
- <|> (return $ return B.space)
+ <|> (skipMany spaceChar >> return (return B.softbreak))
--
-- links
@@ -1710,16 +1737,18 @@ link = try $ do
setState $ st{ stateAllowLinks = False }
(lab,raw) <- reference
setState $ st{ stateAllowLinks = True }
- regLink B.link lab <|> referenceLink B.link (lab,raw)
+ regLink B.linkWith lab <|> referenceLink B.linkWith (lab,raw)
-regLink :: (String -> String -> Inlines -> Inlines)
+regLink :: (Attr -> String -> String -> Inlines -> Inlines)
-> F Inlines -> MarkdownParser (F Inlines)
regLink constructor lab = try $ do
(src, tit) <- source
- return $ constructor src tit <$> lab
+ attr <- option nullAttr $
+ guardEnabled Ext_link_attributes >> attributes
+ return $ constructor attr src tit <$> lab
-- a link like [this][ref] or [this][] or [this]
-referenceLink :: (String -> String -> Inlines -> Inlines)
+referenceLink :: (Attr -> String -> String -> Inlines -> Inlines)
-> (F Inlines, String) -> MarkdownParser (F Inlines)
referenceLink constructor (lab, raw) = do
sp <- (True <$ lookAhead (char ' ')) <|> return False
@@ -1731,7 +1760,7 @@ referenceLink constructor (lab, raw) = do
let labIsRef = raw' == "" || raw' == "[]"
let key = toKey $ if labIsRef then raw else raw'
parsedRaw <- parseFromString (mconcat <$> many inline) raw'
- fallback <- parseFromString (mconcat <$> many inline) $ dropBrackets raw
+ fallback <- parseFromString (mconcat <$> many inline) $ dropBrackets raw
implicitHeaderRefs <- option False $
True <$ guardEnabled Ext_implicit_header_references
let makeFallback = do
@@ -1748,10 +1777,10 @@ referenceLink constructor (lab, raw) = do
then do
headerKeys <- asksF stateHeaderKeys
case M.lookup key headerKeys of
- Just (src, tit) -> constructor src tit <$> lab
- Nothing -> makeFallback
+ Just ((src, tit), _) -> constructor nullAttr src tit <$> lab
+ Nothing -> makeFallback
else makeFallback
- Just (src,tit) -> constructor src tit <$> lab
+ Just ((src,tit), attr) -> constructor attr src tit <$> lab
dropBrackets :: String -> String
dropBrackets = reverse . dropRB . reverse . dropLB
@@ -1785,9 +1814,9 @@ image = try $ do
char '!'
(lab,raw) <- reference
defaultExt <- getOption readerDefaultImageExtension
- let constructor src = case takeExtension src of
- "" -> B.image (addExtension src defaultExt)
- _ -> B.image src
+ let constructor attr' src = case takeExtension src of
+ "" -> B.imageWith attr' (addExtension src defaultExt)
+ _ -> B.imageWith attr' src
regLink constructor lab <|> referenceLink constructor (lab,raw)
note :: MarkdownParser (F Inlines)
@@ -1891,6 +1920,21 @@ rawHtmlInline = do
else not . isTextTag
return $ return $ B.rawInline "html" result
+-- Emoji
+
+emojiChars :: [Char]
+emojiChars = ['a'..'z'] ++ ['0'..'9'] ++ ['_','+','-']
+
+emoji :: MarkdownParser (F Inlines)
+emoji = try $ do
+ guardEnabled Ext_emoji
+ char ':'
+ emojikey <- many1 (oneOf emojiChars)
+ char ':'
+ case M.lookup emojikey emojis of
+ Just s -> return (return (B.str s))
+ Nothing -> mzero
+
-- Citations
cite :: MarkdownParser (F Inlines)
@@ -1923,7 +1967,7 @@ textualCite = try $ do
spc | null spaces' = mempty
| otherwise = B.space
lab <- parseFromString (mconcat <$> many inline) $ dropBrackets raw'
- fallback <- referenceLink B.link (lab,raw')
+ fallback <- referenceLink B.linkWith (lab,raw')
return $ do
fallback' <- fallback
cs' <- cs