diff options
Diffstat (limited to 'src/Text/Pandoc/Readers/Markdown.hs')
-rw-r--r-- | src/Text/Pandoc/Readers/Markdown.hs | 146 |
1 files changed, 95 insertions, 51 deletions
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index f2b0872bb..7b1341af4 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -39,6 +39,8 @@ import Data.Ord ( comparing ) import Data.Char ( isSpace, isAlphaNum, toLower ) import Data.Maybe import Text.Pandoc.Definition +import Text.Pandoc.Emoji (emojis) +import Text.Pandoc.Generic (bottomUp) import qualified Data.Text as T import Data.Text (Text) import qualified Data.Yaml as Yaml @@ -50,6 +52,7 @@ import qualified Data.Vector as V import Text.Pandoc.Builder (Inlines, Blocks, trimInlines) import Text.Pandoc.Options import Text.Pandoc.Shared +import Text.Pandoc.Pretty (charWidth) import Text.Pandoc.XML (fromEntities) import Text.Pandoc.Parsing hiding (tableWith) import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXBlock ) @@ -62,6 +65,7 @@ import Text.HTML.TagSoup.Match (tagOpen) import qualified Data.Set as Set import Text.Printf (printf) import Debug.Trace (trace) +import Text.Pandoc.Compat.Monoid ((<>)) import Text.Pandoc.Error type MarkdownParser = Parser [Char] ParserState @@ -326,23 +330,22 @@ stopLine = try $ (string "---" <|> string "...") >> blankline >> return () mmdTitleBlock :: MarkdownParser () mmdTitleBlock = try $ do guardEnabled Ext_mmd_title_block - kvPairs <- many1 kvPair + firstPair <- kvPair False + restPairs <- many (kvPair True) + let kvPairs = firstPair : restPairs blanklines updateState $ \st -> st{ stateMeta' = stateMeta' st <> return (Meta $ M.fromList kvPairs) } -kvPair :: MarkdownParser (String, MetaValue) -kvPair = try $ do +kvPair :: Bool -> MarkdownParser (String, MetaValue) +kvPair allowEmpty = try $ do key <- many1Till (alphaNum <|> oneOf "_- ") (char ':') - skipMany1 spaceNoNewline - val <- manyTill anyChar + val <- trim <$> manyTill anyChar (try $ newline >> lookAhead (blankline <|> nonspaceChar)) - guard $ not . null . trim $ val + guard $ allowEmpty || not (null val) let key' = concat $ words $ map toLower key - let val' = MetaBlocks $ B.toList $ B.plain $ B.text $ trim val + let val' = MetaBlocks $ B.toList $ B.plain $ B.text $ val return (key',val') - where - spaceNoNewline = satisfy (\x -> isSpace x && (x/='\n') && (x/='\r')) parseMarkdown :: MarkdownParser Pandoc parseMarkdown = do @@ -355,7 +358,19 @@ parseMarkdown = do st <- getState let meta = runF (stateMeta' st) st let Pandoc _ bs = B.doc $ runF blocks st - return $ Pandoc meta bs + eastAsianLineBreaks <- option False $ + True <$ guardEnabled Ext_east_asian_line_breaks + return $ (if eastAsianLineBreaks + then bottomUp softBreakFilter + else id) $ Pandoc meta bs + +softBreakFilter :: [Inline] -> [Inline] +softBreakFilter (x:SoftBreak:y:zs) = + case (stringify x, stringify y) of + (xs@(_:_), (c:_)) + | charWidth (last xs) == 2 && charWidth c == 2 -> x:y:zs + _ -> x:SoftBreak:y:zs +softBreakFilter xs = xs referenceKey :: MarkdownParser (F Blocks) referenceKey = try $ do @@ -367,23 +382,26 @@ referenceKey = try $ do let sourceURL = liftM unwords $ many $ try $ do skipMany spaceChar notFollowedBy' referenceTitle + notFollowedBy' $ guardEnabled Ext_link_attributes >> attributes notFollowedBy' (() <$ reference) many1 $ notFollowedBy space >> litChar let betweenAngles = try $ char '<' >> manyTill litChar (char '>') src <- try betweenAngles <|> sourceURL tit <- option "" referenceTitle - -- currently we just ignore MMD-style link/image attributes - _kvs <- option [] $ guardEnabled Ext_link_attributes - >> many (try $ spnl >> keyValAttr) + attr <- option nullAttr $ try $ + guardEnabled Ext_link_attributes >> skipSpaces >> attributes + addKvs <- option [] $ guardEnabled Ext_mmd_link_attributes + >> many (try $ spnl >> keyValAttr) blanklines - let target = (escapeURI $ trimr src, tit) + let attr' = extractIdClass $ foldl (\x f -> f x) attr addKvs + target = (escapeURI $ trimr src, tit) st <- getState let oldkeys = stateKeys st let key = toKey raw case M.lookup key oldkeys of Just _ -> addWarning (Just pos) $ "Duplicate link reference `" ++ raw ++ "'" Nothing -> return () - updateState $ \s -> s { stateKeys = M.insert key target oldkeys } + updateState $ \s -> s { stateKeys = M.insert key (target, attr') oldkeys } return $ return mempty referenceTitle :: MarkdownParser String @@ -466,7 +484,6 @@ block = do res <- choice [ mempty <$ blanklines , codeBlockFenced , yamlMetaBlock - , guardEnabled Ext_latex_macros *> (macro >>= return . return) -- note: bulletList needs to be before header because of -- the possibility of empty list items: - , bulletList @@ -476,6 +493,7 @@ block = do , htmlBlock , table , codeBlockIndented + , guardEnabled Ext_latex_macros *> (macro >>= return . return) , rawTeXBlock , lineBlock , blockQuote @@ -516,9 +534,9 @@ atxHeader = try $ do (text, raw) <- withRaw $ trimInlinesF . mconcat <$> many (notFollowedBy atxClosing >> inline) attr <- atxClosing - attr'@(ident,_,_) <- registerHeader attr (runF text defaultParserState) + attr' <- registerHeader attr (runF text defaultParserState) guardDisabled Ext_implicit_header_references - <|> registerImplicitHeader raw ident + <|> registerImplicitHeader raw attr' return $ B.headerWith attr' level <$> text atxClosing :: MarkdownParser Attr @@ -559,16 +577,16 @@ setextHeader = try $ do many (char underlineChar) blanklines let level = (fromMaybe 0 $ findIndex (== underlineChar) setextHChars) + 1 - attr'@(ident,_,_) <- registerHeader attr (runF text defaultParserState) + attr' <- registerHeader attr (runF text defaultParserState) guardDisabled Ext_implicit_header_references - <|> registerImplicitHeader raw ident + <|> registerImplicitHeader raw attr' return $ B.headerWith attr' level <$> text -registerImplicitHeader :: String -> String -> MarkdownParser () -registerImplicitHeader raw ident = do +registerImplicitHeader :: String -> Attr -> MarkdownParser () +registerImplicitHeader raw attr@(ident, _, _) = do let key = toKey $ "[" ++ raw ++ "]" updateState (\s -> s { stateHeaderKeys = - M.insert key ('#':ident,"") (stateHeaderKeys s) }) + M.insert key (('#':ident,""), attr) (stateHeaderKeys s) }) -- -- hrule block @@ -979,11 +997,11 @@ para = try $ do return $ do result' <- result case B.toList result' of - [Image alt (src,tit)] + [Image attr alt (src,tit)] | Ext_implicit_figures `Set.member` exts -> -- the fig: at beginning of title indicates a figure return $ B.para $ B.singleton - $ Image alt (src,'f':'i':'g':':':tit) + $ Image attr alt (src,'f':'i':'g':':':tit) _ -> return $ B.para result' plain :: MarkdownParser (F Blocks) @@ -1321,7 +1339,7 @@ removeOneLeadingSpace xs = gridTableFooter :: MarkdownParser [Char] gridTableFooter = blanklines -pipeBreak :: MarkdownParser [Alignment] +pipeBreak :: MarkdownParser ([Alignment], [Int]) pipeBreak = try $ do nonindentSpaces openPipe <- (True <$ char '|') <|> return False @@ -1331,16 +1349,23 @@ pipeBreak = try $ do guard $ not (null rest && not openPipe) optional (char '|') blankline - return (first:rest) + return $ unzip (first:rest) pipeTable :: MarkdownParser ([Alignment], [Double], F [Blocks], F [[Blocks]]) pipeTable = try $ do nonindentSpaces lookAhead nonspaceChar - (heads,aligns) <- (,) <$> pipeTableRow <*> pipeBreak - lines' <- sequence <$> many pipeTableRow - let widths = replicate (length aligns) 0.0 - return $ (aligns, widths, heads, lines') + (heads,(aligns, seplengths)) <- (,) <$> pipeTableRow <*> pipeBreak + lines' <- many pipeTableRow + let maxlength = maximum $ + map (\x -> length . stringify $ runF x def) (heads : lines') + numColumns <- getOption readerColumns + let widths = if maxlength > numColumns + then map (\len -> + fromIntegral (len + 1) / fromIntegral numColumns) + seplengths + else replicate (length aligns) 0.0 + return $ (aligns, widths, heads, sequence lines') sepPipe :: MarkdownParser () sepPipe = try $ do @@ -1369,19 +1394,20 @@ pipeTableRow = do ils' | B.isNull ils' -> mempty | otherwise -> B.plain $ ils') cells' -pipeTableHeaderPart :: Parser [Char] st Alignment +pipeTableHeaderPart :: Parser [Char] st (Alignment, Int) pipeTableHeaderPart = try $ do skipMany spaceChar left <- optionMaybe (char ':') - many1 (char '-') + pipe <- many1 (char '-') right <- optionMaybe (char ':') skipMany spaceChar + let len = length pipe + maybe 0 (const 1) left + maybe 0 (const 1) right return $ - case (left,right) of - (Nothing,Nothing) -> AlignDefault - (Just _,Nothing) -> AlignLeft - (Nothing,Just _) -> AlignRight - (Just _,Just _) -> AlignCenter + ((case (left,right) of + (Nothing,Nothing) -> AlignDefault + (Just _,Nothing) -> AlignLeft + (Nothing,Just _) -> AlignRight + (Just _,Just _) -> AlignCenter), len) -- Succeed only if current line contains a pipe. scanForPipe :: Parser [Char] st () @@ -1460,6 +1486,7 @@ inline = choice [ whitespace , exampleRef , smart , return . B.singleton <$> charRef + , emoji , symbol , ltSign ] <?> "inline" @@ -1666,7 +1693,7 @@ endline = try $ do (eof >> return mempty) <|> (guardEnabled Ext_hard_line_breaks >> return (return B.linebreak)) <|> (guardEnabled Ext_ignore_line_breaks >> return mempty) - <|> (return $ return B.space) + <|> (skipMany spaceChar >> return (return B.softbreak)) -- -- links @@ -1710,16 +1737,18 @@ link = try $ do setState $ st{ stateAllowLinks = False } (lab,raw) <- reference setState $ st{ stateAllowLinks = True } - regLink B.link lab <|> referenceLink B.link (lab,raw) + regLink B.linkWith lab <|> referenceLink B.linkWith (lab,raw) -regLink :: (String -> String -> Inlines -> Inlines) +regLink :: (Attr -> String -> String -> Inlines -> Inlines) -> F Inlines -> MarkdownParser (F Inlines) regLink constructor lab = try $ do (src, tit) <- source - return $ constructor src tit <$> lab + attr <- option nullAttr $ + guardEnabled Ext_link_attributes >> attributes + return $ constructor attr src tit <$> lab -- a link like [this][ref] or [this][] or [this] -referenceLink :: (String -> String -> Inlines -> Inlines) +referenceLink :: (Attr -> String -> String -> Inlines -> Inlines) -> (F Inlines, String) -> MarkdownParser (F Inlines) referenceLink constructor (lab, raw) = do sp <- (True <$ lookAhead (char ' ')) <|> return False @@ -1731,7 +1760,7 @@ referenceLink constructor (lab, raw) = do let labIsRef = raw' == "" || raw' == "[]" let key = toKey $ if labIsRef then raw else raw' parsedRaw <- parseFromString (mconcat <$> many inline) raw' - fallback <- parseFromString (mconcat <$> many inline) $ dropBrackets raw + fallback <- parseFromString (mconcat <$> many inline) $ dropBrackets raw implicitHeaderRefs <- option False $ True <$ guardEnabled Ext_implicit_header_references let makeFallback = do @@ -1748,10 +1777,10 @@ referenceLink constructor (lab, raw) = do then do headerKeys <- asksF stateHeaderKeys case M.lookup key headerKeys of - Just (src, tit) -> constructor src tit <$> lab - Nothing -> makeFallback + Just ((src, tit), _) -> constructor nullAttr src tit <$> lab + Nothing -> makeFallback else makeFallback - Just (src,tit) -> constructor src tit <$> lab + Just ((src,tit), attr) -> constructor attr src tit <$> lab dropBrackets :: String -> String dropBrackets = reverse . dropRB . reverse . dropLB @@ -1785,9 +1814,9 @@ image = try $ do char '!' (lab,raw) <- reference defaultExt <- getOption readerDefaultImageExtension - let constructor src = case takeExtension src of - "" -> B.image (addExtension src defaultExt) - _ -> B.image src + let constructor attr' src = case takeExtension src of + "" -> B.imageWith attr' (addExtension src defaultExt) + _ -> B.imageWith attr' src regLink constructor lab <|> referenceLink constructor (lab,raw) note :: MarkdownParser (F Inlines) @@ -1891,6 +1920,21 @@ rawHtmlInline = do else not . isTextTag return $ return $ B.rawInline "html" result +-- Emoji + +emojiChars :: [Char] +emojiChars = ['a'..'z'] ++ ['0'..'9'] ++ ['_','+','-'] + +emoji :: MarkdownParser (F Inlines) +emoji = try $ do + guardEnabled Ext_emoji + char ':' + emojikey <- many1 (oneOf emojiChars) + char ':' + case M.lookup emojikey emojis of + Just s -> return (return (B.str s)) + Nothing -> mzero + -- Citations cite :: MarkdownParser (F Inlines) @@ -1923,7 +1967,7 @@ textualCite = try $ do spc | null spaces' = mempty | otherwise = B.space lab <- parseFromString (mconcat <$> many inline) $ dropBrackets raw' - fallback <- referenceLink B.link (lab,raw') + fallback <- referenceLink B.linkWith (lab,raw') return $ do fallback' <- fallback cs' <- cs |