diff options
Diffstat (limited to 'src/Text/Pandoc/Readers/Markdown.hs')
-rw-r--r-- | src/Text/Pandoc/Readers/Markdown.hs | 110 |
1 files changed, 72 insertions, 38 deletions
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index ebca7e83d..fd16a5f75 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -39,6 +39,7 @@ import Data.Ord ( comparing ) import Data.Char ( isSpace, isAlphaNum, toLower ) import Data.Maybe import Text.Pandoc.Definition +import Text.Pandoc.Emoji (emojis) import qualified Data.Text as T import Data.Text (Text) import qualified Data.Yaml as Yaml @@ -47,7 +48,7 @@ import qualified Data.HashMap.Strict as H import qualified Text.Pandoc.Builder as B import qualified Text.Pandoc.UTF8 as UTF8 import qualified Data.Vector as V -import Text.Pandoc.Builder (Inlines, Blocks, trimInlines, (<>)) +import Text.Pandoc.Builder (Inlines, Blocks, trimInlines) import Text.Pandoc.Options import Text.Pandoc.Shared import Text.Pandoc.XML (fromEntities) @@ -55,8 +56,6 @@ import Text.Pandoc.Parsing hiding (tableWith) import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXBlock ) import Text.Pandoc.Readers.HTML ( htmlTag, htmlInBalanced, isInlineTag, isBlockTag, isTextTag, isCommentTag ) -import Data.Monoid (mconcat, mempty) -import Control.Applicative ((<$>), (<*), (*>), (<$), (<*>)) import Control.Monad import System.FilePath (takeExtension, addExtension) import Text.HTML.TagSoup @@ -64,6 +63,7 @@ import Text.HTML.TagSoup.Match (tagOpen) import qualified Data.Set as Set import Text.Printf (printf) import Debug.Trace (trace) +import Text.Pandoc.Compat.Monoid ((<>)) import Text.Pandoc.Error type MarkdownParser = Parser [Char] ParserState @@ -328,23 +328,22 @@ stopLine = try $ (string "---" <|> string "...") >> blankline >> return () mmdTitleBlock :: MarkdownParser () mmdTitleBlock = try $ do guardEnabled Ext_mmd_title_block - kvPairs <- many1 kvPair + firstPair <- kvPair False + restPairs <- many (kvPair True) + let kvPairs = firstPair : restPairs blanklines updateState $ \st -> st{ stateMeta' = stateMeta' st <> return (Meta $ M.fromList kvPairs) } -kvPair :: MarkdownParser (String, MetaValue) -kvPair = try $ do +kvPair :: Bool -> MarkdownParser (String, MetaValue) +kvPair allowEmpty = try $ do key <- many1Till (alphaNum <|> oneOf "_- ") (char ':') - skipMany1 spaceNoNewline - val <- manyTill anyChar + val <- trim <$> manyTill anyChar (try $ newline >> lookAhead (blankline <|> nonspaceChar)) - guard $ not . null . trim $ val + guard $ allowEmpty || not (null val) let key' = concat $ words $ map toLower key - let val' = MetaBlocks $ B.toList $ B.plain $ B.text $ trim val + let val' = MetaBlocks $ B.toList $ B.plain $ B.text $ val return (key',val') - where - spaceNoNewline = satisfy (\x -> isSpace x && (x/='\n') && (x/='\r')) parseMarkdown :: MarkdownParser Pandoc parseMarkdown = do @@ -506,9 +505,15 @@ block = do header :: MarkdownParser (F Blocks) header = setextHeader <|> atxHeader <?> "header" +atxChar :: MarkdownParser Char +atxChar = do + exts <- getOption readerExtensions + return $ if Set.member Ext_literate_haskell exts + then '=' else '#' + atxHeader :: MarkdownParser (F Blocks) atxHeader = try $ do - level <- many1 (char '#') >>= return . length + level <- atxChar >>= many1 . char >>= return . length notFollowedBy $ guardEnabled Ext_fancy_lists >> (char '.' <|> char ')') -- this would be a list skipSpaces @@ -524,7 +529,7 @@ atxClosing :: MarkdownParser Attr atxClosing = try $ do attr' <- option nullAttr (guardEnabled Ext_mmd_header_identifiers >> mmdHeaderIdentifier) - skipMany (char '#') + skipMany . char =<< atxChar skipSpaces attr <- option attr' (guardEnabled Ext_header_attributes >> attributes) @@ -636,7 +641,11 @@ keyValAttr = try $ do val <- enclosed (char '"') (char '"') litChar <|> enclosed (char '\'') (char '\'') litChar <|> many (escapedChar' <|> noneOf " \t\n\r}") - return $ \(id',cs,kvs) -> (id',cs,kvs ++ [(key,val)]) + return $ \(id',cs,kvs) -> + case key of + "id" -> (val,cs,kvs) + "class" -> (id',cs ++ words val,kvs) + _ -> (id',cs,kvs ++ [(key,val)]) specialAttr :: MarkdownParser (Attr -> Attr) specialAttr = do @@ -1316,7 +1325,7 @@ removeOneLeadingSpace xs = gridTableFooter :: MarkdownParser [Char] gridTableFooter = blanklines -pipeBreak :: MarkdownParser [Alignment] +pipeBreak :: MarkdownParser ([Alignment], [Int]) pipeBreak = try $ do nonindentSpaces openPipe <- (True <$ char '|') <|> return False @@ -1326,14 +1335,22 @@ pipeBreak = try $ do guard $ not (null rest && not openPipe) optional (char '|') blankline - return (first:rest) + return $ unzip (first:rest) pipeTable :: MarkdownParser ([Alignment], [Double], F [Blocks], F [[Blocks]]) pipeTable = try $ do - (heads,aligns) <- (,) <$> pipeTableRow <*> pipeBreak - lines' <- sequence <$> many pipeTableRow - let widths = replicate (length aligns) 0.0 - return $ (aligns, widths, heads, lines') + nonindentSpaces + lookAhead nonspaceChar + (heads,(aligns, seplengths)) <- (,) <$> pipeTableRow <*> pipeBreak + (lines', rawRows) <- unzip <$> many (withRaw pipeTableRow) + let maxlength = maximum $ map length rawRows + numColumns <- getOption readerColumns + let widths = if maxlength > numColumns + then map (\len -> + fromIntegral (len + 1) / fromIntegral numColumns) + seplengths + else replicate (length aligns) 0.0 + return $ (aligns, widths, heads, sequence lines') sepPipe :: MarkdownParser () sepPipe = try $ do @@ -1343,7 +1360,7 @@ sepPipe = try $ do -- parse a row, also returning probable alignments for org-table cells pipeTableRow :: MarkdownParser (F [Blocks]) pipeTableRow = do - nonindentSpaces + skipMany spaceChar openPipe <- (True <$ char '|') <|> return False let cell = mconcat <$> many (notFollowedBy (blankline <|> char '|') >> inline) @@ -1362,19 +1379,20 @@ pipeTableRow = do ils' | B.isNull ils' -> mempty | otherwise -> B.plain $ ils') cells' -pipeTableHeaderPart :: Parser [Char] st Alignment +pipeTableHeaderPart :: Parser [Char] st (Alignment, Int) pipeTableHeaderPart = try $ do skipMany spaceChar left <- optionMaybe (char ':') - many1 (char '-') + pipe <- many1 (char '-') right <- optionMaybe (char ':') skipMany spaceChar + let len = length pipe + maybe 0 (const 1) left + maybe 0 (const 1) right return $ - case (left,right) of - (Nothing,Nothing) -> AlignDefault - (Just _,Nothing) -> AlignLeft - (Nothing,Just _) -> AlignRight - (Just _,Just _) -> AlignCenter + ((case (left,right) of + (Nothing,Nothing) -> AlignDefault + (Just _,Nothing) -> AlignLeft + (Nothing,Just _) -> AlignRight + (Just _,Just _) -> AlignCenter), len) -- Succeed only if current line contains a pipe. scanForPipe :: Parser [Char] st () @@ -1453,6 +1471,7 @@ inline = choice [ whitespace , exampleRef , smart , return . B.singleton <$> charRef + , emoji , symbol , ltSign ] <?> "inline" @@ -1652,7 +1671,7 @@ endline = try $ do notFollowedBy (inList >> listStart) guardDisabled Ext_lists_without_preceding_blankline <|> notFollowedBy listStart guardEnabled Ext_blank_before_blockquote <|> notFollowedBy emailBlockQuoteStart - guardEnabled Ext_blank_before_header <|> notFollowedBy (char '#') -- atx header + guardEnabled Ext_blank_before_header <|> (notFollowedBy . char =<< atxChar) -- atx header guardDisabled Ext_backtick_code_blocks <|> notFollowedBy (() <$ (lookAhead (char '`') >> codeBlockFenced)) notFollowedByHtmlCloser @@ -1705,16 +1724,16 @@ link = try $ do setState $ st{ stateAllowLinks = True } regLink B.linkWith lab <|> referenceLink B.linkWith (lab,raw) -regLink :: (String -> String -> Attr -> Inlines -> Inlines) +regLink :: (Attr -> String -> String -> Inlines -> Inlines) -> F Inlines -> MarkdownParser (F Inlines) regLink constructor lab = try $ do (src, tit) <- source attr <- option nullAttr $ guardEnabled Ext_common_link_attributes >> attributes - return $ constructor src tit attr <$> lab + return $ constructor attr src tit <$> lab -- a link like [this][ref] or [this][] or [this] -referenceLink :: (String -> String -> Attr -> Inlines -> Inlines) +referenceLink :: (Attr -> String -> String -> Inlines -> Inlines) -> (F Inlines, String) -> MarkdownParser (F Inlines) referenceLink constructor (lab, raw) = do sp <- (True <$ lookAhead (char ' ')) <|> return False @@ -1743,10 +1762,10 @@ referenceLink constructor (lab, raw) = do then do headerKeys <- asksF stateHeaderKeys case M.lookup key headerKeys of - Just ((src, tit), _) -> constructor src tit nullAttr <$> lab + Just ((src, tit), _) -> constructor nullAttr src tit <$> lab Nothing -> makeFallback else makeFallback - Just ((src,tit), attr) -> constructor src tit attr <$> lab + Just ((src,tit), attr) -> constructor attr src tit <$> lab dropBrackets :: String -> String dropBrackets = reverse . dropRB . reverse . dropLB @@ -1780,9 +1799,9 @@ image = try $ do char '!' (lab,raw) <- reference defaultExt <- getOption readerDefaultImageExtension - let constructor src = case takeExtension src of - "" -> B.imageWith (addExtension src defaultExt) - _ -> B.imageWith src + let constructor attr' src = case takeExtension src of + "" -> B.imageWith attr' (addExtension src defaultExt) + _ -> B.imageWith attr' src regLink constructor lab <|> referenceLink constructor (lab,raw) note :: MarkdownParser (F Inlines) @@ -1886,6 +1905,21 @@ rawHtmlInline = do else not . isTextTag return $ return $ B.rawInline "html" result +-- Emoji + +emojiChars :: [Char] +emojiChars = ['a'..'z'] ++ ['0'..'9'] ++ ['_','+','-'] + +emoji :: MarkdownParser (F Inlines) +emoji = try $ do + guardEnabled Ext_emoji + char ':' + emojikey <- many1 (oneOf emojiChars) + char ':' + case M.lookup emojikey emojis of + Just s -> return (return (B.str s)) + Nothing -> mzero + -- Citations cite :: MarkdownParser (F Inlines) |