Merge remote-tracking branch 'jgm/master' into dokuwiki

author: Clare Macrae <github@cfmacrae.fastmail.co.uk> 2014-06-29 19:22:31 +0100
committer: Clare Macrae <github@cfmacrae.fastmail.co.uk> 2014-06-29 19:22:31 +0100
commit: 717e16660d1ee83f690b35d0aa9b60c8ac9d6b61 (patch)
tree: aa850d4ee99fa0b14da9ba0396ba6aa67e2037e3 /src/Text/Pandoc/Readers/Markdown.hs
parent: fccfc8429cf4d002df37977f03508c9aae457416 (diff)
parent: ce69021e42d7bf50deccba2a52ed4717f6ddac10 (diff)
download: pandoc-717e16660d1ee83f690b35d0aa9b60c8ac9d6b61.tar.gz
1 files changed, 264 insertions, 236 deletions
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index a3500fbcf..690256224 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -1,6 +1,6 @@
 {-# LANGUAGE RelaxedPolyRec #-} -- needed for inlinesBetween on GHC < 7
 {-
-Copyright (C) 2006-2010 John MacFarlane <jgm@berkeley.edu>
+Copyright (C) 2006-2014 John MacFarlane <jgm@berkeley.edu>
 
 This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
@@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 
 {- |
    Module      : Text.Pandoc.Readers.Markdown
-   Copyright   : Copyright (C) 2006-2013 John MacFarlane
+   Copyright   : Copyright (C) 2006-2014 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
@@ -33,6 +33,7 @@ module Text.Pandoc.Readers.Markdown ( readMarkdown,
 
 import Data.List ( transpose, sortBy, findIndex, intersperse, intercalate )
 import qualified Data.Map as M
+import Data.Scientific (coefficient, base10Exponent)
 import Data.Ord ( comparing )
 import Data.Char ( isAlphaNum, toLower )
 import Data.Maybe
@@ -49,13 +50,10 @@ import Text.Pandoc.Builder (Inlines, Blocks, trimInlines, (<>))
 import Text.Pandoc.Options
 import Text.Pandoc.Shared
 import Text.Pandoc.XML (fromEntities)
-import Text.Pandoc.Asciify (toAsciiChar)
 import Text.Pandoc.Parsing hiding (tableWith)
 import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXBlock )
 import Text.Pandoc.Readers.HTML ( htmlTag, htmlInBalanced, isInlineTag, isBlockTag,
                                   isTextTag, isCommentTag )
-import Text.Pandoc.Biblio (processBiblio)
-import qualified Text.CSL as CSL
 import Data.Monoid (mconcat, mempty)
 import Control.Applicative ((<$>), (<*), (*>), (<$))
 import Control.Monad
@@ -63,6 +61,8 @@ import System.FilePath (takeExtension, addExtension)
 import Text.HTML.TagSoup
 import Text.HTML.TagSoup.Match (tagOpen)
 import qualified Data.Set as Set
+import Text.Printf (printf)
+import Debug.Trace (trace)
 
 type MarkdownParser = Parser [Char] ParserState
 
@@ -203,13 +203,10 @@ dateLine = try $ do
   skipSpaces
   trimInlinesF . mconcat <$> manyTill inline newline
 
-titleBlock :: MarkdownParser (F (Pandoc -> Pandoc))
-titleBlock = pandocTitleBlock
-         <|> yamlTitleBlock
-         <|> mmdTitleBlock
-         <|> return (return id)
+titleBlock :: MarkdownParser ()
+titleBlock = pandocTitleBlock <|> mmdTitleBlock
 
-pandocTitleBlock :: MarkdownParser (F (Pandoc -> Pandoc))
+pandocTitleBlock :: MarkdownParser ()
 pandocTitleBlock = try $ do
   guardEnabled Ext_pandoc_title_block
   lookAhead (char '%')
@@ -217,49 +214,61 @@ pandocTitleBlock = try $ do
   author <- option (return []) authorsLine
   date <- option mempty dateLine
   optional blanklines
-  return $ do
-    title' <- title
-    author' <- author
-    date' <- date
-    return $ B.setMeta "title" title'
-           . B.setMeta "author" author'
-           . B.setMeta "date" date'
-
-yamlTitleBlock :: MarkdownParser (F (Pandoc -> Pandoc))
-yamlTitleBlock = try $ do
+  let meta' = do title' <- title
+                 author' <- author
+                 date' <- date
+                 return $
+                     (if B.isNull title' then id else B.setMeta "title" title')
+                   . (if null author' then id else B.setMeta "author" author')
+                   . (if B.isNull date' then id else B.setMeta "date" date')
+                   $ nullMeta
+  updateState $ \st -> st{ stateMeta' = stateMeta' st <> meta' }
+
+yamlMetaBlock :: MarkdownParser (F Blocks)
+yamlMetaBlock = try $ do
   guardEnabled Ext_yaml_metadata_block
   pos <- getPosition
   string "---"
   blankline
-  rawYaml <- unlines <$> manyTill anyLine stopLine
+  notFollowedBy blankline  -- if --- is followed by a blank it's an HRULE
+  rawYamlLines <- manyTill anyLine stopLine
+  -- by including --- and ..., we allow yaml blocks with just comments:
+  let rawYaml = unlines ("---" : (rawYamlLines ++ ["..."]))
   optional blanklines
   opts <- stateOptions <$> getState
-  case Yaml.decodeEither' $ UTF8.fromString rawYaml of
-       Right (Yaml.Object hashmap) -> return $ return $
-                H.foldrWithKey (\k v f ->
-                     if ignorable k
-                        then f
-                        else B.setMeta (T.unpack k) (yamlToMeta opts v) . f)
-                  id hashmap
-       Right _ -> do
-                   addWarning (Just pos) "YAML header is not an object"
-                   return $ return id
-       Left err' -> do
-                case err' of
-                   InvalidYaml (Just YamlParseException{
-                               yamlProblem = problem
-                             , yamlContext = _ctxt
-                             , yamlProblemMark = Yaml.YamlMark {
-                                   yamlLine = yline
-                                 , yamlColumn = ycol
-                             }}) ->
-                        addWarning (Just $ setSourceLine
-                           (setSourceColumn pos (sourceColumn pos + ycol))
-                           (sourceLine pos + 1 + yline))
-                           $ "Could not parse YAML header: " ++ problem
-                   _ -> addWarning (Just pos)
-                           $ "Could not parse YAML header: " ++ show err'
-                return $ return id
+  meta' <- case Yaml.decodeEither' $ UTF8.fromString rawYaml of
+                Right (Yaml.Object hashmap) -> return $ return $
+                         H.foldrWithKey (\k v m ->
+                              if ignorable k
+                                 then m
+                                 else B.setMeta (T.unpack k)
+                                            (yamlToMeta opts v) m)
+                           nullMeta hashmap
+                Right Yaml.Null -> return $ return nullMeta
+                Right _ -> do
+                            addWarning (Just pos) "YAML header is not an object"
+                            return $ return nullMeta
+                Left err' -> do
+                         case err' of
+                            InvalidYaml (Just YamlParseException{
+                                        yamlProblem = problem
+                                      , yamlContext = _ctxt
+                                      , yamlProblemMark = Yaml.YamlMark {
+                                            yamlLine = yline
+                                          , yamlColumn = ycol
+                                      }}) ->
+                                 addWarning (Just $ setSourceLine
+                                    (setSourceColumn pos
+                                       (sourceColumn pos + ycol))
+                                    (sourceLine pos + 1 + yline))
+                                    $ "Could not parse YAML header: " ++
+                                        problem
+                            _ -> addWarning (Just pos)
+                                    $ "Could not parse YAML header: " ++
+                                        show err'
+                         return $ return nullMeta
+  updateState $ \st -> st{ stateMeta' = stateMeta' st <> meta' }
+  return mempty
 
 -- ignore fields ending with _
 ignorable :: Text -> Bool
@@ -277,8 +286,12 @@ toMetaValue opts x =
 
 yamlToMeta :: ReaderOptions -> Yaml.Value -> MetaValue
 yamlToMeta opts (Yaml.String t) = toMetaValue opts t
-yamlToMeta _    (Yaml.Number n) = MetaString $ show n
-yamlToMeta _    (Yaml.Bool b) = MetaString $ map toLower $ show b
+yamlToMeta _    (Yaml.Number n)
+  -- avoid decimal points for numbers that don't need them:
+  | base10Exponent n >= 0     = MetaString $ show
+                                $ coefficient n * (10 ^ base10Exponent n)
+  | otherwise                 = MetaString $ show n
+yamlToMeta _    (Yaml.Bool b) = MetaBool b
 yamlToMeta opts (Yaml.Array xs) = B.toMetaValue $ map (yamlToMeta opts)
                                                 $ V.toList xs
 yamlToMeta opts (Yaml.Object o) = MetaMap $ H.foldrWithKey (\k v m ->
@@ -292,13 +305,13 @@ yamlToMeta _ _ = MetaString ""
 stopLine :: MarkdownParser ()
 stopLine = try $ (string "---" <|> string "...") >> blankline >> return ()
 
-mmdTitleBlock :: MarkdownParser (F (Pandoc -> Pandoc))
+mmdTitleBlock :: MarkdownParser ()
 mmdTitleBlock = try $ do
   guardEnabled Ext_mmd_title_block
   kvPairs <- many1 kvPair
   blanklines
-  return $ return $ \(Pandoc m bs) ->
-             Pandoc (foldl (\m' (k,v) -> addMetaField k v m') m kvPairs) bs
+  updateState $ \st -> st{ stateMeta' = stateMeta' st <>
+                             return (Meta $ M.fromList kvPairs) }
 
 kvPair :: MarkdownParser (String, MetaValue)
 kvPair = try $ do
@@ -315,15 +328,12 @@ parseMarkdown = do
   updateState $ \state -> state { stateOptions =
                 let oldOpts = stateOptions state in
                     oldOpts{ readerParseRaw = True } }
-  titleTrans <- option (return id) titleBlock
+  optional titleBlock
   blocks <- parseBlocks
   st <- getState
-  mbsty <- getOption readerCitationStyle
-  refs <- getOption readerReferences
-  return $ processBiblio mbsty refs
-         $ runF titleTrans st
-         $ B.doc
-         $ runF blocks st
+  let meta = runF (stateMeta' st) st
+  let Pandoc _ bs = B.doc $ runF blocks st
+  return $ Pandoc meta bs
 
 addWarning :: Maybe SourcePos -> String -> MarkdownParser ()
 addWarning mbpos msg =
@@ -339,10 +349,8 @@ referenceKey = try $ do
   char ':'
   skipSpaces >> optional newline >> skipSpaces >> notFollowedBy (char '[')
   let sourceURL = liftM unwords $ many $ try $ do
-                    notFollowedBy' referenceTitle
-                    skipMany spaceChar
-                    optional $ newline >> notFollowedBy blankline
                     skipMany spaceChar
+                    notFollowedBy' referenceTitle
                     notFollowedBy' (() <$ reference)
                     many1 $ notFollowedBy space >> litChar
   let betweenAngles = try $ char '<' >>
@@ -351,7 +359,7 @@ referenceKey = try $ do
   tit <- option "" referenceTitle
   -- currently we just ignore MMD-style link/image attributes
   _kvs <- option [] $ guardEnabled Ext_link_attributes
-                      >> many (spnl >> keyValAttr)
+                      >> many (try $ spnl >> keyValAttr)
   blanklines
   let target = (escapeURI $ trimr src,  tit)
   st <- getState
@@ -437,19 +445,26 @@ parseBlocks :: MarkdownParser (F Blocks)
 parseBlocks = mconcat <$> manyTill block eof
 
 block :: MarkdownParser (F Blocks)
-block = choice [ mempty <$ blanklines
+block = do
+  tr <- getOption readerTrace
+  pos <- getPosition
+  res <- choice [ mempty <$ blanklines
                , codeBlockFenced
+               , yamlMetaBlock
                , guardEnabled Ext_latex_macros *> (macro >>= return . return)
+               -- note: bulletList needs to be before header because of
+               -- the possibility of empty list items: -
+               , bulletList
                , header
                , lhsCodeBlock
                , rawTeXBlock
+               , divHtml
                , htmlBlock
                , table
                , lineBlock
                , codeBlockIndented
                , blockQuote
                , hrule
-               , bulletList
                , orderedList
                , definitionList
                , noteBlock
@@ -458,6 +473,11 @@ block = choice [ mempty <$ blanklines
                , para
                , plain
                ] <?> "block"
+  when tr $ do
+    st <- getState
+    trace (printf "line %d: %s" (sourceLine pos)
+           (take 60 $ show $ B.toList $ runF res st)) (return ())
+  return res
 
 --
 -- header blocks
@@ -466,39 +486,15 @@ block = choice [ mempty <$ blanklines
 header :: MarkdownParser (F Blocks)
 header = setextHeader <|> atxHeader <?> "header"
 
---  returns unique identifier
-addToHeaderList :: Attr -> F Inlines -> MarkdownParser Attr
-addToHeaderList (ident,classes,kvs) text = do
-  let header' = runF text defaultParserState
-  exts <- getOption readerExtensions
-  let insert' = M.insertWith (\_new old -> old)
-  if null ident && Ext_auto_identifiers `Set.member` exts
-     then do
-       ids <- stateIdentifiers `fmap` getState
-       let id' = uniqueIdent (B.toList header') ids
-       let id'' = if Ext_ascii_identifiers `Set.member` exts
-                     then catMaybes $ map toAsciiChar id'
-                     else id'
-       updateState $ \st -> st{
-              stateIdentifiers = if id' == id''
-                                    then id' : ids
-                                    else id' : id'' : ids,
-              stateHeaders = insert' header' id' $ stateHeaders st }
-       return (id'',classes,kvs)
-     else do
-        unless (null ident) $
-          updateState $ \st -> st{
-               stateHeaders = insert' header' ident $ stateHeaders st }
-        return (ident,classes,kvs)
-
 atxHeader :: MarkdownParser (F Blocks)
 atxHeader = try $ do
   level <- many1 (char '#') >>= return . length
-  notFollowedBy (char '.' <|> char ')') -- this would be a list
+  notFollowedBy $ guardEnabled Ext_fancy_lists >>
+                  (char '.' <|> char ')') -- this would be a list
   skipSpaces
   text <- trimInlinesF . mconcat <$> many (notFollowedBy atxClosing >> inline)
   attr <- atxClosing
-  attr' <- addToHeaderList attr text
+  attr' <- registerHeader attr (runF text defaultParserState)
   return $ B.headerWith attr' level <$> text
 
 atxClosing :: MarkdownParser Attr
@@ -537,7 +533,7 @@ setextHeader = try $ do
   many (char underlineChar)
   blanklines
   let level = (fromMaybe 0 $ findIndex (== underlineChar) setextHChars) + 1
-  attr' <- addToHeaderList attr text
+  attr' <- registerHeader attr (runF text defaultParserState)
   return $ B.headerWith attr' level <$> text
 
 --
@@ -622,12 +618,19 @@ codeBlockFenced = try $ do
   skipMany spaceChar
   attr <- option ([],[],[]) $
             try (guardEnabled Ext_fenced_code_attributes >> attributes)
-           <|> ((\x -> ("",[x],[])) <$> identifier)
+           <|> ((\x -> ("",[toLanguageId x],[])) <$> many1 nonspaceChar)
   blankline
   contents <- manyTill anyLine (blockDelimiter (== c) (Just size))
   blanklines
   return $ return $ B.codeBlockWith attr $ intercalate "\n" contents
 
+-- correctly handle github language identifiers
+toLanguageId :: String -> String
+toLanguageId = map toLower . go
+  where go "c++" = "cpp"
+        go "objective-c" = "objectivec"
+        go x = x
+
 codeBlockIndented :: MarkdownParser (F Blocks)
 codeBlockIndented = do
   contents <- many1 (indentedLine <|>
@@ -718,7 +721,7 @@ bulletListStart = try $ do
   skipNonindentSpaces
   notFollowedBy' (() <$ hrule)     -- because hrules start out just like lists
   satisfy isBulletListMarker
-  spaceChar
+  spaceChar <|> lookAhead newline
   skipSpaces
 
 anyOrderedListStart :: MarkdownParser (Int, ListNumberStyle, ListNumberDelim)
@@ -746,11 +749,16 @@ listStart = bulletListStart <|> (anyOrderedListStart >> return ())
 -- parse a line of a list item (start = parser for beginning of list item)
 listLine :: MarkdownParser String
 listLine = try $ do
-  notFollowedBy blankline
   notFollowedBy' (do indentSpaces
-                     many (spaceChar)
+                     many spaceChar
                      listStart)
-  chunks <- manyTill (liftM snd (htmlTag isCommentTag) <|> count 1 anyChar) newline
+  notFollowedBy' $ htmlTag (~== TagClose "div")
+  optional (() <$ indentSpaces)
+  chunks <- manyTill
+              (  many1 (satisfy $ \c -> c /= '\n' && c /= '<')
+             <|> liftM snd (htmlTag isCommentTag)
+             <|> count 1 anyChar
+              ) newline
   return $ concat chunks
 
 -- parse raw text for one list item, excluding start marker and continuations
@@ -759,7 +767,7 @@ rawListItem :: MarkdownParser a
 rawListItem start = try $ do
   start
   first <- listLine
-  rest <- many (notFollowedBy listStart >> listLine)
+  rest <- many (notFollowedBy listStart >> notFollowedBy blankline >> listLine)
   blanks <- many blankline
   return $ unlines (first:rest) ++ blanks
 
@@ -777,6 +785,7 @@ listContinuationLine :: MarkdownParser String
 listContinuationLine = try $ do
   notFollowedBy blankline
   notFollowedBy' listStart
+  notFollowedBy' $ htmlTag (~== TagClose "div")
   optional indentSpaces
   result <- anyLine
   return $ result ++ "\n"
@@ -801,8 +810,8 @@ listItem start = try $ do
 orderedList :: MarkdownParser (F Blocks)
 orderedList = try $ do
   (start, style, delim) <- lookAhead anyOrderedListStart
-  unless ((style == DefaultStyle || style == Decimal || style == Example) &&
-          (delim == DefaultDelim || delim == Period)) $
+  unless (style `elem` [DefaultStyle, Decimal, Example] &&
+          delim `elem` [DefaultDelim, Period]) $
     guardEnabled Ext_fancy_lists
   when (style == Example) $ guardEnabled Ext_example_lists
   items <- fmap sequence $ many1 $ listItem
@@ -863,22 +872,6 @@ definitionList = do
   items <- fmap sequence $ many1 definitionListItem
   return $ B.definitionList <$> fmap compactify'DL items
 
-compactify'DL :: [(Inlines, [Blocks])] -> [(Inlines, [Blocks])]
-compactify'DL items =
-  let defs = concatMap snd items
-      defBlocks = reverse $ concatMap B.toList defs
-      isPara (Para _) = True
-      isPara _        = False
-  in  case defBlocks of
-           (Para x:_) -> if not $ any isPara (drop 1 defBlocks)
-                            then let (t,ds) = last items
-                                     lastDef = B.toList $ last ds
-                                     ds' = init ds ++
-                                          [B.fromList $ init lastDef ++ [Plain x]]
-                                  in init items ++ [(t, ds')]
-                            else items
-           _          -> items
-
 --
 -- paragraph block
 --
@@ -891,8 +884,11 @@ para = try $ do
     $ try $ do
             newline
             (blanklines >> return mempty)
-              <|> (guardDisabled Ext_blank_before_blockquote >> lookAhead blockQuote)
-              <|> (guardDisabled Ext_blank_before_header >> lookAhead header)
+              <|> (guardDisabled Ext_blank_before_blockquote >> () <$ lookAhead blockQuote)
+              <|> (guardEnabled Ext_backtick_code_blocks >> () <$ lookAhead codeBlockFenced)
+              <|> (guardDisabled Ext_blank_before_header >> () <$ lookAhead header)
+              <|> (guardEnabled Ext_lists_without_preceding_blankline >>
+                       () <$ lookAhead listStart)
             return $ do
               result' <- result
               case B.toList result' of
@@ -911,7 +907,9 @@ plain = fmap B.plain . trimInlinesF . mconcat <$> many1 inline
 --
 
 htmlElement :: MarkdownParser String
-htmlElement = strictHtmlBlock <|> liftM snd (htmlTag isBlockTag)
+htmlElement = rawVerbatimBlock
+          <|> strictHtmlBlock
+          <|> liftM snd (htmlTag isBlockTag)
 
 htmlBlock :: MarkdownParser (F Blocks)
 htmlBlock = do
@@ -932,8 +930,8 @@ strictHtmlBlock = htmlInBalanced (not . isInlineTag)
 
 rawVerbatimBlock :: MarkdownParser String
 rawVerbatimBlock = try $ do
-  (TagOpen tag _, open) <- htmlTag (tagOpen (\t ->
-                              t == "pre" || t == "style" || t == "script")
+  (TagOpen tag _, open) <- htmlTag (tagOpen (flip elem
+                                                  ["pre", "style", "script"])
                               (const True))
   contents <- manyTill anyChar (htmlTag (~== TagClose tag))
   return $ open ++ contents ++ renderTags [TagClose tag]
@@ -941,8 +939,10 @@ rawVerbatimBlock = try $ do
 rawTeXBlock :: MarkdownParser (F Blocks)
 rawTeXBlock = do
   guardEnabled Ext_raw_tex
-  result <- (B.rawBlock "latex" <$> rawLaTeXBlock)
-        <|> (B.rawBlock "context" <$> rawConTeXtEnvironment)
+  result <- (B.rawBlock "latex" . concat <$>
+                  rawLaTeXBlock `sepEndBy1` blankline)
+        <|> (B.rawBlock "context" . concat <$>
+                  rawConTeXtEnvironment `sepEndBy1` blankline)
   spaces
   return $ return result
 
@@ -951,6 +951,8 @@ rawHtmlBlocks = do
   htmlBlocks <- many1 $ try $ do
                           s <- rawVerbatimBlock <|> try (
                                 do (t,raw) <- htmlTag isBlockTag
+                                   guard $ t ~/= TagOpen "div" [] &&
+                                           t ~/= TagClose "div"
                                    exts <- getOption readerExtensions
                                    -- if open tag, need markdown="1" if
                                    -- markdown_attributes extension is set
@@ -1117,13 +1119,11 @@ multilineTable headless =
 multilineTableHeader :: Bool -- ^ Headerless table
                      -> MarkdownParser (F [Blocks], [Alignment], [Int])
 multilineTableHeader headless = try $ do
-  if headless
-     then return '\n'
-     else tableSep >>~ notFollowedBy blankline
+  unless headless $
+     tableSep >> notFollowedBy blankline
   rawContent  <- if headless
                     then return $ repeat ""
-                    else many1
-                         (notFollowedBy tableSep >> many1Till anyChar newline)
+                    else many1 $ notFollowedBy tableSep >> anyLine
   initSp      <- nonindentSpaces
   dashes      <- many1 (dashedLine '-')
   newline
@@ -1133,12 +1133,12 @@ multilineTableHeader headless = try $ do
                      then liftM (map (:[]) . tail .
                               splitStringByIndices (init indices)) $ lookAhead anyLine
                      else return $ transpose $ map
-                           (\ln -> tail $ splitStringByIndices (init indices) ln)
+                           (tail . splitStringByIndices (init indices))
                            rawContent
   let aligns   = zipWith alignType rawHeadsList lengths
   let rawHeads = if headless
                     then replicate (length dashes) ""
-                    else map unwords rawHeadsList
+                    else map (unlines . map trim) rawHeadsList
   heads <- fmap sequence $
            mapM (parseFromString (mconcat <$> many plain)) $
              map trim rawHeads
@@ -1195,7 +1195,7 @@ gridTableHeader headless = try $ do
   -- RST does not have a notion of alignments
   let rawHeads = if headless
                     then replicate (length dashes) ""
-                    else map unwords $ transpose
+                    else map (unlines . map trim) $ transpose
                        $ map (gridTableSplitLine indices) rawContent
   heads <- fmap sequence $ mapM (parseFromString parseBlocks . trim) rawHeads
   return (heads, aligns, indices)
@@ -1227,11 +1227,20 @@ removeOneLeadingSpace xs =
 gridTableFooter :: MarkdownParser [Char]
 gridTableFooter = blanklines
 
+pipeBreak :: MarkdownParser [Alignment]
+pipeBreak = try $ do
+  nonindentSpaces
+  openPipe <- (True <$ char '|') <|> return False
+  first <- pipeTableHeaderPart
+  rest <- many $ sepPipe *> pipeTableHeaderPart
+  -- surrounding pipes needed for a one-column table:
+  guard $ not (null rest && not openPipe)
+  optional (char '|')
+  blankline
+  return (first:rest)
+
 pipeTable :: MarkdownParser ([Alignment], [Double], F [Blocks], F [[Blocks]])
 pipeTable = try $ do
-  let pipeBreak = nonindentSpaces *> optional (char '|') *>
-                      pipeTableHeaderPart `sepBy1` sepPipe <*
-                      optional (char '|') <* blankline
   (heads,aligns) <- try ( pipeBreak >>= \als ->
                      return (return $ replicate (length als) mempty, als))
                   <|> ( pipeTableRow >>= \row -> pipeBreak >>= \als ->
@@ -1250,12 +1259,13 @@ sepPipe = try $ do
 pipeTableRow :: MarkdownParser (F [Blocks])
 pipeTableRow = do
   nonindentSpaces
-  optional (char '|')
+  openPipe <- (True <$ char '|') <|> return False
   let cell = mconcat <$>
                  many (notFollowedBy (blankline <|> char '|') >> inline)
   first <- cell
-  sepPipe
-  rest <- cell `sepBy1` sepPipe
+  rest <- many $ sepPipe *> cell
+  -- surrounding pipes needed for a one-column table:
+  guard $ not (null rest && not openPipe)
   optional (char '|')
   blankline
   let cells  = sequence (first:rest)
@@ -1340,19 +1350,18 @@ inline = choice [ whitespace
                 , str
                 , endline
                 , code
-                , fours
-                , strong
-                , emph
+                , strongOrEmph
                 , note
                 , cite
                 , link
                 , image
                 , math
                 , strikeout
-                , superscript
                 , subscript
+                , superscript
                 , inlineNote  -- after superscript because of ^[link](/foo)^
                 , autoLink
+                , spanHtml
                 , rawHtmlInline
                 , escapedChar
                 , rawLaTeXInline'
@@ -1382,7 +1391,7 @@ ltSign :: MarkdownParser (F Inlines)
 ltSign = do
   guardDisabled Ext_raw_html
     <|> guardDisabled Ext_markdown_in_html_blocks
-    <|> (notFollowedBy' rawHtmlBlocks >> return ())
+    <|> (notFollowedBy' (htmlTag isBlockTag) >> return ())
   char '<'
   return $ return $ B.str "<"
 
@@ -1422,47 +1431,57 @@ math :: MarkdownParser (F Inlines)
 math =  (return . B.displayMath <$> (mathDisplay >>= applyMacros'))
      <|> (return . B.math <$> (mathInline >>= applyMacros'))
 
-mathDisplay :: MarkdownParser String
-mathDisplay =
-      (guardEnabled Ext_tex_math_dollars >> mathDisplayWith "$$" "$$")
-  <|> (guardEnabled Ext_tex_math_single_backslash >>
-       mathDisplayWith "\\[" "\\]")
-  <|> (guardEnabled Ext_tex_math_double_backslash >>
-       mathDisplayWith "\\\\[" "\\\\]")
-
-mathDisplayWith :: String -> String -> MarkdownParser String
-mathDisplayWith op cl = try $ do
-  string op
-  many1Till (noneOf "\n" <|> (newline >>~ notFollowedBy' blankline)) (try $ string cl)
-
-mathInline :: MarkdownParser String
-mathInline =
-      (guardEnabled Ext_tex_math_dollars >> mathInlineWith "$" "$")
-  <|> (guardEnabled Ext_tex_math_single_backslash >>
-       mathInlineWith "\\(" "\\)")
-  <|> (guardEnabled Ext_tex_math_double_backslash >>
-       mathInlineWith "\\\\(" "\\\\)")
-
-mathInlineWith :: String -> String -> MarkdownParser String
-mathInlineWith op cl = try $ do
-  string op
-  notFollowedBy space
-  words' <- many1Till (count 1 (noneOf "\n\\")
-                   <|> (char '\\' >> anyChar >>= \c -> return ['\\',c])
-                   <|> count 1 newline <* notFollowedBy' blankline
-                       *> return " ")
-              (try $ string cl)
-  notFollowedBy digit  -- to prevent capture of $5
-  return $ concat words'
-
--- to avoid performance problems, treat 4 or more _ or * or ~ or ^ in a row
--- as a literal rather than attempting to parse for emph/strong/strikeout/super/sub
-fours :: Parser [Char] st (F Inlines)
-fours = try $ do
-  x <- char '*' <|> char '_' <|> char '~' <|> char '^'
-  count 2 $ satisfy (==x)
-  rest <- many1 (satisfy (==x))
-  return $ return $ B.str (x:x:x:rest)
+-- Parses material enclosed in *s, **s, _s, or __s.
+-- Designed to avoid backtracking.
+enclosure :: Char
+          -> MarkdownParser (F Inlines)
+enclosure c = do
+  cs <- many1 (char c)
+  (return (B.str cs) <>) <$> whitespace
+    <|> case length cs of
+             3  -> three c
+             2  -> two   c mempty
+             1  -> one   c mempty
+             _  -> return (return $ B.str cs)
+
+-- Parse inlines til you hit one c or a sequence of two cs.
+-- If one c, emit emph and then parse two.
+-- If two cs, emit strong and then parse one.
+-- Otherwise, emit ccc then the results.
+three :: Char -> MarkdownParser (F Inlines)
+three c = do
+  contents <- mconcat <$> many (notFollowedBy (char c) >> inline)
+  (try (string [c,c,c]) >> return ((B.strong . B.emph) <$> contents))
+    <|> (try (string [c,c]) >> one c (B.strong <$> contents))
+    <|> (char c >> two c (B.emph <$> contents))
+    <|> return (return (B.str [c,c,c]) <> contents)
+
+-- Parse inlines til you hit two c's, and emit strong.
+-- If you never do hit two cs, emit ** plus inlines parsed.
+two :: Char -> F Inlines -> MarkdownParser (F Inlines)
+two c prefix' = do
+  let ender = try $ string [c,c]
+  contents <- mconcat <$> many (try $ notFollowedBy ender >> inline)
+  (ender >> return (B.strong <$> (prefix' <> contents)))
+    <|> return (return (B.str [c,c]) <> (prefix' <> contents))
+
+-- Parse inlines til you hit a c, and emit emph.
+-- If you never hit a c, emit * plus inlines parsed.
+one :: Char -> F Inlines -> MarkdownParser (F Inlines)
+one c prefix' = do
+  contents <- mconcat <$> many (  (notFollowedBy (char c) >> inline)
+                           <|> try (string [c,c] >>
+                                    notFollowedBy (char c) >>
+                                    two c mempty) )
+  (char c >> return (B.emph <$> (prefix' <> contents)))
+    <|> return (return (B.str [c]) <> (prefix' <> contents))
+
+strongOrEmph :: MarkdownParser (F Inlines)
+strongOrEmph =  enclosure '*' <|> (checkIntraword >> enclosure '_')
+  where  checkIntraword = do
+           exts <- getOption readerExtensions
+           when (Ext_intraword_underscores `Set.member` exts) $ do
+             guard =<< notAfterString
 
 -- | Parses a list of inlines between start and end delimiters.
 inlinesBetween :: (Show b)
@@ -1474,28 +1493,6 @@ inlinesBetween start end =
     where inner      = innerSpace <|> (notFollowedBy' (() <$ whitespace) >> inline)
           innerSpace = try $ whitespace >>~ notFollowedBy' end
 
-emph :: MarkdownParser (F Inlines)
-emph = fmap B.emph <$> nested
-  (inlinesBetween starStart starEnd <|> inlinesBetween ulStart ulEnd)
-    where starStart = char '*' >> lookAhead nonspaceChar
-          starEnd   = notFollowedBy' (() <$ strong) >> char '*'
-          ulStart   = checkIntraword >> char '_' >> lookAhead nonspaceChar
-          ulEnd     = notFollowedBy' (() <$ strong) >> char '_'
-          checkIntraword = do
-            exts <- getOption readerExtensions
-            when (Ext_intraword_underscores `Set.member` exts) $ do
-              pos <- getPosition
-              lastStrPos <- stateLastStrPos <$> getState
-              guard $ lastStrPos /= Just pos
-
-strong :: MarkdownParser (F Inlines)
-strong = fmap B.strong <$> nested
-  (inlinesBetween starStart starEnd <|> inlinesBetween ulStart ulEnd)
-    where starStart = string "**" >> lookAhead nonspaceChar
-          starEnd   = try $ string "**"
-          ulStart   = string "__" >> lookAhead nonspaceChar
-          ulEnd     = try $ string "__"
-
 strikeout :: MarkdownParser (F Inlines)
 strikeout = fmap B.strikeout <$>
  (guardEnabled Ext_strikeout >> inlinesBetween strikeStart strikeEnd)
@@ -1526,8 +1523,7 @@ nonEndline = satisfy (/='\n')
 str :: MarkdownParser (F Inlines)
 str = do
   result <- many1 alphaNum
-  pos <- getPosition
-  updateState $ \s -> s{ stateLastStrPos = Just pos }
+  updateLastStrPos
   let spacesToNbr = map (\c -> if c == ' ' then '\160' else c)
   isSmart <- getOption readerSmart
   if isSmart
@@ -1558,14 +1554,17 @@ endline :: MarkdownParser (F Inlines)
 endline = try $ do
   newline
   notFollowedBy blankline
-  guardEnabled Ext_blank_before_blockquote <|> notFollowedBy emailBlockQuoteStart
-  guardEnabled Ext_blank_before_header <|> notFollowedBy (char '#') -- atx header
   -- parse potential list-starts differently if in a list:
   st <- getState
-  when (stateParserContext st == ListItemState) $ do
-     notFollowedBy' bulletListStart
-     notFollowedBy' anyOrderedListStart
-  (guardEnabled Ext_hard_line_breaks >> return (return B.linebreak))
+  when (stateParserContext st == ListItemState) $ notFollowedBy listStart
+  guardDisabled Ext_lists_without_preceding_blankline <|> notFollowedBy listStart
+  guardEnabled Ext_blank_before_blockquote <|> notFollowedBy emailBlockQuoteStart
+  guardEnabled Ext_blank_before_header <|> notFollowedBy (char '#') -- atx header
+  guardDisabled Ext_backtick_code_blocks <|>
+     notFollowedBy (() <$ (lookAhead (char '`') >> codeBlockFenced))
+  (eof >> return mempty)
+    <|> (guardEnabled Ext_hard_line_breaks >> return (return B.linebreak))
+    <|> (guardEnabled Ext_ignore_line_breaks >> return mempty)
     <|> (return $ return B.space)
 
 --
@@ -1660,6 +1659,7 @@ bareURL :: MarkdownParser (F Inlines)
 bareURL = try $ do
   guardEnabled Ext_autolink_bare_uris
   (orig, src) <- uri <|> emailAddress
+  notFollowedBy $ try $ spaces >> htmlTag (~== TagClose "a")
   return $ return $ B.link src "" (B.str orig)
 
 autoLink :: MarkdownParser (F Inlines)
@@ -1730,6 +1730,38 @@ inBrackets parser = do
   char ']'
   return $ "[" ++ contents ++ "]"
 
+spanHtml :: MarkdownParser (F Inlines)
+spanHtml = try $ do
+  guardEnabled Ext_markdown_in_html_blocks
+  (TagOpen _ attrs, _) <- htmlTag (~== TagOpen "span" [])
+  contents <- mconcat <$> manyTill inline (htmlTag (~== TagClose "span"))
+  let ident = fromMaybe "" $ lookup "id" attrs
+  let classes = maybe [] words $ lookup "class" attrs
+  let keyvals = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"]
+  case lookup "style" keyvals of
+       Just s | null ident && null classes &&
+            map toLower (filter (`notElem` " \t;") s) ==
+                 "font-variant:small-caps"
+         -> return $ B.smallcaps <$> contents
+       _ -> return $ B.spanWith (ident, classes, keyvals) <$> contents
+
+divHtml :: MarkdownParser (F Blocks)
+divHtml = try $ do
+  guardEnabled Ext_markdown_in_html_blocks
+  (TagOpen _ attrs, rawtag) <- htmlTag (~== TagOpen "div" [])
+  bls <- option "" (blankline >> option "" blanklines)
+  contents <- mconcat <$>
+              many (notFollowedBy' (htmlTag (~== TagClose "div")) >> block)
+  closed <- option False (True <$ htmlTag (~== TagClose "div"))
+  if closed
+     then do
+       let ident = fromMaybe "" $ lookup "id" attrs
+       let classes = maybe [] words $ lookup "class" attrs
+       let keyvals = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"]
+       return $ B.divWith (ident, classes, keyvals) <$> contents
+     else -- avoid backtracing
+       return $ return (B.rawBlock "html" (rawtag <> bls)) <> contents
+
 rawHtmlInline :: MarkdownParser (F Inlines)
 rawHtmlInline = do
   guardEnabled Ext_raw_html
@@ -1745,11 +1777,12 @@ rawHtmlInline = do
 cite :: MarkdownParser (F Inlines)
 cite = do
   guardEnabled Ext_citations
-  getOption readerReferences >>= guard . not . null
-  citations <- textualCite <|> normalCite
-  return $ flip B.cite mempty <$> citations
+  citations <- textualCite
+            <|> do (cs, raw) <- withRaw normalCite
+                   return $ (flip B.cite (B.text raw)) <$> cs
+  return citations
 
-textualCite :: MarkdownParser (F [Citation])
+textualCite :: MarkdownParser (F Inlines)
 textualCite = try $ do
   (_, key) <- citeKey
   let first = Citation{ citationId      = key
@@ -1759,10 +1792,18 @@ textualCite = try $ do
                       , citationNoteNum = 0
                       , citationHash    = 0
                       }
-  mbrest <- option Nothing $ try $ spnl >> Just <$> normalCite
+  mbrest <- option Nothing $ try $ spnl >> Just <$> withRaw normalCite
   case mbrest of
-       Just rest  -> return $ (first:) <$> rest
-       Nothing    -> option (return [first]) $ bareloc first
+       Just (rest, raw) ->
+         return $ (flip B.cite (B.text $ '@':key ++ " " ++ raw) . (first:))
+               <$> rest
+       Nothing   ->
+         (do (cs, raw) <- withRaw $ bareloc first
+             return $ (flip B.cite (B.text $ '@':key ++ " " ++ raw)) <$> cs)
+         <|> return (do st <- askF
+                        return $ case M.lookup key (stateExamples st) of
+                                 Just n -> B.str (show n)
+                                 _      -> B.cite [first] $ B.str $ '@':key)
 
 bareloc :: Citation -> MarkdownParser (F [Citation])
 bareloc c = try $ do
@@ -1786,18 +1827,6 @@ normalCite = try $ do
   char ']'
   return citations
 
-citeKey :: MarkdownParser (Bool, String)
-citeKey = try $ do
-  suppress_author <- option False (char '-' >> return True)
-  char '@'
-  first <- letter
-  let internal p = try $ p >>~ lookAhead (letter <|> digit)
-  rest <- many $ letter <|> digit <|> internal (oneOf ":.#$%&-_+?<>~/")
-  let key = first:rest
-  citations' <- map CSL.refId <$> getOption readerReferences
-  guard $ key `elem` citations'
-  return (suppress_author, key)
-
 suffix :: MarkdownParser (F Inlines)
 suffix = try $ do
   hasSpace <- option False (notFollowedBy nonspaceChar >> return True)
@@ -1836,7 +1865,7 @@ smart :: MarkdownParser (F Inlines)
 smart = do
   getOption readerSmart >>= guard
   doubleQuoted <|> singleQuoted <|>
-    choice (map (return . B.singleton <$>) [apostrophe, dash, ellipses])
+    choice (map (return <$>) [apostrophe, dash, ellipses])
 
 singleQuoted :: MarkdownParser (F Inlines)
 singleQuoted = try $ do
@@ -1855,4 +1884,3 @@ doubleQuoted = try $ do
   (withQuoteContext InDoubleQuote $ doubleQuoteEnd >> return
        (fmap B.doubleQuoted . trimInlinesF $ contents))
    <|> (return $ return (B.str "\8220") <> contents)
-
author	Clare Macrae <github@cfmacrae.fastmail.co.uk>	2014-06-29 19:22:31 +0100
committer	Clare Macrae <github@cfmacrae.fastmail.co.uk>	2014-06-29 19:22:31 +0100
commit	717e16660d1ee83f690b35d0aa9b60c8ac9d6b61 (patch)
tree	aa850d4ee99fa0b14da9ba0396ba6aa67e2037e3 /src/Text/Pandoc/Readers/Markdown.hs
parent	fccfc8429cf4d002df37977f03508c9aae457416 (diff)
parent	ce69021e42d7bf50deccba2a52ed4717f6ddac10 (diff)
download	pandoc-717e16660d1ee83f690b35d0aa9b60c8ac9d6b61.tar.gz