{- Copyright (C) 2014-2017 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -} {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE RecordWildCards #-} {- | Module : Text.Pandoc.Readers.Org.Blocks Copyright : Copyright (C) 2014-2017 Albert Krewinkel License : GNU GPL, version 2 or above Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> Parsers for Org-mode block elements. -} module Text.Pandoc.Readers.Org.Blocks ( blockList , meta ) where import Text.Pandoc.Readers.Org.BlockStarts import Text.Pandoc.Readers.Org.DocumentTree (documentTree, headlineToBlocks) import Text.Pandoc.Readers.Org.Inlines import Text.Pandoc.Readers.Org.Meta (metaExport, metaKey, metaLine) import Text.Pandoc.Readers.Org.ParserState import Text.Pandoc.Readers.Org.Parsing import Text.Pandoc.Readers.Org.Shared (cleanLinkString, isImageFilename, originalLang, translateLang) import Text.Pandoc.Builder (Blocks, Inlines) import qualified Text.Pandoc.Builder as B import Text.Pandoc.Class (PandocMonad) import Text.Pandoc.Definition import Text.Pandoc.Options import Text.Pandoc.Shared (compactify, compactifyDL, safeRead) import Control.Monad (foldM, guard, mzero, void) import Data.Char (isSpace, toLower, toUpper) import Data.Default (Default) import Data.List (foldl', isPrefixOf) import Data.Maybe (fromMaybe, isJust, isNothing) import Data.Monoid ((<>)) -- -- parsing blocks -- -- | Get a list of blocks. blockList :: PandocMonad m => OrgParser m [Block] blockList = do headlines <- documentTree blocks inline st <- getState headlineBlocks <- headlineToBlocks $ runF headlines st -- ignore first headline, it's the document's title return . drop 1 . B.toList $ headlineBlocks -- | Get the meta information saved in the state. meta :: Monad m => OrgParser m Meta meta = do meta' <- metaExport runF meta' <$> getState blocks :: PandocMonad m => OrgParser m (F Blocks) blocks = mconcat <$> manyTill block (void (lookAhead headerStart) <|> eof) block :: PandocMonad m => OrgParser m (F Blocks) block = choice [ mempty <$ blanklines , table , orgBlock , figure , example , genericDrawer , include , specialLine , horizontalRule , list , latexFragment , noteBlock , paraOrPlain ] <?> "block" -- | Parse a horizontal rule into a block element horizontalRule :: Monad m => OrgParser m (F Blocks) horizontalRule = return B.horizontalRule <$ try hline -- -- Block Attributes -- -- | Attributes that may be added to figures (like a name or caption). data BlockAttributes = BlockAttributes { blockAttrName :: Maybe String , blockAttrLabel :: Maybe String , blockAttrCaption :: Maybe (F Inlines) , blockAttrKeyValues :: [(String, String)] } -- | Convert BlockAttributes into pandoc Attr attrFromBlockAttributes :: BlockAttributes -> Attr attrFromBlockAttributes BlockAttributes{..} = let ident = fromMaybe mempty $ lookup "id" blockAttrKeyValues classes = case lookup "class" blockAttrKeyValues of Nothing -> [] Just clsStr -> words clsStr kv = filter ((`notElem` ["id", "class"]) . fst) blockAttrKeyValues in (ident, classes, kv) stringyMetaAttribute :: Monad m => OrgParser m (String, String) stringyMetaAttribute = try $ do metaLineStart attrName <- map toUpper <$> many1Till nonspaceChar (char ':') skipSpaces attrValue <- anyLine <|> ("" <$ newline) return (attrName, attrValue) blockAttributes :: PandocMonad m => OrgParser m BlockAttributes blockAttributes = try $ do kv <- many stringyMetaAttribute guard $ all (attrCheck . fst) kv let caption = foldl' (appendValues "CAPTION") Nothing kv let kvAttrs = foldl' (appendValues "ATTR_HTML") Nothing kv let name = lookup "NAME" kv let label = lookup "LABEL" kv caption' <- case caption of Nothing -> return Nothing Just s -> Just <$> parseFromString inlines (s ++ "\n") kvAttrs' <- parseFromString keyValues . (++ "\n") $ fromMaybe mempty kvAttrs return BlockAttributes { blockAttrName = name , blockAttrLabel = label , blockAttrCaption = caption' , blockAttrKeyValues = kvAttrs' } where attrCheck :: String -> Bool attrCheck x = x `elem` ["NAME", "LABEL", "CAPTION", "ATTR_HTML", "RESULTS"] appendValues :: String -> Maybe String -> (String, String) -> Maybe String appendValues attrName accValue (key, value) = if key /= attrName then accValue else case accValue of Just acc -> Just $ acc ++ ' ':value Nothing -> Just value -- | Parse key-value pairs for HTML attributes keyValues :: Monad m => OrgParser m [(String, String)] keyValues = try $ manyTill ((,) <$> key <*> value) newline where key :: Monad m => OrgParser m String key = try $ skipSpaces *> char ':' *> many1 nonspaceChar value :: Monad m => OrgParser m String value = skipSpaces *> manyTill anyChar endOfValue endOfValue :: Monad m => OrgParser m () endOfValue = lookAhead $ (() <$ try (many1 spaceChar <* key)) <|> () <$ newline -- -- Org Blocks (#+BEGIN_... / #+END_...) -- -- | Read an org-mode block delimited by #+BEGIN_TYPE and #+END_TYPE. orgBlock :: PandocMonad m => OrgParser m (F Blocks) orgBlock = try $ do blockAttrs <- blockAttributes blkType <- blockHeaderStart ($ blkType) $ case map toLower blkType of "export" -> exportBlock "comment" -> rawBlockLines (const mempty) "html" -> rawBlockLines (return . B.rawBlock (lowercase blkType)) "latex" -> rawBlockLines (return . B.rawBlock (lowercase blkType)) "ascii" -> rawBlockLines (return . B.rawBlock (lowercase blkType)) "example" -> rawBlockLines (return . exampleCode) "quote" -> parseBlockLines (fmap B.blockQuote) "verse" -> verseBlock "src" -> codeBlock blockAttrs _ -> parseBlockLines $ let (ident, classes, kv) = attrFromBlockAttributes blockAttrs in fmap $ B.divWith (ident, classes ++ [blkType], kv) where blockHeaderStart :: Monad m => OrgParser m String blockHeaderStart = try $ skipSpaces *> stringAnyCase "#+begin_" *> orgArgWord lowercase :: String -> String lowercase = map toLower rawBlockLines :: Monad m => (String -> F Blocks) -> String -> OrgParser m (F Blocks) rawBlockLines f blockType = ignHeaders *> (f <$> rawBlockContent blockType) parseBlockLines :: PandocMonad m => (F Blocks -> F Blocks) -> String -> OrgParser m (F Blocks) parseBlockLines f blockType = ignHeaders *> (f <$> parsedBlockContent) where parsedBlockContent :: PandocMonad m => OrgParser m (F Blocks) parsedBlockContent = try $ do raw <- rawBlockContent blockType parseFromString blocks (raw ++ "\n") -- | Read the raw string content of a block rawBlockContent :: Monad m => String -> OrgParser m String rawBlockContent blockType = try $ do blkLines <- manyTill rawLine blockEnder tabLen <- getOption readerTabStop return . unlines . stripIndent . map (tabsToSpaces tabLen . commaEscaped) $ blkLines where rawLine :: Monad m => OrgParser m String rawLine = try $ ("" <$ blankline) <|> anyLine blockEnder :: Monad m => OrgParser m () blockEnder = try $ skipSpaces <* stringAnyCase ("#+end_" <> blockType) stripIndent :: [String] -> [String] stripIndent strs = map (drop (shortestIndent strs)) strs shortestIndent :: [String] -> Int shortestIndent = foldr (min . length . takeWhile isSpace) maxBound . filter (not . null) tabsToSpaces :: Int -> String -> String tabsToSpaces _ [] = [] tabsToSpaces tabLen cs'@(c:cs) = case c of ' ' -> ' ':tabsToSpaces tabLen cs '\t' -> replicate tabLen ' ' ++ tabsToSpaces tabLen cs _ -> cs' commaEscaped :: String -> String commaEscaped (',':cs@('*':_)) = cs commaEscaped (',':cs@('#':'+':_)) = cs commaEscaped (' ':cs) = ' ':commaEscaped cs commaEscaped ('\t':cs) = '\t':commaEscaped cs commaEscaped cs = cs -- | Read but ignore all remaining block headers. ignHeaders :: Monad m => OrgParser m () ignHeaders = (() <$ newline) <|> (() <$ anyLine) -- | Read a block containing code intended for export in specific backends -- only. exportBlock :: Monad m => String -> OrgParser m (F Blocks) exportBlock blockType = try $ do exportType <- skipSpaces *> orgArgWord <* ignHeaders contents <- rawBlockContent blockType returnF (B.rawBlock (map toLower exportType) contents) verseBlock :: PandocMonad m => String -> OrgParser m (F Blocks) verseBlock blockType = try $ do ignHeaders content <- rawBlockContent blockType fmap B.lineBlock . sequence <$> mapM parseVerseLine (lines content) where -- replace initial spaces with nonbreaking spaces to preserve -- indentation, parse the rest as normal inline parseVerseLine :: PandocMonad m => String -> OrgParser m (F Inlines) parseVerseLine cs = do let (initialSpaces, indentedLine) = span isSpace cs let nbspIndent = if null initialSpaces then mempty else B.str $ map (const '\160') initialSpaces line <- parseFromString inlines (indentedLine ++ "\n") return (trimInlinesF $ pure nbspIndent <> line) -- | Read a code block and the associated results block if present. Which of -- boths blocks is included in the output is determined using the "exports" -- argument in the block header. codeBlock :: PandocMonad m => BlockAttributes -> String -> OrgParser m (F Blocks) codeBlock blockAttrs blockType = do skipSpaces (classes, kv) <- codeHeaderArgs <|> (mempty <$ ignHeaders) content <- rawBlockContent blockType resultsContent <- option mempty babelResultsBlock let id' = fromMaybe mempty $ blockAttrName blockAttrs let codeBlck = B.codeBlockWith ( id', classes, kv ) content let labelledBlck = maybe (pure codeBlck) (labelDiv codeBlck) (blockAttrCaption blockAttrs) return $ (if exportsCode kv then labelledBlck else mempty) <> (if exportsResults kv then resultsContent else mempty) where labelDiv :: Blocks -> F Inlines -> F Blocks labelDiv blk value = B.divWith nullAttr <$> (mappend <$> labelledBlock value <*> pure blk) labelledBlock :: F Inlines -> F Blocks labelledBlock = fmap (B.plain . B.spanWith ("", ["label"], [])) exportsCode :: [(String, String)] -> Bool exportsCode = maybe True (`elem` ["code", "both"]) . lookup "exports" exportsResults :: [(String, String)] -> Bool exportsResults = maybe False (`elem` ["results", "both"]) . lookup "exports" -- | Parse the result of an evaluated babel code block. babelResultsBlock :: PandocMonad m => OrgParser m (F Blocks) babelResultsBlock = try $ do blanklines resultsMarker <|> (lookAhead . void . try $ manyTill (metaLineStart *> anyLineNewline) resultsMarker) block where resultsMarker = try . void $ stringAnyCase "#+RESULTS:" *> blankline -- | Parse code block arguments codeHeaderArgs :: Monad m => OrgParser m ([String], [(String, String)]) codeHeaderArgs = try $ do language <- skipSpaces *> orgArgWord (switchClasses, switchKv) <- switchesAsAttributes parameters <- manyTill blockOption newline return ( translateLang language : switchClasses , originalLang language <> switchKv <> parameters ) switchesAsAttributes :: Monad m => OrgParser m ([String], [(String, String)]) switchesAsAttributes = try $ do switches <- skipSpaces *> try (switch `sepBy` many1 spaceChar) return $ foldr addToAttr ([], []) switches where addToAttr :: (Char, Maybe String, SwitchPolarity) -> ([String], [(String, String)]) -> ([String], [(String, String)]) addToAttr ('n', lineNum, pol) (cls, kv) = let kv' = case lineNum of Just num -> ("startFrom", num):kv Nothing -> kv cls' = case pol of SwitchPlus -> "continuedSourceBlock":cls SwitchMinus -> cls in ("numberLines":cls', kv') addToAttr _ x = x -- | Whether a switch flag is specified with @+@ or @-@. data SwitchPolarity = SwitchPlus | SwitchMinus deriving (Show, Eq) -- | Parses a switch's polarity. switchPolarity :: Monad m => OrgParser m SwitchPolarity switchPolarity = (SwitchMinus <$ char '-') <|> (SwitchPlus <$ char '+') -- | Parses a source block switch option. switch :: Monad m => OrgParser m (Char, Maybe String, SwitchPolarity) switch = try $ lineNumberSwitch <|> labelSwitch <|> simpleSwitch where simpleSwitch = (\pol c -> (c, Nothing, pol)) <$> switchPolarity <*> letter labelSwitch = genericSwitch 'l' $ char '"' *> many1Till nonspaceChar (char '"') -- | Generic source block switch-option parser. genericSwitch :: Monad m => Char -> OrgParser m String -> OrgParser m (Char, Maybe String, SwitchPolarity) genericSwitch c p = try $ do polarity <- switchPolarity <* char c <* skipSpaces arg <- optionMaybe p return (c, arg, polarity) -- | Reads a line number switch option. The line number switch can be used with -- example and source blocks. lineNumberSwitch :: Monad m => OrgParser m (Char, Maybe String, SwitchPolarity) lineNumberSwitch = genericSwitch 'n' (many digit) blockOption :: Monad m => OrgParser m (String, String) blockOption = try $ do argKey <- orgArgKey paramValue <- option "yes" orgParamValue return (argKey, paramValue) orgParamValue :: Monad m => OrgParser m String orgParamValue = try $ skipSpaces *> notFollowedBy orgArgKey *> noneOf "\n\r" `many1Till` endOfValue <* skipSpaces where endOfValue = lookAhead $ try (skipSpaces <* oneOf "\n\r") <|> try (skipSpaces1 <* orgArgKey) -- -- Drawers -- -- | A generic drawer which has no special meaning for org-mode. -- Whether or not this drawer is included in the output depends on the drawers -- export setting. genericDrawer :: PandocMonad m => OrgParser m (F Blocks) genericDrawer = try $ do name <- map toUpper <$> drawerStart content <- manyTill drawerLine (try drawerEnd) state <- getState -- Include drawer if it is explicitly included in or not explicitly excluded -- from the list of drawers that should be exported. PROPERTIES drawers are -- never exported. case exportDrawers . orgStateExportSettings $ state of _ | name == "PROPERTIES" -> return mempty Left names | name `elem` names -> return mempty Right names | name `notElem` names -> return mempty _ -> drawerDiv name <$> parseLines content where parseLines :: PandocMonad m => [String] -> OrgParser m (F Blocks) parseLines = parseFromString blocks . (++ "\n") . unlines drawerDiv :: String -> F Blocks -> F Blocks drawerDiv drawerName = fmap $ B.divWith (mempty, [drawerName, "drawer"], mempty) drawerLine :: Monad m => OrgParser m String drawerLine = anyLine drawerEnd :: Monad m => OrgParser m String drawerEnd = try $ skipSpaces *> stringAnyCase ":END:" <* skipSpaces <* newline -- -- Figures -- -- | Figures or an image paragraph (i.e. an image on a line by itself). Only -- images with a caption attribute are interpreted as figures. figure :: PandocMonad m => OrgParser m (F Blocks) figure = try $ do figAttrs <- blockAttributes src <- skipSpaces *> selfTarget <* skipSpaces <* endOfParagraph case cleanLinkString src of Nothing -> mzero Just imgSrc -> do guard (isImageFilename imgSrc) let isFigure = isJust $ blockAttrCaption figAttrs return $ imageBlock isFigure figAttrs imgSrc where selfTarget :: PandocMonad m => OrgParser m String selfTarget = try $ char '[' *> linkTarget <* char ']' imageBlock :: Bool -> BlockAttributes -> String -> F Blocks imageBlock isFigure figAttrs imgSrc = let figName = fromMaybe mempty $ blockAttrName figAttrs figLabel = fromMaybe mempty $ blockAttrLabel figAttrs figCaption = fromMaybe mempty $ blockAttrCaption figAttrs figKeyVals = blockAttrKeyValues figAttrs attr = (figLabel, mempty, figKeyVals) figTitle = (if isFigure then withFigPrefix else id) figName in B.para . B.imageWith attr imgSrc figTitle <$> figCaption withFigPrefix :: String -> String withFigPrefix cs = if "fig:" `isPrefixOf` cs then cs else "fig:" ++ cs -- | Succeeds if looking at the end of the current paragraph endOfParagraph :: Monad m => OrgParser m () endOfParagraph = try $ skipSpaces *> newline *> endOfBlock -- -- Examples -- -- | Example code marked up by a leading colon. example :: Monad m => OrgParser m (F Blocks) example = try $ returnF . exampleCode =<< unlines <$> many1 exampleLine where exampleLine :: Monad m => OrgParser m String exampleLine = try $ exampleLineStart *> anyLine exampleCode :: String -> Blocks exampleCode = B.codeBlockWith ("", ["example"], []) -- -- Comments, Options and Metadata -- specialLine :: PandocMonad m => OrgParser m (F Blocks) specialLine = fmap return . try $ rawExportLine <|> metaLine <|> commentLine -- | Include the content of a file. include :: PandocMonad m => OrgParser m (F Blocks) include = try $ do metaLineStart <* stringAnyCase "include:" <* skipSpaces filename <- includeTarget blockType <- optionMaybe $ skipSpaces *> many1 alphaNum blocksParser <- case blockType of Just "example" -> return $ pure . B.codeBlock <$> parseRaw Just "export" -> do format <- skipSpaces *> many (noneOf "\n\r\t ") return $ pure . B.rawBlock format <$> parseRaw Just "src" -> do language <- skipSpaces *> many (noneOf "\n\r\t ") let attr = (mempty, [language], mempty) return $ pure . B.codeBlockWith attr <$> parseRaw _ -> return $ pure . B.fromList <$> blockList anyLine insertIncludedFileF blocksParser ["."] filename where includeTarget :: PandocMonad m => OrgParser m FilePath includeTarget = do char '"' manyTill (noneOf "\n\r\t") (char '"') parseRaw :: PandocMonad m => OrgParser m String parseRaw = many anyChar rawExportLine :: PandocMonad m => OrgParser m Blocks rawExportLine = try $ do metaLineStart key <- metaKey if key `elem` ["latex", "html", "texinfo", "beamer"] then B.rawBlock key <$> anyLine else mzero commentLine :: Monad m => OrgParser m Blocks commentLine = commentLineStart *> anyLine *> pure mempty -- -- Tables -- data ColumnProperty = ColumnProperty { columnAlignment :: Maybe Alignment , columnRelWidth :: Maybe Int } deriving (Show, Eq) instance Default ColumnProperty where def = ColumnProperty Nothing Nothing data OrgTableRow = OrgContentRow (F [Blocks]) | OrgAlignRow [ColumnProperty] | OrgHlineRow -- OrgTable is strongly related to the pandoc table ADT. Using the same -- (i.e. pandoc-global) ADT would mean that the reader would break if the -- global structure was to be changed, which would be bad. The final table -- should be generated using a builder function. data OrgTable = OrgTable { orgTableColumnProperties :: [ColumnProperty] , orgTableHeader :: [Blocks] , orgTableRows :: [[Blocks]] } table :: PandocMonad m => OrgParser m (F Blocks) table = gridTableWith blocks True <|> orgTable -- | A normal org table orgTable :: PandocMonad m => OrgParser m (F Blocks) orgTable = try $ do -- don't allow a table on the first line of a list item; org requires that -- tables start at first non-space character on the line let isFirstInListItem st = orgStateParserContext st == ListItemState && isNothing (orgStateLastPreCharPos st) guard =<< not . isFirstInListItem <$> getState blockAttrs <- blockAttributes lookAhead tableStart do rows <- tableRows let caption = fromMaybe (return mempty) $ blockAttrCaption blockAttrs return $ (<$> caption) . orgToPandocTable . normalizeTable =<< rowsToTable rows orgToPandocTable :: OrgTable -> Inlines -> Blocks orgToPandocTable (OrgTable colProps heads lns) caption = let totalWidth = if any isJust (map columnRelWidth colProps) then Just . sum $ map (fromMaybe 1 . columnRelWidth) colProps else Nothing in B.table caption (map (convertColProp totalWidth) colProps) heads lns where convertColProp :: Maybe Int -> ColumnProperty -> (Alignment, Double) convertColProp totalWidth colProp = let align' = fromMaybe AlignDefault $ columnAlignment colProp width' = fromMaybe 0 $ (\w t -> (fromIntegral w / fromIntegral t)) <$> columnRelWidth colProp <*> totalWidth in (align', width') tableRows :: PandocMonad m => OrgParser m [OrgTableRow] tableRows = try $ many (tableAlignRow <|> tableHline <|> tableContentRow) tableContentRow :: PandocMonad m => OrgParser m OrgTableRow tableContentRow = try $ OrgContentRow . sequence <$> (tableStart *> many1Till tableContentCell newline) tableContentCell :: PandocMonad m => OrgParser m (F Blocks) tableContentCell = try $ fmap B.plain . trimInlinesF . mconcat <$> manyTill inline endOfCell tableAlignRow :: Monad m => OrgParser m OrgTableRow tableAlignRow = try $ do tableStart colProps <- many1Till columnPropertyCell newline -- Empty rows are regular (i.e. content) rows, not alignment rows. guard $ any (/= def) colProps return $ OrgAlignRow colProps columnPropertyCell :: Monad m => OrgParser m ColumnProperty columnPropertyCell = emptyCell <|> propCell <?> "alignment info" where emptyCell = ColumnProperty Nothing Nothing <$ try (skipSpaces *> endOfCell) propCell = try $ ColumnProperty <$> (skipSpaces *> char '<' *> optionMaybe tableAlignFromChar) <*> (optionMaybe (many1 digit >>= safeRead) <* char '>' <* emptyCell) tableAlignFromChar :: Monad m => OrgParser m Alignment tableAlignFromChar = try $ choice [ char 'l' *> return AlignLeft , char 'c' *> return AlignCenter , char 'r' *> return AlignRight ] tableHline :: Monad m => OrgParser m OrgTableRow tableHline = try $ OrgHlineRow <$ (tableStart *> char '-' *> anyLine) endOfCell :: Monad m => OrgParser m Char endOfCell = try $ char '|' <|> lookAhead newline rowsToTable :: [OrgTableRow] -> F OrgTable rowsToTable = foldM rowToContent emptyTable where emptyTable = OrgTable mempty mempty mempty normalizeTable :: OrgTable -> OrgTable normalizeTable (OrgTable colProps heads rows) = OrgTable colProps' heads rows where refRow = if heads /= mempty then heads else case rows of (r:_) -> r _ -> mempty cols = length refRow fillColumns base padding = take cols $ base ++ repeat padding colProps' = fillColumns colProps def -- One or more horizontal rules after the first content line mark the previous -- line as a header. All other horizontal lines are discarded. rowToContent :: OrgTable -> OrgTableRow -> F OrgTable rowToContent tbl row = case row of OrgHlineRow -> return singleRowPromotedToHeader OrgAlignRow props -> return . setProperties $ props OrgContentRow cs -> appendToBody cs where singleRowPromotedToHeader :: OrgTable singleRowPromotedToHeader = case tbl of OrgTable{ orgTableHeader = [], orgTableRows = [b] } -> tbl{ orgTableHeader = b , orgTableRows = [] } _ -> tbl setProperties :: [ColumnProperty] -> OrgTable setProperties ps = tbl{ orgTableColumnProperties = ps } appendToBody :: F [Blocks] -> F OrgTable appendToBody frow = do newRow <- frow let oldRows = orgTableRows tbl -- NOTE: This is an inefficient O(n) operation. This should be changed -- if performance ever becomes a problem. return tbl{ orgTableRows = oldRows ++ [newRow] } -- -- LaTeX fragments -- latexFragment :: Monad m => OrgParser m (F Blocks) latexFragment = try $ do envName <- latexEnvStart content <- mconcat <$> manyTill anyLineNewline (latexEnd envName) returnF $ B.rawBlock "latex" (content `inLatexEnv` envName) where c `inLatexEnv` e = mconcat [ "\\begin{", e, "}\n" , c , "\\end{", e, "}\n" ] latexEnd :: Monad m => String -> OrgParser m () latexEnd envName = try $ () <$ skipSpaces <* string ("\\end{" ++ envName ++ "}") <* blankline -- -- Footnote defintions -- noteBlock :: PandocMonad m => OrgParser m (F Blocks) noteBlock = try $ do ref <- noteMarker <* skipSpaces <* updateLastPreCharPos content <- mconcat <$> blocksTillHeaderOrNote addToNotesTable (ref, content) return mempty where blocksTillHeaderOrNote = many1Till block (eof <|> () <$ lookAhead noteMarker <|> () <$ lookAhead headerStart) -- Paragraphs or Plain text paraOrPlain :: PandocMonad m => OrgParser m (F Blocks) paraOrPlain = try $ do -- Make sure we are not looking at a headline notFollowedBy' (char '*' *> oneOf " *") ils <- inlines nl <- option False (newline *> return True) -- Read block as paragraph, except if we are in a list context and the block -- is directly followed by a list item, in which case the block is read as -- plain text. try (guard nl *> notFollowedBy (inList *> (() <$ orderedListStart <|> bulletListStart)) *> return (B.para <$> ils)) <|> return (B.plain <$> ils) -- -- list blocks -- list :: PandocMonad m => OrgParser m (F Blocks) list = choice [ definitionList, bulletList, orderedList ] <?> "list" definitionList :: PandocMonad m => OrgParser m (F Blocks) definitionList = try $ do n <- lookAhead (bulletListStart' Nothing) fmap (B.definitionList . compactifyDL) . sequence <$> many1 (definitionListItem $ bulletListStart' (Just n)) bulletList :: PandocMonad m => OrgParser m (F Blocks) bulletList = try $ do n <- lookAhead (bulletListStart' Nothing) fmap (B.bulletList . compactify) . sequence <$> many1 (listItem (bulletListStart' $ Just n)) orderedList :: PandocMonad m => OrgParser m (F Blocks) orderedList = fmap (B.orderedList . compactify) . sequence <$> many1 (listItem orderedListStart) bulletListStart' :: Monad m => Maybe Int -> OrgParser m Int -- returns length of bulletList prefix, inclusive of marker bulletListStart' Nothing = do ind <- length <$> many spaceChar oneOf (bullets $ ind == 0) skipSpaces1 return (ind + 1) bulletListStart' (Just n) = do count (n-1) spaceChar oneOf (bullets $ n == 1) many1 spaceChar return n -- Unindented lists are legal, but they can't use '*' bullets. -- We return n to maintain compatibility with the generic listItem. bullets :: Bool -> String bullets unindented = if unindented then "+-" else "*+-" definitionListItem :: PandocMonad m => OrgParser m Int -> OrgParser m (F (Inlines, [Blocks])) definitionListItem parseMarkerGetLength = try $ do markerLength <- parseMarkerGetLength term <- manyTill (noneOf "\n\r") (try definitionMarker) line1 <- anyLineNewline blank <- option "" ("\n" <$ blankline) cont <- concat <$> many (listContinuation markerLength) term' <- parseFromString inlines term contents' <- parseFromString blocks $ line1 ++ blank ++ cont return $ (,) <$> term' <*> fmap (:[]) contents' where definitionMarker = spaceChar *> string "::" <* (spaceChar <|> lookAhead newline) -- parse raw text for one list item, excluding start marker and continuations listItem :: PandocMonad m => OrgParser m Int -> OrgParser m (F Blocks) listItem start = try . withContext ListItemState $ do markerLength <- try start firstLine <- anyLineNewline blank <- option "" ("\n" <$ blankline) rest <- concat <$> many (listContinuation markerLength) parseFromString blocks $ firstLine ++ blank ++ rest -- continuation of a list item - indented and separated by blankline or endline. -- Note: nested lists are parsed as continuations. listContinuation :: Monad m => Int -> OrgParser m String listContinuation markerLength = try $ notFollowedBy' blankline *> (mappend <$> (concat <$> many1 listLine) <*> many blankline) where listLine = try $ indentWith markerLength *> anyLineNewline