aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2016-05-25 22:37:14 -0700
committerJohn MacFarlane <jgm@berkeley.edu>2016-05-25 22:37:14 -0700
commitf54873d5ea757787ec816336c61c24a813799554 (patch)
treebf65c0eb2bccfa5177514e0169b81dcdf0843baa /src
parentcc937eea2fbd8a7bb07672bfed3b924de8573646 (diff)
parenteea6d6568f99eda689b93210a22692c7f79b4bbf (diff)
downloadpandoc-f54873d5ea757787ec816336c61c24a813799554.tar.gz
Merge pull request #2946 from tarleb/org-modularization
Org-mode reader modularization
Diffstat (limited to 'src')
-rw-r--r--src/Text/Pandoc/Readers/Org.hs1638
-rw-r--r--src/Text/Pandoc/Readers/Org/BlockStarts.hs112
-rw-r--r--src/Text/Pandoc/Readers/Org/Blocks.hs891
-rw-r--r--src/Text/Pandoc/Readers/Org/Inlines.hs715
-rw-r--r--src/Text/Pandoc/Readers/Org/ParserState.hs7
-rw-r--r--src/Text/Pandoc/Readers/Org/Parsing.hs201
6 files changed, 1935 insertions, 1629 deletions
diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs
index 621e7107f..1042b5a21 100644
--- a/src/Text/Pandoc/Readers/Org.hs
+++ b/src/Text/Pandoc/Readers/Org.hs
@@ -1,5 +1,3 @@
-{-# LANGUAGE FlexibleContexts #-}
-{-# LANGUAGE OverloadedStrings #-}
{-
Copyright (C) 2014-2016 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
@@ -29,32 +27,15 @@ Conversion of org-mode formatted plain text to 'Pandoc' document.
-}
module Text.Pandoc.Readers.Org ( readOrg ) where
-import qualified Text.Pandoc.Builder as B
-import Text.Pandoc.Builder ( Inlines, Blocks )
+import Text.Pandoc.Readers.Org.Blocks ( blockList, meta )
+import Text.Pandoc.Readers.Org.Parsing ( OrgParser, readWithM )
+import Text.Pandoc.Readers.Org.ParserState ( OrgParserState (..) )
+
import Text.Pandoc.Definition
-import Text.Pandoc.Compat.Monoid ((<>))
import Text.Pandoc.Error
import Text.Pandoc.Options
-import qualified Text.Pandoc.Parsing as P
-import Text.Pandoc.Parsing hiding ( F, unF, askF, asksF, runF
- , anyLine, blanklines, newline
- , orderedListMarker
- , parseFromString
- )
-import Text.Pandoc.Readers.LaTeX (inlineCommand, rawLaTeXInline)
-import Text.Pandoc.Readers.Org.ParserState
-import Text.Pandoc.Shared (compactify', compactify'DL)
-import Text.TeXMath (readTeX, writePandoc, DisplayType(..))
-import qualified Text.TeXMath.Readers.MathML.EntityMap as MathMLEntityMap
-import Control.Arrow (first)
-import Control.Monad (foldM, guard, mplus, mzero, when)
-import Control.Monad.Reader ( Reader, runReader )
-import Data.Char (isAlphaNum, isSpace, toLower, toUpper)
-import Data.List ( foldl', intersperse, isPrefixOf, isSuffixOf )
-import qualified Data.Map as M
-import Data.Maybe ( fromMaybe, isNothing )
-import Network.HTTP (urlEncode)
+import Control.Monad.Reader ( runReader )
-- | Parse org-mode string and return a Pandoc document.
@@ -63,165 +44,17 @@ readOrg :: ReaderOptions -- ^ Reader options
-> Either PandocError Pandoc
readOrg opts s = flip runReader def $ readWithM parseOrg def{ orgStateOptions = opts } (s ++ "\n\n")
--- | The parser used to read org files.
-type OrgParser = ParserT [Char] OrgParserState (Reader OrgParserLocal)
-
---
--- Functions acting on the parser state
---
-recordAnchorId :: String -> OrgParser ()
-recordAnchorId i = updateState $ \s ->
- s{ orgStateAnchorIds = i : (orgStateAnchorIds s) }
-
-updateLastForbiddenCharPos :: OrgParser ()
-updateLastForbiddenCharPos = getPosition >>= \p ->
- updateState $ \s -> s{ orgStateLastForbiddenCharPos = Just p}
-
-updateLastPreCharPos :: OrgParser ()
-updateLastPreCharPos = getPosition >>= \p ->
- updateState $ \s -> s{ orgStateLastPreCharPos = Just p}
-
-pushToInlineCharStack :: Char -> OrgParser ()
-pushToInlineCharStack c = updateState $ \s ->
- s{ orgStateEmphasisCharStack = c:orgStateEmphasisCharStack s }
-
-popInlineCharStack :: OrgParser ()
-popInlineCharStack = updateState $ \s ->
- s{ orgStateEmphasisCharStack = drop 1 . orgStateEmphasisCharStack $ s }
-
-surroundingEmphasisChar :: OrgParser [Char]
-surroundingEmphasisChar =
- take 1 . drop 1 . orgStateEmphasisCharStack <$> getState
-
-startEmphasisNewlinesCounting :: Int -> OrgParser ()
-startEmphasisNewlinesCounting maxNewlines = updateState $ \s ->
- s{ orgStateEmphasisNewlines = Just maxNewlines }
-
-decEmphasisNewlinesCount :: OrgParser ()
-decEmphasisNewlinesCount = updateState $ \s ->
- s{ orgStateEmphasisNewlines = (\n -> n - 1) <$> orgStateEmphasisNewlines s }
-
-newlinesCountWithinLimits :: OrgParser Bool
-newlinesCountWithinLimits = do
- st <- getState
- return $ ((< 0) <$> orgStateEmphasisNewlines st) /= Just True
-
-resetEmphasisNewlines :: OrgParser ()
-resetEmphasisNewlines = updateState $ \s ->
- s{ orgStateEmphasisNewlines = Nothing }
-
-addLinkFormat :: String
- -> (String -> String)
- -> OrgParser ()
-addLinkFormat key formatter = updateState $ \s ->
- let fs = orgStateLinkFormatters s
- in s{ orgStateLinkFormatters = M.insert key formatter fs }
-
-addToNotesTable :: OrgNoteRecord -> OrgParser ()
-addToNotesTable note = do
- oldnotes <- orgStateNotes' <$> getState
- updateState $ \s -> s{ orgStateNotes' = note:oldnotes }
-
---
--- Export Settings
---
-exportSetting :: OrgParser ()
-exportSetting = choice
- [ booleanSetting "^" setExportSubSuperscripts
- , ignoredSetting "'"
- , ignoredSetting "*"
- , ignoredSetting "-"
- , ignoredSetting ":"
- , ignoredSetting "<"
- , ignoredSetting "\\n"
- , ignoredSetting "arch"
- , ignoredSetting "author"
- , ignoredSetting "c"
- , ignoredSetting "creator"
- , complementableListSetting "d" setExportDrawers
- , ignoredSetting "date"
- , ignoredSetting "e"
- , ignoredSetting "email"
- , ignoredSetting "f"
- , ignoredSetting "H"
- , ignoredSetting "inline"
- , ignoredSetting "num"
- , ignoredSetting "p"
- , ignoredSetting "pri"
- , ignoredSetting "prop"
- , ignoredSetting "stat"
- , ignoredSetting "tags"
- , ignoredSetting "tasks"
- , ignoredSetting "tex"
- , ignoredSetting "timestamp"
- , ignoredSetting "title"
- , ignoredSetting "toc"
- , ignoredSetting "todo"
- , ignoredSetting "|"
- ] <?> "export setting"
-
-booleanSetting :: String -> ExportSettingSetter Bool -> OrgParser ()
-booleanSetting settingIdentifier setter = try $ do
- string settingIdentifier
- char ':'
- value <- elispBoolean
- updateState $ modifyExportSettings setter value
-
--- | Read an elisp boolean. Only NIL is treated as false, non-NIL values are
--- interpreted as true.
-elispBoolean :: OrgParser Bool
-elispBoolean = try $ do
- value <- many1 nonspaceChar
- return $ case map toLower value of
- "nil" -> False
- "{}" -> False
- "()" -> False
- _ -> True
-
--- | A list or a complement list (i.e. a list starting with `not`).
-complementableListSetting :: String
- -> ExportSettingSetter (Either [String] [String])
- -> OrgParser ()
-complementableListSetting settingIdentifier setter = try $ do
- _ <- string settingIdentifier <* char ':'
- value <- choice [ Left <$> complementStringList
- , Right <$> stringList
- , (\b -> if b then Left [] else Right []) <$> elispBoolean
- ]
- updateState $ modifyExportSettings setter value
- where
- -- Read a plain list of strings.
- stringList :: OrgParser [String]
- stringList = try $
- char '('
- *> sepBy elispString spaces
- <* char ')'
-
- -- Read an emacs lisp list specifying a complement set.
- complementStringList :: OrgParser [String]
- complementStringList = try $
- string "(not "
- *> sepBy elispString spaces
- <* char ')'
-
- elispString :: OrgParser String
- elispString = try $
- char '"'
- *> manyTill alphaNum (char '"')
-
-ignoredSetting :: String -> OrgParser ()
-ignoredSetting s = try (() <$ string s <* char ':' <* many1 nonspaceChar)
-
--
-- Parser
--
parseOrg :: OrgParser Pandoc
parseOrg = do
- blocks' <- parseBlocks
- st <- getState
- let meta = runF (orgStateMeta' st) st
- let removeUnwantedBlocks = dropCommentTrees . filter (/= Null)
- return $ Pandoc meta $ removeUnwantedBlocks (B.toList $ runF blocks' st)
+ blocks' <- blockList
+ meta' <- meta
+ return . Pandoc meta' $ removeUnwantedBlocks blocks'
+ where
+ removeUnwantedBlocks :: [Block] -> [Block]
+ removeUnwantedBlocks = dropCommentTrees . filter (/= Null)
-- | Drop COMMENT headers and the document tree below those headers.
dropCommentTrees :: [Block] -> [Block]
@@ -256,1452 +89,3 @@ isHeaderLevelLowerEq n blk =
case blk of
(Header level _ _) -> n >= level
_ -> False
-
-
---
--- Adaptions and specializations of parsing utilities
---
-
--- The version Text.Pandoc.Parsing cannot be used, as we need additional parts
--- of the state saved and restored.
-parseFromString :: OrgParser a -> String -> OrgParser a
-parseFromString parser str' = do
- oldLastPreCharPos <- orgStateLastPreCharPos <$> getState
- updateState $ \s -> s{ orgStateLastPreCharPos = Nothing }
- result <- P.parseFromString parser str'
- updateState $ \s -> s{ orgStateLastPreCharPos = oldLastPreCharPos }
- return result
-
--- | Like @Text.Parsec.Char.newline@, but causes additional state changes.
-newline :: OrgParser Char
-newline =
- P.newline
- <* updateLastPreCharPos
- <* updateLastForbiddenCharPos
-
--- | Like @Text.Parsec.Char.blanklines@, but causes additional state changes.
-blanklines :: OrgParser [Char]
-blanklines =
- P.blanklines
- <* updateLastPreCharPos
- <* updateLastForbiddenCharPos
-
-anyLine :: OrgParser String
-anyLine =
- P.anyLine
- <* updateLastPreCharPos
- <* updateLastForbiddenCharPos
-
--- | Succeeds when we're in list context.
-inList :: OrgParser ()
-inList = do
- ctx <- orgStateParserContext <$> getState
- guard (ctx == ListItemState)
-
--- | Parse in different context
-withContext :: ParserContext -- ^ New parser context
- -> OrgParser a -- ^ Parser to run in that context
- -> OrgParser a
-withContext context parser = do
- oldContext <- orgStateParserContext <$> getState
- updateState $ \s -> s{ orgStateParserContext = context }
- result <- parser
- updateState $ \s -> s{ orgStateParserContext = oldContext }
- return result
-
---
--- parsing blocks
---
-
-parseBlocks :: OrgParser (F Blocks)
-parseBlocks = mconcat <$> manyTill block eof
-
-block :: OrgParser (F Blocks)
-block = choice [ mempty <$ blanklines
- , table
- , orgBlock
- , figure
- , example
- , genericDrawer
- , specialLine
- , header
- , return <$> hline
- , list
- , latexFragment
- , noteBlock
- , paraOrPlain
- ] <?> "block"
-
-
---
--- Block Attributes
---
-
--- | Attributes that may be added to figures (like a name or caption).
-data BlockAttributes = BlockAttributes
- { blockAttrName :: Maybe String
- , blockAttrCaption :: Maybe (F Inlines)
- , blockAttrKeyValues :: [(String, String)]
- }
-
-stringyMetaAttribute :: (String -> Bool) -> OrgParser (String, String)
-stringyMetaAttribute attrCheck = try $ do
- metaLineStart
- attrName <- map toUpper <$> many1Till nonspaceChar (char ':')
- guard $ attrCheck attrName
- skipSpaces
- attrValue <- anyLine
- return (attrName, attrValue)
-
-blockAttributes :: OrgParser BlockAttributes
-blockAttributes = try $ do
- kv <- many (stringyMetaAttribute attrCheck)
- let caption = foldl' (appendValues "CAPTION") Nothing kv
- let kvAttrs = foldl' (appendValues "ATTR_HTML") Nothing kv
- let name = lookup "NAME" kv
- caption' <- maybe (return Nothing)
- (fmap Just . parseFromString parseInlines)
- caption
- kvAttrs' <- parseFromString keyValues . (++ "\n") $ fromMaybe mempty kvAttrs
- return $ BlockAttributes
- { blockAttrName = name
- , blockAttrCaption = caption'
- , blockAttrKeyValues = kvAttrs'
- }
- where
- attrCheck :: String -> Bool
- attrCheck attr =
- case attr of
- "NAME" -> True
- "CAPTION" -> True
- "ATTR_HTML" -> True
- _ -> False
-
- appendValues :: String -> Maybe String -> (String, String) -> Maybe String
- appendValues attrName accValue (key, value) =
- if key /= attrName
- then accValue
- else case accValue of
- Just acc -> Just $ acc ++ ' ':value
- Nothing -> Just value
-
-keyValues :: OrgParser [(String, String)]
-keyValues = try $
- manyTill ((,) <$> key <*> value) newline
- where
- key :: OrgParser String
- key = try $ skipSpaces *> char ':' *> many1 nonspaceChar
-
- value :: OrgParser String
- value = skipSpaces *> manyTill anyChar endOfValue
-
- endOfValue :: OrgParser ()
- endOfValue =
- lookAhead $ (() <$ try (many1 spaceChar <* key))
- <|> () <$ P.newline
-
-
---
--- Org Blocks (#+BEGIN_... / #+END_...)
---
-
-type BlockProperties = (Int, String) -- (Indentation, Block-Type)
-
-updateIndent :: BlockProperties -> Int -> BlockProperties
-updateIndent (_, blkType) indent = (indent, blkType)
-
-orgBlock :: OrgParser (F Blocks)
-orgBlock = try $ do
- blockAttrs <- blockAttributes
- blockProp@(_, blkType) <- blockHeaderStart
- ($ blockProp) $
- case blkType of
- "comment" -> withRaw' (const mempty)
- "html" -> withRaw' (return . (B.rawBlock blkType))
- "latex" -> withRaw' (return . (B.rawBlock blkType))
- "ascii" -> withRaw' (return . (B.rawBlock blkType))
- "example" -> withRaw' (return . exampleCode)
- "quote" -> withParsed (fmap B.blockQuote)
- "verse" -> verseBlock
- "src" -> codeBlock blockAttrs
- _ -> withParsed (fmap $ divWithClass blkType)
-
-blockHeaderStart :: OrgParser (Int, String)
-blockHeaderStart = try $ (,) <$> indentation <*> blockType
- where
- blockType = map toLower <$> (stringAnyCase "#+begin_" *> orgArgWord)
-
-indentation :: OrgParser Int
-indentation = try $ do
- tabStop <- getOption readerTabStop
- s <- many spaceChar
- return $ spaceLength tabStop s
-
-spaceLength :: Int -> String -> Int
-spaceLength tabStop s = (sum . map charLen) s
- where
- charLen ' ' = 1
- charLen '\t' = tabStop
- charLen _ = 0
-
-withRaw' :: (String -> F Blocks) -> BlockProperties -> OrgParser (F Blocks)
-withRaw' f blockProp = (ignHeaders *> (f <$> rawBlockContent blockProp))
-
-withParsed :: (F Blocks -> F Blocks) -> BlockProperties -> OrgParser (F Blocks)
-withParsed f blockProp = (ignHeaders *> (f <$> parsedBlockContent blockProp))
-
-ignHeaders :: OrgParser ()
-ignHeaders = (() <$ newline) <|> (() <$ anyLine)
-
-divWithClass :: String -> Blocks -> Blocks
-divWithClass cls = B.divWith ("", [cls], [])
-
-verseBlock :: BlockProperties -> OrgParser (F Blocks)
-verseBlock blkProp = try $ do
- ignHeaders
- content <- rawBlockContent blkProp
- fmap B.para . mconcat . intersperse (pure B.linebreak)
- <$> mapM (parseFromString parseInlines) (map (++ "\n") . lines $ content)
-
-exportsCode :: [(String, String)] -> Bool
-exportsCode attrs = not (("rundoc-exports", "none") `elem` attrs
- || ("rundoc-exports", "results") `elem` attrs)
-
-exportsResults :: [(String, String)] -> Bool
-exportsResults attrs = ("rundoc-exports", "results") `elem` attrs
- || ("rundoc-exports", "both") `elem` attrs
-
-followingResultsBlock :: OrgParser (Maybe (F Blocks))
-followingResultsBlock =
- optionMaybe (try $ blanklines *> stringAnyCase "#+RESULTS:"
- *> blankline
- *> block)
-
-codeBlock :: BlockAttributes -> BlockProperties -> OrgParser (F Blocks)
-codeBlock blockAttrs blkProp = do
- skipSpaces
- (classes, kv) <- codeHeaderArgs <|> (mempty <$ ignHeaders)
- leadingIndent <- lookAhead indentation
- content <- rawBlockContent (updateIndent blkProp leadingIndent)
- resultsContent <- followingResultsBlock
- let id' = fromMaybe mempty $ blockAttrName blockAttrs
- let includeCode = exportsCode kv
- let includeResults = exportsResults kv
- let codeBlck = B.codeBlockWith ( id', classes, kv ) content
- let labelledBlck = maybe (pure codeBlck)
- (labelDiv codeBlck)
- (blockAttrCaption blockAttrs)
- let resultBlck = fromMaybe mempty resultsContent
- return $ (if includeCode then labelledBlck else mempty)
- <> (if includeResults then resultBlck else mempty)
- where
- labelDiv blk value =
- B.divWith nullAttr <$> (mappend <$> labelledBlock value
- <*> pure blk)
- labelledBlock = fmap (B.plain . B.spanWith ("", ["label"], []))
-
-rawBlockContent :: BlockProperties -> OrgParser String
-rawBlockContent (indent, blockType) = try $
- unlines . map commaEscaped <$> manyTill indentedLine blockEnder
- where
- indentedLine = try $ ("" <$ blankline) <|> (indentWith indent *> anyLine)
- blockEnder = try $ skipSpaces *> stringAnyCase ("#+end_" <> blockType)
-
-parsedBlockContent :: BlockProperties -> OrgParser (F Blocks)
-parsedBlockContent blkProps = try $ do
- raw <- rawBlockContent blkProps
- parseFromString parseBlocks (raw ++ "\n")
-
--- indent by specified number of spaces (or equiv. tabs)
-indentWith :: Int -> OrgParser String
-indentWith num = do
- tabStop <- getOption readerTabStop
- if num < tabStop
- then count num (char ' ')
- else choice [ try (count num (char ' '))
- , try (char '\t' >> count (num - tabStop) (char ' ')) ]
-
-type SwitchOption = (Char, Maybe String)
-
-orgArgWord :: OrgParser String
-orgArgWord = many1 orgArgWordChar
-
--- | Parse code block arguments
--- TODO: We currently don't handle switches.
-codeHeaderArgs :: OrgParser ([String], [(String, String)])
-codeHeaderArgs = try $ do
- language <- skipSpaces *> orgArgWord
- _ <- skipSpaces *> (try $ switch `sepBy` (many1 spaceChar))
- parameters <- manyTill blockOption newline
- let pandocLang = translateLang language
- return $
- if hasRundocParameters parameters
- then ( [ pandocLang, rundocBlockClass ]
- , map toRundocAttrib (("language", language) : parameters)
- )
- else ([ pandocLang ], parameters)
- where hasRundocParameters = not . null
-
-switch :: OrgParser SwitchOption
-switch = try $ simpleSwitch <|> lineNumbersSwitch
- where
- simpleSwitch = (\c -> (c, Nothing)) <$> (oneOf "-+" *> letter)
- lineNumbersSwitch = (\ls -> ('l', Just ls)) <$>
- (string "-l \"" *> many1Till nonspaceChar (char '"'))
-
-translateLang :: String -> String
-translateLang "C" = "c"
-translateLang "C++" = "cpp"
-translateLang "emacs-lisp" = "commonlisp" -- emacs lisp is not supported
-translateLang "js" = "javascript"
-translateLang "lisp" = "commonlisp"
-translateLang "R" = "r"
-translateLang "sh" = "bash"
-translateLang "sqlite" = "sql"
-translateLang cs = cs
-
--- | Prefix used for Rundoc classes and arguments.
-rundocPrefix :: String
-rundocPrefix = "rundoc-"
-
--- | The class-name used to mark rundoc blocks.
-rundocBlockClass :: String
-rundocBlockClass = rundocPrefix ++ "block"
-
-blockOption :: OrgParser (String, String)
-blockOption = try $ do
- argKey <- orgArgKey
- paramValue <- option "yes" orgParamValue
- return (argKey, paramValue)
-
-inlineBlockOption :: OrgParser (String, String)
-inlineBlockOption = try $ do
- argKey <- orgArgKey
- paramValue <- option "yes" orgInlineParamValue
- return (argKey, paramValue)
-
-orgArgKey :: OrgParser String
-orgArgKey = try $
- skipSpaces *> char ':'
- *> many1 orgArgWordChar
-
-orgParamValue :: OrgParser String
-orgParamValue = try $
- skipSpaces
- *> notFollowedBy (char ':' )
- *> many1 (noneOf "\t\n\r ")
- <* skipSpaces
-
-orgInlineParamValue :: OrgParser String
-orgInlineParamValue = try $
- skipSpaces
- *> notFollowedBy (char ':')
- *> many1 (noneOf "\t\n\r ]")
- <* skipSpaces
-
-orgArgWordChar :: OrgParser Char
-orgArgWordChar = alphaNum <|> oneOf "-_"
-
-toRundocAttrib :: (String, String) -> (String, String)
-toRundocAttrib = first ("rundoc-" ++)
-
-commaEscaped :: String -> String
-commaEscaped (',':cs@('*':_)) = cs
-commaEscaped (',':cs@('#':'+':_)) = cs
-commaEscaped cs = cs
-
-example :: OrgParser (F Blocks)
-example = try $ do
- return . return . exampleCode =<< unlines <$> many1 exampleLine
-
-exampleCode :: String -> Blocks
-exampleCode = B.codeBlockWith ("", ["example"], [])
-
-exampleLine :: OrgParser String
-exampleLine = try $ skipSpaces *> string ": " *> anyLine
-
-
---
--- Drawers
---
-
--- | A generic drawer which has no special meaning for org-mode.
--- Whether or not this drawer is included in the output depends on the drawers
--- export setting.
-genericDrawer :: OrgParser (F Blocks)
-genericDrawer = try $ do
- name <- map toUpper <$> drawerStart
- content <- manyTill drawerLine (try drawerEnd)
- state <- getState
- -- Include drawer if it is explicitly included in or not explicitly excluded
- -- from the list of drawers that should be exported. PROPERTIES drawers are
- -- never exported.
- case (exportDrawers . orgStateExportSettings $ state) of
- _ | name == "PROPERTIES" -> return mempty
- Left names | name `elem` names -> return mempty
- Right names | name `notElem` names -> return mempty
- _ -> drawerDiv name <$> parseLines content
- where
- parseLines :: [String] -> OrgParser (F Blocks)
- parseLines = parseFromString parseBlocks . (++ "\n") . unlines
-
- drawerDiv :: String -> F Blocks -> F Blocks
- drawerDiv drawerName = fmap $ B.divWith (mempty, [drawerName, "drawer"], mempty)
-
-drawerStart :: OrgParser String
-drawerStart = try $
- skipSpaces *> drawerName <* skipSpaces <* newline
- where drawerName = char ':' *> manyTill nonspaceChar (char ':')
-
-drawerLine :: OrgParser String
-drawerLine = anyLine
-
-drawerEnd :: OrgParser String
-drawerEnd = try $
- skipSpaces *> stringAnyCase ":END:" <* skipSpaces <* newline
-
--- | Read a :PROPERTIES: drawer and return the key/value pairs contained
--- within.
-propertiesDrawer :: OrgParser [(String, String)]
-propertiesDrawer = try $ do
- drawerType <- drawerStart
- guard $ map toUpper drawerType == "PROPERTIES"
- manyTill property (try drawerEnd)
- where
- property :: OrgParser (String, String)
- property = try $ (,) <$> key <*> value
-
- key :: OrgParser String
- key = try $ skipSpaces *> char ':' *> many1Till nonspaceChar (char ':')
-
- value :: OrgParser String
- value = try $ skipSpaces *> manyTill anyChar (try $ skipSpaces *> P.newline)
-
-keyValuesToAttr :: [(String, String)] -> Attr
-keyValuesToAttr kvs =
- let
- lowerKvs = map (\(k, v) -> (map toLower k, v)) kvs
- id' = fromMaybe mempty . lookup "custom_id" $ lowerKvs
- cls = fromMaybe mempty . lookup "class" $ lowerKvs
- kvs' = filter (flip notElem ["custom_id", "class"] . fst) lowerKvs
- in
- (id', words cls, kvs')
-
-
---
--- Figures
---
-
--- | Figures (Image on a line by itself, preceded by name and/or caption)
-figure :: OrgParser (F Blocks)
-figure = try $ do
- figAttrs <- blockAttributes
- src <- skipSpaces *> selfTarget <* skipSpaces <* P.newline
- guard . not . isNothing . blockAttrCaption $ figAttrs
- guard (isImageFilename src)
- let figName = fromMaybe mempty $ blockAttrName figAttrs
- let figCaption = fromMaybe mempty $ blockAttrCaption figAttrs
- let figKeyVals = blockAttrKeyValues figAttrs
- let attr = (mempty, mempty, figKeyVals)
- return $ (B.para . B.imageWith attr src (withFigPrefix figName) <$> figCaption)
- where
- withFigPrefix cs =
- if "fig:" `isPrefixOf` cs
- then cs
- else "fig:" ++ cs
-
---
--- Comments, Options and Metadata
---
-specialLine :: OrgParser (F Blocks)
-specialLine = fmap return . try $ metaLine <|> commentLine
-
-metaLine :: OrgParser Blocks
-metaLine = mempty <$ metaLineStart <* (optionLine <|> declarationLine)
-
--- The order, in which blocks are tried, makes sure that we're not looking at
--- the beginning of a block, so we don't need to check for it
-metaLineStart :: OrgParser ()
-metaLineStart = try $ skipSpaces <* string "#+"
-
-commentLine :: OrgParser Blocks
-commentLine = commentLineStart *> anyLine *> pure mempty
-
-commentLineStart :: OrgParser ()
-commentLineStart = try $ skipSpaces <* string "# "
-
-declarationLine :: OrgParser ()
-declarationLine = try $ do
- key <- metaKey
- inlinesF <- metaInlines
- updateState $ \st ->
- let meta' = B.setMeta <$> pure key <*> inlinesF <*> pure nullMeta
- in st { orgStateMeta' = orgStateMeta' st <> meta' }
- return ()
-
-metaInlines :: OrgParser (F MetaValue)
-metaInlines = fmap (MetaInlines . B.toList) <$> inlinesTillNewline
-
-metaKey :: OrgParser String
-metaKey = map toLower <$> many1 (noneOf ": \n\r")
- <* char ':'
- <* skipSpaces
-
-optionLine :: OrgParser ()
-optionLine = try $ do
- key <- metaKey
- case key of
- "link" -> parseLinkFormat >>= uncurry addLinkFormat
- "options" -> () <$ sepBy spaces exportSetting
- _ -> mzero
-
-parseLinkFormat :: OrgParser ((String, String -> String))
-parseLinkFormat = try $ do
- linkType <- (:) <$> letter <*> many (alphaNum <|> oneOf "-_") <* skipSpaces
- linkSubst <- parseFormat
- return (linkType, linkSubst)
-
--- | An ad-hoc, single-argument-only implementation of a printf-style format
--- parser.
-parseFormat :: OrgParser (String -> String)
-parseFormat = try $ do
- replacePlain <|> replaceUrl <|> justAppend
- where
- -- inefficient, but who cares
- replacePlain = try $ (\x -> concat . flip intersperse x)
- <$> sequence [tillSpecifier 's', rest]
- replaceUrl = try $ (\x -> concat . flip intersperse x . urlEncode)
- <$> sequence [tillSpecifier 'h', rest]
- justAppend = try $ (++) <$> rest
-
- rest = manyTill anyChar (eof <|> () <$ oneOf "\n\r")
- tillSpecifier c = manyTill (noneOf "\n\r") (try $ string ('%':c:""))
-
---
--- Headers
---
-
--- | Headers
-header :: OrgParser (F Blocks)
-header = try $ do
- level <- headerStart
- title <- manyTill inline (lookAhead $ optional headerTags <* P.newline)
- tags <- option [] headerTags
- newline
- propAttr <- option nullAttr (keyValuesToAttr <$> propertiesDrawer)
- inlines <- runF (tagTitle title tags) <$> getState
- attr <- registerHeader propAttr inlines
- return $ pure (B.headerWith attr level inlines)
- where
- tagTitle :: [F Inlines] -> [String] -> F Inlines
- tagTitle title tags = trimInlinesF . mconcat $ title <> map tagToInlineF tags
-
- tagToInlineF :: String -> F Inlines
- tagToInlineF t = return $ B.spanWith ("", ["tag"], [("data-tag-name", t)]) mempty
-
- headerTags :: OrgParser [String]
- headerTags = try $
- let tag = many1 (alphaNum <|> oneOf "@%#_") <* char ':'
- in skipSpaces
- *> char ':'
- *> many1 tag
- <* skipSpaces
-
-headerStart :: OrgParser Int
-headerStart = try $
- (length <$> many1 (char '*')) <* many1 (char ' ') <* updateLastPreCharPos
-
-
--- Don't use (or need) the reader wrapper here, we want hline to be
--- @show@able. Otherwise we can't use it with @notFollowedBy'@.
-
--- | Horizontal Line (five -- dashes or more)
-hline :: OrgParser Blocks
-hline = try $ do
- skipSpaces
- string "-----"
- many (char '-')
- skipSpaces
- newline
- return B.horizontalRule
-
---
--- Tables
---
-
-data OrgTableRow = OrgContentRow (F [Blocks])
- | OrgAlignRow [Alignment]
- | OrgHlineRow
-
--- OrgTable is strongly related to the pandoc table ADT. Using the same
--- (i.e. pandoc-global) ADT would mean that the reader would break if the
--- global structure was to be changed, which would be bad. The final table
--- should be generated using a builder function. Column widths aren't
--- implemented yet, so they are not tracked here.
-data OrgTable = OrgTable
- { orgTableAlignments :: [Alignment]
- , orgTableHeader :: [Blocks]
- , orgTableRows :: [[Blocks]]
- }
-
-table :: OrgParser (F Blocks)
-table = try $ do
- blockAttrs <- blockAttributes
- lookAhead tableStart
- do
- rows <- tableRows
- let caption = fromMaybe (return mempty) $ blockAttrCaption blockAttrs
- return $ (<$> caption) . orgToPandocTable . normalizeTable =<< rowsToTable rows
-
-orgToPandocTable :: OrgTable
- -> Inlines
- -> Blocks
-orgToPandocTable (OrgTable aligns heads lns) caption =
- B.table caption (zip aligns $ repeat 0) heads lns
-
-tableStart :: OrgParser Char
-tableStart = try $ skipSpaces *> char '|'
-
-tableRows :: OrgParser [OrgTableRow]
-tableRows = try $ many (tableAlignRow <|> tableHline <|> tableContentRow)
-
-tableContentRow :: OrgParser OrgTableRow
-tableContentRow = try $
- OrgContentRow . sequence <$> (tableStart *> many1Till tableContentCell newline)
-
-tableContentCell :: OrgParser (F Blocks)
-tableContentCell = try $
- fmap B.plain . trimInlinesF . mconcat <$> manyTill inline endOfCell
-
-tableAlignRow :: OrgParser OrgTableRow
-tableAlignRow = try $ do
- tableStart
- cells <- many1Till tableAlignCell newline
- -- Empty rows are regular (i.e. content) rows, not alignment rows.
- guard $ any (/= AlignDefault) cells
- return $ OrgAlignRow cells
-
-tableAlignCell :: OrgParser Alignment
-tableAlignCell =
- choice [ try $ emptyCell *> return AlignDefault
- , try $ skipSpaces
- *> char '<'
- *> tableAlignFromChar
- <* many digit
- <* char '>'
- <* emptyCell
- ] <?> "alignment info"
- where emptyCell = try $ skipSpaces *> endOfCell
-
-tableAlignFromChar :: OrgParser Alignment
-tableAlignFromChar = try $
- choice [ char 'l' *> return AlignLeft
- , char 'c' *> return AlignCenter
- , char 'r' *> return AlignRight
- ]
-
-tableHline :: OrgParser OrgTableRow
-tableHline = try $
- OrgHlineRow <$ (tableStart *> char '-' *> anyLine)
-
-endOfCell :: OrgParser Char
-endOfCell = try $ char '|' <|> lookAhead newline
-
-rowsToTable :: [OrgTableRow]
- -> F OrgTable
-rowsToTable = foldM rowToContent emptyTable
- where emptyTable = OrgTable mempty mempty mempty
-
-normalizeTable :: OrgTable -> OrgTable
-normalizeTable (OrgTable aligns heads rows) = OrgTable aligns' heads rows
- where
- refRow = if heads /= mempty
- then heads
- else if rows == mempty then mempty else head rows
- cols = length refRow
- fillColumns base padding = take cols $ base ++ repeat padding
- aligns' = fillColumns aligns AlignDefault
-
--- One or more horizontal rules after the first content line mark the previous
--- line as a header. All other horizontal lines are discarded.
-rowToContent :: OrgTable
- -> OrgTableRow
- -> F OrgTable
-rowToContent orgTable row =
- case row of
- OrgHlineRow -> return singleRowPromotedToHeader
- OrgAlignRow as -> return . setAligns $ as
- OrgContentRow cs -> appendToBody cs
- where
- singleRowPromotedToHeader :: OrgTable
- singleRowPromotedToHeader = case orgTable of
- OrgTable{ orgTableHeader = [], orgTableRows = b:[] } ->
- orgTable{ orgTableHeader = b , orgTableRows = [] }
- _ -> orgTable
-
- setAligns :: [Alignment] -> OrgTable
- setAligns aligns = orgTable{ orgTableAlignments = aligns }
-
- appendToBody :: F [Blocks] -> F OrgTable
- appendToBody frow = do
- newRow <- frow
- let oldRows = orgTableRows orgTable
- -- NOTE: This is an inefficient O(n) operation. This should be changed
- -- if performance ever becomes a problem.
- return orgTable{ orgTableRows = oldRows ++ [newRow] }
-
-
---
--- LaTeX fragments
---
-latexFragment :: OrgParser (F Blocks)
-latexFragment = try $ do
- envName <- latexEnvStart
- content <- mconcat <$> manyTill anyLineNewline (latexEnd envName)
- return . return $ B.rawBlock "latex" (content `inLatexEnv` envName)
- where
- c `inLatexEnv` e = mconcat [ "\\begin{", e, "}\n"
- , c
- , "\\end{", e, "}\n"
- ]
-
-latexEnvStart :: OrgParser String
-latexEnvStart = try $ do
- skipSpaces *> string "\\begin{"
- *> latexEnvName
- <* string "}"
- <* blankline
-
-latexEnd :: String -> OrgParser ()
-latexEnd envName = try $
- () <$ skipSpaces
- <* string ("\\end{" ++ envName ++ "}")
- <* blankline
-
--- | Parses a LaTeX environment name.
-latexEnvName :: OrgParser String
-latexEnvName = try $ do
- mappend <$> many1 alphaNum
- <*> option "" (string "*")
-
-
---
--- Footnote defintions
---
-noteBlock :: OrgParser (F Blocks)
-noteBlock = try $ do
- ref <- noteMarker <* skipSpaces
- content <- mconcat <$> blocksTillHeaderOrNote
- addToNotesTable (ref, content)
- return mempty
- where
- blocksTillHeaderOrNote =
- many1Till block (eof <|> () <$ lookAhead noteMarker
- <|> () <$ lookAhead headerStart)
-
--- Paragraphs or Plain text
-paraOrPlain :: OrgParser (F Blocks)
-paraOrPlain = try $ do
- ils <- parseInlines
- nl <- option False (newline *> return True)
- -- Read block as paragraph, except if we are in a list context and the block
- -- is directly followed by a list item, in which case the block is read as
- -- plain text.
- try (guard nl
- *> notFollowedBy (inList *> (orderedListStart <|> bulletListStart))
- *> return (B.para <$> ils))
- <|> (return (B.plain <$> ils))
-
-inlinesTillNewline :: OrgParser (F Inlines)
-inlinesTillNewline = trimInlinesF . mconcat <$> manyTill inline newline
-
-
---
--- list blocks
---
-
-list :: OrgParser (F Blocks)
-list = choice [ definitionList, bulletList, orderedList ] <?> "list"
-
-definitionList :: OrgParser (F Blocks)
-definitionList = try $ do n <- lookAhead (bulletListStart' Nothing)
- fmap B.definitionList . fmap compactify'DL . sequence
- <$> many1 (definitionListItem $ bulletListStart' (Just n))
-
-bulletList :: OrgParser (F Blocks)
-bulletList = try $ do n <- lookAhead (bulletListStart' Nothing)
- fmap B.bulletList . fmap compactify' . sequence
- <$> many1 (listItem (bulletListStart' $ Just n))
-
-orderedList :: OrgParser (F Blocks)
-orderedList = fmap B.orderedList . fmap compactify' . sequence
- <$> many1 (listItem orderedListStart)
-
-genericListStart :: OrgParser String
- -> OrgParser Int
-genericListStart listMarker = try $
- (+) <$> (length <$> many spaceChar)
- <*> (length <$> listMarker <* many1 spaceChar)
-
--- parses bullet list marker. maybe we know the indent level
-bulletListStart :: OrgParser Int
-bulletListStart = bulletListStart' Nothing
-
-bulletListStart' :: Maybe Int -> OrgParser Int
--- returns length of bulletList prefix, inclusive of marker
-bulletListStart' Nothing = do ind <- length <$> many spaceChar
- when (ind == 0) $ notFollowedBy (char '*')
- oneOf bullets
- many1 spaceChar
- return (ind + 1)
- -- Unindented lists are legal, but they can't use '*' bullets
- -- We return n to maintain compatibility with the generic listItem
-bulletListStart' (Just n) = do count (n-1) spaceChar
- when (n == 1) $ notFollowedBy (char '*')
- oneOf bullets
- many1 spaceChar
- return n
-
-bullets :: String
-bullets = "*+-"
-
-orderedListStart :: OrgParser Int
-orderedListStart = genericListStart orderedListMarker
- -- Ordered list markers allowed in org-mode
- where orderedListMarker = mappend <$> many1 digit <*> (pure <$> oneOf ".)")
-
-definitionListItem :: OrgParser Int
- -> OrgParser (F (Inlines, [Blocks]))
-definitionListItem parseMarkerGetLength = try $ do
- markerLength <- parseMarkerGetLength
- term <- manyTill (noneOf "\n\r") (try definitionMarker)
- line1 <- anyLineNewline
- blank <- option "" ("\n" <$ blankline)
- cont <- concat <$> many (listContinuation markerLength)
- term' <- parseFromString parseInlines term
- contents' <- parseFromString parseBlocks $ line1 ++ blank ++ cont
- return $ (,) <$> term' <*> fmap (:[]) contents'
- where
- definitionMarker =
- spaceChar *> string "::" <* (spaceChar <|> lookAhead P.newline)
-
-
--- parse raw text for one list item, excluding start marker and continuations
-listItem :: OrgParser Int
- -> OrgParser (F Blocks)
-listItem start = try . withContext ListItemState $ do
- markerLength <- try start
- firstLine <- anyLineNewline
- blank <- option "" ("\n" <$ blankline)
- rest <- concat <$> many (listContinuation markerLength)
- parseFromString parseBlocks $ firstLine ++ blank ++ rest
-
--- continuation of a list item - indented and separated by blankline or endline.
--- Note: nested lists are parsed as continuations.
-listContinuation :: Int
- -> OrgParser String
-listContinuation markerLength = try $
- notFollowedBy' blankline
- *> (mappend <$> (concat <$> many1 listLine)
- <*> many blankline)
- where listLine = try $ indentWith markerLength *> anyLineNewline
-
-anyLineNewline :: OrgParser String
-anyLineNewline = (++ "\n") <$> anyLine
-
-
---
--- inline
---
-
-inline :: OrgParser (F Inlines)
-inline =
- choice [ whitespace
- , linebreak
- , cite
- , footnote
- , linkOrImage
- , anchor
- , inlineCodeBlock
- , str
- , endline
- , emph
- , strong
- , strikeout
- , underline
- , code
- , math
- , displayMath
- , verbatim
- , subscript
- , superscript
- , inlineLaTeX
- , smart
- , symbol
- ] <* (guard =<< newlinesCountWithinLimits)
- <?> "inline"
-
-parseInlines :: OrgParser (F Inlines)
-parseInlines = trimInlinesF . mconcat <$> many1 inline
-
--- treat these as potentially non-text when parsing inline:
-specialChars :: [Char]
-specialChars = "\"$'()*+-,./:<=>[\\]^_{|}~"
-
-
-whitespace :: OrgParser (F Inlines)
-whitespace = pure B.space <$ skipMany1 spaceChar
- <* updateLastPreCharPos
- <* updateLastForbiddenCharPos
- <?> "whitespace"
-
-linebreak :: OrgParser (F Inlines)
-linebreak = try $ pure B.linebreak <$ string "\\\\" <* skipSpaces <* newline
-
-str :: OrgParser (F Inlines)
-str = return . B.str <$> many1 (noneOf $ specialChars ++ "\n\r ")
- <* updateLastStrPos
-
--- | An endline character that can be treated as a space, not a structural
--- break. This should reflect the values of the Emacs variable
--- @org-element-pagaraph-separate@.
-endline :: OrgParser (F Inlines)
-endline = try $ do
- newline
- notFollowedBy blankline
- notFollowedBy' exampleLine
- notFollowedBy' hline
- notFollowedBy' noteMarker
- notFollowedBy' tableStart
- notFollowedBy' drawerStart
- notFollowedBy' headerStart
- notFollowedBy' metaLineStart
- notFollowedBy' latexEnvStart
- notFollowedBy' commentLineStart
- notFollowedBy' bulletListStart
- notFollowedBy' orderedListStart
- decEmphasisNewlinesCount
- guard =<< newlinesCountWithinLimits
- updateLastPreCharPos
- return . return $ B.softbreak
-
-cite :: OrgParser (F Inlines)
-cite = try $ do
- guardEnabled Ext_citations
- (cs, raw) <- withRaw normalCite
- return $ (flip B.cite (B.text raw)) <$> cs
-
-normalCite :: OrgParser (F [Citation])
-normalCite = try $ char '['
- *> skipSpaces
- *> citeList
- <* skipSpaces
- <* char ']'
-
-citeList :: OrgParser (F [Citation])
-citeList = sequence <$> sepBy1 citation (try $ char ';' *> skipSpaces)
-
-citation :: OrgParser (F Citation)
-citation = try $ do
- pref <- prefix
- (suppress_author, key) <- citeKey
- suff <- suffix
- return $ do
- x <- pref
- y <- suff
- return $ Citation{ citationId = key
- , citationPrefix = B.toList x
- , citationSuffix = B.toList y
- , citationMode = if suppress_author
- then SuppressAuthor
- else NormalCitation
- , citationNoteNum = 0
- , citationHash = 0
- }
- where
- prefix = trimInlinesF . mconcat <$>
- manyTill inline (char ']' <|> (']' <$ lookAhead citeKey))
- suffix = try $ do
- hasSpace <- option False (notFollowedBy nonspaceChar >> return True)
- skipSpaces
- rest <- trimInlinesF . mconcat <$>
- many (notFollowedBy (oneOf ";]") *> inline)
- return $ if hasSpace
- then (B.space <>) <$> rest
- else rest
-
-footnote :: OrgParser (F Inlines)
-footnote = try $ inlineNote <|> referencedNote
-
-inlineNote :: OrgParser (F Inlines)
-inlineNote = try $ do
- string "[fn:"
- ref <- many alphaNum
- char ':'
- note <- fmap B.para . trimInlinesF . mconcat <$> many1Till inline (char ']')
- when (not $ null ref) $
- addToNotesTable ("fn:" ++ ref, note)
- return $ B.note <$> note
-
-referencedNote :: OrgParser (F Inlines)
-referencedNote = try $ do
- ref <- noteMarker
- return $ do
- notes <- asksF orgStateNotes'
- case lookup ref notes of
- Nothing -> return $ B.str $ "[" ++ ref ++ "]"
- Just contents -> do
- st <- askF
- let contents' = runF contents st{ orgStateNotes' = [] }
- return $ B.note contents'
-
-noteMarker :: OrgParser String
-noteMarker = try $ do
- char '['
- choice [ many1Till digit (char ']')
- , (++) <$> string "fn:"
- <*> many1Till (noneOf "\n\r\t ") (char ']')
- ]
-
-linkOrImage :: OrgParser (F Inlines)
-linkOrImage = explicitOrImageLink
- <|> selflinkOrImage
- <|> angleLink
- <|> plainLink
- <?> "link or image"
-
-explicitOrImageLink :: OrgParser (F Inlines)
-explicitOrImageLink = try $ do
- char '['
- srcF <- applyCustomLinkFormat =<< possiblyEmptyLinkTarget
- title <- enclosedRaw (char '[') (char ']')
- title' <- parseFromString (mconcat <$> many inline) title
- char ']'
- return $ do
- src <- srcF
- if isImageFilename title
- then pure $ B.link src "" $ B.image title mempty mempty
- else linkToInlinesF src =<< title'
-
-selflinkOrImage :: OrgParser (F Inlines)
-selflinkOrImage = try $ do
- src <- char '[' *> linkTarget <* char ']'
- return $ linkToInlinesF src (B.str src)
-
-plainLink :: OrgParser (F Inlines)
-plainLink = try $ do
- (orig, src) <- uri
- returnF $ B.link src "" (B.str orig)
-
-angleLink :: OrgParser (F Inlines)
-angleLink = try $ do
- char '<'
- link <- plainLink
- char '>'
- return link
-
-selfTarget :: OrgParser String
-selfTarget = try $ char '[' *> linkTarget <* char ']'
-
-linkTarget :: OrgParser String
-linkTarget = enclosedByPair '[' ']' (noneOf "\n\r[]")
-
-possiblyEmptyLinkTarget :: OrgParser String
-possiblyEmptyLinkTarget = try linkTarget <|> ("" <$ string "[]")
-
-applyCustomLinkFormat :: String -> OrgParser (F String)
-applyCustomLinkFormat link = do
- let (linkType, rest) = break (== ':') link
- return $ do
- formatter <- M.lookup linkType <$> asksF orgStateLinkFormatters
- return $ maybe link ($ drop 1 rest) formatter
-
--- | Take a link and return a function which produces new inlines when given
--- description inlines.
-linkToInlinesF :: String -> Inlines -> F Inlines
-linkToInlinesF linkStr =
- case linkStr of
- "" -> pure . B.link mempty "" -- wiki link (empty by convention)
- ('#':_) -> pure . B.link linkStr "" -- document-local fraction
- _ -> case cleanLinkString linkStr of
- (Just cleanedLink) -> if isImageFilename cleanedLink
- then const . pure $ B.image cleanedLink "" ""
- else pure . B.link cleanedLink ""
- Nothing -> internalLink linkStr -- other internal link
-
--- | Cleanup and canonicalize a string describing a link. Return @Nothing@ if
--- the string does not appear to be a link.
-cleanLinkString :: String -> Maybe String
-cleanLinkString s =
- case s of
- '/':_ -> Just $ "file://" ++ s -- absolute path
- '.':'/':_ -> Just s -- relative path
- '.':'.':'/':_ -> Just s -- relative path
- -- Relative path or URL (file schema)
- 'f':'i':'l':'e':':':s' -> Just $ if ("//" `isPrefixOf` s') then s else s'
- _ | isUrl s -> Just s -- URL
- _ -> Nothing
- where
- isUrl :: String -> Bool
- isUrl cs =
- let (scheme, path) = break (== ':') cs
- in all (\c -> isAlphaNum c || c `elem` (".-"::String)) scheme
- && not (null path)
-
-isImageFilename :: String -> Bool
-isImageFilename filename =
- any (\x -> ('.':x) `isSuffixOf` filename) imageExtensions &&
- (any (\x -> (x++":") `isPrefixOf` filename) protocols ||
- ':' `notElem` filename)
- where
- imageExtensions = [ "jpeg" , "jpg" , "png" , "gif" , "svg" ]
- protocols = [ "file", "http", "https" ]
-
-internalLink :: String -> Inlines -> F Inlines
-internalLink link title = do
- anchorB <- (link `elem`) <$> asksF orgStateAnchorIds
- if anchorB
- then return $ B.link ('#':link) "" title
- else return $ B.emph title
-
--- | Parse an anchor like @<<anchor-id>>@ and return an empty span with
--- @anchor-id@ set as id. Legal anchors in org-mode are defined through
--- @org-target-regexp@, which is fairly liberal. Since no link is created if
--- @anchor-id@ contains spaces, we are more restrictive in what is accepted as
--- an anchor.
-
-anchor :: OrgParser (F Inlines)
-anchor = try $ do
- anchorId <- parseAnchor
- recordAnchorId anchorId
- returnF $ B.spanWith (solidify anchorId, [], []) mempty
- where
- parseAnchor = string "<<"
- *> many1 (noneOf "\t\n\r<>\"' ")
- <* string ">>"
- <* skipSpaces
-
--- | Replace every char but [a-zA-Z0-9_.-:] with a hypen '-'. This mirrors
--- the org function @org-export-solidify-link-text@.
-
-solidify :: String -> String
-solidify = map replaceSpecialChar
- where replaceSpecialChar c
- | isAlphaNum c = c
- | c `elem` ("_.-:" :: String) = c
- | otherwise = '-'
-
--- | Parses an inline code block and marks it as an babel block.
-inlineCodeBlock :: OrgParser (F Inlines)
-inlineCodeBlock = try $ do
- string "src_"
- lang <- many1 orgArgWordChar
- opts <- option [] $ enclosedByPair '[' ']' inlineBlockOption
- inlineCode <- enclosedByPair '{' '}' (noneOf "\n\r")
- let attrClasses = [translateLang lang, rundocBlockClass]
- let attrKeyVal = map toRundocAttrib (("language", lang) : opts)
- returnF $ B.codeWith ("", attrClasses, attrKeyVal) inlineCode
-
-enclosedByPair :: Char -- ^ opening char
- -> Char -- ^ closing char
- -> OrgParser a -- ^ parser
- -> OrgParser [a]
-enclosedByPair s e p = char s *> many1Till p (char e)
-
-emph :: OrgParser (F Inlines)
-emph = fmap B.emph <$> emphasisBetween '/'
-
-strong :: OrgParser (F Inlines)
-strong = fmap B.strong <$> emphasisBetween '*'
-
-strikeout :: OrgParser (F Inlines)
-strikeout = fmap B.strikeout <$> emphasisBetween '+'
-
--- There is no underline, so we use strong instead.
-underline :: OrgParser (F Inlines)
-underline = fmap B.strong <$> emphasisBetween '_'
-
-verbatim :: OrgParser (F Inlines)
-verbatim = return . B.code <$> verbatimBetween '='
-
-code :: OrgParser (F Inlines)
-code = return . B.code <$> verbatimBetween '~'
-
-subscript :: OrgParser (F Inlines)
-subscript = fmap B.subscript <$> try (char '_' *> subOrSuperExpr)
-
-superscript :: OrgParser (F Inlines)
-superscript = fmap B.superscript <$> try (char '^' *> subOrSuperExpr)
-
-math :: OrgParser (F Inlines)
-math = return . B.math <$> choice [ math1CharBetween '$'
- , mathStringBetween '$'
- , rawMathBetween "\\(" "\\)"
- ]
-
-displayMath :: OrgParser (F Inlines)
-displayMath = return . B.displayMath <$> choice [ rawMathBetween "\\[" "\\]"
- , rawMathBetween "$$" "$$"
- ]
-
-updatePositions :: Char
- -> OrgParser (Char)
-updatePositions c = do
- when (c `elem` emphasisPreChars) updateLastPreCharPos
- when (c `elem` emphasisForbiddenBorderChars) updateLastForbiddenCharPos
- return c
-
-symbol :: OrgParser (F Inlines)
-symbol = return . B.str . (: "") <$> (oneOf specialChars >>= updatePositions)
-
-emphasisBetween :: Char
- -> OrgParser (F Inlines)
-emphasisBetween c = try $ do
- startEmphasisNewlinesCounting emphasisAllowedNewlines
- res <- enclosedInlines (emphasisStart c) (emphasisEnd c)
- isTopLevelEmphasis <- null . orgStateEmphasisCharStack <$> getState
- when isTopLevelEmphasis
- resetEmphasisNewlines
- return res
-
-verbatimBetween :: Char
- -> OrgParser String
-verbatimBetween c = try $
- emphasisStart c *>
- many1TillNOrLessNewlines 1 (noneOf "\n\r") (emphasisEnd c)
-
--- | Parses a raw string delimited by @c@ using Org's math rules
-mathStringBetween :: Char
- -> OrgParser String
-mathStringBetween c = try $ do
- mathStart c
- body <- many1TillNOrLessNewlines mathAllowedNewlines
- (noneOf (c:"\n\r"))
- (lookAhead $ mathEnd c)
- final <- mathEnd c
- return $ body ++ [final]
-
--- | Parse a single character between @c@ using math rules
-math1CharBetween :: Char
- -> OrgParser String
-math1CharBetween c = try $ do
- char c
- res <- noneOf $ c:mathForbiddenBorderChars
- char c
- eof <|> () <$ lookAhead (oneOf mathPostChars)
- return [res]
-
-rawMathBetween :: String
- -> String
- -> OrgParser String
-rawMathBetween s e = try $ string s *> manyTill anyChar (try $ string e)
-
--- | Parses the start (opening character) of emphasis
-emphasisStart :: Char -> OrgParser Char
-emphasisStart c = try $ do
- guard =<< afterEmphasisPreChar
- guard =<< notAfterString
- char c
- lookAhead (noneOf emphasisForbiddenBorderChars)
- pushToInlineCharStack c
- return c
-
--- | Parses the closing character of emphasis
-emphasisEnd :: Char -> OrgParser Char
-emphasisEnd c = try $ do
- guard =<< notAfterForbiddenBorderChar
- char c
- eof <|> () <$ lookAhead acceptablePostChars
- updateLastStrPos
- popInlineCharStack
- return c
- where acceptablePostChars =
- surroundingEmphasisChar >>= \x -> oneOf (x ++ emphasisPostChars)
-
-mathStart :: Char -> OrgParser Char
-mathStart c = try $
- char c <* notFollowedBy' (oneOf (c:mathForbiddenBorderChars))
-
-mathEnd :: Char -> OrgParser Char
-mathEnd c = try $ do
- res <- noneOf (c:mathForbiddenBorderChars)
- char c
- eof <|> () <$ lookAhead (oneOf mathPostChars)
- return res
-
-
-enclosedInlines :: OrgParser a
- -> OrgParser b
- -> OrgParser (F Inlines)
-enclosedInlines start end = try $
- trimInlinesF . mconcat <$> enclosed start end inline
-
-enclosedRaw :: OrgParser a
- -> OrgParser b
- -> OrgParser String
-enclosedRaw start end = try $
- start *> (onSingleLine <|> spanningTwoLines)
- where onSingleLine = try $ many1Till (noneOf "\n\r") end
- spanningTwoLines = try $
- anyLine >>= \f -> mappend (f <> " ") <$> onSingleLine
-
--- | Like many1Till, but parses at most @n+1@ lines. @p@ must not consume
--- newlines.
-many1TillNOrLessNewlines :: Int
- -> OrgParser Char
- -> OrgParser a
- -> OrgParser String
-many1TillNOrLessNewlines n p end = try $
- nMoreLines (Just n) mempty >>= oneOrMore
- where
- nMoreLines Nothing cs = return cs
- nMoreLines (Just 0) cs = try $ (cs ++) <$> finalLine
- nMoreLines k cs = try $ (final k cs <|> rest k cs)
- >>= uncurry nMoreLines
- final _ cs = (\x -> (Nothing, cs ++ x)) <$> try finalLine
- rest m cs = (\x -> (minus1 <$> m, cs ++ x ++ "\n")) <$> try (manyTill p P.newline)
- finalLine = try $ manyTill p end
- minus1 k = k - 1
- oneOrMore cs = guard (not $ null cs) *> return cs
-
--- Org allows customization of the way it reads emphasis. We use the defaults
--- here (see, e.g., the Emacs Lisp variable `org-emphasis-regexp-components`
--- for details).
-
--- | Chars allowed to occur before emphasis (spaces and newlines are ok, too)
-emphasisPreChars :: [Char]
-emphasisPreChars = "\t \"'({"
-
--- | Chars allowed at after emphasis
-emphasisPostChars :: [Char]
-emphasisPostChars = "\t\n !\"'),-.:;?\\}"
-
--- | Chars not allowed at the (inner) border of emphasis
-emphasisForbiddenBorderChars :: [Char]
-emphasisForbiddenBorderChars = "\t\n\r \"',"
-
--- | The maximum number of newlines within
-emphasisAllowedNewlines :: Int
-emphasisAllowedNewlines = 1
-
--- LaTeX-style math: see `org-latex-regexps` for details
-
--- | Chars allowed after an inline ($...$) math statement
-mathPostChars :: [Char]
-mathPostChars = "\t\n \"'),-.:;?"
-
--- | Chars not allowed at the (inner) border of math
-mathForbiddenBorderChars :: [Char]
-mathForbiddenBorderChars = "\t\n\r ,;.$"
-
--- | Maximum number of newlines in an inline math statement
-mathAllowedNewlines :: Int
-mathAllowedNewlines = 2
-
--- | Whether we are right behind a char allowed before emphasis
-afterEmphasisPreChar :: OrgParser Bool
-afterEmphasisPreChar = do
- pos <- getPosition
- lastPrePos <- orgStateLastPreCharPos <$> getState
- return . fromMaybe True $ (== pos) <$> lastPrePos
-
--- | Whether the parser is right after a forbidden border char
-notAfterForbiddenBorderChar :: OrgParser Bool
-notAfterForbiddenBorderChar = do
- pos <- getPosition
- lastFBCPos <- orgStateLastForbiddenCharPos <$> getState
- return $ lastFBCPos /= Just pos
-
--- | Read a sub- or superscript expression
-subOrSuperExpr :: OrgParser (F Inlines)
-subOrSuperExpr = try $
- choice [ id <$> charsInBalanced '{' '}' (noneOf "\n\r")
- , enclosing ('(', ')') <$> charsInBalanced '(' ')' (noneOf "\n\r")
- , simpleSubOrSuperString
- ] >>= parseFromString (mconcat <$> many inline)
- where enclosing (left, right) s = left : s ++ [right]
-
-simpleSubOrSuperString :: OrgParser String
-simpleSubOrSuperString = try $ do
- state <- getState
- guard . exportSubSuperscripts . orgStateExportSettings $ state
- choice [ string "*"
- , mappend <$> option [] ((:[]) <$> oneOf "+-")
- <*> many1 alphaNum
- ]
-
-inlineLaTeX :: OrgParser (F Inlines)
-inlineLaTeX = try $ do
- cmd <- inlineLaTeXCommand
- maybe mzero returnF $
- parseAsMath cmd `mplus` parseAsMathMLSym cmd `mplus` parseAsInlineLaTeX cmd
- where
- parseAsMath :: String -> Maybe Inlines
- parseAsMath cs = B.fromList <$> texMathToPandoc cs
-
- parseAsInlineLaTeX :: String -> Maybe Inlines
- parseAsInlineLaTeX cs = maybeRight $ runParser inlineCommand state "" cs
-
- parseAsMathMLSym :: String -> Maybe Inlines
- parseAsMathMLSym cs = B.str <$> MathMLEntityMap.getUnicode (clean cs)
- -- drop initial backslash and any trailing "{}"
- where clean = dropWhileEnd (`elem` ("{}" :: String)) . drop 1
-
- state :: ParserState
- state = def{ stateOptions = def{ readerParseRaw = True }}
-
- texMathToPandoc :: String -> Maybe [Inline]
- texMathToPandoc cs = (maybeRight $ readTeX cs) >>= writePandoc DisplayInline
-
-maybeRight :: Either a b -> Maybe b
-maybeRight = either (const Nothing) Just
-
-inlineLaTeXCommand :: OrgParser String
-inlineLaTeXCommand = try $ do
- rest <- getInput
- case runParser rawLaTeXInline def "source" rest of
- Right (RawInline _ cs) -> do
- -- drop any trailing whitespace, those are not be part of the command as
- -- far as org mode is concerned.
- let cmdNoSpc = dropWhileEnd isSpace cs
- let len = length cmdNoSpc
- count len anyChar
- return cmdNoSpc
- _ -> mzero
-
--- Taken from Data.OldList.
-dropWhileEnd :: (a -> Bool) -> [a] -> [a]
-dropWhileEnd p = foldr (\x xs -> if p x && null xs then [] else x : xs) []
-
-smart :: OrgParser (F Inlines)
-smart = do
- getOption readerSmart >>= guard
- doubleQuoted <|> singleQuoted <|>
- choice (map (return <$>) [orgApostrophe, orgDash, orgEllipses])
- where
- orgDash = dash <* updatePositions '-'
- orgEllipses = ellipses <* updatePositions '.'
- orgApostrophe =
- (char '\'' <|> char '\8217') <* updateLastPreCharPos
- <* updateLastForbiddenCharPos
- *> return (B.str "\x2019")
-
-singleQuoted :: OrgParser (F Inlines)
-singleQuoted = try $ do
- singleQuoteStart
- updatePositions '\''
- withQuoteContext InSingleQuote $
- fmap B.singleQuoted . trimInlinesF . mconcat <$>
- many1Till inline (singleQuoteEnd <* updatePositions '\'')
-
--- doubleQuoted will handle regular double-quoted sections, as well
--- as dialogues with an open double-quote without a close double-quote
--- in the same paragraph.
-doubleQuoted :: OrgParser (F Inlines)
-doubleQuoted = try $ do
- doubleQuoteStart
- updatePositions '"'
- contents <- mconcat <$> many (try $ notFollowedBy doubleQuoteEnd >> inline)
- (withQuoteContext InDoubleQuote $ (doubleQuoteEnd <* updateLastForbiddenCharPos) >> return
- (fmap B.doubleQuoted . trimInlinesF $ contents))
- <|> (return $ return (B.str "\8220") <> contents)
diff --git a/src/Text/Pandoc/Readers/Org/BlockStarts.hs b/src/Text/Pandoc/Readers/Org/BlockStarts.hs
new file mode 100644
index 000000000..e4dc31342
--- /dev/null
+++ b/src/Text/Pandoc/Readers/Org/BlockStarts.hs
@@ -0,0 +1,112 @@
+{-
+Copyright (C) 2014-2016 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Readers.Org.Options
+ Copyright : Copyright (C) 2014-2016 Albert Krewinkel
+ License : GNU GPL, version 2 or above
+
+ Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
+
+Parsers for Org-mode inline elements.
+-}
+module Text.Pandoc.Readers.Org.BlockStarts
+ ( exampleLineStart
+ , hline
+ , noteMarker
+ , tableStart
+ , drawerStart
+ , headerStart
+ , metaLineStart
+ , latexEnvStart
+ , commentLineStart
+ , bulletListStart
+ , orderedListStart
+ ) where
+
+import Text.Pandoc.Readers.Org.Parsing
+
+-- | Horizontal Line (five -- dashes or more)
+hline :: OrgParser ()
+hline = try $ do
+ skipSpaces
+ string "-----"
+ many (char '-')
+ skipSpaces
+ newline
+ return ()
+
+-- | Read the start of a header line, return the header level
+headerStart :: OrgParser Int
+headerStart = try $
+ (length <$> many1 (char '*')) <* many1 (char ' ') <* updateLastPreCharPos
+
+tableStart :: OrgParser Char
+tableStart = try $ skipSpaces *> char '|'
+
+latexEnvStart :: OrgParser String
+latexEnvStart = try $ do
+ skipSpaces *> string "\\begin{"
+ *> latexEnvName
+ <* string "}"
+ <* blankline
+ where
+ latexEnvName :: OrgParser String
+ latexEnvName = try $ mappend <$> many1 alphaNum <*> option "" (string "*")
+
+
+-- | Parses bullet list marker.
+bulletListStart :: OrgParser ()
+bulletListStart = try $
+ choice
+ [ () <$ skipSpaces <* oneOf "+-" <* skipSpaces1
+ , () <$ skipSpaces1 <* char '*' <* skipSpaces1
+ ]
+
+genericListStart :: OrgParser String
+ -> OrgParser Int
+genericListStart listMarker = try $
+ (+) <$> (length <$> many spaceChar)
+ <*> (length <$> listMarker <* many1 spaceChar)
+
+orderedListStart :: OrgParser Int
+orderedListStart = genericListStart orderedListMarker
+ -- Ordered list markers allowed in org-mode
+ where orderedListMarker = mappend <$> many1 digit <*> (pure <$> oneOf ".)")
+
+drawerStart :: OrgParser String
+drawerStart = try $
+ skipSpaces *> drawerName <* skipSpaces <* newline
+ where drawerName = char ':' *> manyTill nonspaceChar (char ':')
+
+metaLineStart :: OrgParser ()
+metaLineStart = try $ skipSpaces <* string "#+"
+
+commentLineStart :: OrgParser ()
+commentLineStart = try $ skipSpaces <* string "# "
+
+exampleLineStart :: OrgParser ()
+exampleLineStart = () <$ try (skipSpaces *> string ": ")
+
+noteMarker :: OrgParser String
+noteMarker = try $ do
+ char '['
+ choice [ many1Till digit (char ']')
+ , (++) <$> string "fn:"
+ <*> many1Till (noneOf "\n\r\t ") (char ']')
+ ]
diff --git a/src/Text/Pandoc/Readers/Org/Blocks.hs b/src/Text/Pandoc/Readers/Org/Blocks.hs
new file mode 100644
index 000000000..b374acfe2
--- /dev/null
+++ b/src/Text/Pandoc/Readers/Org/Blocks.hs
@@ -0,0 +1,891 @@
+{-# LANGUAGE FlexibleContexts #-}
+{-
+Copyright (C) 2014-2016 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Readers.Org.Options
+ Copyright : Copyright (C) 2014-2016 Albert Krewinkel
+ License : GNU GPL, version 2 or above
+
+ Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
+
+Parsers for Org-mode block elements.
+-}
+module Text.Pandoc.Readers.Org.Blocks
+ ( blockList
+ , meta
+ ) where
+
+import Text.Pandoc.Readers.Org.BlockStarts
+import Text.Pandoc.Readers.Org.Inlines
+import Text.Pandoc.Readers.Org.ParserState
+import Text.Pandoc.Readers.Org.Parsing
+
+import qualified Text.Pandoc.Builder as B
+import Text.Pandoc.Builder ( Inlines, Blocks )
+import Text.Pandoc.Definition
+import Text.Pandoc.Compat.Monoid ((<>))
+import Text.Pandoc.Options
+import Text.Pandoc.Shared ( compactify', compactify'DL )
+
+import Control.Arrow ( first )
+import Control.Monad ( foldM, guard, mzero )
+import Data.Char ( toLower, toUpper)
+import Data.List ( foldl', intersperse, isPrefixOf )
+import qualified Data.Map as M
+import Data.Maybe ( fromMaybe, isNothing )
+import Network.HTTP ( urlEncode )
+
+
+--
+-- parsing blocks
+--
+
+-- | Get a list of blocks.
+blockList :: OrgParser [Block]
+blockList = do
+ blocks' <- blocks
+ st <- getState
+ return . B.toList $ runF blocks' st
+
+-- | Get the meta information safed in the state.
+meta :: OrgParser Meta
+meta = do
+ st <- getState
+ return $ runF (orgStateMeta' st) st
+
+blocks :: OrgParser (F Blocks)
+blocks = mconcat <$> manyTill block eof
+
+block :: OrgParser (F Blocks)
+block = choice [ mempty <$ blanklines
+ , table
+ , orgBlock
+ , figure
+ , example
+ , genericDrawer
+ , specialLine
+ , header
+ , horizontalRule
+ , list
+ , latexFragment
+ , noteBlock
+ , paraOrPlain
+ ] <?> "block"
+
+
+--
+-- Block Attributes
+--
+
+-- | Attributes that may be added to figures (like a name or caption).
+data BlockAttributes = BlockAttributes
+ { blockAttrName :: Maybe String
+ , blockAttrCaption :: Maybe (F Inlines)
+ , blockAttrKeyValues :: [(String, String)]
+ }
+
+stringyMetaAttribute :: (String -> Bool) -> OrgParser (String, String)
+stringyMetaAttribute attrCheck = try $ do
+ metaLineStart
+ attrName <- map toUpper <$> many1Till nonspaceChar (char ':')
+ guard $ attrCheck attrName
+ skipSpaces
+ attrValue <- anyLine
+ return (attrName, attrValue)
+
+blockAttributes :: OrgParser BlockAttributes
+blockAttributes = try $ do
+ kv <- many (stringyMetaAttribute attrCheck)
+ let caption = foldl' (appendValues "CAPTION") Nothing kv
+ let kvAttrs = foldl' (appendValues "ATTR_HTML") Nothing kv
+ let name = lookup "NAME" kv
+ caption' <- maybe (return Nothing)
+ (fmap Just . parseFromString parseInlines)
+ caption
+ kvAttrs' <- parseFromString keyValues . (++ "\n") $ fromMaybe mempty kvAttrs
+ return $ BlockAttributes
+ { blockAttrName = name
+ , blockAttrCaption = caption'
+ , blockAttrKeyValues = kvAttrs'
+ }
+ where
+ attrCheck :: String -> Bool
+ attrCheck attr =
+ case attr of
+ "NAME" -> True
+ "CAPTION" -> True
+ "ATTR_HTML" -> True
+ _ -> False
+
+ appendValues :: String -> Maybe String -> (String, String) -> Maybe String
+ appendValues attrName accValue (key, value) =
+ if key /= attrName
+ then accValue
+ else case accValue of
+ Just acc -> Just $ acc ++ ' ':value
+ Nothing -> Just value
+
+keyValues :: OrgParser [(String, String)]
+keyValues = try $
+ manyTill ((,) <$> key <*> value) newline
+ where
+ key :: OrgParser String
+ key = try $ skipSpaces *> char ':' *> many1 nonspaceChar
+
+ value :: OrgParser String
+ value = skipSpaces *> manyTill anyChar endOfValue
+
+ endOfValue :: OrgParser ()
+ endOfValue =
+ lookAhead $ (() <$ try (many1 spaceChar <* key))
+ <|> () <$ newline
+
+
+--
+-- Org Blocks (#+BEGIN_... / #+END_...)
+--
+
+type BlockProperties = (Int, String) -- (Indentation, Block-Type)
+
+updateIndent :: BlockProperties -> Int -> BlockProperties
+updateIndent (_, blkType) indent = (indent, blkType)
+
+orgBlock :: OrgParser (F Blocks)
+orgBlock = try $ do
+ blockAttrs <- blockAttributes
+ blockProp@(_, blkType) <- blockHeaderStart
+ ($ blockProp) $
+ case blkType of
+ "comment" -> withRaw' (const mempty)
+ "html" -> withRaw' (return . (B.rawBlock blkType))
+ "latex" -> withRaw' (return . (B.rawBlock blkType))
+ "ascii" -> withRaw' (return . (B.rawBlock blkType))
+ "example" -> withRaw' (return . exampleCode)
+ "quote" -> withParsed (fmap B.blockQuote)
+ "verse" -> verseBlock
+ "src" -> codeBlock blockAttrs
+ _ -> withParsed (fmap $ divWithClass blkType)
+
+blockHeaderStart :: OrgParser (Int, String)
+blockHeaderStart = try $ (,) <$> indentation <*> blockType
+ where
+ blockType = map toLower <$> (stringAnyCase "#+begin_" *> orgArgWord)
+
+indentation :: OrgParser Int
+indentation = try $ do
+ tabStop <- getOption readerTabStop
+ s <- many spaceChar
+ return $ spaceLength tabStop s
+
+spaceLength :: Int -> String -> Int
+spaceLength tabStop s = (sum . map charLen) s
+ where
+ charLen ' ' = 1
+ charLen '\t' = tabStop
+ charLen _ = 0
+
+withRaw' :: (String -> F Blocks) -> BlockProperties -> OrgParser (F Blocks)
+withRaw' f blockProp = (ignHeaders *> (f <$> rawBlockContent blockProp))
+
+withParsed :: (F Blocks -> F Blocks) -> BlockProperties -> OrgParser (F Blocks)
+withParsed f blockProp = (ignHeaders *> (f <$> parsedBlockContent blockProp))
+
+ignHeaders :: OrgParser ()
+ignHeaders = (() <$ newline) <|> (() <$ anyLine)
+
+divWithClass :: String -> Blocks -> Blocks
+divWithClass cls = B.divWith ("", [cls], [])
+
+verseBlock :: BlockProperties -> OrgParser (F Blocks)
+verseBlock blkProp = try $ do
+ ignHeaders
+ content <- rawBlockContent blkProp
+ fmap B.para . mconcat . intersperse (pure B.linebreak)
+ <$> mapM (parseFromString parseInlines) (map (++ "\n") . lines $ content)
+
+exportsCode :: [(String, String)] -> Bool
+exportsCode attrs = not (("rundoc-exports", "none") `elem` attrs
+ || ("rundoc-exports", "results") `elem` attrs)
+
+exportsResults :: [(String, String)] -> Bool
+exportsResults attrs = ("rundoc-exports", "results") `elem` attrs
+ || ("rundoc-exports", "both") `elem` attrs
+
+followingResultsBlock :: OrgParser (Maybe (F Blocks))
+followingResultsBlock =
+ optionMaybe (try $ blanklines *> stringAnyCase "#+RESULTS:"
+ *> blankline
+ *> block)
+
+codeBlock :: BlockAttributes -> BlockProperties -> OrgParser (F Blocks)
+codeBlock blockAttrs blkProp = do
+ skipSpaces
+ (classes, kv) <- codeHeaderArgs <|> (mempty <$ ignHeaders)
+ leadingIndent <- lookAhead indentation
+ content <- rawBlockContent (updateIndent blkProp leadingIndent)
+ resultsContent <- followingResultsBlock
+ let id' = fromMaybe mempty $ blockAttrName blockAttrs
+ let includeCode = exportsCode kv
+ let includeResults = exportsResults kv
+ let codeBlck = B.codeBlockWith ( id', classes, kv ) content
+ let labelledBlck = maybe (pure codeBlck)
+ (labelDiv codeBlck)
+ (blockAttrCaption blockAttrs)
+ let resultBlck = fromMaybe mempty resultsContent
+ return $ (if includeCode then labelledBlck else mempty)
+ <> (if includeResults then resultBlck else mempty)
+ where
+ labelDiv blk value =
+ B.divWith nullAttr <$> (mappend <$> labelledBlock value
+ <*> pure blk)
+ labelledBlock = fmap (B.plain . B.spanWith ("", ["label"], []))
+
+rawBlockContent :: BlockProperties -> OrgParser String
+rawBlockContent (indent, blockType) = try $
+ unlines . map commaEscaped <$> manyTill indentedLine blockEnder
+ where
+ indentedLine = try $ ("" <$ blankline) <|> (indentWith indent *> anyLine)
+ blockEnder = try $ skipSpaces *> stringAnyCase ("#+end_" <> blockType)
+
+parsedBlockContent :: BlockProperties -> OrgParser (F Blocks)
+parsedBlockContent blkProps = try $ do
+ raw <- rawBlockContent blkProps
+ parseFromString blocks (raw ++ "\n")
+
+-- indent by specified number of spaces (or equiv. tabs)
+indentWith :: Int -> OrgParser String
+indentWith num = do
+ tabStop <- getOption readerTabStop
+ if num < tabStop
+ then count num (char ' ')
+ else choice [ try (count num (char ' '))
+ , try (char '\t' >> count (num - tabStop) (char ' ')) ]
+
+type SwitchOption = (Char, Maybe String)
+
+-- | Parse code block arguments
+-- TODO: We currently don't handle switches.
+codeHeaderArgs :: OrgParser ([String], [(String, String)])
+codeHeaderArgs = try $ do
+ language <- skipSpaces *> orgArgWord
+ _ <- skipSpaces *> (try $ switch `sepBy` (many1 spaceChar))
+ parameters <- manyTill blockOption newline
+ let pandocLang = translateLang language
+ return $
+ if hasRundocParameters parameters
+ then ( [ pandocLang, rundocBlockClass ]
+ , map toRundocAttrib (("language", language) : parameters)
+ )
+ else ([ pandocLang ], parameters)
+ where
+ hasRundocParameters = not . null
+ toRundocAttrib = first ("rundoc-" ++)
+
+
+switch :: OrgParser SwitchOption
+switch = try $ simpleSwitch <|> lineNumbersSwitch
+ where
+ simpleSwitch = (\c -> (c, Nothing)) <$> (oneOf "-+" *> letter)
+ lineNumbersSwitch = (\ls -> ('l', Just ls)) <$>
+ (string "-l \"" *> many1Till nonspaceChar (char '"'))
+
+translateLang :: String -> String
+translateLang "C" = "c"
+translateLang "C++" = "cpp"
+translateLang "emacs-lisp" = "commonlisp" -- emacs lisp is not supported
+translateLang "js" = "javascript"
+translateLang "lisp" = "commonlisp"
+translateLang "R" = "r"
+translateLang "sh" = "bash"
+translateLang "sqlite" = "sql"
+translateLang cs = cs
+
+-- | Prefix used for Rundoc classes and arguments.
+rundocPrefix :: String
+rundocPrefix = "rundoc-"
+
+-- | The class-name used to mark rundoc blocks.
+rundocBlockClass :: String
+rundocBlockClass = rundocPrefix ++ "block"
+
+blockOption :: OrgParser (String, String)
+blockOption = try $ do
+ argKey <- orgArgKey
+ paramValue <- option "yes" orgParamValue
+ return (argKey, paramValue)
+
+orgParamValue :: OrgParser String
+orgParamValue = try $
+ skipSpaces
+ *> notFollowedBy (char ':' )
+ *> many1 (noneOf "\t\n\r ")
+ <* skipSpaces
+
+commaEscaped :: String -> String
+commaEscaped (',':cs@('*':_)) = cs
+commaEscaped (',':cs@('#':'+':_)) = cs
+commaEscaped cs = cs
+
+example :: OrgParser (F Blocks)
+example = try $ do
+ return . return . exampleCode =<< unlines <$> many1 exampleLine
+
+exampleCode :: String -> Blocks
+exampleCode = B.codeBlockWith ("", ["example"], [])
+
+exampleLine :: OrgParser String
+exampleLine = try $ exampleLineStart *> anyLine
+
+horizontalRule :: OrgParser (F Blocks)
+horizontalRule = return B.horizontalRule <$ try hline
+
+
+--
+-- Drawers
+--
+
+-- | A generic drawer which has no special meaning for org-mode.
+-- Whether or not this drawer is included in the output depends on the drawers
+-- export setting.
+genericDrawer :: OrgParser (F Blocks)
+genericDrawer = try $ do
+ name <- map toUpper <$> drawerStart
+ content <- manyTill drawerLine (try drawerEnd)
+ state <- getState
+ -- Include drawer if it is explicitly included in or not explicitly excluded
+ -- from the list of drawers that should be exported. PROPERTIES drawers are
+ -- never exported.
+ case (exportDrawers . orgStateExportSettings $ state) of
+ _ | name == "PROPERTIES" -> return mempty
+ Left names | name `elem` names -> return mempty
+ Right names | name `notElem` names -> return mempty
+ _ -> drawerDiv name <$> parseLines content
+ where
+ parseLines :: [String] -> OrgParser (F Blocks)
+ parseLines = parseFromString blocks . (++ "\n") . unlines
+
+ drawerDiv :: String -> F Blocks -> F Blocks
+ drawerDiv drawerName = fmap $ B.divWith (mempty, [drawerName, "drawer"], mempty)
+
+drawerLine :: OrgParser String
+drawerLine = anyLine
+
+drawerEnd :: OrgParser String
+drawerEnd = try $
+ skipSpaces *> stringAnyCase ":END:" <* skipSpaces <* newline
+
+-- | Read a :PROPERTIES: drawer and return the key/value pairs contained
+-- within.
+propertiesDrawer :: OrgParser [(String, String)]
+propertiesDrawer = try $ do
+ drawerType <- drawerStart
+ guard $ map toUpper drawerType == "PROPERTIES"
+ manyTill property (try drawerEnd)
+ where
+ property :: OrgParser (String, String)
+ property = try $ (,) <$> key <*> value
+
+ key :: OrgParser String
+ key = try $ skipSpaces *> char ':' *> many1Till nonspaceChar (char ':')
+
+ value :: OrgParser String
+ value = try $ skipSpaces *> manyTill anyChar (try $ skipSpaces *> newline)
+
+keyValuesToAttr :: [(String, String)] -> Attr
+keyValuesToAttr kvs =
+ let
+ lowerKvs = map (\(k, v) -> (map toLower k, v)) kvs
+ id' = fromMaybe mempty . lookup "custom_id" $ lowerKvs
+ cls = fromMaybe mempty . lookup "class" $ lowerKvs
+ kvs' = filter (flip notElem ["custom_id", "class"] . fst) lowerKvs
+ in
+ (id', words cls, kvs')
+
+
+--
+-- Figures
+--
+
+-- | Figures (Image on a line by itself, preceded by name and/or caption)
+figure :: OrgParser (F Blocks)
+figure = try $ do
+ figAttrs <- blockAttributes
+ src <- skipSpaces *> selfTarget <* skipSpaces <* newline
+ guard . not . isNothing . blockAttrCaption $ figAttrs
+ guard (isImageFilename src)
+ let figName = fromMaybe mempty $ blockAttrName figAttrs
+ let figCaption = fromMaybe mempty $ blockAttrCaption figAttrs
+ let figKeyVals = blockAttrKeyValues figAttrs
+ let attr = (mempty, mempty, figKeyVals)
+ return $ (B.para . B.imageWith attr src (withFigPrefix figName) <$> figCaption)
+ where
+ withFigPrefix :: String -> String
+ withFigPrefix cs =
+ if "fig:" `isPrefixOf` cs
+ then cs
+ else "fig:" ++ cs
+
+ selfTarget :: OrgParser String
+ selfTarget = try $ char '[' *> linkTarget <* char ']'
+
+
+--
+-- Comments, Options and Metadata
+--
+
+addLinkFormat :: String
+ -> (String -> String)
+ -> OrgParser ()
+addLinkFormat key formatter = updateState $ \s ->
+ let fs = orgStateLinkFormatters s
+ in s{ orgStateLinkFormatters = M.insert key formatter fs }
+
+specialLine :: OrgParser (F Blocks)
+specialLine = fmap return . try $ metaLine <|> commentLine
+
+-- The order, in which blocks are tried, makes sure that we're not looking at
+-- the beginning of a block, so we don't need to check for it
+metaLine :: OrgParser Blocks
+metaLine = mempty <$ metaLineStart <* (optionLine <|> declarationLine)
+
+commentLine :: OrgParser Blocks
+commentLine = commentLineStart *> anyLine *> pure mempty
+
+declarationLine :: OrgParser ()
+declarationLine = try $ do
+ key <- metaKey
+ inlinesF <- metaInlines
+ updateState $ \st ->
+ let meta' = B.setMeta <$> pure key <*> inlinesF <*> pure nullMeta
+ in st { orgStateMeta' = orgStateMeta' st <> meta' }
+ return ()
+
+metaInlines :: OrgParser (F MetaValue)
+metaInlines = fmap (MetaInlines . B.toList) <$> inlinesTillNewline
+
+metaKey :: OrgParser String
+metaKey = map toLower <$> many1 (noneOf ": \n\r")
+ <* char ':'
+ <* skipSpaces
+
+optionLine :: OrgParser ()
+optionLine = try $ do
+ key <- metaKey
+ case key of
+ "link" -> parseLinkFormat >>= uncurry addLinkFormat
+ "options" -> () <$ sepBy spaces exportSetting
+ _ -> mzero
+
+--
+-- Export Settings
+--
+
+-- | Read and process org-mode specific export options.
+exportSetting :: OrgParser ()
+exportSetting = choice
+ [ booleanSetting "^" setExportSubSuperscripts
+ , ignoredSetting "'"
+ , ignoredSetting "*"
+ , ignoredSetting "-"
+ , ignoredSetting ":"
+ , ignoredSetting "<"
+ , ignoredSetting "\\n"
+ , ignoredSetting "arch"
+ , ignoredSetting "author"
+ , ignoredSetting "c"
+ , ignoredSetting "creator"
+ , complementableListSetting "d" setExportDrawers
+ , ignoredSetting "date"
+ , ignoredSetting "e"
+ , ignoredSetting "email"
+ , ignoredSetting "f"
+ , ignoredSetting "H"
+ , ignoredSetting "inline"
+ , ignoredSetting "num"
+ , ignoredSetting "p"
+ , ignoredSetting "pri"
+ , ignoredSetting "prop"
+ , ignoredSetting "stat"
+ , ignoredSetting "tags"
+ , ignoredSetting "tasks"
+ , ignoredSetting "tex"
+ , ignoredSetting "timestamp"
+ , ignoredSetting "title"
+ , ignoredSetting "toc"
+ , ignoredSetting "todo"
+ , ignoredSetting "|"
+ ] <?> "export setting"
+
+booleanSetting :: String -> ExportSettingSetter Bool -> OrgParser ()
+booleanSetting settingIdentifier setter = try $ do
+ string settingIdentifier
+ char ':'
+ value <- elispBoolean
+ updateState $ modifyExportSettings setter value
+
+-- | Read an elisp boolean. Only NIL is treated as false, non-NIL values are
+-- interpreted as true.
+elispBoolean :: OrgParser Bool
+elispBoolean = try $ do
+ value <- many1 nonspaceChar
+ return $ case map toLower value of
+ "nil" -> False
+ "{}" -> False
+ "()" -> False
+ _ -> True
+
+-- | A list or a complement list (i.e. a list starting with `not`).
+complementableListSetting :: String
+ -> ExportSettingSetter (Either [String] [String])
+ -> OrgParser ()
+complementableListSetting settingIdentifier setter = try $ do
+ _ <- string settingIdentifier <* char ':'
+ value <- choice [ Left <$> complementStringList
+ , Right <$> stringList
+ , (\b -> if b then Left [] else Right []) <$> elispBoolean
+ ]
+ updateState $ modifyExportSettings setter value
+ where
+ -- Read a plain list of strings.
+ stringList :: OrgParser [String]
+ stringList = try $
+ char '('
+ *> sepBy elispString spaces
+ <* char ')'
+
+ -- Read an emacs lisp list specifying a complement set.
+ complementStringList :: OrgParser [String]
+ complementStringList = try $
+ string "(not "
+ *> sepBy elispString spaces
+ <* char ')'
+
+ elispString :: OrgParser String
+ elispString = try $
+ char '"'
+ *> manyTill alphaNum (char '"')
+
+ignoredSetting :: String -> OrgParser ()
+ignoredSetting s = try (() <$ string s <* char ':' <* many1 nonspaceChar)
+
+
+parseLinkFormat :: OrgParser ((String, String -> String))
+parseLinkFormat = try $ do
+ linkType <- (:) <$> letter <*> many (alphaNum <|> oneOf "-_") <* skipSpaces
+ linkSubst <- parseFormat
+ return (linkType, linkSubst)
+
+-- | An ad-hoc, single-argument-only implementation of a printf-style format
+-- parser.
+parseFormat :: OrgParser (String -> String)
+parseFormat = try $ do
+ replacePlain <|> replaceUrl <|> justAppend
+ where
+ -- inefficient, but who cares
+ replacePlain = try $ (\x -> concat . flip intersperse x)
+ <$> sequence [tillSpecifier 's', rest]
+ replaceUrl = try $ (\x -> concat . flip intersperse x . urlEncode)
+ <$> sequence [tillSpecifier 'h', rest]
+ justAppend = try $ (++) <$> rest
+
+ rest = manyTill anyChar (eof <|> () <$ oneOf "\n\r")
+ tillSpecifier c = manyTill (noneOf "\n\r") (try $ string ('%':c:""))
+
+--
+-- Headers
+--
+
+-- | Headers
+header :: OrgParser (F Blocks)
+header = try $ do
+ level <- headerStart
+ title <- manyTill inline (lookAhead $ optional headerTags <* newline)
+ tags <- option [] headerTags
+ newline
+ propAttr <- option nullAttr (keyValuesToAttr <$> propertiesDrawer)
+ inlines <- runF (tagTitle title tags) <$> getState
+ attr <- registerHeader propAttr inlines
+ return $ pure (B.headerWith attr level inlines)
+ where
+ tagTitle :: [F Inlines] -> [String] -> F Inlines
+ tagTitle title tags = trimInlinesF . mconcat $ title <> map tagToInlineF tags
+
+ tagToInlineF :: String -> F Inlines
+ tagToInlineF t = return $ B.spanWith ("", ["tag"], [("data-tag-name", t)]) mempty
+
+ headerTags :: OrgParser [String]
+ headerTags = try $
+ let tag = many1 (alphaNum <|> oneOf "@%#_") <* char ':'
+ in skipSpaces
+ *> char ':'
+ *> many1 tag
+ <* skipSpaces
+
+
+--
+-- Tables
+--
+
+data OrgTableRow = OrgContentRow (F [Blocks])
+ | OrgAlignRow [Alignment]
+ | OrgHlineRow
+
+-- OrgTable is strongly related to the pandoc table ADT. Using the same
+-- (i.e. pandoc-global) ADT would mean that the reader would break if the
+-- global structure was to be changed, which would be bad. The final table
+-- should be generated using a builder function. Column widths aren't
+-- implemented yet, so they are not tracked here.
+data OrgTable = OrgTable
+ { orgTableAlignments :: [Alignment]
+ , orgTableHeader :: [Blocks]
+ , orgTableRows :: [[Blocks]]
+ }
+
+table :: OrgParser (F Blocks)
+table = try $ do
+ blockAttrs <- blockAttributes
+ lookAhead tableStart
+ do
+ rows <- tableRows
+ let caption = fromMaybe (return mempty) $ blockAttrCaption blockAttrs
+ return $ (<$> caption) . orgToPandocTable . normalizeTable =<< rowsToTable rows
+
+orgToPandocTable :: OrgTable
+ -> Inlines
+ -> Blocks
+orgToPandocTable (OrgTable aligns heads lns) caption =
+ B.table caption (zip aligns $ repeat 0) heads lns
+
+tableRows :: OrgParser [OrgTableRow]
+tableRows = try $ many (tableAlignRow <|> tableHline <|> tableContentRow)
+
+tableContentRow :: OrgParser OrgTableRow
+tableContentRow = try $
+ OrgContentRow . sequence <$> (tableStart *> many1Till tableContentCell newline)
+
+tableContentCell :: OrgParser (F Blocks)
+tableContentCell = try $
+ fmap B.plain . trimInlinesF . mconcat <$> manyTill inline endOfCell
+
+tableAlignRow :: OrgParser OrgTableRow
+tableAlignRow = try $ do
+ tableStart
+ cells <- many1Till tableAlignCell newline
+ -- Empty rows are regular (i.e. content) rows, not alignment rows.
+ guard $ any (/= AlignDefault) cells
+ return $ OrgAlignRow cells
+
+tableAlignCell :: OrgParser Alignment
+tableAlignCell =
+ choice [ try $ emptyCell *> return AlignDefault
+ , try $ skipSpaces
+ *> char '<'
+ *> tableAlignFromChar
+ <* many digit
+ <* char '>'
+ <* emptyCell
+ ] <?> "alignment info"
+ where emptyCell = try $ skipSpaces *> endOfCell
+
+tableAlignFromChar :: OrgParser Alignment
+tableAlignFromChar = try $
+ choice [ char 'l' *> return AlignLeft
+ , char 'c' *> return AlignCenter
+ , char 'r' *> return AlignRight
+ ]
+
+tableHline :: OrgParser OrgTableRow
+tableHline = try $
+ OrgHlineRow <$ (tableStart *> char '-' *> anyLine)
+
+endOfCell :: OrgParser Char
+endOfCell = try $ char '|' <|> lookAhead newline
+
+rowsToTable :: [OrgTableRow]
+ -> F OrgTable
+rowsToTable = foldM rowToContent emptyTable
+ where emptyTable = OrgTable mempty mempty mempty
+
+normalizeTable :: OrgTable -> OrgTable
+normalizeTable (OrgTable aligns heads rows) = OrgTable aligns' heads rows
+ where
+ refRow = if heads /= mempty
+ then heads
+ else if rows == mempty then mempty else head rows
+ cols = length refRow
+ fillColumns base padding = take cols $ base ++ repeat padding
+ aligns' = fillColumns aligns AlignDefault
+
+-- One or more horizontal rules after the first content line mark the previous
+-- line as a header. All other horizontal lines are discarded.
+rowToContent :: OrgTable
+ -> OrgTableRow
+ -> F OrgTable
+rowToContent orgTable row =
+ case row of
+ OrgHlineRow -> return singleRowPromotedToHeader
+ OrgAlignRow as -> return . setAligns $ as
+ OrgContentRow cs -> appendToBody cs
+ where
+ singleRowPromotedToHeader :: OrgTable
+ singleRowPromotedToHeader = case orgTable of
+ OrgTable{ orgTableHeader = [], orgTableRows = b:[] } ->
+ orgTable{ orgTableHeader = b , orgTableRows = [] }
+ _ -> orgTable
+
+ setAligns :: [Alignment] -> OrgTable
+ setAligns aligns = orgTable{ orgTableAlignments = aligns }
+
+ appendToBody :: F [Blocks] -> F OrgTable
+ appendToBody frow = do
+ newRow <- frow
+ let oldRows = orgTableRows orgTable
+ -- NOTE: This is an inefficient O(n) operation. This should be changed
+ -- if performance ever becomes a problem.
+ return orgTable{ orgTableRows = oldRows ++ [newRow] }
+
+
+--
+-- LaTeX fragments
+--
+latexFragment :: OrgParser (F Blocks)
+latexFragment = try $ do
+ envName <- latexEnvStart
+ content <- mconcat <$> manyTill anyLineNewline (latexEnd envName)
+ return . return $ B.rawBlock "latex" (content `inLatexEnv` envName)
+ where
+ c `inLatexEnv` e = mconcat [ "\\begin{", e, "}\n"
+ , c
+ , "\\end{", e, "}\n"
+ ]
+
+latexEnd :: String -> OrgParser ()
+latexEnd envName = try $
+ () <$ skipSpaces
+ <* string ("\\end{" ++ envName ++ "}")
+ <* blankline
+
+
+--
+-- Footnote defintions
+--
+noteBlock :: OrgParser (F Blocks)
+noteBlock = try $ do
+ ref <- noteMarker <* skipSpaces
+ content <- mconcat <$> blocksTillHeaderOrNote
+ addToNotesTable (ref, content)
+ return mempty
+ where
+ blocksTillHeaderOrNote =
+ many1Till block (eof <|> () <$ lookAhead noteMarker
+ <|> () <$ lookAhead headerStart)
+
+-- Paragraphs or Plain text
+paraOrPlain :: OrgParser (F Blocks)
+paraOrPlain = try $ do
+ ils <- parseInlines
+ nl <- option False (newline *> return True)
+ -- Read block as paragraph, except if we are in a list context and the block
+ -- is directly followed by a list item, in which case the block is read as
+ -- plain text.
+ try (guard nl
+ *> notFollowedBy (inList *> (() <$ orderedListStart <|> bulletListStart))
+ *> return (B.para <$> ils))
+ <|> (return (B.plain <$> ils))
+
+inlinesTillNewline :: OrgParser (F Inlines)
+inlinesTillNewline = trimInlinesF . mconcat <$> manyTill inline newline
+
+
+--
+-- list blocks
+--
+
+list :: OrgParser (F Blocks)
+list = choice [ definitionList, bulletList, orderedList ] <?> "list"
+
+definitionList :: OrgParser (F Blocks)
+definitionList = try $ do n <- lookAhead (bulletListStart' Nothing)
+ fmap B.definitionList . fmap compactify'DL . sequence
+ <$> many1 (definitionListItem $ bulletListStart' (Just n))
+
+bulletList :: OrgParser (F Blocks)
+bulletList = try $ do n <- lookAhead (bulletListStart' Nothing)
+ fmap B.bulletList . fmap compactify' . sequence
+ <$> many1 (listItem (bulletListStart' $ Just n))
+
+orderedList :: OrgParser (F Blocks)
+orderedList = fmap B.orderedList . fmap compactify' . sequence
+ <$> many1 (listItem orderedListStart)
+
+bulletListStart' :: Maybe Int -> OrgParser Int
+-- returns length of bulletList prefix, inclusive of marker
+bulletListStart' Nothing = do ind <- length <$> many spaceChar
+ oneOf (bullets $ ind == 0)
+ skipSpaces1
+ return (ind + 1)
+bulletListStart' (Just n) = do count (n-1) spaceChar
+ oneOf (bullets $ n == 1)
+ many1 spaceChar
+ return n
+
+-- Unindented lists are legal, but they can't use '*' bullets.
+-- We return n to maintain compatibility with the generic listItem.
+bullets :: Bool -> String
+bullets unindented = if unindented then "+-" else "*+-"
+
+definitionListItem :: OrgParser Int
+ -> OrgParser (F (Inlines, [Blocks]))
+definitionListItem parseMarkerGetLength = try $ do
+ markerLength <- parseMarkerGetLength
+ term <- manyTill (noneOf "\n\r") (try definitionMarker)
+ line1 <- anyLineNewline
+ blank <- option "" ("\n" <$ blankline)
+ cont <- concat <$> many (listContinuation markerLength)
+ term' <- parseFromString parseInlines term
+ contents' <- parseFromString blocks $ line1 ++ blank ++ cont
+ return $ (,) <$> term' <*> fmap (:[]) contents'
+ where
+ definitionMarker =
+ spaceChar *> string "::" <* (spaceChar <|> lookAhead newline)
+
+
+-- parse raw text for one list item, excluding start marker and continuations
+listItem :: OrgParser Int
+ -> OrgParser (F Blocks)
+listItem start = try . withContext ListItemState $ do
+ markerLength <- try start
+ firstLine <- anyLineNewline
+ blank <- option "" ("\n" <$ blankline)
+ rest <- concat <$> many (listContinuation markerLength)
+ parseFromString blocks $ firstLine ++ blank ++ rest
+
+-- continuation of a list item - indented and separated by blankline or endline.
+-- Note: nested lists are parsed as continuations.
+listContinuation :: Int
+ -> OrgParser String
+listContinuation markerLength = try $
+ notFollowedBy' blankline
+ *> (mappend <$> (concat <$> many1 listLine)
+ <*> many blankline)
+ where listLine = try $ indentWith markerLength *> anyLineNewline
+
+-- | Parse any line, include the final newline in the output.
+anyLineNewline :: OrgParser String
+anyLineNewline = (++ "\n") <$> anyLine
diff --git a/src/Text/Pandoc/Readers/Org/Inlines.hs b/src/Text/Pandoc/Readers/Org/Inlines.hs
new file mode 100644
index 000000000..0c3840979
--- /dev/null
+++ b/src/Text/Pandoc/Readers/Org/Inlines.hs
@@ -0,0 +1,715 @@
+{-# LANGUAGE OverloadedStrings #-}
+{-
+Copyright (C) 2014-2016 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Readers.Org.Options
+ Copyright : Copyright (C) 2014-2016 Albert Krewinkel
+ License : GNU GPL, version 2 or above
+
+ Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
+
+Parsers for Org-mode inline elements.
+-}
+module Text.Pandoc.Readers.Org.Inlines
+ ( inline
+ , addToNotesTable
+ , parseInlines
+ , isImageFilename
+ , linkTarget
+ ) where
+
+import Text.Pandoc.Readers.Org.BlockStarts
+import Text.Pandoc.Readers.Org.ParserState
+import Text.Pandoc.Readers.Org.Parsing
+
+import qualified Text.Pandoc.Builder as B
+import Text.Pandoc.Builder ( Inlines )
+import Text.Pandoc.Definition
+import Text.Pandoc.Compat.Monoid ( (<>) )
+import Text.Pandoc.Options
+import Text.Pandoc.Readers.LaTeX ( inlineCommand, rawLaTeXInline )
+import Text.TeXMath ( readTeX, writePandoc, DisplayType(..) )
+import qualified Text.TeXMath.Readers.MathML.EntityMap as MathMLEntityMap
+
+import Control.Arrow ( first )
+import Control.Monad ( guard, mplus, mzero, when )
+import Data.Char ( isAlphaNum, isSpace )
+import Data.List ( isPrefixOf, isSuffixOf )
+import Data.Maybe ( fromMaybe )
+import qualified Data.Map as M
+
+-- | Prefix used for Rundoc classes and arguments.
+rundocPrefix :: String
+rundocPrefix = "rundoc-"
+
+-- | The class-name used to mark rundoc blocks.
+rundocBlockClass :: String
+rundocBlockClass = rundocPrefix ++ "block"
+
+toRundocAttrib :: (String, String) -> (String, String)
+toRundocAttrib = first ("rundoc-" ++)
+
+translateLang :: String -> String
+translateLang "C" = "c"
+translateLang "C++" = "cpp"
+translateLang "emacs-lisp" = "commonlisp" -- emacs lisp is not supported
+translateLang "js" = "javascript"
+translateLang "lisp" = "commonlisp"
+translateLang "R" = "r"
+translateLang "sh" = "bash"
+translateLang "sqlite" = "sql"
+translateLang cs = cs
+
+--
+-- Functions acting on the parser state
+--
+recordAnchorId :: String -> OrgParser ()
+recordAnchorId i = updateState $ \s ->
+ s{ orgStateAnchorIds = i : (orgStateAnchorIds s) }
+
+pushToInlineCharStack :: Char -> OrgParser ()
+pushToInlineCharStack c = updateState $ \s ->
+ s{ orgStateEmphasisCharStack = c:orgStateEmphasisCharStack s }
+
+popInlineCharStack :: OrgParser ()
+popInlineCharStack = updateState $ \s ->
+ s{ orgStateEmphasisCharStack = drop 1 . orgStateEmphasisCharStack $ s }
+
+surroundingEmphasisChar :: OrgParser [Char]
+surroundingEmphasisChar =
+ take 1 . drop 1 . orgStateEmphasisCharStack <$> getState
+
+startEmphasisNewlinesCounting :: Int -> OrgParser ()
+startEmphasisNewlinesCounting maxNewlines = updateState $ \s ->
+ s{ orgStateEmphasisNewlines = Just maxNewlines }
+
+decEmphasisNewlinesCount :: OrgParser ()
+decEmphasisNewlinesCount = updateState $ \s ->
+ s{ orgStateEmphasisNewlines = (\n -> n - 1) <$> orgStateEmphasisNewlines s }
+
+newlinesCountWithinLimits :: OrgParser Bool
+newlinesCountWithinLimits = do
+ st <- getState
+ return $ ((< 0) <$> orgStateEmphasisNewlines st) /= Just True
+
+resetEmphasisNewlines :: OrgParser ()
+resetEmphasisNewlines = updateState $ \s ->
+ s{ orgStateEmphasisNewlines = Nothing }
+
+addToNotesTable :: OrgNoteRecord -> OrgParser ()
+addToNotesTable note = do
+ oldnotes <- orgStateNotes' <$> getState
+ updateState $ \s -> s{ orgStateNotes' = note:oldnotes }
+
+-- | Parse a single Org-mode inline element
+inline :: OrgParser (F Inlines)
+inline =
+ choice [ whitespace
+ , linebreak
+ , cite
+ , footnote
+ , linkOrImage
+ , anchor
+ , inlineCodeBlock
+ , str
+ , endline
+ , emph
+ , strong
+ , strikeout
+ , underline
+ , code
+ , math
+ , displayMath
+ , verbatim
+ , subscript
+ , superscript
+ , inlineLaTeX
+ , smart
+ , symbol
+ ] <* (guard =<< newlinesCountWithinLimits)
+ <?> "inline"
+
+parseInlines :: OrgParser (F Inlines)
+parseInlines = trimInlinesF . mconcat <$> many1 inline
+
+-- treat these as potentially non-text when parsing inline:
+specialChars :: [Char]
+specialChars = "\"$'()*+-,./:<=>[\\]^_{|}~"
+
+
+whitespace :: OrgParser (F Inlines)
+whitespace = pure B.space <$ skipMany1 spaceChar
+ <* updateLastPreCharPos
+ <* updateLastForbiddenCharPos
+ <?> "whitespace"
+
+linebreak :: OrgParser (F Inlines)
+linebreak = try $ pure B.linebreak <$ string "\\\\" <* skipSpaces <* newline
+
+str :: OrgParser (F Inlines)
+str = return . B.str <$> many1 (noneOf $ specialChars ++ "\n\r ")
+ <* updateLastStrPos
+
+-- | An endline character that can be treated as a space, not a structural
+-- break. This should reflect the values of the Emacs variable
+-- @org-element-pagaraph-separate@.
+endline :: OrgParser (F Inlines)
+endline = try $ do
+ newline
+ notFollowedBy blankline
+ notFollowedBy' exampleLineStart
+ notFollowedBy' hline
+ notFollowedBy' noteMarker
+ notFollowedBy' tableStart
+ notFollowedBy' drawerStart
+ notFollowedBy' headerStart
+ notFollowedBy' metaLineStart
+ notFollowedBy' latexEnvStart
+ notFollowedBy' commentLineStart
+ notFollowedBy' bulletListStart
+ notFollowedBy' orderedListStart
+ decEmphasisNewlinesCount
+ guard =<< newlinesCountWithinLimits
+ updateLastPreCharPos
+ return . return $ B.softbreak
+
+cite :: OrgParser (F Inlines)
+cite = try $ do
+ guardEnabled Ext_citations
+ (cs, raw) <- withRaw normalCite
+ return $ (flip B.cite (B.text raw)) <$> cs
+
+normalCite :: OrgParser (F [Citation])
+normalCite = try $ char '['
+ *> skipSpaces
+ *> citeList
+ <* skipSpaces
+ <* char ']'
+
+citeList :: OrgParser (F [Citation])
+citeList = sequence <$> sepBy1 citation (try $ char ';' *> skipSpaces)
+
+citation :: OrgParser (F Citation)
+citation = try $ do
+ pref <- prefix
+ (suppress_author, key) <- citeKey
+ suff <- suffix
+ return $ do
+ x <- pref
+ y <- suff
+ return $ Citation{ citationId = key
+ , citationPrefix = B.toList x
+ , citationSuffix = B.toList y
+ , citationMode = if suppress_author
+ then SuppressAuthor
+ else NormalCitation
+ , citationNoteNum = 0
+ , citationHash = 0
+ }
+ where
+ prefix = trimInlinesF . mconcat <$>
+ manyTill inline (char ']' <|> (']' <$ lookAhead citeKey))
+ suffix = try $ do
+ hasSpace <- option False (notFollowedBy nonspaceChar >> return True)
+ skipSpaces
+ rest <- trimInlinesF . mconcat <$>
+ many (notFollowedBy (oneOf ";]") *> inline)
+ return $ if hasSpace
+ then (B.space <>) <$> rest
+ else rest
+
+footnote :: OrgParser (F Inlines)
+footnote = try $ inlineNote <|> referencedNote
+
+inlineNote :: OrgParser (F Inlines)
+inlineNote = try $ do
+ string "[fn:"
+ ref <- many alphaNum
+ char ':'
+ note <- fmap B.para . trimInlinesF . mconcat <$> many1Till inline (char ']')
+ when (not $ null ref) $
+ addToNotesTable ("fn:" ++ ref, note)
+ return $ B.note <$> note
+
+referencedNote :: OrgParser (F Inlines)
+referencedNote = try $ do
+ ref <- noteMarker
+ return $ do
+ notes <- asksF orgStateNotes'
+ case lookup ref notes of
+ Nothing -> return $ B.str $ "[" ++ ref ++ "]"
+ Just contents -> do
+ st <- askF
+ let contents' = runF contents st{ orgStateNotes' = [] }
+ return $ B.note contents'
+
+linkOrImage :: OrgParser (F Inlines)
+linkOrImage = explicitOrImageLink
+ <|> selflinkOrImage
+ <|> angleLink
+ <|> plainLink
+ <?> "link or image"
+
+explicitOrImageLink :: OrgParser (F Inlines)
+explicitOrImageLink = try $ do
+ char '['
+ srcF <- applyCustomLinkFormat =<< possiblyEmptyLinkTarget
+ title <- enclosedRaw (char '[') (char ']')
+ title' <- parseFromString (mconcat <$> many inline) title
+ char ']'
+ return $ do
+ src <- srcF
+ if isImageFilename title
+ then pure $ B.link src "" $ B.image title mempty mempty
+ else linkToInlinesF src =<< title'
+
+selflinkOrImage :: OrgParser (F Inlines)
+selflinkOrImage = try $ do
+ src <- char '[' *> linkTarget <* char ']'
+ return $ linkToInlinesF src (B.str src)
+
+plainLink :: OrgParser (F Inlines)
+plainLink = try $ do
+ (orig, src) <- uri
+ returnF $ B.link src "" (B.str orig)
+
+angleLink :: OrgParser (F Inlines)
+angleLink = try $ do
+ char '<'
+ link <- plainLink
+ char '>'
+ return link
+
+linkTarget :: OrgParser String
+linkTarget = enclosedByPair '[' ']' (noneOf "\n\r[]")
+
+possiblyEmptyLinkTarget :: OrgParser String
+possiblyEmptyLinkTarget = try linkTarget <|> ("" <$ string "[]")
+
+applyCustomLinkFormat :: String -> OrgParser (F String)
+applyCustomLinkFormat link = do
+ let (linkType, rest) = break (== ':') link
+ return $ do
+ formatter <- M.lookup linkType <$> asksF orgStateLinkFormatters
+ return $ maybe link ($ drop 1 rest) formatter
+
+-- | Take a link and return a function which produces new inlines when given
+-- description inlines.
+linkToInlinesF :: String -> Inlines -> F Inlines
+linkToInlinesF linkStr =
+ case linkStr of
+ "" -> pure . B.link mempty "" -- wiki link (empty by convention)
+ ('#':_) -> pure . B.link linkStr "" -- document-local fraction
+ _ -> case cleanLinkString linkStr of
+ (Just cleanedLink) -> if isImageFilename cleanedLink
+ then const . pure $ B.image cleanedLink "" ""
+ else pure . B.link cleanedLink ""
+ Nothing -> internalLink linkStr -- other internal link
+
+-- | Cleanup and canonicalize a string describing a link. Return @Nothing@ if
+-- the string does not appear to be a link.
+cleanLinkString :: String -> Maybe String
+cleanLinkString s =
+ case s of
+ '/':_ -> Just $ "file://" ++ s -- absolute path
+ '.':'/':_ -> Just s -- relative path
+ '.':'.':'/':_ -> Just s -- relative path
+ -- Relative path or URL (file schema)
+ 'f':'i':'l':'e':':':s' -> Just $ if ("//" `isPrefixOf` s') then s else s'
+ _ | isUrl s -> Just s -- URL
+ _ -> Nothing
+ where
+ isUrl :: String -> Bool
+ isUrl cs =
+ let (scheme, path) = break (== ':') cs
+ in all (\c -> isAlphaNum c || c `elem` (".-"::String)) scheme
+ && not (null path)
+
+isImageFilename :: String -> Bool
+isImageFilename filename =
+ any (\x -> ('.':x) `isSuffixOf` filename) imageExtensions &&
+ (any (\x -> (x++":") `isPrefixOf` filename) protocols ||
+ ':' `notElem` filename)
+ where
+ imageExtensions = [ "jpeg" , "jpg" , "png" , "gif" , "svg" ]
+ protocols = [ "file", "http", "https" ]
+
+internalLink :: String -> Inlines -> F Inlines
+internalLink link title = do
+ anchorB <- (link `elem`) <$> asksF orgStateAnchorIds
+ if anchorB
+ then return $ B.link ('#':link) "" title
+ else return $ B.emph title
+
+-- | Parse an anchor like @<<anchor-id>>@ and return an empty span with
+-- @anchor-id@ set as id. Legal anchors in org-mode are defined through
+-- @org-target-regexp@, which is fairly liberal. Since no link is created if
+-- @anchor-id@ contains spaces, we are more restrictive in what is accepted as
+-- an anchor.
+
+anchor :: OrgParser (F Inlines)
+anchor = try $ do
+ anchorId <- parseAnchor
+ recordAnchorId anchorId
+ returnF $ B.spanWith (solidify anchorId, [], []) mempty
+ where
+ parseAnchor = string "<<"
+ *> many1 (noneOf "\t\n\r<>\"' ")
+ <* string ">>"
+ <* skipSpaces
+
+-- | Replace every char but [a-zA-Z0-9_.-:] with a hypen '-'. This mirrors
+-- the org function @org-export-solidify-link-text@.
+
+solidify :: String -> String
+solidify = map replaceSpecialChar
+ where replaceSpecialChar c
+ | isAlphaNum c = c
+ | c `elem` ("_.-:" :: String) = c
+ | otherwise = '-'
+
+-- | Parses an inline code block and marks it as an babel block.
+inlineCodeBlock :: OrgParser (F Inlines)
+inlineCodeBlock = try $ do
+ string "src_"
+ lang <- many1 orgArgWordChar
+ opts <- option [] $ enclosedByPair '[' ']' inlineBlockOption
+ inlineCode <- enclosedByPair '{' '}' (noneOf "\n\r")
+ let attrClasses = [translateLang lang, rundocBlockClass]
+ let attrKeyVal = map toRundocAttrib (("language", lang) : opts)
+ returnF $ B.codeWith ("", attrClasses, attrKeyVal) inlineCode
+ where
+ inlineBlockOption :: OrgParser (String, String)
+ inlineBlockOption = try $ do
+ argKey <- orgArgKey
+ paramValue <- option "yes" orgInlineParamValue
+ return (argKey, paramValue)
+
+ orgInlineParamValue :: OrgParser String
+ orgInlineParamValue = try $
+ skipSpaces
+ *> notFollowedBy (char ':')
+ *> many1 (noneOf "\t\n\r ]")
+ <* skipSpaces
+
+
+
+enclosedByPair :: Char -- ^ opening char
+ -> Char -- ^ closing char
+ -> OrgParser a -- ^ parser
+ -> OrgParser [a]
+enclosedByPair s e p = char s *> many1Till p (char e)
+
+emph :: OrgParser (F Inlines)
+emph = fmap B.emph <$> emphasisBetween '/'
+
+strong :: OrgParser (F Inlines)
+strong = fmap B.strong <$> emphasisBetween '*'
+
+strikeout :: OrgParser (F Inlines)
+strikeout = fmap B.strikeout <$> emphasisBetween '+'
+
+-- There is no underline, so we use strong instead.
+underline :: OrgParser (F Inlines)
+underline = fmap B.strong <$> emphasisBetween '_'
+
+verbatim :: OrgParser (F Inlines)
+verbatim = return . B.code <$> verbatimBetween '='
+
+code :: OrgParser (F Inlines)
+code = return . B.code <$> verbatimBetween '~'
+
+subscript :: OrgParser (F Inlines)
+subscript = fmap B.subscript <$> try (char '_' *> subOrSuperExpr)
+
+superscript :: OrgParser (F Inlines)
+superscript = fmap B.superscript <$> try (char '^' *> subOrSuperExpr)
+
+math :: OrgParser (F Inlines)
+math = return . B.math <$> choice [ math1CharBetween '$'
+ , mathStringBetween '$'
+ , rawMathBetween "\\(" "\\)"
+ ]
+
+displayMath :: OrgParser (F Inlines)
+displayMath = return . B.displayMath <$> choice [ rawMathBetween "\\[" "\\]"
+ , rawMathBetween "$$" "$$"
+ ]
+
+updatePositions :: Char
+ -> OrgParser (Char)
+updatePositions c = do
+ when (c `elem` emphasisPreChars) updateLastPreCharPos
+ when (c `elem` emphasisForbiddenBorderChars) updateLastForbiddenCharPos
+ return c
+
+symbol :: OrgParser (F Inlines)
+symbol = return . B.str . (: "") <$> (oneOf specialChars >>= updatePositions)
+
+emphasisBetween :: Char
+ -> OrgParser (F Inlines)
+emphasisBetween c = try $ do
+ startEmphasisNewlinesCounting emphasisAllowedNewlines
+ res <- enclosedInlines (emphasisStart c) (emphasisEnd c)
+ isTopLevelEmphasis <- null . orgStateEmphasisCharStack <$> getState
+ when isTopLevelEmphasis
+ resetEmphasisNewlines
+ return res
+
+verbatimBetween :: Char
+ -> OrgParser String
+verbatimBetween c = try $
+ emphasisStart c *>
+ many1TillNOrLessNewlines 1 (noneOf "\n\r") (emphasisEnd c)
+
+-- | Parses a raw string delimited by @c@ using Org's math rules
+mathStringBetween :: Char
+ -> OrgParser String
+mathStringBetween c = try $ do
+ mathStart c
+ body <- many1TillNOrLessNewlines mathAllowedNewlines
+ (noneOf (c:"\n\r"))
+ (lookAhead $ mathEnd c)
+ final <- mathEnd c
+ return $ body ++ [final]
+
+-- | Parse a single character between @c@ using math rules
+math1CharBetween :: Char
+ -> OrgParser String
+math1CharBetween c = try $ do
+ char c
+ res <- noneOf $ c:mathForbiddenBorderChars
+ char c
+ eof <|> () <$ lookAhead (oneOf mathPostChars)
+ return [res]
+
+rawMathBetween :: String
+ -> String
+ -> OrgParser String
+rawMathBetween s e = try $ string s *> manyTill anyChar (try $ string e)
+
+-- | Parses the start (opening character) of emphasis
+emphasisStart :: Char -> OrgParser Char
+emphasisStart c = try $ do
+ guard =<< afterEmphasisPreChar
+ guard =<< notAfterString
+ char c
+ lookAhead (noneOf emphasisForbiddenBorderChars)
+ pushToInlineCharStack c
+ return c
+
+-- | Parses the closing character of emphasis
+emphasisEnd :: Char -> OrgParser Char
+emphasisEnd c = try $ do
+ guard =<< notAfterForbiddenBorderChar
+ char c
+ eof <|> () <$ lookAhead acceptablePostChars
+ updateLastStrPos
+ popInlineCharStack
+ return c
+ where acceptablePostChars =
+ surroundingEmphasisChar >>= \x -> oneOf (x ++ emphasisPostChars)
+
+mathStart :: Char -> OrgParser Char
+mathStart c = try $
+ char c <* notFollowedBy' (oneOf (c:mathForbiddenBorderChars))
+
+mathEnd :: Char -> OrgParser Char
+mathEnd c = try $ do
+ res <- noneOf (c:mathForbiddenBorderChars)
+ char c
+ eof <|> () <$ lookAhead (oneOf mathPostChars)
+ return res
+
+
+enclosedInlines :: OrgParser a
+ -> OrgParser b
+ -> OrgParser (F Inlines)
+enclosedInlines start end = try $
+ trimInlinesF . mconcat <$> enclosed start end inline
+
+enclosedRaw :: OrgParser a
+ -> OrgParser b
+ -> OrgParser String
+enclosedRaw start end = try $
+ start *> (onSingleLine <|> spanningTwoLines)
+ where onSingleLine = try $ many1Till (noneOf "\n\r") end
+ spanningTwoLines = try $
+ anyLine >>= \f -> mappend (f <> " ") <$> onSingleLine
+
+-- | Like many1Till, but parses at most @n+1@ lines. @p@ must not consume
+-- newlines.
+many1TillNOrLessNewlines :: Int
+ -> OrgParser Char
+ -> OrgParser a
+ -> OrgParser String
+many1TillNOrLessNewlines n p end = try $
+ nMoreLines (Just n) mempty >>= oneOrMore
+ where
+ nMoreLines Nothing cs = return cs
+ nMoreLines (Just 0) cs = try $ (cs ++) <$> finalLine
+ nMoreLines k cs = try $ (final k cs <|> rest k cs)
+ >>= uncurry nMoreLines
+ final _ cs = (\x -> (Nothing, cs ++ x)) <$> try finalLine
+ rest m cs = (\x -> (minus1 <$> m, cs ++ x ++ "\n")) <$> try (manyTill p newline)
+ finalLine = try $ manyTill p end
+ minus1 k = k - 1
+ oneOrMore cs = guard (not $ null cs) *> return cs
+
+-- Org allows customization of the way it reads emphasis. We use the defaults
+-- here (see, e.g., the Emacs Lisp variable `org-emphasis-regexp-components`
+-- for details).
+
+-- | Chars allowed to occur before emphasis (spaces and newlines are ok, too)
+emphasisPreChars :: [Char]
+emphasisPreChars = "\t \"'({"
+
+-- | Chars allowed at after emphasis
+emphasisPostChars :: [Char]
+emphasisPostChars = "\t\n !\"'),-.:;?\\}"
+
+-- | Chars not allowed at the (inner) border of emphasis
+emphasisForbiddenBorderChars :: [Char]
+emphasisForbiddenBorderChars = "\t\n\r \"',"
+
+-- | The maximum number of newlines within
+emphasisAllowedNewlines :: Int
+emphasisAllowedNewlines = 1
+
+-- LaTeX-style math: see `org-latex-regexps` for details
+
+-- | Chars allowed after an inline ($...$) math statement
+mathPostChars :: [Char]
+mathPostChars = "\t\n \"'),-.:;?"
+
+-- | Chars not allowed at the (inner) border of math
+mathForbiddenBorderChars :: [Char]
+mathForbiddenBorderChars = "\t\n\r ,;.$"
+
+-- | Maximum number of newlines in an inline math statement
+mathAllowedNewlines :: Int
+mathAllowedNewlines = 2
+
+-- | Whether we are right behind a char allowed before emphasis
+afterEmphasisPreChar :: OrgParser Bool
+afterEmphasisPreChar = do
+ pos <- getPosition
+ lastPrePos <- orgStateLastPreCharPos <$> getState
+ return . fromMaybe True $ (== pos) <$> lastPrePos
+
+-- | Whether the parser is right after a forbidden border char
+notAfterForbiddenBorderChar :: OrgParser Bool
+notAfterForbiddenBorderChar = do
+ pos <- getPosition
+ lastFBCPos <- orgStateLastForbiddenCharPos <$> getState
+ return $ lastFBCPos /= Just pos
+
+-- | Read a sub- or superscript expression
+subOrSuperExpr :: OrgParser (F Inlines)
+subOrSuperExpr = try $
+ choice [ id <$> charsInBalanced '{' '}' (noneOf "\n\r")
+ , enclosing ('(', ')') <$> charsInBalanced '(' ')' (noneOf "\n\r")
+ , simpleSubOrSuperString
+ ] >>= parseFromString (mconcat <$> many inline)
+ where enclosing (left, right) s = left : s ++ [right]
+
+simpleSubOrSuperString :: OrgParser String
+simpleSubOrSuperString = try $ do
+ state <- getState
+ guard . exportSubSuperscripts . orgStateExportSettings $ state
+ choice [ string "*"
+ , mappend <$> option [] ((:[]) <$> oneOf "+-")
+ <*> many1 alphaNum
+ ]
+
+inlineLaTeX :: OrgParser (F Inlines)
+inlineLaTeX = try $ do
+ cmd <- inlineLaTeXCommand
+ maybe mzero returnF $
+ parseAsMath cmd `mplus` parseAsMathMLSym cmd `mplus` parseAsInlineLaTeX cmd
+ where
+ parseAsMath :: String -> Maybe Inlines
+ parseAsMath cs = B.fromList <$> texMathToPandoc cs
+
+ parseAsInlineLaTeX :: String -> Maybe Inlines
+ parseAsInlineLaTeX cs = maybeRight $ runParser inlineCommand state "" cs
+
+ parseAsMathMLSym :: String -> Maybe Inlines
+ parseAsMathMLSym cs = B.str <$> MathMLEntityMap.getUnicode (clean cs)
+ -- drop initial backslash and any trailing "{}"
+ where clean = dropWhileEnd (`elem` ("{}" :: String)) . drop 1
+
+ state :: ParserState
+ state = def{ stateOptions = def{ readerParseRaw = True }}
+
+ texMathToPandoc :: String -> Maybe [Inline]
+ texMathToPandoc cs = (maybeRight $ readTeX cs) >>= writePandoc DisplayInline
+
+maybeRight :: Either a b -> Maybe b
+maybeRight = either (const Nothing) Just
+
+inlineLaTeXCommand :: OrgParser String
+inlineLaTeXCommand = try $ do
+ rest <- getInput
+ case runParser rawLaTeXInline def "source" rest of
+ Right (RawInline _ cs) -> do
+ -- drop any trailing whitespace, those are not be part of the command as
+ -- far as org mode is concerned.
+ let cmdNoSpc = dropWhileEnd isSpace cs
+ let len = length cmdNoSpc
+ count len anyChar
+ return cmdNoSpc
+ _ -> mzero
+
+-- Taken from Data.OldList.
+dropWhileEnd :: (a -> Bool) -> [a] -> [a]
+dropWhileEnd p = foldr (\x xs -> if p x && null xs then [] else x : xs) []
+
+smart :: OrgParser (F Inlines)
+smart = do
+ getOption readerSmart >>= guard
+ doubleQuoted <|> singleQuoted <|>
+ choice (map (return <$>) [orgApostrophe, orgDash, orgEllipses])
+ where
+ orgDash = dash <* updatePositions '-'
+ orgEllipses = ellipses <* updatePositions '.'
+ orgApostrophe =
+ (char '\'' <|> char '\8217') <* updateLastPreCharPos
+ <* updateLastForbiddenCharPos
+ *> return (B.str "\x2019")
+
+singleQuoted :: OrgParser (F Inlines)
+singleQuoted = try $ do
+ singleQuoteStart
+ updatePositions '\''
+ withQuoteContext InSingleQuote $
+ fmap B.singleQuoted . trimInlinesF . mconcat <$>
+ many1Till inline (singleQuoteEnd <* updatePositions '\'')
+
+-- doubleQuoted will handle regular double-quoted sections, as well
+-- as dialogues with an open double-quote without a close double-quote
+-- in the same paragraph.
+doubleQuoted :: OrgParser (F Inlines)
+doubleQuoted = try $ do
+ doubleQuoteStart
+ updatePositions '"'
+ contents <- mconcat <$> many (try $ notFollowedBy doubleQuoteEnd >> inline)
+ (withQuoteContext InDoubleQuote $ (doubleQuoteEnd <* updateLastForbiddenCharPos) >> return
+ (fmap B.doubleQuoted . trimInlinesF $ contents))
+ <|> (return $ return (B.str "\8220") <> contents)
diff --git a/src/Text/Pandoc/Readers/Org/ParserState.hs b/src/Text/Pandoc/Readers/Org/ParserState.hs
index 6a902cd46..e648a883e 100644
--- a/src/Text/Pandoc/Readers/Org/ParserState.hs
+++ b/src/Text/Pandoc/Readers/Org/ParserState.hs
@@ -29,9 +29,11 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
Define the Org-mode parser state.
-}
module Text.Pandoc.Readers.Org.ParserState
- ( OrgParserState(..)
- , OrgParserLocal(..)
+ ( OrgParserState (..)
+ , OrgParserLocal (..)
, OrgNoteRecord
+ , HasReaderOptions (..)
+ , HasQuoteContext (..)
, F(..)
, askF
, asksF
@@ -184,6 +186,7 @@ modifyExportSettings :: ExportSettingSetter a -> a -> OrgParserState -> OrgParse
modifyExportSettings setter val state =
state { orgStateExportSettings = setter val . orgStateExportSettings $ state }
+
--
-- Parser state reader
--
diff --git a/src/Text/Pandoc/Readers/Org/Parsing.hs b/src/Text/Pandoc/Readers/Org/Parsing.hs
new file mode 100644
index 000000000..9a1420645
--- /dev/null
+++ b/src/Text/Pandoc/Readers/Org/Parsing.hs
@@ -0,0 +1,201 @@
+{-
+Copyright (C) 2014-2016 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Readers.Org.Options
+ Copyright : Copyright (C) 2014-2016 Albert Krewinkel
+ License : GNU GPL, version 2 or above
+
+ Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
+
+Org-mode parsing utilities.
+
+Most functions are simply re-exports from @Text.Pandoc.Parsing@, some
+functions are adapted to Org-mode specific functionality.
+-}
+module Text.Pandoc.Readers.Org.Parsing
+ ( OrgParser
+ , anyLine
+ , blanklines
+ , newline
+ , parseFromString
+ , skipSpaces1
+ , inList
+ , withContext
+ , updateLastForbiddenCharPos
+ , updateLastPreCharPos
+ , orgArgKey
+ , orgArgWord
+ , orgArgWordChar
+ -- * Re-exports from Text.Pandoc.Parser
+ , ParserContext (..)
+ , many1Till
+ , notFollowedBy'
+ , spaceChar
+ , nonspaceChar
+ , skipSpaces
+ , blankline
+ , enclosed
+ , stringAnyCase
+ , charsInBalanced
+ , uri
+ , withRaw
+ , readWithM
+ , guardEnabled
+ , updateLastStrPos
+ , notAfterString
+ , ParserState (..)
+ , registerHeader
+ , QuoteContext (..)
+ , singleQuoteStart
+ , singleQuoteEnd
+ , doubleQuoteStart
+ , doubleQuoteEnd
+ , dash
+ , ellipses
+ , citeKey
+ -- * Re-exports from Text.Pandoc.Parsec
+ , runParser
+ , getInput
+ , char
+ , letter
+ , digit
+ , alphaNum
+ , skipMany1
+ , spaces
+ , anyChar
+ , string
+ , count
+ , eof
+ , noneOf
+ , oneOf
+ , lookAhead
+ , notFollowedBy
+ , many
+ , many1
+ , manyTill
+ , (<|>)
+ , (<?>)
+ , choice
+ , try
+ , sepBy
+ , sepBy1
+ , option
+ , optional
+ , optionMaybe
+ , getState
+ , updateState
+ , SourcePos
+ , getPosition
+ ) where
+
+import Text.Pandoc.Readers.Org.ParserState
+
+import qualified Text.Pandoc.Parsing as P
+import Text.Pandoc.Parsing hiding ( anyLine, blanklines, newline
+ , parseFromString )
+
+import Control.Monad ( guard )
+import Control.Monad.Reader ( Reader )
+
+-- | The parser used to read org files.
+type OrgParser = ParserT [Char] OrgParserState (Reader OrgParserLocal)
+
+--
+-- Adaptions and specializations of parsing utilities
+--
+
+-- | Parse any line of text
+anyLine :: OrgParser String
+anyLine =
+ P.anyLine
+ <* updateLastPreCharPos
+ <* updateLastForbiddenCharPos
+
+-- The version Text.Pandoc.Parsing cannot be used, as we need additional parts
+-- of the state saved and restored.
+parseFromString :: OrgParser a -> String -> OrgParser a
+parseFromString parser str' = do
+ oldLastPreCharPos <- orgStateLastPreCharPos <$> getState
+ updateState $ \s -> s{ orgStateLastPreCharPos = Nothing }
+ result <- P.parseFromString parser str'
+ updateState $ \s -> s{ orgStateLastPreCharPos = oldLastPreCharPos }
+ return result
+
+-- | Skip one or more tab or space characters.
+skipSpaces1 :: OrgParser ()
+skipSpaces1 = skipMany1 spaceChar
+
+-- | Like @Text.Parsec.Char.newline@, but causes additional state changes.
+newline :: OrgParser Char
+newline =
+ P.newline
+ <* updateLastPreCharPos
+ <* updateLastForbiddenCharPos
+
+-- | Like @Text.Parsec.Char.blanklines@, but causes additional state changes.
+blanklines :: OrgParser [Char]
+blanklines =
+ P.blanklines
+ <* updateLastPreCharPos
+ <* updateLastForbiddenCharPos
+
+-- | Succeeds when we're in list context.
+inList :: OrgParser ()
+inList = do
+ ctx <- orgStateParserContext <$> getState
+ guard (ctx == ListItemState)
+
+-- | Parse in different context
+withContext :: ParserContext -- ^ New parser context
+ -> OrgParser a -- ^ Parser to run in that context
+ -> OrgParser a
+withContext context parser = do
+ oldContext <- orgStateParserContext <$> getState
+ updateState $ \s -> s{ orgStateParserContext = context }
+ result <- parser
+ updateState $ \s -> s{ orgStateParserContext = oldContext }
+ return result
+
+--
+-- Parser state update functions
+--
+
+-- | Set the current position as the last position at which a forbidden char
+-- was found (i.e. a character which is not allowed at the inner border of
+-- markup).
+updateLastForbiddenCharPos :: OrgParser ()
+updateLastForbiddenCharPos = getPosition >>= \p ->
+ updateState $ \s -> s{ orgStateLastForbiddenCharPos = Just p}
+
+-- | Set the current parser position as the position at which a character was
+-- seen which allows inline markup to follow.
+updateLastPreCharPos :: OrgParser ()
+updateLastPreCharPos = getPosition >>= \p ->
+ updateState $ \s -> s{ orgStateLastPreCharPos = Just p}
+
+orgArgKey :: OrgParser String
+orgArgKey = try $
+ skipSpaces *> char ':'
+ *> many1 orgArgWordChar
+
+orgArgWord :: OrgParser String
+orgArgWord = many1 orgArgWordChar
+
+orgArgWordChar :: OrgParser Char
+orgArgWordChar = alphaNum <|> oneOf "-_"