aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Readers/Org
diff options
context:
space:
mode:
Diffstat (limited to 'src/Text/Pandoc/Readers/Org')
-rw-r--r--src/Text/Pandoc/Readers/Org/BlockStarts.hs137
-rw-r--r--src/Text/Pandoc/Readers/Org/Blocks.hs979
-rw-r--r--src/Text/Pandoc/Readers/Org/ExportSettings.hs172
-rw-r--r--src/Text/Pandoc/Readers/Org/Inlines.hs880
-rw-r--r--src/Text/Pandoc/Readers/Org/Meta.hs218
-rw-r--r--src/Text/Pandoc/Readers/Org/ParserState.hs259
-rw-r--r--src/Text/Pandoc/Readers/Org/Parsing.hs217
-rw-r--r--src/Text/Pandoc/Readers/Org/Shared.hs97
8 files changed, 0 insertions, 2959 deletions
diff --git a/src/Text/Pandoc/Readers/Org/BlockStarts.hs b/src/Text/Pandoc/Readers/Org/BlockStarts.hs
deleted file mode 100644
index 5588c4552..000000000
--- a/src/Text/Pandoc/Readers/Org/BlockStarts.hs
+++ /dev/null
@@ -1,137 +0,0 @@
-{-
-Copyright (C) 2014-2016 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
--}
-
-{- |
- Module : Text.Pandoc.Readers.Org.Options
- Copyright : Copyright (C) 2014-2016 Albert Krewinkel
- License : GNU GPL, version 2 or above
-
- Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
-
-Parsers for Org-mode inline elements.
--}
-module Text.Pandoc.Readers.Org.BlockStarts
- ( exampleLineStart
- , hline
- , noteMarker
- , tableStart
- , drawerStart
- , headerStart
- , metaLineStart
- , latexEnvStart
- , commentLineStart
- , bulletListStart
- , orderedListStart
- , endOfBlock
- ) where
-
-import Control.Monad ( void )
-import Text.Pandoc.Readers.Org.Parsing
-
--- | Horizontal Line (five -- dashes or more)
-hline :: Monad m => OrgParser m ()
-hline = try $ do
- skipSpaces
- string "-----"
- many (char '-')
- skipSpaces
- newline
- return ()
-
--- | Read the start of a header line, return the header level
-headerStart :: Monad m => OrgParser m Int
-headerStart = try $
- (length <$> many1 (char '*')) <* many1 (char ' ') <* updateLastPreCharPos
-
-tableStart :: Monad m => OrgParser m Char
-tableStart = try $ skipSpaces *> char '|'
-
-latexEnvStart :: Monad m => OrgParser m String
-latexEnvStart = try $ do
- skipSpaces *> string "\\begin{"
- *> latexEnvName
- <* string "}"
- <* blankline
- where
- latexEnvName :: Monad m => OrgParser m String
- latexEnvName = try $ mappend <$> many1 alphaNum <*> option "" (string "*")
-
-
--- | Parses bullet list marker.
-bulletListStart :: Monad m => OrgParser m ()
-bulletListStart = try $
- choice
- [ () <$ skipSpaces <* oneOf "+-" <* skipSpaces1
- , () <$ skipSpaces1 <* char '*' <* skipSpaces1
- ]
-
-genericListStart :: Monad m
- => OrgParser m String
- -> OrgParser m Int
-genericListStart listMarker = try $
- (+) <$> (length <$> many spaceChar)
- <*> (length <$> listMarker <* many1 spaceChar)
-
-orderedListStart :: Monad m => OrgParser m Int
-orderedListStart = genericListStart orderedListMarker
- -- Ordered list markers allowed in org-mode
- where orderedListMarker = mappend <$> many1 digit <*> (pure <$> oneOf ".)")
-
-drawerStart :: Monad m => OrgParser m String
-drawerStart = try $
- skipSpaces *> drawerName <* skipSpaces <* newline
- where drawerName = char ':' *> manyTill nonspaceChar (char ':')
-
-metaLineStart :: Monad m => OrgParser m ()
-metaLineStart = try $ skipSpaces <* string "#+"
-
-commentLineStart :: Monad m => OrgParser m ()
-commentLineStart = try $ skipSpaces <* string "# "
-
-exampleLineStart :: Monad m => OrgParser m ()
-exampleLineStart = () <$ try (skipSpaces *> string ": ")
-
-noteMarker :: Monad m => OrgParser m String
-noteMarker = try $ do
- char '['
- choice [ many1Till digit (char ']')
- , (++) <$> string "fn:"
- <*> many1Till (noneOf "\n\r\t ") (char ']')
- ]
-
--- | Succeeds if the parser is at the end of a block.
-endOfBlock :: Monad m => OrgParser m ()
-endOfBlock = lookAhead . try $ do
- void blankline <|> anyBlockStart
- where
- -- Succeeds if there is a new block starting at this position.
- anyBlockStart :: Monad m => OrgParser m ()
- anyBlockStart = try . choice $
- [ exampleLineStart
- , hline
- , metaLineStart
- , commentLineStart
- , void noteMarker
- , void tableStart
- , void drawerStart
- , void headerStart
- , void latexEnvStart
- , void bulletListStart
- , void orderedListStart
- ]
-
diff --git a/src/Text/Pandoc/Readers/Org/Blocks.hs b/src/Text/Pandoc/Readers/Org/Blocks.hs
deleted file mode 100644
index 78ac8d0d1..000000000
--- a/src/Text/Pandoc/Readers/Org/Blocks.hs
+++ /dev/null
@@ -1,979 +0,0 @@
-{-# LANGUAGE FlexibleContexts #-}
-{-# LANGUAGE RecordWildCards #-}
-{-# LANGUAGE ViewPatterns #-}
-{-
-Copyright (C) 2014-2017 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
--}
-
-{- |
- Module : Text.Pandoc.Readers.Org.Options
- Copyright : Copyright (C) 2014-2017 Albert Krewinkel
- License : GNU GPL, version 2 or above
-
- Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
-
-Parsers for Org-mode block elements.
--}
-module Text.Pandoc.Readers.Org.Blocks
- ( blockList
- , meta
- ) where
-
-import Text.Pandoc.Readers.Org.BlockStarts
-import Text.Pandoc.Readers.Org.Inlines
-import Text.Pandoc.Readers.Org.Meta ( metaExport, metaKey, metaLine )
-import Text.Pandoc.Readers.Org.ParserState
-import Text.Pandoc.Readers.Org.Parsing
-import Text.Pandoc.Readers.Org.Shared
- ( cleanLinkString, isImageFilename, rundocBlockClass
- , toRundocAttrib, translateLang )
-
-import qualified Text.Pandoc.Builder as B
-import Text.Pandoc.Builder ( Inlines, Blocks )
-import Text.Pandoc.Class (PandocMonad)
-import Text.Pandoc.Definition
-import Text.Pandoc.Options
-import Text.Pandoc.Shared ( compactify, compactifyDL, safeRead )
-
-import Control.Monad ( foldM, guard, mzero, void )
-import Data.Char ( isSpace, toLower, toUpper)
-import Data.Default ( Default )
-import Data.List ( foldl', isPrefixOf )
-import Data.Maybe ( fromMaybe, isNothing )
-import Data.Monoid ((<>))
-
---
--- Org headers
---
-newtype Tag = Tag { fromTag :: String }
- deriving (Show, Eq)
-
--- | Create a tag containing the given string.
-toTag :: String -> Tag
-toTag = Tag
-
--- | The key (also called name or type) of a property.
-newtype PropertyKey = PropertyKey { fromKey :: String }
- deriving (Show, Eq, Ord)
-
--- | Create a property key containing the given string. Org mode keys are
--- case insensitive and are hence converted to lower case.
-toPropertyKey :: String -> PropertyKey
-toPropertyKey = PropertyKey . map toLower
-
--- | The value assigned to a property.
-newtype PropertyValue = PropertyValue { fromValue :: String }
-
--- | Create a property value containing the given string.
-toPropertyValue :: String -> PropertyValue
-toPropertyValue = PropertyValue
-
--- | Check whether the property value is non-nil (i.e. truish).
-isNonNil :: PropertyValue -> Bool
-isNonNil p = map toLower (fromValue p) `notElem` ["()", "{}", "nil"]
-
--- | Key/value pairs from a PROPERTIES drawer
-type Properties = [(PropertyKey, PropertyValue)]
-
--- | Org mode headline (i.e. a document subtree).
-data Headline = Headline
- { headlineLevel :: Int
- , headlineTodoMarker :: Maybe TodoMarker
- , headlineText :: Inlines
- , headlineTags :: [Tag]
- , headlineProperties :: Properties
- , headlineContents :: Blocks
- , headlineChildren :: [Headline]
- }
-
---
--- Parsing headlines and subtrees
---
-
--- | Read an Org mode headline and its contents (i.e. a document subtree).
--- @lvl@ gives the minimum acceptable level of the tree.
-headline :: PandocMonad m => Int -> OrgParser m (F Headline)
-headline lvl = try $ do
- level <- headerStart
- guard (lvl <= level)
- todoKw <- optionMaybe todoKeyword
- title <- trimInlinesF . mconcat <$> manyTill inline endOfTitle
- tags <- option [] headerTags
- newline
- properties <- option mempty propertiesDrawer
- contents <- blocks
- children <- many (headline (level + 1))
- return $ do
- title' <- title
- contents' <- contents
- children' <- sequence children
- return $ Headline
- { headlineLevel = level
- , headlineTodoMarker = todoKw
- , headlineText = title'
- , headlineTags = tags
- , headlineProperties = properties
- , headlineContents = contents'
- , headlineChildren = children'
- }
- where
- endOfTitle :: Monad m => OrgParser m ()
- endOfTitle = void . lookAhead $ optional headerTags *> newline
-
- headerTags :: Monad m => OrgParser m [Tag]
- headerTags = try $
- let tag = many1 (alphaNum <|> oneOf "@%#_") <* char ':'
- in map toTag <$> (skipSpaces *> char ':' *> many1 tag <* skipSpaces)
-
--- | Convert an Org mode headline (i.e. a document tree) into pandoc's Blocks
-headlineToBlocks :: Monad m => Headline -> OrgParser m Blocks
-headlineToBlocks hdln@(Headline {..}) = do
- maxHeadlineLevels <- getExportSetting exportHeadlineLevels
- case () of
- _ | any isNoExportTag headlineTags -> return mempty
- _ | any isArchiveTag headlineTags -> archivedHeadlineToBlocks hdln
- _ | isCommentTitle headlineText -> return mempty
- _ | headlineLevel >= maxHeadlineLevels -> headlineToHeaderWithList hdln
- _ | otherwise -> headlineToHeaderWithContents hdln
-
-isNoExportTag :: Tag -> Bool
-isNoExportTag = (== toTag "noexport")
-
-isArchiveTag :: Tag -> Bool
-isArchiveTag = (== toTag "ARCHIVE")
-
--- | Check if the title starts with COMMENT.
--- FIXME: This accesses builder internals not intended for use in situations
--- like these. Replace once keyword parsing is supported.
-isCommentTitle :: Inlines -> Bool
-isCommentTitle (B.toList -> (Str "COMMENT":_)) = True
-isCommentTitle _ = False
-
-archivedHeadlineToBlocks :: Monad m => Headline -> OrgParser m Blocks
-archivedHeadlineToBlocks hdln = do
- archivedTreesOption <- getExportSetting exportArchivedTrees
- case archivedTreesOption of
- ArchivedTreesNoExport -> return mempty
- ArchivedTreesExport -> headlineToHeaderWithContents hdln
- ArchivedTreesHeadlineOnly -> headlineToHeader hdln
-
-headlineToHeaderWithList :: Monad m => Headline -> OrgParser m Blocks
-headlineToHeaderWithList hdln@(Headline {..}) = do
- maxHeadlineLevels <- getExportSetting exportHeadlineLevels
- header <- headlineToHeader hdln
- listElements <- sequence (map headlineToBlocks headlineChildren)
- let listBlock = if null listElements
- then mempty
- else B.orderedList listElements
- let headerText = if maxHeadlineLevels == headlineLevel
- then header
- else flattenHeader header
- return $ headerText <> headlineContents <> listBlock
- where
- flattenHeader :: Blocks -> Blocks
- flattenHeader blks =
- case B.toList blks of
- (Header _ _ inlns:_) -> B.para (B.fromList inlns)
- _ -> mempty
-
-headlineToHeaderWithContents :: Monad m => Headline -> OrgParser m Blocks
-headlineToHeaderWithContents hdln@(Headline {..}) = do
- header <- headlineToHeader hdln
- childrenBlocks <- mconcat <$> sequence (map headlineToBlocks headlineChildren)
- return $ header <> headlineContents <> childrenBlocks
-
-headlineToHeader :: Monad m => Headline -> OrgParser m Blocks
-headlineToHeader (Headline {..}) = do
- exportTodoKeyword <- getExportSetting exportWithTodoKeywords
- let todoText = if exportTodoKeyword
- then case headlineTodoMarker of
- Just kw -> todoKeywordToInlines kw <> B.space
- Nothing -> mempty
- else mempty
- let text = tagTitle (todoText <> headlineText) headlineTags
- let propAttr = propertiesToAttr headlineProperties
- attr <- registerHeader propAttr headlineText
- return $ B.headerWith attr headlineLevel text
-
-todoKeyword :: Monad m => OrgParser m TodoMarker
-todoKeyword = try $ do
- taskStates <- activeTodoMarkers <$> getState
- let kwParser tdm = try $ (tdm <$ string (todoMarkerName tdm) <* spaceChar)
- choice (map kwParser taskStates)
-
-todoKeywordToInlines :: TodoMarker -> Inlines
-todoKeywordToInlines tdm =
- let todoText = todoMarkerName tdm
- todoState = map toLower . show $ todoMarkerState tdm
- classes = [todoState, todoText]
- in B.spanWith (mempty, classes, mempty) (B.str todoText)
-
-propertiesToAttr :: Properties -> Attr
-propertiesToAttr properties =
- let
- toStringPair prop = (fromKey (fst prop), fromValue (snd prop))
- customIdKey = toPropertyKey "custom_id"
- classKey = toPropertyKey "class"
- unnumberedKey = toPropertyKey "unnumbered"
- specialProperties = [customIdKey, classKey, unnumberedKey]
- id' = fromMaybe mempty . fmap fromValue . lookup customIdKey $ properties
- cls = fromMaybe mempty . fmap fromValue . lookup classKey $ properties
- kvs' = map toStringPair . filter ((`notElem` specialProperties) . fst)
- $ properties
- isUnnumbered =
- fromMaybe False . fmap isNonNil . lookup unnumberedKey $ properties
- in
- (id', words cls ++ (if isUnnumbered then ["unnumbered"] else []), kvs')
-
-tagTitle :: Inlines -> [Tag] -> Inlines
-tagTitle title tags = title <> (mconcat $ map tagToInline tags)
-
-tagToInline :: Tag -> Inlines
-tagToInline t = B.spanWith ("", ["tag"], [("data-tag-name", fromTag t)]) mempty
-
-
---
--- parsing blocks
---
-
--- | Get a list of blocks.
-blockList :: PandocMonad m => OrgParser m [Block]
-blockList = do
- initialBlocks <- blocks
- headlines <- sequence <$> manyTill (headline 1) eof
- st <- getState
- headlineBlocks <- fmap mconcat . sequence . map headlineToBlocks $ runF headlines st
- return . B.toList $ (runF initialBlocks st) <> headlineBlocks
-
--- | Get the meta information safed in the state.
-meta :: Monad m => OrgParser m Meta
-meta = do
- meta' <- metaExport
- runF meta' <$> getState
-
-blocks :: PandocMonad m => OrgParser m (F Blocks)
-blocks = mconcat <$> manyTill block (void (lookAhead headerStart) <|> eof)
-
-block :: PandocMonad m => OrgParser m (F Blocks)
-block = choice [ mempty <$ blanklines
- , table
- , orgBlock
- , figure
- , example
- , genericDrawer
- , specialLine
- , horizontalRule
- , list
- , latexFragment
- , noteBlock
- , paraOrPlain
- ] <?> "block"
-
-
---
--- Block Attributes
---
-
--- | Attributes that may be added to figures (like a name or caption).
-data BlockAttributes = BlockAttributes
- { blockAttrName :: Maybe String
- , blockAttrLabel :: Maybe String
- , blockAttrCaption :: Maybe (F Inlines)
- , blockAttrKeyValues :: [(String, String)]
- }
-
--- | Convert BlockAttributes into pandoc Attr
-attrFromBlockAttributes :: BlockAttributes -> Attr
-attrFromBlockAttributes (BlockAttributes{..}) =
- let
- ident = fromMaybe mempty $ lookup "id" blockAttrKeyValues
- classes = case lookup "class" blockAttrKeyValues of
- Nothing -> []
- Just clsStr -> words clsStr
- kv = filter ((`notElem` ["id", "class"]) . fst) blockAttrKeyValues
- in (ident, classes, kv)
-
-stringyMetaAttribute :: Monad m => (String -> Bool) -> OrgParser m (String, String)
-stringyMetaAttribute attrCheck = try $ do
- metaLineStart
- attrName <- map toUpper <$> many1Till nonspaceChar (char ':')
- guard $ attrCheck attrName
- skipSpaces
- attrValue <- anyLine
- return (attrName, attrValue)
-
-blockAttributes :: PandocMonad m => OrgParser m BlockAttributes
-blockAttributes = try $ do
- kv <- many (stringyMetaAttribute attrCheck)
- let caption = foldl' (appendValues "CAPTION") Nothing kv
- let kvAttrs = foldl' (appendValues "ATTR_HTML") Nothing kv
- let name = lookup "NAME" kv
- let label = lookup "LABEL" kv
- caption' <- case caption of
- Nothing -> return Nothing
- Just s -> Just <$> parseFromString inlines (s ++ "\n")
- kvAttrs' <- parseFromString keyValues . (++ "\n") $ fromMaybe mempty kvAttrs
- return $ BlockAttributes
- { blockAttrName = name
- , blockAttrLabel = label
- , blockAttrCaption = caption'
- , blockAttrKeyValues = kvAttrs'
- }
- where
- attrCheck :: String -> Bool
- attrCheck attr =
- case attr of
- "NAME" -> True
- "LABEL" -> True
- "CAPTION" -> True
- "ATTR_HTML" -> True
- _ -> False
-
- appendValues :: String -> Maybe String -> (String, String) -> Maybe String
- appendValues attrName accValue (key, value) =
- if key /= attrName
- then accValue
- else case accValue of
- Just acc -> Just $ acc ++ ' ':value
- Nothing -> Just value
-
-keyValues :: Monad m => OrgParser m [(String, String)]
-keyValues = try $
- manyTill ((,) <$> key <*> value) newline
- where
- key :: Monad m => OrgParser m String
- key = try $ skipSpaces *> char ':' *> many1 nonspaceChar
-
- value :: Monad m => OrgParser m String
- value = skipSpaces *> manyTill anyChar endOfValue
-
- endOfValue :: Monad m => OrgParser m ()
- endOfValue =
- lookAhead $ (() <$ try (many1 spaceChar <* key))
- <|> () <$ newline
-
-
---
--- Org Blocks (#+BEGIN_... / #+END_...)
---
-
--- | Read an org-mode block delimited by #+BEGIN_TYPE and #+END_TYPE.
-orgBlock :: PandocMonad m => OrgParser m (F Blocks)
-orgBlock = try $ do
- blockAttrs <- blockAttributes
- blkType <- blockHeaderStart
- ($ blkType) $
- case (map toLower blkType) of
- "export" -> exportBlock
- "comment" -> rawBlockLines (const mempty)
- "html" -> rawBlockLines (return . B.rawBlock (lowercase blkType))
- "latex" -> rawBlockLines (return . B.rawBlock (lowercase blkType))
- "ascii" -> rawBlockLines (return . B.rawBlock (lowercase blkType))
- "example" -> rawBlockLines (return . exampleCode)
- "quote" -> parseBlockLines (fmap B.blockQuote)
- "verse" -> verseBlock
- "src" -> codeBlock blockAttrs
- _ -> parseBlockLines $
- let (ident, classes, kv) = attrFromBlockAttributes blockAttrs
- in fmap $ B.divWith (ident, classes ++ [blkType], kv)
- where
- blockHeaderStart :: Monad m => OrgParser m String
- blockHeaderStart = try $ skipSpaces *> stringAnyCase "#+begin_" *> orgArgWord
-
- lowercase :: String -> String
- lowercase = map toLower
-
-rawBlockLines :: Monad m => (String -> F Blocks) -> String -> OrgParser m (F Blocks)
-rawBlockLines f blockType = (ignHeaders *> (f <$> rawBlockContent blockType))
-
-parseBlockLines :: PandocMonad m => (F Blocks -> F Blocks) -> String -> OrgParser m (F Blocks)
-parseBlockLines f blockType = (ignHeaders *> (f <$> parsedBlockContent))
- where
- parsedBlockContent :: PandocMonad m => OrgParser m (F Blocks)
- parsedBlockContent = try $ do
- raw <- rawBlockContent blockType
- parseFromString blocks (raw ++ "\n")
-
--- | Read the raw string content of a block
-rawBlockContent :: Monad m => String -> OrgParser m String
-rawBlockContent blockType = try $ do
- blkLines <- manyTill rawLine blockEnder
- tabLen <- getOption readerTabStop
- return
- . unlines
- . stripIndent
- . map (tabsToSpaces tabLen . commaEscaped)
- $ blkLines
- where
- rawLine :: Monad m => OrgParser m String
- rawLine = try $ ("" <$ blankline) <|> anyLine
-
- blockEnder :: Monad m => OrgParser m ()
- blockEnder = try $ skipSpaces <* stringAnyCase ("#+end_" <> blockType)
-
- stripIndent :: [String] -> [String]
- stripIndent strs = map (drop (shortestIndent strs)) strs
-
- shortestIndent :: [String] -> Int
- shortestIndent = foldr min maxBound
- . map (length . takeWhile isSpace)
- . filter (not . null)
-
- tabsToSpaces :: Int -> String -> String
- tabsToSpaces _ [] = []
- tabsToSpaces tabLen cs'@(c:cs) =
- case c of
- ' ' -> ' ':tabsToSpaces tabLen cs
- '\t' -> (take tabLen $ repeat ' ') ++ tabsToSpaces tabLen cs
- _ -> cs'
-
- commaEscaped :: String -> String
- commaEscaped (',':cs@('*':_)) = cs
- commaEscaped (',':cs@('#':'+':_)) = cs
- commaEscaped (' ':cs) = ' ':commaEscaped cs
- commaEscaped ('\t':cs) = '\t':commaEscaped cs
- commaEscaped cs = cs
-
--- | Read but ignore all remaining block headers.
-ignHeaders :: Monad m => OrgParser m ()
-ignHeaders = (() <$ newline) <|> (() <$ anyLine)
-
--- | Read a block containing code intended for export in specific backends
--- only.
-exportBlock :: Monad m => String -> OrgParser m (F Blocks)
-exportBlock blockType = try $ do
- exportType <- skipSpaces *> orgArgWord <* ignHeaders
- contents <- rawBlockContent blockType
- returnF (B.rawBlock (map toLower exportType) contents)
-
-verseBlock :: PandocMonad m => String -> OrgParser m (F Blocks)
-verseBlock blockType = try $ do
- ignHeaders
- content <- rawBlockContent blockType
- fmap B.lineBlock . sequence
- <$> mapM parseVerseLine (lines content)
- where
- -- replace initial spaces with nonbreaking spaces to preserve
- -- indentation, parse the rest as normal inline
- parseVerseLine :: PandocMonad m => String -> OrgParser m (F Inlines)
- parseVerseLine cs = do
- let (initialSpaces, indentedLine) = span isSpace cs
- let nbspIndent = if null initialSpaces
- then mempty
- else B.str $ map (const '\160') initialSpaces
- line <- parseFromString inlines (indentedLine ++ "\n")
- return (trimInlinesF $ pure nbspIndent <> line)
-
--- | Read a code block and the associated results block if present. Which of
--- boths blocks is included in the output is determined using the "exports"
--- argument in the block header.
-codeBlock :: PandocMonad m => BlockAttributes -> String -> OrgParser m (F Blocks)
-codeBlock blockAttrs blockType = do
- skipSpaces
- (classes, kv) <- codeHeaderArgs <|> (mempty <$ ignHeaders)
- content <- rawBlockContent blockType
- resultsContent <- trailingResultsBlock
- let id' = fromMaybe mempty $ blockAttrName blockAttrs
- let includeCode = exportsCode kv
- let includeResults = exportsResults kv
- let codeBlck = B.codeBlockWith ( id', classes, kv ) content
- let labelledBlck = maybe (pure codeBlck)
- (labelDiv codeBlck)
- (blockAttrCaption blockAttrs)
- let resultBlck = fromMaybe mempty resultsContent
- return $
- (if includeCode then labelledBlck else mempty) <>
- (if includeResults then resultBlck else mempty)
- where
- labelDiv :: Blocks -> F Inlines -> F Blocks
- labelDiv blk value =
- B.divWith nullAttr <$> (mappend <$> labelledBlock value <*> pure blk)
-
- labelledBlock :: F Inlines -> F Blocks
- labelledBlock = fmap (B.plain . B.spanWith ("", ["label"], []))
-
-exportsCode :: [(String, String)] -> Bool
-exportsCode attrs = not (("rundoc-exports", "none") `elem` attrs
- || ("rundoc-exports", "results") `elem` attrs)
-
-exportsResults :: [(String, String)] -> Bool
-exportsResults attrs = ("rundoc-exports", "results") `elem` attrs
- || ("rundoc-exports", "both") `elem` attrs
-
-trailingResultsBlock :: PandocMonad m => OrgParser m (Maybe (F Blocks))
-trailingResultsBlock = optionMaybe . try $ do
- blanklines
- stringAnyCase "#+RESULTS:"
- blankline
- block
-
--- | Parse code block arguments
--- TODO: We currently don't handle switches.
-codeHeaderArgs :: Monad m => OrgParser m ([String], [(String, String)])
-codeHeaderArgs = try $ do
- language <- skipSpaces *> orgArgWord
- _ <- skipSpaces *> (try $ switch `sepBy` (many1 spaceChar))
- parameters <- manyTill blockOption newline
- let pandocLang = translateLang language
- return $
- if hasRundocParameters parameters
- then ( [ pandocLang, rundocBlockClass ]
- , map toRundocAttrib (("language", language) : parameters)
- )
- else ([ pandocLang ], parameters)
- where
- hasRundocParameters = not . null
-
-switch :: Monad m => OrgParser m (Char, Maybe String)
-switch = try $ simpleSwitch <|> lineNumbersSwitch
- where
- simpleSwitch = (\c -> (c, Nothing)) <$> (oneOf "-+" *> letter)
- lineNumbersSwitch = (\ls -> ('l', Just ls)) <$>
- (string "-l \"" *> many1Till nonspaceChar (char '"'))
-
-blockOption :: Monad m => OrgParser m (String, String)
-blockOption = try $ do
- argKey <- orgArgKey
- paramValue <- option "yes" orgParamValue
- return (argKey, paramValue)
-
-orgParamValue :: Monad m => OrgParser m String
-orgParamValue = try $
- skipSpaces
- *> notFollowedBy (char ':' )
- *> many1 nonspaceChar
- <* skipSpaces
-
-horizontalRule :: Monad m => OrgParser m (F Blocks)
-horizontalRule = return B.horizontalRule <$ try hline
-
-
---
--- Drawers
---
-
--- | A generic drawer which has no special meaning for org-mode.
--- Whether or not this drawer is included in the output depends on the drawers
--- export setting.
-genericDrawer :: PandocMonad m => OrgParser m (F Blocks)
-genericDrawer = try $ do
- name <- map toUpper <$> drawerStart
- content <- manyTill drawerLine (try drawerEnd)
- state <- getState
- -- Include drawer if it is explicitly included in or not explicitly excluded
- -- from the list of drawers that should be exported. PROPERTIES drawers are
- -- never exported.
- case (exportDrawers . orgStateExportSettings $ state) of
- _ | name == "PROPERTIES" -> return mempty
- Left names | name `elem` names -> return mempty
- Right names | name `notElem` names -> return mempty
- _ -> drawerDiv name <$> parseLines content
- where
- parseLines :: PandocMonad m => [String] -> OrgParser m (F Blocks)
- parseLines = parseFromString blocks . (++ "\n") . unlines
-
- drawerDiv :: String -> F Blocks -> F Blocks
- drawerDiv drawerName = fmap $ B.divWith (mempty, [drawerName, "drawer"], mempty)
-
-drawerLine :: Monad m => OrgParser m String
-drawerLine = anyLine
-
-drawerEnd :: Monad m => OrgParser m String
-drawerEnd = try $
- skipSpaces *> stringAnyCase ":END:" <* skipSpaces <* newline
-
--- | Read a :PROPERTIES: drawer and return the key/value pairs contained
--- within.
-propertiesDrawer :: Monad m => OrgParser m Properties
-propertiesDrawer = try $ do
- drawerType <- drawerStart
- guard $ map toUpper drawerType == "PROPERTIES"
- manyTill property (try drawerEnd)
- where
- property :: Monad m => OrgParser m (PropertyKey, PropertyValue)
- property = try $ (,) <$> key <*> value
-
- key :: Monad m => OrgParser m PropertyKey
- key = fmap toPropertyKey . try $
- skipSpaces *> char ':' *> many1Till nonspaceChar (char ':')
-
- value :: Monad m => OrgParser m PropertyValue
- value = fmap toPropertyValue . try $
- skipSpaces *> manyTill anyChar (try $ skipSpaces *> newline)
-
-
---
--- Figures
---
-
--- | Figures or an image paragraph (i.e. an image on a line by itself). Only
--- images with a caption attribute are interpreted as figures.
-figure :: PandocMonad m => OrgParser m (F Blocks)
-figure = try $ do
- figAttrs <- blockAttributes
- src <- skipSpaces *> selfTarget <* skipSpaces <* endOfParagraph
- case cleanLinkString src of
- Nothing -> mzero
- Just imgSrc -> do
- guard (isImageFilename imgSrc)
- let isFigure = not . isNothing $ blockAttrCaption figAttrs
- return $ imageBlock isFigure figAttrs imgSrc
- where
- selfTarget :: PandocMonad m => OrgParser m String
- selfTarget = try $ char '[' *> linkTarget <* char ']'
-
- imageBlock :: Bool -> BlockAttributes -> String -> F Blocks
- imageBlock isFigure figAttrs imgSrc =
- let
- figName = fromMaybe mempty $ blockAttrName figAttrs
- figLabel = fromMaybe mempty $ blockAttrLabel figAttrs
- figCaption = fromMaybe mempty $ blockAttrCaption figAttrs
- figKeyVals = blockAttrKeyValues figAttrs
- attr = (figLabel, mempty, figKeyVals)
- figTitle = (if isFigure then withFigPrefix else id) figName
- in
- B.para . B.imageWith attr imgSrc figTitle <$> figCaption
-
- withFigPrefix :: String -> String
- withFigPrefix cs =
- if "fig:" `isPrefixOf` cs
- then cs
- else "fig:" ++ cs
-
--- | Succeeds if looking at the end of the current paragraph
-endOfParagraph :: Monad m => OrgParser m ()
-endOfParagraph = try $ skipSpaces *> newline *> endOfBlock
-
-
---
--- Examples
---
-
--- | Example code marked up by a leading colon.
-example :: Monad m => OrgParser m (F Blocks)
-example = try $ do
- return . return . exampleCode =<< unlines <$> many1 exampleLine
- where
- exampleLine :: Monad m => OrgParser m String
- exampleLine = try $ exampleLineStart *> anyLine
-
-exampleCode :: String -> Blocks
-exampleCode = B.codeBlockWith ("", ["example"], [])
-
-
---
--- Comments, Options and Metadata
---
-
-specialLine :: PandocMonad m => OrgParser m (F Blocks)
-specialLine = fmap return . try $ rawExportLine <|> metaLine <|> commentLine
-
-rawExportLine :: PandocMonad m => OrgParser m Blocks
-rawExportLine = try $ do
- metaLineStart
- key <- metaKey
- if key `elem` ["latex", "html", "texinfo", "beamer"]
- then B.rawBlock key <$> anyLine
- else mzero
-
-commentLine :: Monad m => OrgParser m Blocks
-commentLine = commentLineStart *> anyLine *> pure mempty
-
-
---
--- Tables
---
-data ColumnProperty = ColumnProperty
- { columnAlignment :: Maybe Alignment
- , columnRelWidth :: Maybe Int
- } deriving (Show, Eq)
-
-instance Default ColumnProperty where
- def = ColumnProperty Nothing Nothing
-
-data OrgTableRow = OrgContentRow (F [Blocks])
- | OrgAlignRow [ColumnProperty]
- | OrgHlineRow
-
--- OrgTable is strongly related to the pandoc table ADT. Using the same
--- (i.e. pandoc-global) ADT would mean that the reader would break if the
--- global structure was to be changed, which would be bad. The final table
--- should be generated using a builder function.
-data OrgTable = OrgTable
- { orgTableColumnProperties :: [ColumnProperty]
- , orgTableHeader :: [Blocks]
- , orgTableRows :: [[Blocks]]
- }
-
-table :: PandocMonad m => OrgParser m (F Blocks)
-table = try $ do
- blockAttrs <- blockAttributes
- lookAhead tableStart
- do
- rows <- tableRows
- let caption = fromMaybe (return mempty) $ blockAttrCaption blockAttrs
- return $ (<$> caption) . orgToPandocTable . normalizeTable =<< rowsToTable rows
-
-orgToPandocTable :: OrgTable
- -> Inlines
- -> Blocks
-orgToPandocTable (OrgTable colProps heads lns) caption =
- let totalWidth = if any (not . isNothing) (map columnRelWidth colProps)
- then Just . sum $ map (fromMaybe 1 . columnRelWidth) colProps
- else Nothing
- in B.table caption (map (convertColProp totalWidth) colProps) heads lns
- where
- convertColProp :: Maybe Int -> ColumnProperty -> (Alignment, Double)
- convertColProp totalWidth colProp =
- let
- align' = fromMaybe AlignDefault $ columnAlignment colProp
- width' = fromMaybe 0 $ (\w t -> (fromIntegral w / fromIntegral t))
- <$> (columnRelWidth colProp)
- <*> totalWidth
- in (align', width')
-
-tableRows :: PandocMonad m => OrgParser m [OrgTableRow]
-tableRows = try $ many (tableAlignRow <|> tableHline <|> tableContentRow)
-
-tableContentRow :: PandocMonad m => OrgParser m OrgTableRow
-tableContentRow = try $
- OrgContentRow . sequence <$> (tableStart *> many1Till tableContentCell newline)
-
-tableContentCell :: PandocMonad m => OrgParser m (F Blocks)
-tableContentCell = try $
- fmap B.plain . trimInlinesF . mconcat <$> manyTill inline endOfCell
-
-tableAlignRow :: Monad m => OrgParser m OrgTableRow
-tableAlignRow = try $ do
- tableStart
- colProps <- many1Till columnPropertyCell newline
- -- Empty rows are regular (i.e. content) rows, not alignment rows.
- guard $ any (/= def) colProps
- return $ OrgAlignRow colProps
-
-columnPropertyCell :: Monad m => OrgParser m ColumnProperty
-columnPropertyCell = emptyCell <|> propCell <?> "alignment info"
- where
- emptyCell = ColumnProperty Nothing Nothing <$ (try $ skipSpaces *> endOfCell)
- propCell = try $ ColumnProperty
- <$> (skipSpaces
- *> char '<'
- *> optionMaybe tableAlignFromChar)
- <*> (optionMaybe (many1 digit >>= safeRead)
- <* char '>'
- <* emptyCell)
-
-tableAlignFromChar :: Monad m => OrgParser m Alignment
-tableAlignFromChar = try $
- choice [ char 'l' *> return AlignLeft
- , char 'c' *> return AlignCenter
- , char 'r' *> return AlignRight
- ]
-
-tableHline :: Monad m => OrgParser m OrgTableRow
-tableHline = try $
- OrgHlineRow <$ (tableStart *> char '-' *> anyLine)
-
-endOfCell :: Monad m => OrgParser m Char
-endOfCell = try $ char '|' <|> lookAhead newline
-
-rowsToTable :: [OrgTableRow]
- -> F OrgTable
-rowsToTable = foldM rowToContent emptyTable
- where emptyTable = OrgTable mempty mempty mempty
-
-normalizeTable :: OrgTable -> OrgTable
-normalizeTable (OrgTable colProps heads rows) =
- OrgTable colProps' heads rows
- where
- refRow = if heads /= mempty
- then heads
- else case rows of
- (r:_) -> r
- _ -> mempty
- cols = length refRow
- fillColumns base padding = take cols $ base ++ repeat padding
- colProps' = fillColumns colProps def
-
--- One or more horizontal rules after the first content line mark the previous
--- line as a header. All other horizontal lines are discarded.
-rowToContent :: OrgTable
- -> OrgTableRow
- -> F OrgTable
-rowToContent orgTable row =
- case row of
- OrgHlineRow -> return singleRowPromotedToHeader
- OrgAlignRow props -> return . setProperties $ props
- OrgContentRow cs -> appendToBody cs
- where
- singleRowPromotedToHeader :: OrgTable
- singleRowPromotedToHeader = case orgTable of
- OrgTable{ orgTableHeader = [], orgTableRows = b:[] } ->
- orgTable{ orgTableHeader = b , orgTableRows = [] }
- _ -> orgTable
-
- setProperties :: [ColumnProperty] -> OrgTable
- setProperties ps = orgTable{ orgTableColumnProperties = ps }
-
- appendToBody :: F [Blocks] -> F OrgTable
- appendToBody frow = do
- newRow <- frow
- let oldRows = orgTableRows orgTable
- -- NOTE: This is an inefficient O(n) operation. This should be changed
- -- if performance ever becomes a problem.
- return orgTable{ orgTableRows = oldRows ++ [newRow] }
-
-
---
--- LaTeX fragments
---
-latexFragment :: Monad m => OrgParser m (F Blocks)
-latexFragment = try $ do
- envName <- latexEnvStart
- content <- mconcat <$> manyTill anyLineNewline (latexEnd envName)
- return . return $ B.rawBlock "latex" (content `inLatexEnv` envName)
- where
- c `inLatexEnv` e = mconcat [ "\\begin{", e, "}\n"
- , c
- , "\\end{", e, "}\n"
- ]
-
-latexEnd :: Monad m => String -> OrgParser m ()
-latexEnd envName = try $
- () <$ skipSpaces
- <* string ("\\end{" ++ envName ++ "}")
- <* blankline
-
-
---
--- Footnote defintions
---
-noteBlock :: PandocMonad m => OrgParser m (F Blocks)
-noteBlock = try $ do
- ref <- noteMarker <* skipSpaces
- content <- mconcat <$> blocksTillHeaderOrNote
- addToNotesTable (ref, content)
- return mempty
- where
- blocksTillHeaderOrNote =
- many1Till block (eof <|> () <$ lookAhead noteMarker
- <|> () <$ lookAhead headerStart)
-
--- Paragraphs or Plain text
-paraOrPlain :: PandocMonad m => OrgParser m (F Blocks)
-paraOrPlain = try $ do
- -- Make sure we are not looking at a headline
- notFollowedBy' (char '*' *> (oneOf " *"))
- ils <- inlines
- nl <- option False (newline *> return True)
- -- Read block as paragraph, except if we are in a list context and the block
- -- is directly followed by a list item, in which case the block is read as
- -- plain text.
- try (guard nl
- *> notFollowedBy (inList *> (() <$ orderedListStart <|> bulletListStart))
- *> return (B.para <$> ils))
- <|> (return (B.plain <$> ils))
-
-
---
--- list blocks
---
-
-list :: PandocMonad m => OrgParser m (F Blocks)
-list = choice [ definitionList, bulletList, orderedList ] <?> "list"
-
-definitionList :: PandocMonad m => OrgParser m (F Blocks)
-definitionList = try $ do n <- lookAhead (bulletListStart' Nothing)
- fmap B.definitionList . fmap compactifyDL . sequence
- <$> many1 (definitionListItem $ bulletListStart' (Just n))
-
-bulletList :: PandocMonad m => OrgParser m (F Blocks)
-bulletList = try $ do n <- lookAhead (bulletListStart' Nothing)
- fmap B.bulletList . fmap compactify . sequence
- <$> many1 (listItem (bulletListStart' $ Just n))
-
-orderedList :: PandocMonad m => OrgParser m (F Blocks)
-orderedList = fmap B.orderedList . fmap compactify . sequence
- <$> many1 (listItem orderedListStart)
-
-bulletListStart' :: Monad m => Maybe Int -> OrgParser m Int
--- returns length of bulletList prefix, inclusive of marker
-bulletListStart' Nothing = do ind <- length <$> many spaceChar
- oneOf (bullets $ ind == 0)
- skipSpaces1
- return (ind + 1)
-bulletListStart' (Just n) = do count (n-1) spaceChar
- oneOf (bullets $ n == 1)
- many1 spaceChar
- return n
-
--- Unindented lists are legal, but they can't use '*' bullets.
--- We return n to maintain compatibility with the generic listItem.
-bullets :: Bool -> String
-bullets unindented = if unindented then "+-" else "*+-"
-
-definitionListItem :: PandocMonad m
- => OrgParser m Int
- -> OrgParser m (F (Inlines, [Blocks]))
-definitionListItem parseMarkerGetLength = try $ do
- markerLength <- parseMarkerGetLength
- term <- manyTill (noneOf "\n\r") (try definitionMarker)
- line1 <- anyLineNewline
- blank <- option "" ("\n" <$ blankline)
- cont <- concat <$> many (listContinuation markerLength)
- term' <- parseFromString inlines term
- contents' <- parseFromString blocks $ line1 ++ blank ++ cont
- return $ (,) <$> term' <*> fmap (:[]) contents'
- where
- definitionMarker =
- spaceChar *> string "::" <* (spaceChar <|> lookAhead newline)
-
-
--- parse raw text for one list item, excluding start marker and continuations
-listItem :: PandocMonad m
- => OrgParser m Int
- -> OrgParser m (F Blocks)
-listItem start = try . withContext ListItemState $ do
- markerLength <- try start
- firstLine <- anyLineNewline
- blank <- option "" ("\n" <$ blankline)
- rest <- concat <$> many (listContinuation markerLength)
- parseFromString blocks $ firstLine ++ blank ++ rest
-
--- continuation of a list item - indented and separated by blankline or endline.
--- Note: nested lists are parsed as continuations.
-listContinuation :: Monad m => Int
- -> OrgParser m String
-listContinuation markerLength = try $
- notFollowedBy' blankline
- *> (mappend <$> (concat <$> many1 listLine)
- <*> many blankline)
- where
- listLine = try $ indentWith markerLength *> anyLineNewline
-
- -- indent by specified number of spaces (or equiv. tabs)
- indentWith :: Monad m => Int -> OrgParser m String
- indentWith num = do
- tabStop <- getOption readerTabStop
- if num < tabStop
- then count num (char ' ')
- else choice [ try (count num (char ' '))
- , try (char '\t' >> count (num - tabStop) (char ' ')) ]
-
--- | Parse any line, include the final newline in the output.
-anyLineNewline :: Monad m => OrgParser m String
-anyLineNewline = (++ "\n") <$> anyLine
diff --git a/src/Text/Pandoc/Readers/Org/ExportSettings.hs b/src/Text/Pandoc/Readers/Org/ExportSettings.hs
deleted file mode 100644
index 391877c03..000000000
--- a/src/Text/Pandoc/Readers/Org/ExportSettings.hs
+++ /dev/null
@@ -1,172 +0,0 @@
-{-
-Copyright (C) 2014-2016 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
--}
-
-{- |
- Module : Text.Pandoc.Readers.Org.Options
- Copyright : Copyright (C) 2016 Albert Krewinkel
- License : GNU GPL, version 2 or above
-
- Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
-
-Parsers for Org-mode export options.
--}
-module Text.Pandoc.Readers.Org.ExportSettings
- ( exportSettings
- ) where
-
-import Text.Pandoc.Readers.Org.ParserState
-import Text.Pandoc.Readers.Org.Parsing
-
-import Control.Monad ( mzero, void )
-import Data.Char ( toLower )
-import Data.Maybe ( listToMaybe )
-
--- | Read and handle space separated org-mode export settings.
-exportSettings :: Monad m => OrgParser m ()
-exportSettings = void $ sepBy spaces exportSetting
-
--- | Setter function for export settings.
-type ExportSettingSetter a = a -> ExportSettings -> ExportSettings
-
--- | Read and process a single org-mode export option.
-exportSetting :: Monad m => OrgParser m ()
-exportSetting = choice
- [ booleanSetting "^" (\val es -> es { exportSubSuperscripts = val })
- , booleanSetting "'" (\val es -> es { exportSmartQuotes = val })
- , booleanSetting "*" (\val es -> es { exportEmphasizedText = val })
- , booleanSetting "-" (\val es -> es { exportSpecialStrings = val })
- , ignoredSetting ":"
- , ignoredSetting "<"
- , ignoredSetting "\\n"
- , archivedTreeSetting "arch" (\val es -> es { exportArchivedTrees = val })
- , booleanSetting "author" (\val es -> es { exportWithAuthor = val })
- , ignoredSetting "c"
- -- org-mode allows the special value `comment` for creator, which we'll
- -- interpret as true as it doesn't make sense in the context of Pandoc.
- , booleanSetting "creator" (\val es -> es { exportWithCreator = val })
- , complementableListSetting "d" (\val es -> es { exportDrawers = val })
- , ignoredSetting "date"
- , ignoredSetting "e"
- , booleanSetting "email" (\val es -> es { exportWithEmail = val })
- , ignoredSetting "f"
- , integerSetting "H" (\val es -> es { exportHeadlineLevels = val })
- , ignoredSetting "inline"
- , ignoredSetting "num"
- , ignoredSetting "p"
- , ignoredSetting "pri"
- , ignoredSetting "prop"
- , ignoredSetting "stat"
- , ignoredSetting "tags"
- , ignoredSetting "tasks"
- , ignoredSetting "tex"
- , ignoredSetting "timestamp"
- , ignoredSetting "title"
- , ignoredSetting "toc"
- , booleanSetting "todo" (\val es -> es { exportWithTodoKeywords = val })
- , ignoredSetting "|"
- ] <?> "export setting"
-
-genericExportSetting :: Monad m
- => OrgParser m a
- -> String
- -> ExportSettingSetter a
- -> OrgParser m ()
-genericExportSetting optionParser settingIdentifier setter = try $ do
- _ <- string settingIdentifier *> char ':'
- value <- optionParser
- updateState $ modifyExportSettings value
- where
- modifyExportSettings val st =
- st { orgStateExportSettings = setter val . orgStateExportSettings $ st }
-
--- | A boolean option, either nil (False) or non-nil (True).
-booleanSetting :: Monad m => String -> ExportSettingSetter Bool -> OrgParser m ()
-booleanSetting = genericExportSetting elispBoolean
-
--- | An integer-valued option.
-integerSetting :: Monad m => String -> ExportSettingSetter Int -> OrgParser m ()
-integerSetting = genericExportSetting parseInt
- where
- parseInt = try $
- many1 digit >>= maybe mzero (return . fst) . listToMaybe . reads
-
--- | Either the string "headline" or an elisp boolean and treated as an
--- @ArchivedTreesOption@.
-archivedTreeSetting :: Monad m
- => String
- -> ExportSettingSetter ArchivedTreesOption
- -> OrgParser m ()
-archivedTreeSetting =
- genericExportSetting $ archivedTreesHeadlineSetting <|> archivedTreesBoolean
- where
- archivedTreesHeadlineSetting = try $ do
- _ <- string "headline"
- lookAhead (newline <|> spaceChar)
- return ArchivedTreesHeadlineOnly
-
- archivedTreesBoolean = try $ do
- exportBool <- elispBoolean
- return $
- if exportBool
- then ArchivedTreesExport
- else ArchivedTreesNoExport
-
--- | A list or a complement list (i.e. a list starting with `not`).
-complementableListSetting :: Monad m
- => String
- -> ExportSettingSetter (Either [String] [String])
- -> OrgParser m ()
-complementableListSetting = genericExportSetting $ choice
- [ Left <$> complementStringList
- , Right <$> stringList
- , (\b -> if b then Left [] else Right []) <$> elispBoolean
- ]
- where
- -- Read a plain list of strings.
- stringList :: Monad m => OrgParser m [String]
- stringList = try $
- char '('
- *> sepBy elispString spaces
- <* char ')'
-
- -- Read an emacs lisp list specifying a complement set.
- complementStringList :: Monad m => OrgParser m [String]
- complementStringList = try $
- string "(not "
- *> sepBy elispString spaces
- <* char ')'
-
- elispString :: Monad m => OrgParser m String
- elispString = try $
- char '"'
- *> manyTill alphaNum (char '"')
-
--- | Read but ignore the export setting.
-ignoredSetting :: Monad m => String -> OrgParser m ()
-ignoredSetting s = try (() <$ string s <* char ':' <* many1 nonspaceChar)
-
--- | Read an elisp boolean. Only NIL is treated as false, non-NIL values are
--- interpreted as true.
-elispBoolean :: Monad m => OrgParser m Bool
-elispBoolean = try $ do
- value <- many1 nonspaceChar
- return $ case map toLower value of
- "nil" -> False
- "{}" -> False
- "()" -> False
- _ -> True
diff --git a/src/Text/Pandoc/Readers/Org/Inlines.hs b/src/Text/Pandoc/Readers/Org/Inlines.hs
deleted file mode 100644
index f3671641a..000000000
--- a/src/Text/Pandoc/Readers/Org/Inlines.hs
+++ /dev/null
@@ -1,880 +0,0 @@
-{-# LANGUAGE OverloadedStrings #-}
-{-
-Copyright (C) 2014-2016 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
--}
-
-{- |
- Module : Text.Pandoc.Readers.Org.Options
- Copyright : Copyright (C) 2014-2016 Albert Krewinkel
- License : GNU GPL, version 2 or above
-
- Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
-
-Parsers for Org-mode inline elements.
--}
-module Text.Pandoc.Readers.Org.Inlines
- ( inline
- , inlines
- , addToNotesTable
- , linkTarget
- ) where
-
-import Text.Pandoc.Readers.Org.BlockStarts ( endOfBlock, noteMarker )
-import Text.Pandoc.Readers.Org.ParserState
-import Text.Pandoc.Readers.Org.Parsing
-import Text.Pandoc.Readers.Org.Shared
- ( cleanLinkString, isImageFilename, rundocBlockClass
- , toRundocAttrib, translateLang )
-
-import qualified Text.Pandoc.Builder as B
-import Text.Pandoc.Builder ( Inlines )
-import Text.Pandoc.Definition
-import Text.Pandoc.Options
-import Text.Pandoc.Readers.LaTeX ( inlineCommand, rawLaTeXInline )
-import Text.TeXMath ( readTeX, writePandoc, DisplayType(..) )
-import qualified Text.TeXMath.Readers.MathML.EntityMap as MathMLEntityMap
-import Text.Pandoc.Class (PandocMonad)
-
-import Prelude hiding (sequence)
-import Control.Monad ( guard, mplus, mzero, when, void )
-import Control.Monad.Trans ( lift )
-import Data.Char ( isAlphaNum, isSpace )
-import Data.List ( intersperse )
-import Data.Maybe ( fromMaybe )
-import qualified Data.Map as M
-import Data.Monoid ( (<>) )
-import Data.Traversable (sequence)
-
---
--- Functions acting on the parser state
---
-recordAnchorId :: PandocMonad m => String -> OrgParser m ()
-recordAnchorId i = updateState $ \s ->
- s{ orgStateAnchorIds = i : (orgStateAnchorIds s) }
-
-pushToInlineCharStack :: PandocMonad m => Char -> OrgParser m ()
-pushToInlineCharStack c = updateState $ \s ->
- s{ orgStateEmphasisCharStack = c:orgStateEmphasisCharStack s }
-
-popInlineCharStack :: PandocMonad m => OrgParser m ()
-popInlineCharStack = updateState $ \s ->
- s{ orgStateEmphasisCharStack = drop 1 . orgStateEmphasisCharStack $ s }
-
-surroundingEmphasisChar :: PandocMonad m => OrgParser m [Char]
-surroundingEmphasisChar =
- take 1 . drop 1 . orgStateEmphasisCharStack <$> getState
-
-startEmphasisNewlinesCounting :: PandocMonad m => Int -> OrgParser m ()
-startEmphasisNewlinesCounting maxNewlines = updateState $ \s ->
- s{ orgStateEmphasisNewlines = Just maxNewlines }
-
-decEmphasisNewlinesCount :: PandocMonad m => OrgParser m ()
-decEmphasisNewlinesCount = updateState $ \s ->
- s{ orgStateEmphasisNewlines = (\n -> n - 1) <$> orgStateEmphasisNewlines s }
-
-newlinesCountWithinLimits :: PandocMonad m => OrgParser m Bool
-newlinesCountWithinLimits = do
- st <- getState
- return $ ((< 0) <$> orgStateEmphasisNewlines st) /= Just True
-
-resetEmphasisNewlines :: PandocMonad m => OrgParser m ()
-resetEmphasisNewlines = updateState $ \s ->
- s{ orgStateEmphasisNewlines = Nothing }
-
-addToNotesTable :: PandocMonad m => OrgNoteRecord -> OrgParser m ()
-addToNotesTable note = do
- oldnotes <- orgStateNotes' <$> getState
- updateState $ \s -> s{ orgStateNotes' = note:oldnotes }
-
--- | Parse a single Org-mode inline element
-inline :: PandocMonad m => OrgParser m (F Inlines)
-inline =
- choice [ whitespace
- , linebreak
- , cite
- , footnote
- , linkOrImage
- , anchor
- , inlineCodeBlock
- , str
- , endline
- , emphasizedText
- , code
- , math
- , displayMath
- , verbatim
- , subscript
- , superscript
- , inlineLaTeX
- , exportSnippet
- , smart
- , symbol
- ] <* (guard =<< newlinesCountWithinLimits)
- <?> "inline"
-
--- | Read the rest of the input as inlines.
-inlines :: PandocMonad m => OrgParser m (F Inlines)
-inlines = trimInlinesF . mconcat <$> many1 inline
-
--- treat these as potentially non-text when parsing inline:
-specialChars :: [Char]
-specialChars = "\"$'()*+-,./:;<=>@[\\]^_{|}~"
-
-
-whitespace :: PandocMonad m => OrgParser m (F Inlines)
-whitespace = pure B.space <$ skipMany1 spaceChar
- <* updateLastPreCharPos
- <* updateLastForbiddenCharPos
- <?> "whitespace"
-
-linebreak :: PandocMonad m => OrgParser m (F Inlines)
-linebreak = try $ pure B.linebreak <$ string "\\\\" <* skipSpaces <* newline
-
-str :: PandocMonad m => OrgParser m (F Inlines)
-str = return . B.str <$> many1 (noneOf $ specialChars ++ "\n\r ")
- <* updateLastStrPos
-
--- | An endline character that can be treated as a space, not a structural
--- break. This should reflect the values of the Emacs variable
--- @org-element-pagaraph-separate@.
-endline :: PandocMonad m => OrgParser m (F Inlines)
-endline = try $ do
- newline
- notFollowedBy' endOfBlock
- decEmphasisNewlinesCount
- guard =<< newlinesCountWithinLimits
- updateLastPreCharPos
- return . return $ B.softbreak
-
-
---
--- Citations
---
-
--- The state of citations is a bit confusing due to the lack of an official
--- syntax and multiple syntaxes coexisting. The pandocOrgCite syntax was the
--- first to be implemented here and is almost identical to Markdown's citation
--- syntax. The org-ref package is in wide use to handle citations, but the
--- syntax is a bit limiting and not quite as simple to write. The
--- semi-offical Org-mode citation syntax is based on John MacFarlane's Pandoc
--- sytax and Org-oriented enhancements contributed by Richard Lawrence and
--- others. It's dubbed Berkeley syntax due the place of activity of its main
--- contributors. All this should be consolidated once an official Org-mode
--- citation syntax has emerged.
-
-cite :: PandocMonad m => OrgParser m (F Inlines)
-cite = try $ berkeleyCite <|> do
- guardEnabled Ext_citations
- (cs, raw) <- withRaw $ choice
- [ pandocOrgCite
- , orgRefCite
- , berkeleyTextualCite
- ]
- return $ (flip B.cite (B.text raw)) <$> cs
-
--- | A citation in Pandoc Org-mode style (@[prefix \@citekey suffix]@).
-pandocOrgCite :: PandocMonad m => OrgParser m (F [Citation])
-pandocOrgCite = try $
- char '[' *> skipSpaces *> citeList <* skipSpaces <* char ']'
-
-orgRefCite :: PandocMonad m => OrgParser m (F [Citation])
-orgRefCite = try $ choice
- [ normalOrgRefCite
- , fmap (:[]) <$> linkLikeOrgRefCite
- ]
-
-normalOrgRefCite :: PandocMonad m => OrgParser m (F [Citation])
-normalOrgRefCite = try $ do
- mode <- orgRefCiteMode
- firstCitation <- orgRefCiteList mode
- moreCitations <- many (try $ char ',' *> orgRefCiteList mode)
- return . sequence $ firstCitation : moreCitations
- where
- -- | A list of org-ref style citation keys, parsed as citation of the given
- -- citation mode.
- orgRefCiteList :: PandocMonad m => CitationMode -> OrgParser m (F Citation)
- orgRefCiteList citeMode = try $ do
- key <- orgRefCiteKey
- returnF $ Citation
- { citationId = key
- , citationPrefix = mempty
- , citationSuffix = mempty
- , citationMode = citeMode
- , citationNoteNum = 0
- , citationHash = 0
- }
-
--- | Read an Berkeley-style Org-mode citation. Berkeley citation style was
--- develop and adjusted to Org-mode style by John MacFarlane and Richard
--- Lawrence, respectively, both philosophers at UC Berkeley.
-berkeleyCite :: PandocMonad m => OrgParser m (F Inlines)
-berkeleyCite = try $ do
- bcl <- berkeleyCitationList
- return $ do
- parens <- berkeleyCiteParens <$> bcl
- prefix <- berkeleyCiteCommonPrefix <$> bcl
- suffix <- berkeleyCiteCommonSuffix <$> bcl
- citationList <- berkeleyCiteCitations <$> bcl
- return $
- if parens
- then toCite
- . maybe id (\p -> alterFirst (prependPrefix p)) prefix
- . maybe id (\s -> alterLast (appendSuffix s)) suffix
- $ citationList
- else maybe mempty (<> " ") prefix
- <> (toListOfCites $ map toInTextMode citationList)
- <> maybe mempty (", " <>) suffix
- where
- toCite :: [Citation] -> Inlines
- toCite cs = B.cite cs mempty
-
- toListOfCites :: [Citation] -> Inlines
- toListOfCites = mconcat . intersperse ", " . map (\c -> B.cite [c] mempty)
-
- toInTextMode :: Citation -> Citation
- toInTextMode c = c { citationMode = AuthorInText }
-
- alterFirst, alterLast :: (a -> a) -> [a] -> [a]
- alterFirst _ [] = []
- alterFirst f (c:cs) = (f c):cs
- alterLast f = reverse . alterFirst f . reverse
-
- prependPrefix, appendSuffix :: Inlines -> Citation -> Citation
- prependPrefix pre c = c { citationPrefix = B.toList pre <> citationPrefix c }
- appendSuffix suf c = c { citationSuffix = citationSuffix c <> B.toList suf }
-
-data BerkeleyCitationList = BerkeleyCitationList
- { berkeleyCiteParens :: Bool
- , berkeleyCiteCommonPrefix :: Maybe Inlines
- , berkeleyCiteCommonSuffix :: Maybe Inlines
- , berkeleyCiteCitations :: [Citation]
- }
-berkeleyCitationList :: PandocMonad m => OrgParser m (F BerkeleyCitationList)
-berkeleyCitationList = try $ do
- char '['
- parens <- choice [ False <$ berkeleyBareTag, True <$ berkeleyParensTag ]
- char ':'
- skipSpaces
- commonPrefix <- optionMaybe (try $ citationListPart <* char ';')
- citations <- citeList
- commonSuffix <- optionMaybe (try $ citationListPart)
- char ']'
- return (BerkeleyCitationList parens
- <$> sequence commonPrefix
- <*> sequence commonSuffix
- <*> citations)
- where
- citationListPart :: PandocMonad m => OrgParser m (F Inlines)
- citationListPart = fmap (trimInlinesF . mconcat) . try . many1 $ do
- notFollowedBy' citeKey
- notFollowedBy (oneOf ";]")
- inline
-
-berkeleyBareTag :: PandocMonad m => OrgParser m ()
-berkeleyBareTag = try $ void berkeleyBareTag'
-
-berkeleyParensTag :: PandocMonad m => OrgParser m ()
-berkeleyParensTag = try . void $ enclosedByPair '(' ')' berkeleyBareTag'
-
-berkeleyBareTag' :: PandocMonad m => OrgParser m ()
-berkeleyBareTag' = try $ void (string "cite")
-
-berkeleyTextualCite :: PandocMonad m => OrgParser m (F [Citation])
-berkeleyTextualCite = try $ do
- (suppressAuthor, key) <- citeKey
- returnF . return $ Citation
- { citationId = key
- , citationPrefix = mempty
- , citationSuffix = mempty
- , citationMode = if suppressAuthor then SuppressAuthor else AuthorInText
- , citationNoteNum = 0
- , citationHash = 0
- }
-
--- The following is what a Berkeley-style bracketed textual citation parser
--- would look like. However, as these citations are a subset of Pandoc's Org
--- citation style, this isn't used.
--- berkeleyBracketedTextualCite :: PandocMonad m => OrgParser m (F [Citation])
--- berkeleyBracketedTextualCite = try . (fmap head) $
--- enclosedByPair '[' ']' berkeleyTextualCite
-
--- | Read a link-like org-ref style citation. The citation includes pre and
--- post text. However, multiple citations are not possible due to limitations
--- in the syntax.
-linkLikeOrgRefCite :: PandocMonad m => OrgParser m (F Citation)
-linkLikeOrgRefCite = try $ do
- _ <- string "[["
- mode <- orgRefCiteMode
- key <- orgRefCiteKey
- _ <- string "]["
- pre <- trimInlinesF . mconcat <$> manyTill inline (try $ string "::")
- spc <- option False (True <$ spaceChar)
- suf <- trimInlinesF . mconcat <$> manyTill inline (try $ string "]]")
- return $ do
- pre' <- pre
- suf' <- suf
- return Citation
- { citationId = key
- , citationPrefix = B.toList pre'
- , citationSuffix = B.toList (if spc then B.space <> suf' else suf')
- , citationMode = mode
- , citationNoteNum = 0
- , citationHash = 0
- }
-
--- | Read a citation key. The characters allowed in citation keys are taken
--- from the `org-ref-cite-re` variable in `org-ref.el`.
-orgRefCiteKey :: PandocMonad m => OrgParser m String
-orgRefCiteKey = try . many1 . satisfy $ \c ->
- isAlphaNum c || c `elem` ("-_:\\./"::String)
-
--- | Supported citation types. Only a small subset of org-ref types is
--- supported for now. TODO: rewrite this, use LaTeX reader as template.
-orgRefCiteMode :: PandocMonad m => OrgParser m CitationMode
-orgRefCiteMode =
- choice $ map (\(s, mode) -> mode <$ try (string s <* char ':'))
- [ ("cite", AuthorInText)
- , ("citep", NormalCitation)
- , ("citep*", NormalCitation)
- , ("citet", AuthorInText)
- , ("citet*", AuthorInText)
- , ("citeyear", SuppressAuthor)
- ]
-
-citeList :: PandocMonad m => OrgParser m (F [Citation])
-citeList = sequence <$> sepEndBy1 citation (try $ char ';' *> skipSpaces)
-
-citation :: PandocMonad m => OrgParser m (F Citation)
-citation = try $ do
- pref <- prefix
- (suppress_author, key) <- citeKey
- suff <- suffix
- return $ do
- x <- pref
- y <- suff
- return $ Citation{ citationId = key
- , citationPrefix = B.toList x
- , citationSuffix = B.toList y
- , citationMode = if suppress_author
- then SuppressAuthor
- else NormalCitation
- , citationNoteNum = 0
- , citationHash = 0
- }
- where
- prefix = trimInlinesF . mconcat <$>
- manyTill inline (char ']' <|> (']' <$ lookAhead citeKey))
- suffix = try $ do
- hasSpace <- option False (notFollowedBy nonspaceChar >> return True)
- skipSpaces
- rest <- trimInlinesF . mconcat <$>
- many (notFollowedBy (oneOf ";]") *> inline)
- return $ if hasSpace
- then (B.space <>) <$> rest
- else rest
-
-footnote :: PandocMonad m => OrgParser m (F Inlines)
-footnote = try $ inlineNote <|> referencedNote
-
-inlineNote :: PandocMonad m => OrgParser m (F Inlines)
-inlineNote = try $ do
- string "[fn:"
- ref <- many alphaNum
- char ':'
- note <- fmap B.para . trimInlinesF . mconcat <$> many1Till inline (char ']')
- when (not $ null ref) $
- addToNotesTable ("fn:" ++ ref, note)
- return $ B.note <$> note
-
-referencedNote :: PandocMonad m => OrgParser m (F Inlines)
-referencedNote = try $ do
- ref <- noteMarker
- return $ do
- notes <- asksF orgStateNotes'
- case lookup ref notes of
- Nothing -> return $ B.str $ "[" ++ ref ++ "]"
- Just contents -> do
- st <- askF
- let contents' = runF contents st{ orgStateNotes' = [] }
- return $ B.note contents'
-
-linkOrImage :: PandocMonad m => OrgParser m (F Inlines)
-linkOrImage = explicitOrImageLink
- <|> selflinkOrImage
- <|> angleLink
- <|> plainLink
- <?> "link or image"
-
-explicitOrImageLink :: PandocMonad m => OrgParser m (F Inlines)
-explicitOrImageLink = try $ do
- char '['
- srcF <- applyCustomLinkFormat =<< possiblyEmptyLinkTarget
- title <- enclosedRaw (char '[') (char ']')
- title' <- parseFromString (mconcat <$> many inline) title
- char ']'
- return $ do
- src <- srcF
- case cleanLinkString title of
- Just imgSrc | isImageFilename imgSrc ->
- pure $ B.link src "" $ B.image imgSrc mempty mempty
- _ ->
- linkToInlinesF src =<< title'
-
-selflinkOrImage :: PandocMonad m => OrgParser m (F Inlines)
-selflinkOrImage = try $ do
- src <- char '[' *> linkTarget <* char ']'
- return $ linkToInlinesF src (B.str src)
-
-plainLink :: PandocMonad m => OrgParser m (F Inlines)
-plainLink = try $ do
- (orig, src) <- uri
- returnF $ B.link src "" (B.str orig)
-
-angleLink :: PandocMonad m => OrgParser m (F Inlines)
-angleLink = try $ do
- char '<'
- link <- plainLink
- char '>'
- return link
-
-linkTarget :: PandocMonad m => OrgParser m String
-linkTarget = enclosedByPair '[' ']' (noneOf "\n\r[]")
-
-possiblyEmptyLinkTarget :: PandocMonad m => OrgParser m String
-possiblyEmptyLinkTarget = try linkTarget <|> ("" <$ string "[]")
-
-applyCustomLinkFormat :: String -> OrgParser m (F String)
-applyCustomLinkFormat link = do
- let (linkType, rest) = break (== ':') link
- return $ do
- formatter <- M.lookup linkType <$> asksF orgStateLinkFormatters
- return $ maybe link ($ drop 1 rest) formatter
-
--- | Take a link and return a function which produces new inlines when given
--- description inlines.
-linkToInlinesF :: String -> Inlines -> F Inlines
-linkToInlinesF linkStr =
- case linkStr of
- "" -> pure . B.link mempty "" -- wiki link (empty by convention)
- ('#':_) -> pure . B.link linkStr "" -- document-local fraction
- _ -> case cleanLinkString linkStr of
- (Just cleanedLink) -> if isImageFilename cleanedLink
- then const . pure $ B.image cleanedLink "" ""
- else pure . B.link cleanedLink ""
- Nothing -> internalLink linkStr -- other internal link
-
-internalLink :: String -> Inlines -> F Inlines
-internalLink link title = do
- anchorB <- (link `elem`) <$> asksF orgStateAnchorIds
- if anchorB
- then return $ B.link ('#':link) "" title
- else return $ B.emph title
-
--- | Parse an anchor like @<<anchor-id>>@ and return an empty span with
--- @anchor-id@ set as id. Legal anchors in org-mode are defined through
--- @org-target-regexp@, which is fairly liberal. Since no link is created if
--- @anchor-id@ contains spaces, we are more restrictive in what is accepted as
--- an anchor.
-
-anchor :: PandocMonad m => OrgParser m (F Inlines)
-anchor = try $ do
- anchorId <- parseAnchor
- recordAnchorId anchorId
- returnF $ B.spanWith (solidify anchorId, [], []) mempty
- where
- parseAnchor = string "<<"
- *> many1 (noneOf "\t\n\r<>\"' ")
- <* string ">>"
- <* skipSpaces
-
--- | Replace every char but [a-zA-Z0-9_.-:] with a hypen '-'. This mirrors
--- the org function @org-export-solidify-link-text@.
-
-solidify :: String -> String
-solidify = map replaceSpecialChar
- where replaceSpecialChar c
- | isAlphaNum c = c
- | c `elem` ("_.-:" :: String) = c
- | otherwise = '-'
-
--- | Parses an inline code block and marks it as an babel block.
-inlineCodeBlock :: PandocMonad m => OrgParser m (F Inlines)
-inlineCodeBlock = try $ do
- string "src_"
- lang <- many1 orgArgWordChar
- opts <- option [] $ enclosedByPair '[' ']' inlineBlockOption
- inlineCode <- enclosedByPair '{' '}' (noneOf "\n\r")
- let attrClasses = [translateLang lang, rundocBlockClass]
- let attrKeyVal = map toRundocAttrib (("language", lang) : opts)
- returnF $ B.codeWith ("", attrClasses, attrKeyVal) inlineCode
- where
- inlineBlockOption :: PandocMonad m => OrgParser m (String, String)
- inlineBlockOption = try $ do
- argKey <- orgArgKey
- paramValue <- option "yes" orgInlineParamValue
- return (argKey, paramValue)
-
- orgInlineParamValue :: PandocMonad m => OrgParser m String
- orgInlineParamValue = try $
- skipSpaces
- *> notFollowedBy (char ':')
- *> many1 (noneOf "\t\n\r ]")
- <* skipSpaces
-
-
-emphasizedText :: PandocMonad m => OrgParser m (F Inlines)
-emphasizedText = do
- state <- getState
- guard . exportEmphasizedText . orgStateExportSettings $ state
- try $ choice
- [ emph
- , strong
- , strikeout
- , underline
- ]
-
-enclosedByPair :: PandocMonad m
- => Char -- ^ opening char
- -> Char -- ^ closing char
- -> OrgParser m a -- ^ parser
- -> OrgParser m [a]
-enclosedByPair s e p = char s *> many1Till p (char e)
-
-emph :: PandocMonad m => OrgParser m (F Inlines)
-emph = fmap B.emph <$> emphasisBetween '/'
-
-strong :: PandocMonad m => OrgParser m (F Inlines)
-strong = fmap B.strong <$> emphasisBetween '*'
-
-strikeout :: PandocMonad m => OrgParser m (F Inlines)
-strikeout = fmap B.strikeout <$> emphasisBetween '+'
-
--- There is no underline, so we use strong instead.
-underline :: PandocMonad m => OrgParser m (F Inlines)
-underline = fmap B.strong <$> emphasisBetween '_'
-
-verbatim :: PandocMonad m => OrgParser m (F Inlines)
-verbatim = return . B.code <$> verbatimBetween '='
-
-code :: PandocMonad m => OrgParser m (F Inlines)
-code = return . B.code <$> verbatimBetween '~'
-
-subscript :: PandocMonad m => OrgParser m (F Inlines)
-subscript = fmap B.subscript <$> try (char '_' *> subOrSuperExpr)
-
-superscript :: PandocMonad m => OrgParser m (F Inlines)
-superscript = fmap B.superscript <$> try (char '^' *> subOrSuperExpr)
-
-math :: PandocMonad m => OrgParser m (F Inlines)
-math = return . B.math <$> choice [ math1CharBetween '$'
- , mathStringBetween '$'
- , rawMathBetween "\\(" "\\)"
- ]
-
-displayMath :: PandocMonad m => OrgParser m (F Inlines)
-displayMath = return . B.displayMath <$> choice [ rawMathBetween "\\[" "\\]"
- , rawMathBetween "$$" "$$"
- ]
-
-updatePositions :: PandocMonad m
- => Char
- -> OrgParser m Char
-updatePositions c = do
- when (c `elem` emphasisPreChars) updateLastPreCharPos
- when (c `elem` emphasisForbiddenBorderChars) updateLastForbiddenCharPos
- return c
-
-symbol :: PandocMonad m => OrgParser m (F Inlines)
-symbol = return . B.str . (: "") <$> (oneOf specialChars >>= updatePositions)
-
-emphasisBetween :: PandocMonad m
- => Char
- -> OrgParser m (F Inlines)
-emphasisBetween c = try $ do
- startEmphasisNewlinesCounting emphasisAllowedNewlines
- res <- enclosedInlines (emphasisStart c) (emphasisEnd c)
- isTopLevelEmphasis <- null . orgStateEmphasisCharStack <$> getState
- when isTopLevelEmphasis
- resetEmphasisNewlines
- return res
-
-verbatimBetween :: PandocMonad m
- => Char
- -> OrgParser m String
-verbatimBetween c = try $
- emphasisStart c *>
- many1TillNOrLessNewlines 1 verbatimChar (emphasisEnd c)
- where
- verbatimChar = noneOf "\n\r" >>= updatePositions
-
--- | Parses a raw string delimited by @c@ using Org's math rules
-mathStringBetween :: PandocMonad m
- => Char
- -> OrgParser m String
-mathStringBetween c = try $ do
- mathStart c
- body <- many1TillNOrLessNewlines mathAllowedNewlines
- (noneOf (c:"\n\r"))
- (lookAhead $ mathEnd c)
- final <- mathEnd c
- return $ body ++ [final]
-
--- | Parse a single character between @c@ using math rules
-math1CharBetween :: PandocMonad m
- => Char
- -> OrgParser m String
-math1CharBetween c = try $ do
- char c
- res <- noneOf $ c:mathForbiddenBorderChars
- char c
- eof <|> () <$ lookAhead (oneOf mathPostChars)
- return [res]
-
-rawMathBetween :: PandocMonad m
- => String
- -> String
- -> OrgParser m String
-rawMathBetween s e = try $ string s *> manyTill anyChar (try $ string e)
-
--- | Parses the start (opening character) of emphasis
-emphasisStart :: PandocMonad m => Char -> OrgParser m Char
-emphasisStart c = try $ do
- guard =<< afterEmphasisPreChar
- guard =<< notAfterString
- char c
- lookAhead (noneOf emphasisForbiddenBorderChars)
- pushToInlineCharStack c
- -- nested inlines are allowed, so mark this position as one which might be
- -- followed by another inline.
- updateLastPreCharPos
- return c
-
--- | Parses the closing character of emphasis
-emphasisEnd :: PandocMonad m => Char -> OrgParser m Char
-emphasisEnd c = try $ do
- guard =<< notAfterForbiddenBorderChar
- char c
- eof <|> () <$ lookAhead acceptablePostChars
- updateLastStrPos
- popInlineCharStack
- return c
- where acceptablePostChars =
- surroundingEmphasisChar >>= \x -> oneOf (x ++ emphasisPostChars)
-
-mathStart :: PandocMonad m => Char -> OrgParser m Char
-mathStart c = try $
- char c <* notFollowedBy' (oneOf (c:mathForbiddenBorderChars))
-
-mathEnd :: PandocMonad m => Char -> OrgParser m Char
-mathEnd c = try $ do
- res <- noneOf (c:mathForbiddenBorderChars)
- char c
- eof <|> () <$ lookAhead (oneOf mathPostChars)
- return res
-
-
-enclosedInlines :: PandocMonad m => OrgParser m a
- -> OrgParser m b
- -> OrgParser m (F Inlines)
-enclosedInlines start end = try $
- trimInlinesF . mconcat <$> enclosed start end inline
-
-enclosedRaw :: PandocMonad m => OrgParser m a
- -> OrgParser m b
- -> OrgParser m String
-enclosedRaw start end = try $
- start *> (onSingleLine <|> spanningTwoLines)
- where onSingleLine = try $ many1Till (noneOf "\n\r") end
- spanningTwoLines = try $
- anyLine >>= \f -> mappend (f <> " ") <$> onSingleLine
-
--- | Like many1Till, but parses at most @n+1@ lines. @p@ must not consume
--- newlines.
-many1TillNOrLessNewlines :: PandocMonad m => Int
- -> OrgParser m Char
- -> OrgParser m a
- -> OrgParser m String
-many1TillNOrLessNewlines n p end = try $
- nMoreLines (Just n) mempty >>= oneOrMore
- where
- nMoreLines Nothing cs = return cs
- nMoreLines (Just 0) cs = try $ (cs ++) <$> finalLine
- nMoreLines k cs = try $ (final k cs <|> rest k cs)
- >>= uncurry nMoreLines
- final _ cs = (\x -> (Nothing, cs ++ x)) <$> try finalLine
- rest m cs = (\x -> (minus1 <$> m, cs ++ x ++ "\n")) <$> try (manyTill p newline)
- finalLine = try $ manyTill p end
- minus1 k = k - 1
- oneOrMore cs = guard (not $ null cs) *> return cs
-
--- Org allows customization of the way it reads emphasis. We use the defaults
--- here (see, e.g., the Emacs Lisp variable `org-emphasis-regexp-components`
--- for details).
-
--- | Chars allowed to occur before emphasis (spaces and newlines are ok, too)
-emphasisPreChars :: [Char]
-emphasisPreChars = "\t \"'({"
-
--- | Chars allowed at after emphasis
-emphasisPostChars :: [Char]
-emphasisPostChars = "\t\n !\"'),-.:;?\\}"
-
--- | Chars not allowed at the (inner) border of emphasis
-emphasisForbiddenBorderChars :: [Char]
-emphasisForbiddenBorderChars = "\t\n\r \"',"
-
--- | The maximum number of newlines within
-emphasisAllowedNewlines :: Int
-emphasisAllowedNewlines = 1
-
--- LaTeX-style math: see `org-latex-regexps` for details
-
--- | Chars allowed after an inline ($...$) math statement
-mathPostChars :: [Char]
-mathPostChars = "\t\n \"'),-.:;?"
-
--- | Chars not allowed at the (inner) border of math
-mathForbiddenBorderChars :: [Char]
-mathForbiddenBorderChars = "\t\n\r ,;.$"
-
--- | Maximum number of newlines in an inline math statement
-mathAllowedNewlines :: Int
-mathAllowedNewlines = 2
-
--- | Whether we are right behind a char allowed before emphasis
-afterEmphasisPreChar :: PandocMonad m => OrgParser m Bool
-afterEmphasisPreChar = do
- pos <- getPosition
- lastPrePos <- orgStateLastPreCharPos <$> getState
- return . fromMaybe True $ (== pos) <$> lastPrePos
-
--- | Whether the parser is right after a forbidden border char
-notAfterForbiddenBorderChar :: PandocMonad m => OrgParser m Bool
-notAfterForbiddenBorderChar = do
- pos <- getPosition
- lastFBCPos <- orgStateLastForbiddenCharPos <$> getState
- return $ lastFBCPos /= Just pos
-
--- | Read a sub- or superscript expression
-subOrSuperExpr :: PandocMonad m => OrgParser m (F Inlines)
-subOrSuperExpr = try $
- choice [ id <$> charsInBalanced '{' '}' (noneOf "\n\r")
- , enclosing ('(', ')') <$> charsInBalanced '(' ')' (noneOf "\n\r")
- , simpleSubOrSuperString
- ] >>= parseFromString (mconcat <$> many inline)
- where enclosing (left, right) s = left : s ++ [right]
-
-simpleSubOrSuperString :: PandocMonad m => OrgParser m String
-simpleSubOrSuperString = try $ do
- state <- getState
- guard . exportSubSuperscripts . orgStateExportSettings $ state
- choice [ string "*"
- , mappend <$> option [] ((:[]) <$> oneOf "+-")
- <*> many1 alphaNum
- ]
-
-inlineLaTeX :: PandocMonad m => OrgParser m (F Inlines)
-inlineLaTeX = try $ do
- cmd <- inlineLaTeXCommand
- ils <- (lift . lift) $ parseAsInlineLaTeX cmd
- maybe mzero returnF $
- parseAsMath cmd `mplus` parseAsMathMLSym cmd `mplus` ils
- where
- parseAsMath :: String -> Maybe Inlines
- parseAsMath cs = B.fromList <$> texMathToPandoc cs
-
- parseAsInlineLaTeX :: PandocMonad m => String -> m (Maybe Inlines)
- parseAsInlineLaTeX cs = maybeRight <$> runParserT inlineCommand state "" cs
-
- parseAsMathMLSym :: String -> Maybe Inlines
- parseAsMathMLSym cs = B.str <$> MathMLEntityMap.getUnicode (clean cs)
- -- drop initial backslash and any trailing "{}"
- where clean = dropWhileEnd (`elem` ("{}" :: String)) . drop 1
-
- state :: ParserState
- state = def{ stateOptions = def{ readerExtensions =
- enableExtension Ext_raw_tex (readerExtensions def) } }
-
- texMathToPandoc :: String -> Maybe [Inline]
- texMathToPandoc cs = (maybeRight $ readTeX cs) >>= writePandoc DisplayInline
-
-maybeRight :: Either a b -> Maybe b
-maybeRight = either (const Nothing) Just
-
-inlineLaTeXCommand :: PandocMonad m => OrgParser m String
-inlineLaTeXCommand = try $ do
- rest <- getInput
- parsed <- (lift . lift) $ runParserT rawLaTeXInline def "source" rest
- case parsed of
- Right (RawInline _ cs) -> do
- -- drop any trailing whitespace, those are not be part of the command as
- -- far as org mode is concerned.
- let cmdNoSpc = dropWhileEnd isSpace cs
- let len = length cmdNoSpc
- count len anyChar
- return cmdNoSpc
- _ -> mzero
-
--- Taken from Data.OldList.
-dropWhileEnd :: (a -> Bool) -> [a] -> [a]
-dropWhileEnd p = foldr (\x xs -> if p x && null xs then [] else x : xs) []
-
-exportSnippet :: PandocMonad m => OrgParser m (F Inlines)
-exportSnippet = try $ do
- string "@@"
- format <- many1Till (alphaNum <|> char '-') (char ':')
- snippet <- manyTill anyChar (try $ string "@@")
- returnF $ B.rawInline format snippet
-
-smart :: PandocMonad m => OrgParser m (F Inlines)
-smart = do
- guardEnabled Ext_smart
- doubleQuoted <|> singleQuoted <|>
- choice (map (return <$>) [orgApostrophe, orgDash, orgEllipses])
- where
- orgDash = do
- guard =<< getExportSetting exportSpecialStrings
- dash <* updatePositions '-'
- orgEllipses = do
- guard =<< getExportSetting exportSpecialStrings
- ellipses <* updatePositions '.'
- orgApostrophe =
- (char '\'' <|> char '\8217') <* updateLastPreCharPos
- <* updateLastForbiddenCharPos
- *> return (B.str "\x2019")
-
-singleQuoted :: PandocMonad m => OrgParser m (F Inlines)
-singleQuoted = try $ do
- guard =<< getExportSetting exportSmartQuotes
- singleQuoteStart
- updatePositions '\''
- withQuoteContext InSingleQuote $
- fmap B.singleQuoted . trimInlinesF . mconcat <$>
- many1Till inline (singleQuoteEnd <* updatePositions '\'')
-
--- doubleQuoted will handle regular double-quoted sections, as well
--- as dialogues with an open double-quote without a close double-quote
--- in the same paragraph.
-doubleQuoted :: PandocMonad m => OrgParser m (F Inlines)
-doubleQuoted = try $ do
- guard =<< getExportSetting exportSmartQuotes
- doubleQuoteStart
- updatePositions '"'
- contents <- mconcat <$> many (try $ notFollowedBy doubleQuoteEnd >> inline)
- (withQuoteContext InDoubleQuote $ (doubleQuoteEnd <* updateLastForbiddenCharPos) >> return
- (fmap B.doubleQuoted . trimInlinesF $ contents))
- <|> (return $ return (B.str "\8220") <> contents)
diff --git a/src/Text/Pandoc/Readers/Org/Meta.hs b/src/Text/Pandoc/Readers/Org/Meta.hs
deleted file mode 100644
index 2f4e21248..000000000
--- a/src/Text/Pandoc/Readers/Org/Meta.hs
+++ /dev/null
@@ -1,218 +0,0 @@
-{-# LANGUAGE FlexibleContexts #-}
-{-# LANGUAGE TupleSections #-}
-{-
-Copyright (C) 2014-2017 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
--}
-
-{- |
- Module : Text.Pandoc.Readers.Org.Meta
- Copyright : Copyright (C) 2014-2017 Albert Krewinkel
- License : GNU GPL, version 2 or above
-
- Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
-
-Parsers for Org-mode meta declarations.
--}
-module Text.Pandoc.Readers.Org.Meta
- ( metaExport
- , metaKey
- , metaLine
- ) where
-
-import Text.Pandoc.Readers.Org.BlockStarts
-import Text.Pandoc.Readers.Org.ExportSettings ( exportSettings )
-import Text.Pandoc.Readers.Org.Inlines
-import Text.Pandoc.Readers.Org.ParserState
-import Text.Pandoc.Readers.Org.Parsing
-
-import qualified Text.Pandoc.Builder as B
-import Text.Pandoc.Builder ( Blocks, Inlines )
-import Text.Pandoc.Class ( PandocMonad )
-import Text.Pandoc.Definition
-
-import Control.Monad ( mzero, void )
-import Data.Char ( toLower )
-import Data.List ( intersperse )
-import qualified Data.Map as M
-import Data.Monoid ( (<>) )
-import Network.HTTP ( urlEncode )
-
--- | Returns the current meta, respecting export options.
-metaExport :: Monad m => OrgParser m (F Meta)
-metaExport = do
- st <- getState
- let settings = orgStateExportSettings st
- return $ (if exportWithAuthor settings then id else removeMeta "author")
- . (if exportWithCreator settings then id else removeMeta "creator")
- . (if exportWithEmail settings then id else removeMeta "email")
- <$> orgStateMeta st
-
-removeMeta :: String -> Meta -> Meta
-removeMeta key meta' =
- let metaMap = unMeta meta'
- in Meta $ M.delete key metaMap
-
--- | Parse and handle a single line containing meta information
--- The order, in which blocks are tried, makes sure that we're not looking at
--- the beginning of a block, so we don't need to check for it
-metaLine :: PandocMonad m => OrgParser m Blocks
-metaLine = mempty <$ metaLineStart <* (optionLine <|> declarationLine)
-
-declarationLine :: PandocMonad m => OrgParser m ()
-declarationLine = try $ do
- key <- map toLower <$> metaKey
- (key', value) <- metaValue key
- updateState $ \st ->
- let meta' = B.setMeta key' <$> value <*> pure nullMeta
- in st { orgStateMeta = meta' <> orgStateMeta st }
-
-metaKey :: Monad m => OrgParser m String
-metaKey = map toLower <$> many1 (noneOf ": \n\r")
- <* char ':'
- <* skipSpaces
-
-metaValue :: PandocMonad m => String -> OrgParser m (String, (F MetaValue))
-metaValue key =
- let inclKey = "header-includes"
- in case key of
- "author" -> (key,) <$> metaInlinesCommaSeparated
- "title" -> (key,) <$> metaInlines
- "date" -> (key,) <$> metaInlines
- "header-includes" -> (key,) <$> accumulatingList key metaInlines
- "latex_header" -> (inclKey,) <$>
- accumulatingList inclKey (metaExportSnippet "latex")
- "latex_class" -> ("documentclass",) <$> metaString
- -- Org-mode expects class options to contain the surrounding brackets,
- -- pandoc does not.
- "latex_class_options" -> ("classoption",) <$>
- metaModifiedString (filter (`notElem` "[]"))
- "html_head" -> (inclKey,) <$>
- accumulatingList inclKey (metaExportSnippet "html")
- _ -> (key,) <$> metaString
-
-metaInlines :: PandocMonad m => OrgParser m (F MetaValue)
-metaInlines = fmap (MetaInlines . B.toList) <$> inlinesTillNewline
-
-metaInlinesCommaSeparated :: PandocMonad m => OrgParser m (F MetaValue)
-metaInlinesCommaSeparated = do
- authStrs <- (many1 (noneOf ",\n")) `sepBy1` (char ',')
- newline
- authors <- mapM (parseFromString inlinesTillNewline . (++ "\n")) authStrs
- let toMetaInlines = MetaInlines . B.toList
- return $ MetaList . map toMetaInlines <$> sequence authors
-
-metaString :: Monad m => OrgParser m (F MetaValue)
-metaString = metaModifiedString id
-
-metaModifiedString :: Monad m => (String -> String) -> OrgParser m (F MetaValue)
-metaModifiedString f = return . MetaString . f <$> anyLine
-
--- | Read an format specific meta definition
-metaExportSnippet :: Monad m => String -> OrgParser m (F MetaValue)
-metaExportSnippet format =
- return . MetaInlines . B.toList . B.rawInline format <$> anyLine
-
--- | Accumulate the result of the @parser@ in a list under @key@.
-accumulatingList :: Monad m => String
- -> OrgParser m (F MetaValue)
- -> OrgParser m (F MetaValue)
-accumulatingList key p = do
- value <- p
- meta' <- orgStateMeta <$> getState
- return $ (\m v -> MetaList (curList m ++ [v])) <$> meta' <*> value
- where curList m = case lookupMeta key m of
- Just (MetaList ms) -> ms
- Just x -> [x]
- _ -> []
-
---
--- export options
---
-optionLine :: Monad m => OrgParser m ()
-optionLine = try $ do
- key <- metaKey
- case key of
- "link" -> parseLinkFormat >>= uncurry addLinkFormat
- "options" -> exportSettings
- "todo" -> todoSequence >>= updateState . registerTodoSequence
- "seq_todo" -> todoSequence >>= updateState . registerTodoSequence
- "typ_todo" -> todoSequence >>= updateState . registerTodoSequence
- _ -> mzero
-
-addLinkFormat :: Monad m => String
- -> (String -> String)
- -> OrgParser m ()
-addLinkFormat key formatter = updateState $ \s ->
- let fs = orgStateLinkFormatters s
- in s{ orgStateLinkFormatters = M.insert key formatter fs }
-
-parseLinkFormat :: Monad m => OrgParser m ((String, String -> String))
-parseLinkFormat = try $ do
- linkType <- (:) <$> letter <*> many (alphaNum <|> oneOf "-_") <* skipSpaces
- linkSubst <- parseFormat
- return (linkType, linkSubst)
-
--- | An ad-hoc, single-argument-only implementation of a printf-style format
--- parser.
-parseFormat :: Monad m => OrgParser m (String -> String)
-parseFormat = try $ do
- replacePlain <|> replaceUrl <|> justAppend
- where
- -- inefficient, but who cares
- replacePlain = try $ (\x -> concat . flip intersperse x)
- <$> sequence [tillSpecifier 's', rest]
- replaceUrl = try $ (\x -> concat . flip intersperse x . urlEncode)
- <$> sequence [tillSpecifier 'h', rest]
- justAppend = try $ (++) <$> rest
-
- rest = manyTill anyChar (eof <|> () <$ oneOf "\n\r")
- tillSpecifier c = manyTill (noneOf "\n\r") (try $ string ('%':c:""))
-
-inlinesTillNewline :: PandocMonad m => OrgParser m (F Inlines)
-inlinesTillNewline = trimInlinesF . mconcat <$> manyTill inline newline
-
---
--- ToDo Sequences and Keywords
---
-todoSequence :: Monad m => OrgParser m TodoSequence
-todoSequence = try $ do
- todoKws <- todoKeywords
- doneKws <- optionMaybe $ todoDoneSep *> todoKeywords
- newline
- -- There must be at least one DONE keyword. The last TODO keyword is taken if
- -- necessary.
- case doneKws of
- Just done -> return $ keywordsToSequence todoKws done
- Nothing -> case reverse todoKws of
- [] -> mzero -- no keywords present
- (x:xs) -> return $ keywordsToSequence (reverse xs) [x]
-
- where
- todoKeywords :: Monad m => OrgParser m [String]
- todoKeywords = try $
- let keyword = many1 nonspaceChar <* skipSpaces
- endOfKeywords = todoDoneSep <|> void newline
- in manyTill keyword (lookAhead endOfKeywords)
-
- todoDoneSep :: Monad m => OrgParser m ()
- todoDoneSep = void . try $ skipSpaces *> char '|' <* skipSpaces1
-
- keywordsToSequence :: [String] -> [String] -> TodoSequence
- keywordsToSequence todo done =
- let todoMarkers = map (TodoMarker Todo) todo
- doneMarkers = map (TodoMarker Done) done
- in todoMarkers ++ doneMarkers
diff --git a/src/Text/Pandoc/Readers/Org/ParserState.hs b/src/Text/Pandoc/Readers/Org/ParserState.hs
deleted file mode 100644
index 181dd1d5c..000000000
--- a/src/Text/Pandoc/Readers/Org/ParserState.hs
+++ /dev/null
@@ -1,259 +0,0 @@
-{-# LANGUAGE FlexibleInstances #-}
-{-# LANGUAGE GeneralizedNewtypeDeriving #-}
-{-# LANGUAGE MultiParamTypeClasses #-}
-{-
-Copyright (C) 2014-2016 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
--}
-
-{- |
- Module : Text.Pandoc.Readers.Org.Options
- Copyright : Copyright (C) 2014-2016 Albert Krewinkel
- License : GNU GPL, version 2 or above
-
- Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
-
-Define the Org-mode parser state.
--}
-module Text.Pandoc.Readers.Org.ParserState
- ( OrgParserState (..)
- , OrgParserLocal (..)
- , OrgNoteRecord
- , HasReaderOptions (..)
- , HasQuoteContext (..)
- , TodoMarker (..)
- , TodoSequence
- , TodoState (..)
- , activeTodoMarkers
- , registerTodoSequence
- , F(..)
- , askF
- , asksF
- , trimInlinesF
- , runF
- , returnF
- , ExportSettings (..)
- , ArchivedTreesOption (..)
- , optionsToParserState
- ) where
-
-import Control.Monad (liftM, liftM2)
-import Control.Monad.Reader (Reader, runReader, ReaderT, ask, asks, local)
-
-import Data.Default (Default(..))
-import qualified Data.Map as M
-import qualified Data.Set as Set
-
-import Text.Pandoc.Builder ( Inlines, Blocks, trimInlines )
-import Text.Pandoc.Definition ( Meta(..), nullMeta )
-import Text.Pandoc.Options ( ReaderOptions(..) )
-import Text.Pandoc.Parsing ( HasHeaderMap(..)
- , HasIdentifierList(..)
- , HasLastStrPosition(..)
- , HasQuoteContext(..)
- , HasReaderOptions(..)
- , ParserContext(..)
- , QuoteContext(..)
- , SourcePos )
-
--- | An inline note / footnote containing the note key and its (inline) value.
-type OrgNoteRecord = (String, F Blocks)
--- | Table of footnotes
-type OrgNoteTable = [OrgNoteRecord]
--- | Map of functions for link transformations. The map key is refers to the
--- link-type, the corresponding function transforms the given link string.
-type OrgLinkFormatters = M.Map String (String -> String)
-
--- | The states in which a todo item can be
-data TodoState = Todo | Done
- deriving (Eq, Ord, Show)
-
--- | A ToDo keyword like @TODO@ or @DONE@.
-data TodoMarker = TodoMarker
- { todoMarkerState :: TodoState
- , todoMarkerName :: String
- }
- deriving (Show, Eq)
-
--- | Collection of todo markers in the order in which items should progress
-type TodoSequence = [TodoMarker]
-
--- | Org-mode parser state
-data OrgParserState = OrgParserState
- { orgStateAnchorIds :: [String]
- , orgStateEmphasisCharStack :: [Char]
- , orgStateEmphasisNewlines :: Maybe Int
- , orgStateExportSettings :: ExportSettings
- , orgStateHeaderMap :: M.Map Inlines String
- , orgStateIdentifiers :: Set.Set String
- , orgStateLastForbiddenCharPos :: Maybe SourcePos
- , orgStateLastPreCharPos :: Maybe SourcePos
- , orgStateLastStrPos :: Maybe SourcePos
- , orgStateLinkFormatters :: OrgLinkFormatters
- , orgStateMeta :: F Meta
- , orgStateNotes' :: OrgNoteTable
- , orgStateOptions :: ReaderOptions
- , orgStateParserContext :: ParserContext
- , orgStateTodoSequences :: [TodoSequence]
- }
-
-data OrgParserLocal = OrgParserLocal { orgLocalQuoteContext :: QuoteContext }
-
-instance Default OrgParserLocal where
- def = OrgParserLocal NoQuote
-
-instance HasReaderOptions OrgParserState where
- extractReaderOptions = orgStateOptions
-
-instance HasLastStrPosition OrgParserState where
- getLastStrPos = orgStateLastStrPos
- setLastStrPos pos st = st{ orgStateLastStrPos = Just pos }
-
-instance Monad m => HasQuoteContext st (ReaderT OrgParserLocal m) where
- getQuoteContext = asks orgLocalQuoteContext
- withQuoteContext q = local (\s -> s{orgLocalQuoteContext = q})
-
-instance HasIdentifierList OrgParserState where
- extractIdentifierList = orgStateIdentifiers
- updateIdentifierList f s = s{ orgStateIdentifiers = f (orgStateIdentifiers s) }
-
-instance HasHeaderMap OrgParserState where
- extractHeaderMap = orgStateHeaderMap
- updateHeaderMap f s = s{ orgStateHeaderMap = f (orgStateHeaderMap s) }
-
-instance Default OrgParserState where
- def = defaultOrgParserState
-
-defaultOrgParserState :: OrgParserState
-defaultOrgParserState = OrgParserState
- { orgStateAnchorIds = []
- , orgStateEmphasisCharStack = []
- , orgStateEmphasisNewlines = Nothing
- , orgStateExportSettings = def
- , orgStateHeaderMap = M.empty
- , orgStateIdentifiers = Set.empty
- , orgStateLastForbiddenCharPos = Nothing
- , orgStateLastPreCharPos = Nothing
- , orgStateLastStrPos = Nothing
- , orgStateLinkFormatters = M.empty
- , orgStateMeta = return nullMeta
- , orgStateNotes' = []
- , orgStateOptions = def
- , orgStateParserContext = NullState
- , orgStateTodoSequences = []
- }
-
-optionsToParserState :: ReaderOptions -> OrgParserState
-optionsToParserState opts =
- def { orgStateOptions = opts }
-
-registerTodoSequence :: TodoSequence -> OrgParserState -> OrgParserState
-registerTodoSequence todoSeq st =
- let curSeqs = orgStateTodoSequences st
- in st{ orgStateTodoSequences = todoSeq : curSeqs }
-
--- | Get the current todo/done sequences. If no custom todo sequences have been
--- defined, return a list containing just the default todo/done sequence.
-activeTodoSequences :: OrgParserState -> [TodoSequence]
-activeTodoSequences st =
- let curSeqs = orgStateTodoSequences st
- in if null curSeqs
- then [[ TodoMarker Todo "TODO" , TodoMarker Done "DONE" ]]
- else curSeqs
-
-activeTodoMarkers :: OrgParserState -> TodoSequence
-activeTodoMarkers = concat . activeTodoSequences
-
-
---
--- Export Settings
---
-
--- | Options for the way archived trees are handled.
-data ArchivedTreesOption =
- ArchivedTreesExport -- ^ Export the complete tree
- | ArchivedTreesNoExport -- ^ Exclude archived trees from exporting
- | ArchivedTreesHeadlineOnly -- ^ Export only the headline, discard the contents
-
--- | Export settings <http://orgmode.org/manual/Export-settings.html>
--- These settings can be changed via OPTIONS statements.
-data ExportSettings = ExportSettings
- { exportArchivedTrees :: ArchivedTreesOption -- ^ How to treat archived trees
- , exportDrawers :: Either [String] [String]
- -- ^ Specify drawer names which should be exported. @Left@ names are
- -- explicitly excluded from the resulting output while @Right@ means that
- -- only the listed drawer names should be included.
- , exportEmphasizedText :: Bool -- ^ Parse emphasized text
- , exportHeadlineLevels :: Int
- -- ^ Maximum depth of headlines, deeper headlines are convert to list
- , exportSmartQuotes :: Bool -- ^ Parse quotes smartly
- , exportSpecialStrings :: Bool -- ^ Parse ellipses and dashes smartly
- , exportSubSuperscripts :: Bool -- ^ TeX-like syntax for sub- and superscripts
- , exportWithAuthor :: Bool -- ^ Include author in final meta-data
- , exportWithCreator :: Bool -- ^ Include creator in final meta-data
- , exportWithEmail :: Bool -- ^ Include email in final meta-data
- , exportWithTodoKeywords :: Bool -- ^ Keep TODO keywords in headers
- }
-
-instance Default ExportSettings where
- def = defaultExportSettings
-
-defaultExportSettings :: ExportSettings
-defaultExportSettings = ExportSettings
- { exportArchivedTrees = ArchivedTreesHeadlineOnly
- , exportDrawers = Left ["LOGBOOK"]
- , exportEmphasizedText = True
- , exportHeadlineLevels = 3
- , exportSmartQuotes = True
- , exportSpecialStrings = True
- , exportSubSuperscripts = True
- , exportWithAuthor = True
- , exportWithCreator = True
- , exportWithEmail = True
- , exportWithTodoKeywords = True
- }
-
-
---
--- Parser state reader
---
-
--- | Reader monad wrapping the parser state. This is used to delay evaluation
--- until all relevant information has been parsed and made available in the
--- parser state. See also the newtype of the same name in
--- Text.Pandoc.Parsing.
-newtype F a = F { unF :: Reader OrgParserState a
- } deriving (Functor, Applicative, Monad)
-
-instance Monoid a => Monoid (F a) where
- mempty = return mempty
- mappend = liftM2 mappend
- mconcat = fmap mconcat . sequence
-
-runF :: F a -> OrgParserState -> a
-runF = runReader . unF
-
-askF :: F OrgParserState
-askF = F ask
-
-asksF :: (OrgParserState -> a) -> F a
-asksF f = F $ asks f
-
-trimInlinesF :: F Inlines -> F Inlines
-trimInlinesF = liftM trimInlines
-
-returnF :: Monad m => a -> m (F a)
-returnF = return . return
diff --git a/src/Text/Pandoc/Readers/Org/Parsing.hs b/src/Text/Pandoc/Readers/Org/Parsing.hs
deleted file mode 100644
index 1eb8a3b00..000000000
--- a/src/Text/Pandoc/Readers/Org/Parsing.hs
+++ /dev/null
@@ -1,217 +0,0 @@
-{-
-Copyright (C) 2014-2016 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
--}
-
-{- |
- Module : Text.Pandoc.Readers.Org.Options
- Copyright : Copyright (C) 2014-2016 Albert Krewinkel
- License : GNU GPL, version 2 or above
-
- Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
-
-Org-mode parsing utilities.
-
-Most functions are simply re-exports from @Text.Pandoc.Parsing@, some
-functions are adapted to Org-mode specific functionality.
--}
-module Text.Pandoc.Readers.Org.Parsing
- ( OrgParser
- , anyLine
- , blanklines
- , newline
- , parseFromString
- , skipSpaces1
- , inList
- , withContext
- , getExportSetting
- , updateLastForbiddenCharPos
- , updateLastPreCharPos
- , orgArgKey
- , orgArgWord
- , orgArgWordChar
- -- * Re-exports from Text.Pandoc.Parser
- , ParserContext (..)
- , many1Till
- , notFollowedBy'
- , spaceChar
- , nonspaceChar
- , skipSpaces
- , blankline
- , enclosed
- , stringAnyCase
- , charsInBalanced
- , uri
- , withRaw
- , readWithM
- , guardEnabled
- , updateLastStrPos
- , notAfterString
- , ParserState (..)
- , registerHeader
- , QuoteContext (..)
- , singleQuoteStart
- , singleQuoteEnd
- , doubleQuoteStart
- , doubleQuoteEnd
- , dash
- , ellipses
- , citeKey
- -- * Re-exports from Text.Pandoc.Parsec
- , runParser
- , runParserT
- , getInput
- , char
- , letter
- , digit
- , alphaNum
- , skipMany1
- , spaces
- , anyChar
- , satisfy
- , string
- , count
- , eof
- , noneOf
- , oneOf
- , lookAhead
- , notFollowedBy
- , many
- , many1
- , manyTill
- , (<|>)
- , (<?>)
- , choice
- , try
- , sepBy
- , sepBy1
- , sepEndBy1
- , option
- , optional
- , optionMaybe
- , getState
- , updateState
- , SourcePos
- , getPosition
- ) where
-
-import Text.Pandoc.Readers.Org.ParserState
-
-import qualified Text.Pandoc.Parsing as P
-import Text.Pandoc.Parsing hiding ( anyLine, blanklines, newline
- , parseFromString )
-
-import Control.Monad ( guard )
-import Control.Monad.Reader ( ReaderT )
-
--- | The parser used to read org files.
-type OrgParser m = ParserT [Char] OrgParserState (ReaderT OrgParserLocal m)
-
---
--- Adaptions and specializations of parsing utilities
---
-
--- | Parse any line of text
-anyLine :: Monad m => OrgParser m String
-anyLine =
- P.anyLine
- <* updateLastPreCharPos
- <* updateLastForbiddenCharPos
-
--- The version Text.Pandoc.Parsing cannot be used, as we need additional parts
--- of the state saved and restored.
-parseFromString :: Monad m => OrgParser m a -> String -> OrgParser m a
-parseFromString parser str' = do
- oldLastPreCharPos <- orgStateLastPreCharPos <$> getState
- updateState $ \s -> s{ orgStateLastPreCharPos = Nothing }
- result <- P.parseFromString parser str'
- updateState $ \s -> s{ orgStateLastPreCharPos = oldLastPreCharPos }
- return result
-
--- | Skip one or more tab or space characters.
-skipSpaces1 :: Monad m => OrgParser m ()
-skipSpaces1 = skipMany1 spaceChar
-
--- | Like @Text.Parsec.Char.newline@, but causes additional state changes.
-newline :: Monad m => OrgParser m Char
-newline =
- P.newline
- <* updateLastPreCharPos
- <* updateLastForbiddenCharPos
-
--- | Like @Text.Parsec.Char.blanklines@, but causes additional state changes.
-blanklines :: Monad m => OrgParser m [Char]
-blanklines =
- P.blanklines
- <* updateLastPreCharPos
- <* updateLastForbiddenCharPos
-
--- | Succeeds when we're in list context.
-inList :: Monad m => OrgParser m ()
-inList = do
- ctx <- orgStateParserContext <$> getState
- guard (ctx == ListItemState)
-
--- | Parse in different context
-withContext :: Monad m
- => ParserContext -- ^ New parser context
- -> OrgParser m a -- ^ Parser to run in that context
- -> OrgParser m a
-withContext context parser = do
- oldContext <- orgStateParserContext <$> getState
- updateState $ \s -> s{ orgStateParserContext = context }
- result <- parser
- updateState $ \s -> s{ orgStateParserContext = oldContext }
- return result
-
---
--- Parser state functions
---
-
--- | Get an export setting.
-getExportSetting :: Monad m => (ExportSettings -> a) -> OrgParser m a
-getExportSetting s = s . orgStateExportSettings <$> getState
-
--- | Set the current position as the last position at which a forbidden char
--- was found (i.e. a character which is not allowed at the inner border of
--- markup).
-updateLastForbiddenCharPos :: Monad m => OrgParser m ()
-updateLastForbiddenCharPos = getPosition >>= \p ->
- updateState $ \s -> s{ orgStateLastForbiddenCharPos = Just p}
-
--- | Set the current parser position as the position at which a character was
--- seen which allows inline markup to follow.
-updateLastPreCharPos :: Monad m => OrgParser m ()
-updateLastPreCharPos = getPosition >>= \p ->
- updateState $ \s -> s{ orgStateLastPreCharPos = Just p}
-
---
--- Org key-value parsing
---
-
--- | Read the key of a plist style key-value list.
-orgArgKey :: Monad m => OrgParser m String
-orgArgKey = try $
- skipSpaces *> char ':'
- *> many1 orgArgWordChar
-
--- | Read the value of a plist style key-value list.
-orgArgWord :: Monad m => OrgParser m String
-orgArgWord = many1 orgArgWordChar
-
--- | Chars treated as part of a word in plists.
-orgArgWordChar :: Monad m => OrgParser m Char
-orgArgWordChar = alphaNum <|> oneOf "-_"
diff --git a/src/Text/Pandoc/Readers/Org/Shared.hs b/src/Text/Pandoc/Readers/Org/Shared.hs
deleted file mode 100644
index 8c87cfa25..000000000
--- a/src/Text/Pandoc/Readers/Org/Shared.hs
+++ /dev/null
@@ -1,97 +0,0 @@
-{-# LANGUAGE OverloadedStrings #-}
-{-
-Copyright (C) 2014-2016 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
--}
-
-{- |
- Module : Text.Pandoc.Readers.Org.Options
- Copyright : Copyright (C) 2014-2016 Albert Krewinkel
- License : GNU GPL, version 2 or above
-
- Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
-
-Utility functions used in other Pandoc Org modules.
--}
-module Text.Pandoc.Readers.Org.Shared
- ( cleanLinkString
- , isImageFilename
- , rundocBlockClass
- , toRundocAttrib
- , translateLang
- ) where
-
-import Control.Arrow ( first )
-import Data.Char ( isAlphaNum )
-import Data.List ( isPrefixOf, isSuffixOf )
-
-
--- | Check whether the given string looks like the path to of URL of an image.
-isImageFilename :: String -> Bool
-isImageFilename filename =
- any (\x -> ('.':x) `isSuffixOf` filename) imageExtensions &&
- (any (\x -> (x ++ "://") `isPrefixOf` filename) protocols ||
- ':' `notElem` filename)
- where
- imageExtensions = [ "jpeg" , "jpg" , "png" , "gif" , "svg" ]
- protocols = [ "file", "http", "https" ]
-
--- | Cleanup and canonicalize a string describing a link. Return @Nothing@ if
--- the string does not appear to be a link.
-cleanLinkString :: String -> Maybe String
-cleanLinkString s =
- case s of
- '/':_ -> Just $ "file://" ++ s -- absolute path
- '.':'/':_ -> Just s -- relative path
- '.':'.':'/':_ -> Just s -- relative path
- -- Relative path or URL (file schema)
- 'f':'i':'l':'e':':':s' -> Just $ if ("//" `isPrefixOf` s') then s else s'
- _ | isUrl s -> Just s -- URL
- _ -> Nothing
- where
- isUrl :: String -> Bool
- isUrl cs =
- let (scheme, path) = break (== ':') cs
- in all (\c -> isAlphaNum c || c `elem` (".-"::String)) scheme
- && not (null path)
-
--- | Prefix used for Rundoc classes and arguments.
-rundocPrefix :: String
-rundocPrefix = "rundoc-"
-
--- | The class-name used to mark rundoc blocks.
-rundocBlockClass :: String
-rundocBlockClass = rundocPrefix ++ "block"
-
--- | Prefix the name of a attribute, marking it as a code execution parameter.
-toRundocAttrib :: (String, String) -> (String, String)
-toRundocAttrib = first (rundocPrefix ++)
-
--- | Translate from Org-mode's programming language identifiers to those used
--- by Pandoc. This is useful to allow for proper syntax highlighting in
--- Pandoc output.
-translateLang :: String -> String
-translateLang cs =
- case cs of
- "C" -> "c"
- "C++" -> "cpp"
- "emacs-lisp" -> "commonlisp" -- emacs lisp is not supported
- "js" -> "javascript"
- "lisp" -> "commonlisp"
- "R" -> "r"
- "sh" -> "bash"
- "sqlite" -> "sql"
- _ -> cs