diff options
Diffstat (limited to 'src/Text/Pandoc/Readers/Org')
-rw-r--r-- | src/Text/Pandoc/Readers/Org/BlockStarts.hs | 137 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Org/Blocks.hs | 979 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Org/ExportSettings.hs | 172 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Org/Inlines.hs | 880 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Org/Meta.hs | 218 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Org/ParserState.hs | 259 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Org/Parsing.hs | 217 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Org/Shared.hs | 97 |
8 files changed, 0 insertions, 2959 deletions
diff --git a/src/Text/Pandoc/Readers/Org/BlockStarts.hs b/src/Text/Pandoc/Readers/Org/BlockStarts.hs deleted file mode 100644 index 5588c4552..000000000 --- a/src/Text/Pandoc/Readers/Org/BlockStarts.hs +++ /dev/null @@ -1,137 +0,0 @@ -{- -Copyright (C) 2014-2016 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.Readers.Org.Options - Copyright : Copyright (C) 2014-2016 Albert Krewinkel - License : GNU GPL, version 2 or above - - Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> - -Parsers for Org-mode inline elements. --} -module Text.Pandoc.Readers.Org.BlockStarts - ( exampleLineStart - , hline - , noteMarker - , tableStart - , drawerStart - , headerStart - , metaLineStart - , latexEnvStart - , commentLineStart - , bulletListStart - , orderedListStart - , endOfBlock - ) where - -import Control.Monad ( void ) -import Text.Pandoc.Readers.Org.Parsing - --- | Horizontal Line (five -- dashes or more) -hline :: Monad m => OrgParser m () -hline = try $ do - skipSpaces - string "-----" - many (char '-') - skipSpaces - newline - return () - --- | Read the start of a header line, return the header level -headerStart :: Monad m => OrgParser m Int -headerStart = try $ - (length <$> many1 (char '*')) <* many1 (char ' ') <* updateLastPreCharPos - -tableStart :: Monad m => OrgParser m Char -tableStart = try $ skipSpaces *> char '|' - -latexEnvStart :: Monad m => OrgParser m String -latexEnvStart = try $ do - skipSpaces *> string "\\begin{" - *> latexEnvName - <* string "}" - <* blankline - where - latexEnvName :: Monad m => OrgParser m String - latexEnvName = try $ mappend <$> many1 alphaNum <*> option "" (string "*") - - --- | Parses bullet list marker. -bulletListStart :: Monad m => OrgParser m () -bulletListStart = try $ - choice - [ () <$ skipSpaces <* oneOf "+-" <* skipSpaces1 - , () <$ skipSpaces1 <* char '*' <* skipSpaces1 - ] - -genericListStart :: Monad m - => OrgParser m String - -> OrgParser m Int -genericListStart listMarker = try $ - (+) <$> (length <$> many spaceChar) - <*> (length <$> listMarker <* many1 spaceChar) - -orderedListStart :: Monad m => OrgParser m Int -orderedListStart = genericListStart orderedListMarker - -- Ordered list markers allowed in org-mode - where orderedListMarker = mappend <$> many1 digit <*> (pure <$> oneOf ".)") - -drawerStart :: Monad m => OrgParser m String -drawerStart = try $ - skipSpaces *> drawerName <* skipSpaces <* newline - where drawerName = char ':' *> manyTill nonspaceChar (char ':') - -metaLineStart :: Monad m => OrgParser m () -metaLineStart = try $ skipSpaces <* string "#+" - -commentLineStart :: Monad m => OrgParser m () -commentLineStart = try $ skipSpaces <* string "# " - -exampleLineStart :: Monad m => OrgParser m () -exampleLineStart = () <$ try (skipSpaces *> string ": ") - -noteMarker :: Monad m => OrgParser m String -noteMarker = try $ do - char '[' - choice [ many1Till digit (char ']') - , (++) <$> string "fn:" - <*> many1Till (noneOf "\n\r\t ") (char ']') - ] - --- | Succeeds if the parser is at the end of a block. -endOfBlock :: Monad m => OrgParser m () -endOfBlock = lookAhead . try $ do - void blankline <|> anyBlockStart - where - -- Succeeds if there is a new block starting at this position. - anyBlockStart :: Monad m => OrgParser m () - anyBlockStart = try . choice $ - [ exampleLineStart - , hline - , metaLineStart - , commentLineStart - , void noteMarker - , void tableStart - , void drawerStart - , void headerStart - , void latexEnvStart - , void bulletListStart - , void orderedListStart - ] - diff --git a/src/Text/Pandoc/Readers/Org/Blocks.hs b/src/Text/Pandoc/Readers/Org/Blocks.hs deleted file mode 100644 index 78ac8d0d1..000000000 --- a/src/Text/Pandoc/Readers/Org/Blocks.hs +++ /dev/null @@ -1,979 +0,0 @@ -{-# LANGUAGE FlexibleContexts #-} -{-# LANGUAGE RecordWildCards #-} -{-# LANGUAGE ViewPatterns #-} -{- -Copyright (C) 2014-2017 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.Readers.Org.Options - Copyright : Copyright (C) 2014-2017 Albert Krewinkel - License : GNU GPL, version 2 or above - - Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> - -Parsers for Org-mode block elements. --} -module Text.Pandoc.Readers.Org.Blocks - ( blockList - , meta - ) where - -import Text.Pandoc.Readers.Org.BlockStarts -import Text.Pandoc.Readers.Org.Inlines -import Text.Pandoc.Readers.Org.Meta ( metaExport, metaKey, metaLine ) -import Text.Pandoc.Readers.Org.ParserState -import Text.Pandoc.Readers.Org.Parsing -import Text.Pandoc.Readers.Org.Shared - ( cleanLinkString, isImageFilename, rundocBlockClass - , toRundocAttrib, translateLang ) - -import qualified Text.Pandoc.Builder as B -import Text.Pandoc.Builder ( Inlines, Blocks ) -import Text.Pandoc.Class (PandocMonad) -import Text.Pandoc.Definition -import Text.Pandoc.Options -import Text.Pandoc.Shared ( compactify, compactifyDL, safeRead ) - -import Control.Monad ( foldM, guard, mzero, void ) -import Data.Char ( isSpace, toLower, toUpper) -import Data.Default ( Default ) -import Data.List ( foldl', isPrefixOf ) -import Data.Maybe ( fromMaybe, isNothing ) -import Data.Monoid ((<>)) - --- --- Org headers --- -newtype Tag = Tag { fromTag :: String } - deriving (Show, Eq) - --- | Create a tag containing the given string. -toTag :: String -> Tag -toTag = Tag - --- | The key (also called name or type) of a property. -newtype PropertyKey = PropertyKey { fromKey :: String } - deriving (Show, Eq, Ord) - --- | Create a property key containing the given string. Org mode keys are --- case insensitive and are hence converted to lower case. -toPropertyKey :: String -> PropertyKey -toPropertyKey = PropertyKey . map toLower - --- | The value assigned to a property. -newtype PropertyValue = PropertyValue { fromValue :: String } - --- | Create a property value containing the given string. -toPropertyValue :: String -> PropertyValue -toPropertyValue = PropertyValue - --- | Check whether the property value is non-nil (i.e. truish). -isNonNil :: PropertyValue -> Bool -isNonNil p = map toLower (fromValue p) `notElem` ["()", "{}", "nil"] - --- | Key/value pairs from a PROPERTIES drawer -type Properties = [(PropertyKey, PropertyValue)] - --- | Org mode headline (i.e. a document subtree). -data Headline = Headline - { headlineLevel :: Int - , headlineTodoMarker :: Maybe TodoMarker - , headlineText :: Inlines - , headlineTags :: [Tag] - , headlineProperties :: Properties - , headlineContents :: Blocks - , headlineChildren :: [Headline] - } - --- --- Parsing headlines and subtrees --- - --- | Read an Org mode headline and its contents (i.e. a document subtree). --- @lvl@ gives the minimum acceptable level of the tree. -headline :: PandocMonad m => Int -> OrgParser m (F Headline) -headline lvl = try $ do - level <- headerStart - guard (lvl <= level) - todoKw <- optionMaybe todoKeyword - title <- trimInlinesF . mconcat <$> manyTill inline endOfTitle - tags <- option [] headerTags - newline - properties <- option mempty propertiesDrawer - contents <- blocks - children <- many (headline (level + 1)) - return $ do - title' <- title - contents' <- contents - children' <- sequence children - return $ Headline - { headlineLevel = level - , headlineTodoMarker = todoKw - , headlineText = title' - , headlineTags = tags - , headlineProperties = properties - , headlineContents = contents' - , headlineChildren = children' - } - where - endOfTitle :: Monad m => OrgParser m () - endOfTitle = void . lookAhead $ optional headerTags *> newline - - headerTags :: Monad m => OrgParser m [Tag] - headerTags = try $ - let tag = many1 (alphaNum <|> oneOf "@%#_") <* char ':' - in map toTag <$> (skipSpaces *> char ':' *> many1 tag <* skipSpaces) - --- | Convert an Org mode headline (i.e. a document tree) into pandoc's Blocks -headlineToBlocks :: Monad m => Headline -> OrgParser m Blocks -headlineToBlocks hdln@(Headline {..}) = do - maxHeadlineLevels <- getExportSetting exportHeadlineLevels - case () of - _ | any isNoExportTag headlineTags -> return mempty - _ | any isArchiveTag headlineTags -> archivedHeadlineToBlocks hdln - _ | isCommentTitle headlineText -> return mempty - _ | headlineLevel >= maxHeadlineLevels -> headlineToHeaderWithList hdln - _ | otherwise -> headlineToHeaderWithContents hdln - -isNoExportTag :: Tag -> Bool -isNoExportTag = (== toTag "noexport") - -isArchiveTag :: Tag -> Bool -isArchiveTag = (== toTag "ARCHIVE") - --- | Check if the title starts with COMMENT. --- FIXME: This accesses builder internals not intended for use in situations --- like these. Replace once keyword parsing is supported. -isCommentTitle :: Inlines -> Bool -isCommentTitle (B.toList -> (Str "COMMENT":_)) = True -isCommentTitle _ = False - -archivedHeadlineToBlocks :: Monad m => Headline -> OrgParser m Blocks -archivedHeadlineToBlocks hdln = do - archivedTreesOption <- getExportSetting exportArchivedTrees - case archivedTreesOption of - ArchivedTreesNoExport -> return mempty - ArchivedTreesExport -> headlineToHeaderWithContents hdln - ArchivedTreesHeadlineOnly -> headlineToHeader hdln - -headlineToHeaderWithList :: Monad m => Headline -> OrgParser m Blocks -headlineToHeaderWithList hdln@(Headline {..}) = do - maxHeadlineLevels <- getExportSetting exportHeadlineLevels - header <- headlineToHeader hdln - listElements <- sequence (map headlineToBlocks headlineChildren) - let listBlock = if null listElements - then mempty - else B.orderedList listElements - let headerText = if maxHeadlineLevels == headlineLevel - then header - else flattenHeader header - return $ headerText <> headlineContents <> listBlock - where - flattenHeader :: Blocks -> Blocks - flattenHeader blks = - case B.toList blks of - (Header _ _ inlns:_) -> B.para (B.fromList inlns) - _ -> mempty - -headlineToHeaderWithContents :: Monad m => Headline -> OrgParser m Blocks -headlineToHeaderWithContents hdln@(Headline {..}) = do - header <- headlineToHeader hdln - childrenBlocks <- mconcat <$> sequence (map headlineToBlocks headlineChildren) - return $ header <> headlineContents <> childrenBlocks - -headlineToHeader :: Monad m => Headline -> OrgParser m Blocks -headlineToHeader (Headline {..}) = do - exportTodoKeyword <- getExportSetting exportWithTodoKeywords - let todoText = if exportTodoKeyword - then case headlineTodoMarker of - Just kw -> todoKeywordToInlines kw <> B.space - Nothing -> mempty - else mempty - let text = tagTitle (todoText <> headlineText) headlineTags - let propAttr = propertiesToAttr headlineProperties - attr <- registerHeader propAttr headlineText - return $ B.headerWith attr headlineLevel text - -todoKeyword :: Monad m => OrgParser m TodoMarker -todoKeyword = try $ do - taskStates <- activeTodoMarkers <$> getState - let kwParser tdm = try $ (tdm <$ string (todoMarkerName tdm) <* spaceChar) - choice (map kwParser taskStates) - -todoKeywordToInlines :: TodoMarker -> Inlines -todoKeywordToInlines tdm = - let todoText = todoMarkerName tdm - todoState = map toLower . show $ todoMarkerState tdm - classes = [todoState, todoText] - in B.spanWith (mempty, classes, mempty) (B.str todoText) - -propertiesToAttr :: Properties -> Attr -propertiesToAttr properties = - let - toStringPair prop = (fromKey (fst prop), fromValue (snd prop)) - customIdKey = toPropertyKey "custom_id" - classKey = toPropertyKey "class" - unnumberedKey = toPropertyKey "unnumbered" - specialProperties = [customIdKey, classKey, unnumberedKey] - id' = fromMaybe mempty . fmap fromValue . lookup customIdKey $ properties - cls = fromMaybe mempty . fmap fromValue . lookup classKey $ properties - kvs' = map toStringPair . filter ((`notElem` specialProperties) . fst) - $ properties - isUnnumbered = - fromMaybe False . fmap isNonNil . lookup unnumberedKey $ properties - in - (id', words cls ++ (if isUnnumbered then ["unnumbered"] else []), kvs') - -tagTitle :: Inlines -> [Tag] -> Inlines -tagTitle title tags = title <> (mconcat $ map tagToInline tags) - -tagToInline :: Tag -> Inlines -tagToInline t = B.spanWith ("", ["tag"], [("data-tag-name", fromTag t)]) mempty - - --- --- parsing blocks --- - --- | Get a list of blocks. -blockList :: PandocMonad m => OrgParser m [Block] -blockList = do - initialBlocks <- blocks - headlines <- sequence <$> manyTill (headline 1) eof - st <- getState - headlineBlocks <- fmap mconcat . sequence . map headlineToBlocks $ runF headlines st - return . B.toList $ (runF initialBlocks st) <> headlineBlocks - --- | Get the meta information safed in the state. -meta :: Monad m => OrgParser m Meta -meta = do - meta' <- metaExport - runF meta' <$> getState - -blocks :: PandocMonad m => OrgParser m (F Blocks) -blocks = mconcat <$> manyTill block (void (lookAhead headerStart) <|> eof) - -block :: PandocMonad m => OrgParser m (F Blocks) -block = choice [ mempty <$ blanklines - , table - , orgBlock - , figure - , example - , genericDrawer - , specialLine - , horizontalRule - , list - , latexFragment - , noteBlock - , paraOrPlain - ] <?> "block" - - --- --- Block Attributes --- - --- | Attributes that may be added to figures (like a name or caption). -data BlockAttributes = BlockAttributes - { blockAttrName :: Maybe String - , blockAttrLabel :: Maybe String - , blockAttrCaption :: Maybe (F Inlines) - , blockAttrKeyValues :: [(String, String)] - } - --- | Convert BlockAttributes into pandoc Attr -attrFromBlockAttributes :: BlockAttributes -> Attr -attrFromBlockAttributes (BlockAttributes{..}) = - let - ident = fromMaybe mempty $ lookup "id" blockAttrKeyValues - classes = case lookup "class" blockAttrKeyValues of - Nothing -> [] - Just clsStr -> words clsStr - kv = filter ((`notElem` ["id", "class"]) . fst) blockAttrKeyValues - in (ident, classes, kv) - -stringyMetaAttribute :: Monad m => (String -> Bool) -> OrgParser m (String, String) -stringyMetaAttribute attrCheck = try $ do - metaLineStart - attrName <- map toUpper <$> many1Till nonspaceChar (char ':') - guard $ attrCheck attrName - skipSpaces - attrValue <- anyLine - return (attrName, attrValue) - -blockAttributes :: PandocMonad m => OrgParser m BlockAttributes -blockAttributes = try $ do - kv <- many (stringyMetaAttribute attrCheck) - let caption = foldl' (appendValues "CAPTION") Nothing kv - let kvAttrs = foldl' (appendValues "ATTR_HTML") Nothing kv - let name = lookup "NAME" kv - let label = lookup "LABEL" kv - caption' <- case caption of - Nothing -> return Nothing - Just s -> Just <$> parseFromString inlines (s ++ "\n") - kvAttrs' <- parseFromString keyValues . (++ "\n") $ fromMaybe mempty kvAttrs - return $ BlockAttributes - { blockAttrName = name - , blockAttrLabel = label - , blockAttrCaption = caption' - , blockAttrKeyValues = kvAttrs' - } - where - attrCheck :: String -> Bool - attrCheck attr = - case attr of - "NAME" -> True - "LABEL" -> True - "CAPTION" -> True - "ATTR_HTML" -> True - _ -> False - - appendValues :: String -> Maybe String -> (String, String) -> Maybe String - appendValues attrName accValue (key, value) = - if key /= attrName - then accValue - else case accValue of - Just acc -> Just $ acc ++ ' ':value - Nothing -> Just value - -keyValues :: Monad m => OrgParser m [(String, String)] -keyValues = try $ - manyTill ((,) <$> key <*> value) newline - where - key :: Monad m => OrgParser m String - key = try $ skipSpaces *> char ':' *> many1 nonspaceChar - - value :: Monad m => OrgParser m String - value = skipSpaces *> manyTill anyChar endOfValue - - endOfValue :: Monad m => OrgParser m () - endOfValue = - lookAhead $ (() <$ try (many1 spaceChar <* key)) - <|> () <$ newline - - --- --- Org Blocks (#+BEGIN_... / #+END_...) --- - --- | Read an org-mode block delimited by #+BEGIN_TYPE and #+END_TYPE. -orgBlock :: PandocMonad m => OrgParser m (F Blocks) -orgBlock = try $ do - blockAttrs <- blockAttributes - blkType <- blockHeaderStart - ($ blkType) $ - case (map toLower blkType) of - "export" -> exportBlock - "comment" -> rawBlockLines (const mempty) - "html" -> rawBlockLines (return . B.rawBlock (lowercase blkType)) - "latex" -> rawBlockLines (return . B.rawBlock (lowercase blkType)) - "ascii" -> rawBlockLines (return . B.rawBlock (lowercase blkType)) - "example" -> rawBlockLines (return . exampleCode) - "quote" -> parseBlockLines (fmap B.blockQuote) - "verse" -> verseBlock - "src" -> codeBlock blockAttrs - _ -> parseBlockLines $ - let (ident, classes, kv) = attrFromBlockAttributes blockAttrs - in fmap $ B.divWith (ident, classes ++ [blkType], kv) - where - blockHeaderStart :: Monad m => OrgParser m String - blockHeaderStart = try $ skipSpaces *> stringAnyCase "#+begin_" *> orgArgWord - - lowercase :: String -> String - lowercase = map toLower - -rawBlockLines :: Monad m => (String -> F Blocks) -> String -> OrgParser m (F Blocks) -rawBlockLines f blockType = (ignHeaders *> (f <$> rawBlockContent blockType)) - -parseBlockLines :: PandocMonad m => (F Blocks -> F Blocks) -> String -> OrgParser m (F Blocks) -parseBlockLines f blockType = (ignHeaders *> (f <$> parsedBlockContent)) - where - parsedBlockContent :: PandocMonad m => OrgParser m (F Blocks) - parsedBlockContent = try $ do - raw <- rawBlockContent blockType - parseFromString blocks (raw ++ "\n") - --- | Read the raw string content of a block -rawBlockContent :: Monad m => String -> OrgParser m String -rawBlockContent blockType = try $ do - blkLines <- manyTill rawLine blockEnder - tabLen <- getOption readerTabStop - return - . unlines - . stripIndent - . map (tabsToSpaces tabLen . commaEscaped) - $ blkLines - where - rawLine :: Monad m => OrgParser m String - rawLine = try $ ("" <$ blankline) <|> anyLine - - blockEnder :: Monad m => OrgParser m () - blockEnder = try $ skipSpaces <* stringAnyCase ("#+end_" <> blockType) - - stripIndent :: [String] -> [String] - stripIndent strs = map (drop (shortestIndent strs)) strs - - shortestIndent :: [String] -> Int - shortestIndent = foldr min maxBound - . map (length . takeWhile isSpace) - . filter (not . null) - - tabsToSpaces :: Int -> String -> String - tabsToSpaces _ [] = [] - tabsToSpaces tabLen cs'@(c:cs) = - case c of - ' ' -> ' ':tabsToSpaces tabLen cs - '\t' -> (take tabLen $ repeat ' ') ++ tabsToSpaces tabLen cs - _ -> cs' - - commaEscaped :: String -> String - commaEscaped (',':cs@('*':_)) = cs - commaEscaped (',':cs@('#':'+':_)) = cs - commaEscaped (' ':cs) = ' ':commaEscaped cs - commaEscaped ('\t':cs) = '\t':commaEscaped cs - commaEscaped cs = cs - --- | Read but ignore all remaining block headers. -ignHeaders :: Monad m => OrgParser m () -ignHeaders = (() <$ newline) <|> (() <$ anyLine) - --- | Read a block containing code intended for export in specific backends --- only. -exportBlock :: Monad m => String -> OrgParser m (F Blocks) -exportBlock blockType = try $ do - exportType <- skipSpaces *> orgArgWord <* ignHeaders - contents <- rawBlockContent blockType - returnF (B.rawBlock (map toLower exportType) contents) - -verseBlock :: PandocMonad m => String -> OrgParser m (F Blocks) -verseBlock blockType = try $ do - ignHeaders - content <- rawBlockContent blockType - fmap B.lineBlock . sequence - <$> mapM parseVerseLine (lines content) - where - -- replace initial spaces with nonbreaking spaces to preserve - -- indentation, parse the rest as normal inline - parseVerseLine :: PandocMonad m => String -> OrgParser m (F Inlines) - parseVerseLine cs = do - let (initialSpaces, indentedLine) = span isSpace cs - let nbspIndent = if null initialSpaces - then mempty - else B.str $ map (const '\160') initialSpaces - line <- parseFromString inlines (indentedLine ++ "\n") - return (trimInlinesF $ pure nbspIndent <> line) - --- | Read a code block and the associated results block if present. Which of --- boths blocks is included in the output is determined using the "exports" --- argument in the block header. -codeBlock :: PandocMonad m => BlockAttributes -> String -> OrgParser m (F Blocks) -codeBlock blockAttrs blockType = do - skipSpaces - (classes, kv) <- codeHeaderArgs <|> (mempty <$ ignHeaders) - content <- rawBlockContent blockType - resultsContent <- trailingResultsBlock - let id' = fromMaybe mempty $ blockAttrName blockAttrs - let includeCode = exportsCode kv - let includeResults = exportsResults kv - let codeBlck = B.codeBlockWith ( id', classes, kv ) content - let labelledBlck = maybe (pure codeBlck) - (labelDiv codeBlck) - (blockAttrCaption blockAttrs) - let resultBlck = fromMaybe mempty resultsContent - return $ - (if includeCode then labelledBlck else mempty) <> - (if includeResults then resultBlck else mempty) - where - labelDiv :: Blocks -> F Inlines -> F Blocks - labelDiv blk value = - B.divWith nullAttr <$> (mappend <$> labelledBlock value <*> pure blk) - - labelledBlock :: F Inlines -> F Blocks - labelledBlock = fmap (B.plain . B.spanWith ("", ["label"], [])) - -exportsCode :: [(String, String)] -> Bool -exportsCode attrs = not (("rundoc-exports", "none") `elem` attrs - || ("rundoc-exports", "results") `elem` attrs) - -exportsResults :: [(String, String)] -> Bool -exportsResults attrs = ("rundoc-exports", "results") `elem` attrs - || ("rundoc-exports", "both") `elem` attrs - -trailingResultsBlock :: PandocMonad m => OrgParser m (Maybe (F Blocks)) -trailingResultsBlock = optionMaybe . try $ do - blanklines - stringAnyCase "#+RESULTS:" - blankline - block - --- | Parse code block arguments --- TODO: We currently don't handle switches. -codeHeaderArgs :: Monad m => OrgParser m ([String], [(String, String)]) -codeHeaderArgs = try $ do - language <- skipSpaces *> orgArgWord - _ <- skipSpaces *> (try $ switch `sepBy` (many1 spaceChar)) - parameters <- manyTill blockOption newline - let pandocLang = translateLang language - return $ - if hasRundocParameters parameters - then ( [ pandocLang, rundocBlockClass ] - , map toRundocAttrib (("language", language) : parameters) - ) - else ([ pandocLang ], parameters) - where - hasRundocParameters = not . null - -switch :: Monad m => OrgParser m (Char, Maybe String) -switch = try $ simpleSwitch <|> lineNumbersSwitch - where - simpleSwitch = (\c -> (c, Nothing)) <$> (oneOf "-+" *> letter) - lineNumbersSwitch = (\ls -> ('l', Just ls)) <$> - (string "-l \"" *> many1Till nonspaceChar (char '"')) - -blockOption :: Monad m => OrgParser m (String, String) -blockOption = try $ do - argKey <- orgArgKey - paramValue <- option "yes" orgParamValue - return (argKey, paramValue) - -orgParamValue :: Monad m => OrgParser m String -orgParamValue = try $ - skipSpaces - *> notFollowedBy (char ':' ) - *> many1 nonspaceChar - <* skipSpaces - -horizontalRule :: Monad m => OrgParser m (F Blocks) -horizontalRule = return B.horizontalRule <$ try hline - - --- --- Drawers --- - --- | A generic drawer which has no special meaning for org-mode. --- Whether or not this drawer is included in the output depends on the drawers --- export setting. -genericDrawer :: PandocMonad m => OrgParser m (F Blocks) -genericDrawer = try $ do - name <- map toUpper <$> drawerStart - content <- manyTill drawerLine (try drawerEnd) - state <- getState - -- Include drawer if it is explicitly included in or not explicitly excluded - -- from the list of drawers that should be exported. PROPERTIES drawers are - -- never exported. - case (exportDrawers . orgStateExportSettings $ state) of - _ | name == "PROPERTIES" -> return mempty - Left names | name `elem` names -> return mempty - Right names | name `notElem` names -> return mempty - _ -> drawerDiv name <$> parseLines content - where - parseLines :: PandocMonad m => [String] -> OrgParser m (F Blocks) - parseLines = parseFromString blocks . (++ "\n") . unlines - - drawerDiv :: String -> F Blocks -> F Blocks - drawerDiv drawerName = fmap $ B.divWith (mempty, [drawerName, "drawer"], mempty) - -drawerLine :: Monad m => OrgParser m String -drawerLine = anyLine - -drawerEnd :: Monad m => OrgParser m String -drawerEnd = try $ - skipSpaces *> stringAnyCase ":END:" <* skipSpaces <* newline - --- | Read a :PROPERTIES: drawer and return the key/value pairs contained --- within. -propertiesDrawer :: Monad m => OrgParser m Properties -propertiesDrawer = try $ do - drawerType <- drawerStart - guard $ map toUpper drawerType == "PROPERTIES" - manyTill property (try drawerEnd) - where - property :: Monad m => OrgParser m (PropertyKey, PropertyValue) - property = try $ (,) <$> key <*> value - - key :: Monad m => OrgParser m PropertyKey - key = fmap toPropertyKey . try $ - skipSpaces *> char ':' *> many1Till nonspaceChar (char ':') - - value :: Monad m => OrgParser m PropertyValue - value = fmap toPropertyValue . try $ - skipSpaces *> manyTill anyChar (try $ skipSpaces *> newline) - - --- --- Figures --- - --- | Figures or an image paragraph (i.e. an image on a line by itself). Only --- images with a caption attribute are interpreted as figures. -figure :: PandocMonad m => OrgParser m (F Blocks) -figure = try $ do - figAttrs <- blockAttributes - src <- skipSpaces *> selfTarget <* skipSpaces <* endOfParagraph - case cleanLinkString src of - Nothing -> mzero - Just imgSrc -> do - guard (isImageFilename imgSrc) - let isFigure = not . isNothing $ blockAttrCaption figAttrs - return $ imageBlock isFigure figAttrs imgSrc - where - selfTarget :: PandocMonad m => OrgParser m String - selfTarget = try $ char '[' *> linkTarget <* char ']' - - imageBlock :: Bool -> BlockAttributes -> String -> F Blocks - imageBlock isFigure figAttrs imgSrc = - let - figName = fromMaybe mempty $ blockAttrName figAttrs - figLabel = fromMaybe mempty $ blockAttrLabel figAttrs - figCaption = fromMaybe mempty $ blockAttrCaption figAttrs - figKeyVals = blockAttrKeyValues figAttrs - attr = (figLabel, mempty, figKeyVals) - figTitle = (if isFigure then withFigPrefix else id) figName - in - B.para . B.imageWith attr imgSrc figTitle <$> figCaption - - withFigPrefix :: String -> String - withFigPrefix cs = - if "fig:" `isPrefixOf` cs - then cs - else "fig:" ++ cs - --- | Succeeds if looking at the end of the current paragraph -endOfParagraph :: Monad m => OrgParser m () -endOfParagraph = try $ skipSpaces *> newline *> endOfBlock - - --- --- Examples --- - --- | Example code marked up by a leading colon. -example :: Monad m => OrgParser m (F Blocks) -example = try $ do - return . return . exampleCode =<< unlines <$> many1 exampleLine - where - exampleLine :: Monad m => OrgParser m String - exampleLine = try $ exampleLineStart *> anyLine - -exampleCode :: String -> Blocks -exampleCode = B.codeBlockWith ("", ["example"], []) - - --- --- Comments, Options and Metadata --- - -specialLine :: PandocMonad m => OrgParser m (F Blocks) -specialLine = fmap return . try $ rawExportLine <|> metaLine <|> commentLine - -rawExportLine :: PandocMonad m => OrgParser m Blocks -rawExportLine = try $ do - metaLineStart - key <- metaKey - if key `elem` ["latex", "html", "texinfo", "beamer"] - then B.rawBlock key <$> anyLine - else mzero - -commentLine :: Monad m => OrgParser m Blocks -commentLine = commentLineStart *> anyLine *> pure mempty - - --- --- Tables --- -data ColumnProperty = ColumnProperty - { columnAlignment :: Maybe Alignment - , columnRelWidth :: Maybe Int - } deriving (Show, Eq) - -instance Default ColumnProperty where - def = ColumnProperty Nothing Nothing - -data OrgTableRow = OrgContentRow (F [Blocks]) - | OrgAlignRow [ColumnProperty] - | OrgHlineRow - --- OrgTable is strongly related to the pandoc table ADT. Using the same --- (i.e. pandoc-global) ADT would mean that the reader would break if the --- global structure was to be changed, which would be bad. The final table --- should be generated using a builder function. -data OrgTable = OrgTable - { orgTableColumnProperties :: [ColumnProperty] - , orgTableHeader :: [Blocks] - , orgTableRows :: [[Blocks]] - } - -table :: PandocMonad m => OrgParser m (F Blocks) -table = try $ do - blockAttrs <- blockAttributes - lookAhead tableStart - do - rows <- tableRows - let caption = fromMaybe (return mempty) $ blockAttrCaption blockAttrs - return $ (<$> caption) . orgToPandocTable . normalizeTable =<< rowsToTable rows - -orgToPandocTable :: OrgTable - -> Inlines - -> Blocks -orgToPandocTable (OrgTable colProps heads lns) caption = - let totalWidth = if any (not . isNothing) (map columnRelWidth colProps) - then Just . sum $ map (fromMaybe 1 . columnRelWidth) colProps - else Nothing - in B.table caption (map (convertColProp totalWidth) colProps) heads lns - where - convertColProp :: Maybe Int -> ColumnProperty -> (Alignment, Double) - convertColProp totalWidth colProp = - let - align' = fromMaybe AlignDefault $ columnAlignment colProp - width' = fromMaybe 0 $ (\w t -> (fromIntegral w / fromIntegral t)) - <$> (columnRelWidth colProp) - <*> totalWidth - in (align', width') - -tableRows :: PandocMonad m => OrgParser m [OrgTableRow] -tableRows = try $ many (tableAlignRow <|> tableHline <|> tableContentRow) - -tableContentRow :: PandocMonad m => OrgParser m OrgTableRow -tableContentRow = try $ - OrgContentRow . sequence <$> (tableStart *> many1Till tableContentCell newline) - -tableContentCell :: PandocMonad m => OrgParser m (F Blocks) -tableContentCell = try $ - fmap B.plain . trimInlinesF . mconcat <$> manyTill inline endOfCell - -tableAlignRow :: Monad m => OrgParser m OrgTableRow -tableAlignRow = try $ do - tableStart - colProps <- many1Till columnPropertyCell newline - -- Empty rows are regular (i.e. content) rows, not alignment rows. - guard $ any (/= def) colProps - return $ OrgAlignRow colProps - -columnPropertyCell :: Monad m => OrgParser m ColumnProperty -columnPropertyCell = emptyCell <|> propCell <?> "alignment info" - where - emptyCell = ColumnProperty Nothing Nothing <$ (try $ skipSpaces *> endOfCell) - propCell = try $ ColumnProperty - <$> (skipSpaces - *> char '<' - *> optionMaybe tableAlignFromChar) - <*> (optionMaybe (many1 digit >>= safeRead) - <* char '>' - <* emptyCell) - -tableAlignFromChar :: Monad m => OrgParser m Alignment -tableAlignFromChar = try $ - choice [ char 'l' *> return AlignLeft - , char 'c' *> return AlignCenter - , char 'r' *> return AlignRight - ] - -tableHline :: Monad m => OrgParser m OrgTableRow -tableHline = try $ - OrgHlineRow <$ (tableStart *> char '-' *> anyLine) - -endOfCell :: Monad m => OrgParser m Char -endOfCell = try $ char '|' <|> lookAhead newline - -rowsToTable :: [OrgTableRow] - -> F OrgTable -rowsToTable = foldM rowToContent emptyTable - where emptyTable = OrgTable mempty mempty mempty - -normalizeTable :: OrgTable -> OrgTable -normalizeTable (OrgTable colProps heads rows) = - OrgTable colProps' heads rows - where - refRow = if heads /= mempty - then heads - else case rows of - (r:_) -> r - _ -> mempty - cols = length refRow - fillColumns base padding = take cols $ base ++ repeat padding - colProps' = fillColumns colProps def - --- One or more horizontal rules after the first content line mark the previous --- line as a header. All other horizontal lines are discarded. -rowToContent :: OrgTable - -> OrgTableRow - -> F OrgTable -rowToContent orgTable row = - case row of - OrgHlineRow -> return singleRowPromotedToHeader - OrgAlignRow props -> return . setProperties $ props - OrgContentRow cs -> appendToBody cs - where - singleRowPromotedToHeader :: OrgTable - singleRowPromotedToHeader = case orgTable of - OrgTable{ orgTableHeader = [], orgTableRows = b:[] } -> - orgTable{ orgTableHeader = b , orgTableRows = [] } - _ -> orgTable - - setProperties :: [ColumnProperty] -> OrgTable - setProperties ps = orgTable{ orgTableColumnProperties = ps } - - appendToBody :: F [Blocks] -> F OrgTable - appendToBody frow = do - newRow <- frow - let oldRows = orgTableRows orgTable - -- NOTE: This is an inefficient O(n) operation. This should be changed - -- if performance ever becomes a problem. - return orgTable{ orgTableRows = oldRows ++ [newRow] } - - --- --- LaTeX fragments --- -latexFragment :: Monad m => OrgParser m (F Blocks) -latexFragment = try $ do - envName <- latexEnvStart - content <- mconcat <$> manyTill anyLineNewline (latexEnd envName) - return . return $ B.rawBlock "latex" (content `inLatexEnv` envName) - where - c `inLatexEnv` e = mconcat [ "\\begin{", e, "}\n" - , c - , "\\end{", e, "}\n" - ] - -latexEnd :: Monad m => String -> OrgParser m () -latexEnd envName = try $ - () <$ skipSpaces - <* string ("\\end{" ++ envName ++ "}") - <* blankline - - --- --- Footnote defintions --- -noteBlock :: PandocMonad m => OrgParser m (F Blocks) -noteBlock = try $ do - ref <- noteMarker <* skipSpaces - content <- mconcat <$> blocksTillHeaderOrNote - addToNotesTable (ref, content) - return mempty - where - blocksTillHeaderOrNote = - many1Till block (eof <|> () <$ lookAhead noteMarker - <|> () <$ lookAhead headerStart) - --- Paragraphs or Plain text -paraOrPlain :: PandocMonad m => OrgParser m (F Blocks) -paraOrPlain = try $ do - -- Make sure we are not looking at a headline - notFollowedBy' (char '*' *> (oneOf " *")) - ils <- inlines - nl <- option False (newline *> return True) - -- Read block as paragraph, except if we are in a list context and the block - -- is directly followed by a list item, in which case the block is read as - -- plain text. - try (guard nl - *> notFollowedBy (inList *> (() <$ orderedListStart <|> bulletListStart)) - *> return (B.para <$> ils)) - <|> (return (B.plain <$> ils)) - - --- --- list blocks --- - -list :: PandocMonad m => OrgParser m (F Blocks) -list = choice [ definitionList, bulletList, orderedList ] <?> "list" - -definitionList :: PandocMonad m => OrgParser m (F Blocks) -definitionList = try $ do n <- lookAhead (bulletListStart' Nothing) - fmap B.definitionList . fmap compactifyDL . sequence - <$> many1 (definitionListItem $ bulletListStart' (Just n)) - -bulletList :: PandocMonad m => OrgParser m (F Blocks) -bulletList = try $ do n <- lookAhead (bulletListStart' Nothing) - fmap B.bulletList . fmap compactify . sequence - <$> many1 (listItem (bulletListStart' $ Just n)) - -orderedList :: PandocMonad m => OrgParser m (F Blocks) -orderedList = fmap B.orderedList . fmap compactify . sequence - <$> many1 (listItem orderedListStart) - -bulletListStart' :: Monad m => Maybe Int -> OrgParser m Int --- returns length of bulletList prefix, inclusive of marker -bulletListStart' Nothing = do ind <- length <$> many spaceChar - oneOf (bullets $ ind == 0) - skipSpaces1 - return (ind + 1) -bulletListStart' (Just n) = do count (n-1) spaceChar - oneOf (bullets $ n == 1) - many1 spaceChar - return n - --- Unindented lists are legal, but they can't use '*' bullets. --- We return n to maintain compatibility with the generic listItem. -bullets :: Bool -> String -bullets unindented = if unindented then "+-" else "*+-" - -definitionListItem :: PandocMonad m - => OrgParser m Int - -> OrgParser m (F (Inlines, [Blocks])) -definitionListItem parseMarkerGetLength = try $ do - markerLength <- parseMarkerGetLength - term <- manyTill (noneOf "\n\r") (try definitionMarker) - line1 <- anyLineNewline - blank <- option "" ("\n" <$ blankline) - cont <- concat <$> many (listContinuation markerLength) - term' <- parseFromString inlines term - contents' <- parseFromString blocks $ line1 ++ blank ++ cont - return $ (,) <$> term' <*> fmap (:[]) contents' - where - definitionMarker = - spaceChar *> string "::" <* (spaceChar <|> lookAhead newline) - - --- parse raw text for one list item, excluding start marker and continuations -listItem :: PandocMonad m - => OrgParser m Int - -> OrgParser m (F Blocks) -listItem start = try . withContext ListItemState $ do - markerLength <- try start - firstLine <- anyLineNewline - blank <- option "" ("\n" <$ blankline) - rest <- concat <$> many (listContinuation markerLength) - parseFromString blocks $ firstLine ++ blank ++ rest - --- continuation of a list item - indented and separated by blankline or endline. --- Note: nested lists are parsed as continuations. -listContinuation :: Monad m => Int - -> OrgParser m String -listContinuation markerLength = try $ - notFollowedBy' blankline - *> (mappend <$> (concat <$> many1 listLine) - <*> many blankline) - where - listLine = try $ indentWith markerLength *> anyLineNewline - - -- indent by specified number of spaces (or equiv. tabs) - indentWith :: Monad m => Int -> OrgParser m String - indentWith num = do - tabStop <- getOption readerTabStop - if num < tabStop - then count num (char ' ') - else choice [ try (count num (char ' ')) - , try (char '\t' >> count (num - tabStop) (char ' ')) ] - --- | Parse any line, include the final newline in the output. -anyLineNewline :: Monad m => OrgParser m String -anyLineNewline = (++ "\n") <$> anyLine diff --git a/src/Text/Pandoc/Readers/Org/ExportSettings.hs b/src/Text/Pandoc/Readers/Org/ExportSettings.hs deleted file mode 100644 index 391877c03..000000000 --- a/src/Text/Pandoc/Readers/Org/ExportSettings.hs +++ /dev/null @@ -1,172 +0,0 @@ -{- -Copyright (C) 2014-2016 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.Readers.Org.Options - Copyright : Copyright (C) 2016 Albert Krewinkel - License : GNU GPL, version 2 or above - - Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> - -Parsers for Org-mode export options. --} -module Text.Pandoc.Readers.Org.ExportSettings - ( exportSettings - ) where - -import Text.Pandoc.Readers.Org.ParserState -import Text.Pandoc.Readers.Org.Parsing - -import Control.Monad ( mzero, void ) -import Data.Char ( toLower ) -import Data.Maybe ( listToMaybe ) - --- | Read and handle space separated org-mode export settings. -exportSettings :: Monad m => OrgParser m () -exportSettings = void $ sepBy spaces exportSetting - --- | Setter function for export settings. -type ExportSettingSetter a = a -> ExportSettings -> ExportSettings - --- | Read and process a single org-mode export option. -exportSetting :: Monad m => OrgParser m () -exportSetting = choice - [ booleanSetting "^" (\val es -> es { exportSubSuperscripts = val }) - , booleanSetting "'" (\val es -> es { exportSmartQuotes = val }) - , booleanSetting "*" (\val es -> es { exportEmphasizedText = val }) - , booleanSetting "-" (\val es -> es { exportSpecialStrings = val }) - , ignoredSetting ":" - , ignoredSetting "<" - , ignoredSetting "\\n" - , archivedTreeSetting "arch" (\val es -> es { exportArchivedTrees = val }) - , booleanSetting "author" (\val es -> es { exportWithAuthor = val }) - , ignoredSetting "c" - -- org-mode allows the special value `comment` for creator, which we'll - -- interpret as true as it doesn't make sense in the context of Pandoc. - , booleanSetting "creator" (\val es -> es { exportWithCreator = val }) - , complementableListSetting "d" (\val es -> es { exportDrawers = val }) - , ignoredSetting "date" - , ignoredSetting "e" - , booleanSetting "email" (\val es -> es { exportWithEmail = val }) - , ignoredSetting "f" - , integerSetting "H" (\val es -> es { exportHeadlineLevels = val }) - , ignoredSetting "inline" - , ignoredSetting "num" - , ignoredSetting "p" - , ignoredSetting "pri" - , ignoredSetting "prop" - , ignoredSetting "stat" - , ignoredSetting "tags" - , ignoredSetting "tasks" - , ignoredSetting "tex" - , ignoredSetting "timestamp" - , ignoredSetting "title" - , ignoredSetting "toc" - , booleanSetting "todo" (\val es -> es { exportWithTodoKeywords = val }) - , ignoredSetting "|" - ] <?> "export setting" - -genericExportSetting :: Monad m - => OrgParser m a - -> String - -> ExportSettingSetter a - -> OrgParser m () -genericExportSetting optionParser settingIdentifier setter = try $ do - _ <- string settingIdentifier *> char ':' - value <- optionParser - updateState $ modifyExportSettings value - where - modifyExportSettings val st = - st { orgStateExportSettings = setter val . orgStateExportSettings $ st } - --- | A boolean option, either nil (False) or non-nil (True). -booleanSetting :: Monad m => String -> ExportSettingSetter Bool -> OrgParser m () -booleanSetting = genericExportSetting elispBoolean - --- | An integer-valued option. -integerSetting :: Monad m => String -> ExportSettingSetter Int -> OrgParser m () -integerSetting = genericExportSetting parseInt - where - parseInt = try $ - many1 digit >>= maybe mzero (return . fst) . listToMaybe . reads - --- | Either the string "headline" or an elisp boolean and treated as an --- @ArchivedTreesOption@. -archivedTreeSetting :: Monad m - => String - -> ExportSettingSetter ArchivedTreesOption - -> OrgParser m () -archivedTreeSetting = - genericExportSetting $ archivedTreesHeadlineSetting <|> archivedTreesBoolean - where - archivedTreesHeadlineSetting = try $ do - _ <- string "headline" - lookAhead (newline <|> spaceChar) - return ArchivedTreesHeadlineOnly - - archivedTreesBoolean = try $ do - exportBool <- elispBoolean - return $ - if exportBool - then ArchivedTreesExport - else ArchivedTreesNoExport - --- | A list or a complement list (i.e. a list starting with `not`). -complementableListSetting :: Monad m - => String - -> ExportSettingSetter (Either [String] [String]) - -> OrgParser m () -complementableListSetting = genericExportSetting $ choice - [ Left <$> complementStringList - , Right <$> stringList - , (\b -> if b then Left [] else Right []) <$> elispBoolean - ] - where - -- Read a plain list of strings. - stringList :: Monad m => OrgParser m [String] - stringList = try $ - char '(' - *> sepBy elispString spaces - <* char ')' - - -- Read an emacs lisp list specifying a complement set. - complementStringList :: Monad m => OrgParser m [String] - complementStringList = try $ - string "(not " - *> sepBy elispString spaces - <* char ')' - - elispString :: Monad m => OrgParser m String - elispString = try $ - char '"' - *> manyTill alphaNum (char '"') - --- | Read but ignore the export setting. -ignoredSetting :: Monad m => String -> OrgParser m () -ignoredSetting s = try (() <$ string s <* char ':' <* many1 nonspaceChar) - --- | Read an elisp boolean. Only NIL is treated as false, non-NIL values are --- interpreted as true. -elispBoolean :: Monad m => OrgParser m Bool -elispBoolean = try $ do - value <- many1 nonspaceChar - return $ case map toLower value of - "nil" -> False - "{}" -> False - "()" -> False - _ -> True diff --git a/src/Text/Pandoc/Readers/Org/Inlines.hs b/src/Text/Pandoc/Readers/Org/Inlines.hs deleted file mode 100644 index f3671641a..000000000 --- a/src/Text/Pandoc/Readers/Org/Inlines.hs +++ /dev/null @@ -1,880 +0,0 @@ -{-# LANGUAGE OverloadedStrings #-} -{- -Copyright (C) 2014-2016 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.Readers.Org.Options - Copyright : Copyright (C) 2014-2016 Albert Krewinkel - License : GNU GPL, version 2 or above - - Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> - -Parsers for Org-mode inline elements. --} -module Text.Pandoc.Readers.Org.Inlines - ( inline - , inlines - , addToNotesTable - , linkTarget - ) where - -import Text.Pandoc.Readers.Org.BlockStarts ( endOfBlock, noteMarker ) -import Text.Pandoc.Readers.Org.ParserState -import Text.Pandoc.Readers.Org.Parsing -import Text.Pandoc.Readers.Org.Shared - ( cleanLinkString, isImageFilename, rundocBlockClass - , toRundocAttrib, translateLang ) - -import qualified Text.Pandoc.Builder as B -import Text.Pandoc.Builder ( Inlines ) -import Text.Pandoc.Definition -import Text.Pandoc.Options -import Text.Pandoc.Readers.LaTeX ( inlineCommand, rawLaTeXInline ) -import Text.TeXMath ( readTeX, writePandoc, DisplayType(..) ) -import qualified Text.TeXMath.Readers.MathML.EntityMap as MathMLEntityMap -import Text.Pandoc.Class (PandocMonad) - -import Prelude hiding (sequence) -import Control.Monad ( guard, mplus, mzero, when, void ) -import Control.Monad.Trans ( lift ) -import Data.Char ( isAlphaNum, isSpace ) -import Data.List ( intersperse ) -import Data.Maybe ( fromMaybe ) -import qualified Data.Map as M -import Data.Monoid ( (<>) ) -import Data.Traversable (sequence) - --- --- Functions acting on the parser state --- -recordAnchorId :: PandocMonad m => String -> OrgParser m () -recordAnchorId i = updateState $ \s -> - s{ orgStateAnchorIds = i : (orgStateAnchorIds s) } - -pushToInlineCharStack :: PandocMonad m => Char -> OrgParser m () -pushToInlineCharStack c = updateState $ \s -> - s{ orgStateEmphasisCharStack = c:orgStateEmphasisCharStack s } - -popInlineCharStack :: PandocMonad m => OrgParser m () -popInlineCharStack = updateState $ \s -> - s{ orgStateEmphasisCharStack = drop 1 . orgStateEmphasisCharStack $ s } - -surroundingEmphasisChar :: PandocMonad m => OrgParser m [Char] -surroundingEmphasisChar = - take 1 . drop 1 . orgStateEmphasisCharStack <$> getState - -startEmphasisNewlinesCounting :: PandocMonad m => Int -> OrgParser m () -startEmphasisNewlinesCounting maxNewlines = updateState $ \s -> - s{ orgStateEmphasisNewlines = Just maxNewlines } - -decEmphasisNewlinesCount :: PandocMonad m => OrgParser m () -decEmphasisNewlinesCount = updateState $ \s -> - s{ orgStateEmphasisNewlines = (\n -> n - 1) <$> orgStateEmphasisNewlines s } - -newlinesCountWithinLimits :: PandocMonad m => OrgParser m Bool -newlinesCountWithinLimits = do - st <- getState - return $ ((< 0) <$> orgStateEmphasisNewlines st) /= Just True - -resetEmphasisNewlines :: PandocMonad m => OrgParser m () -resetEmphasisNewlines = updateState $ \s -> - s{ orgStateEmphasisNewlines = Nothing } - -addToNotesTable :: PandocMonad m => OrgNoteRecord -> OrgParser m () -addToNotesTable note = do - oldnotes <- orgStateNotes' <$> getState - updateState $ \s -> s{ orgStateNotes' = note:oldnotes } - --- | Parse a single Org-mode inline element -inline :: PandocMonad m => OrgParser m (F Inlines) -inline = - choice [ whitespace - , linebreak - , cite - , footnote - , linkOrImage - , anchor - , inlineCodeBlock - , str - , endline - , emphasizedText - , code - , math - , displayMath - , verbatim - , subscript - , superscript - , inlineLaTeX - , exportSnippet - , smart - , symbol - ] <* (guard =<< newlinesCountWithinLimits) - <?> "inline" - --- | Read the rest of the input as inlines. -inlines :: PandocMonad m => OrgParser m (F Inlines) -inlines = trimInlinesF . mconcat <$> many1 inline - --- treat these as potentially non-text when parsing inline: -specialChars :: [Char] -specialChars = "\"$'()*+-,./:;<=>@[\\]^_{|}~" - - -whitespace :: PandocMonad m => OrgParser m (F Inlines) -whitespace = pure B.space <$ skipMany1 spaceChar - <* updateLastPreCharPos - <* updateLastForbiddenCharPos - <?> "whitespace" - -linebreak :: PandocMonad m => OrgParser m (F Inlines) -linebreak = try $ pure B.linebreak <$ string "\\\\" <* skipSpaces <* newline - -str :: PandocMonad m => OrgParser m (F Inlines) -str = return . B.str <$> many1 (noneOf $ specialChars ++ "\n\r ") - <* updateLastStrPos - --- | An endline character that can be treated as a space, not a structural --- break. This should reflect the values of the Emacs variable --- @org-element-pagaraph-separate@. -endline :: PandocMonad m => OrgParser m (F Inlines) -endline = try $ do - newline - notFollowedBy' endOfBlock - decEmphasisNewlinesCount - guard =<< newlinesCountWithinLimits - updateLastPreCharPos - return . return $ B.softbreak - - --- --- Citations --- - --- The state of citations is a bit confusing due to the lack of an official --- syntax and multiple syntaxes coexisting. The pandocOrgCite syntax was the --- first to be implemented here and is almost identical to Markdown's citation --- syntax. The org-ref package is in wide use to handle citations, but the --- syntax is a bit limiting and not quite as simple to write. The --- semi-offical Org-mode citation syntax is based on John MacFarlane's Pandoc --- sytax and Org-oriented enhancements contributed by Richard Lawrence and --- others. It's dubbed Berkeley syntax due the place of activity of its main --- contributors. All this should be consolidated once an official Org-mode --- citation syntax has emerged. - -cite :: PandocMonad m => OrgParser m (F Inlines) -cite = try $ berkeleyCite <|> do - guardEnabled Ext_citations - (cs, raw) <- withRaw $ choice - [ pandocOrgCite - , orgRefCite - , berkeleyTextualCite - ] - return $ (flip B.cite (B.text raw)) <$> cs - --- | A citation in Pandoc Org-mode style (@[prefix \@citekey suffix]@). -pandocOrgCite :: PandocMonad m => OrgParser m (F [Citation]) -pandocOrgCite = try $ - char '[' *> skipSpaces *> citeList <* skipSpaces <* char ']' - -orgRefCite :: PandocMonad m => OrgParser m (F [Citation]) -orgRefCite = try $ choice - [ normalOrgRefCite - , fmap (:[]) <$> linkLikeOrgRefCite - ] - -normalOrgRefCite :: PandocMonad m => OrgParser m (F [Citation]) -normalOrgRefCite = try $ do - mode <- orgRefCiteMode - firstCitation <- orgRefCiteList mode - moreCitations <- many (try $ char ',' *> orgRefCiteList mode) - return . sequence $ firstCitation : moreCitations - where - -- | A list of org-ref style citation keys, parsed as citation of the given - -- citation mode. - orgRefCiteList :: PandocMonad m => CitationMode -> OrgParser m (F Citation) - orgRefCiteList citeMode = try $ do - key <- orgRefCiteKey - returnF $ Citation - { citationId = key - , citationPrefix = mempty - , citationSuffix = mempty - , citationMode = citeMode - , citationNoteNum = 0 - , citationHash = 0 - } - --- | Read an Berkeley-style Org-mode citation. Berkeley citation style was --- develop and adjusted to Org-mode style by John MacFarlane and Richard --- Lawrence, respectively, both philosophers at UC Berkeley. -berkeleyCite :: PandocMonad m => OrgParser m (F Inlines) -berkeleyCite = try $ do - bcl <- berkeleyCitationList - return $ do - parens <- berkeleyCiteParens <$> bcl - prefix <- berkeleyCiteCommonPrefix <$> bcl - suffix <- berkeleyCiteCommonSuffix <$> bcl - citationList <- berkeleyCiteCitations <$> bcl - return $ - if parens - then toCite - . maybe id (\p -> alterFirst (prependPrefix p)) prefix - . maybe id (\s -> alterLast (appendSuffix s)) suffix - $ citationList - else maybe mempty (<> " ") prefix - <> (toListOfCites $ map toInTextMode citationList) - <> maybe mempty (", " <>) suffix - where - toCite :: [Citation] -> Inlines - toCite cs = B.cite cs mempty - - toListOfCites :: [Citation] -> Inlines - toListOfCites = mconcat . intersperse ", " . map (\c -> B.cite [c] mempty) - - toInTextMode :: Citation -> Citation - toInTextMode c = c { citationMode = AuthorInText } - - alterFirst, alterLast :: (a -> a) -> [a] -> [a] - alterFirst _ [] = [] - alterFirst f (c:cs) = (f c):cs - alterLast f = reverse . alterFirst f . reverse - - prependPrefix, appendSuffix :: Inlines -> Citation -> Citation - prependPrefix pre c = c { citationPrefix = B.toList pre <> citationPrefix c } - appendSuffix suf c = c { citationSuffix = citationSuffix c <> B.toList suf } - -data BerkeleyCitationList = BerkeleyCitationList - { berkeleyCiteParens :: Bool - , berkeleyCiteCommonPrefix :: Maybe Inlines - , berkeleyCiteCommonSuffix :: Maybe Inlines - , berkeleyCiteCitations :: [Citation] - } -berkeleyCitationList :: PandocMonad m => OrgParser m (F BerkeleyCitationList) -berkeleyCitationList = try $ do - char '[' - parens <- choice [ False <$ berkeleyBareTag, True <$ berkeleyParensTag ] - char ':' - skipSpaces - commonPrefix <- optionMaybe (try $ citationListPart <* char ';') - citations <- citeList - commonSuffix <- optionMaybe (try $ citationListPart) - char ']' - return (BerkeleyCitationList parens - <$> sequence commonPrefix - <*> sequence commonSuffix - <*> citations) - where - citationListPart :: PandocMonad m => OrgParser m (F Inlines) - citationListPart = fmap (trimInlinesF . mconcat) . try . many1 $ do - notFollowedBy' citeKey - notFollowedBy (oneOf ";]") - inline - -berkeleyBareTag :: PandocMonad m => OrgParser m () -berkeleyBareTag = try $ void berkeleyBareTag' - -berkeleyParensTag :: PandocMonad m => OrgParser m () -berkeleyParensTag = try . void $ enclosedByPair '(' ')' berkeleyBareTag' - -berkeleyBareTag' :: PandocMonad m => OrgParser m () -berkeleyBareTag' = try $ void (string "cite") - -berkeleyTextualCite :: PandocMonad m => OrgParser m (F [Citation]) -berkeleyTextualCite = try $ do - (suppressAuthor, key) <- citeKey - returnF . return $ Citation - { citationId = key - , citationPrefix = mempty - , citationSuffix = mempty - , citationMode = if suppressAuthor then SuppressAuthor else AuthorInText - , citationNoteNum = 0 - , citationHash = 0 - } - --- The following is what a Berkeley-style bracketed textual citation parser --- would look like. However, as these citations are a subset of Pandoc's Org --- citation style, this isn't used. --- berkeleyBracketedTextualCite :: PandocMonad m => OrgParser m (F [Citation]) --- berkeleyBracketedTextualCite = try . (fmap head) $ --- enclosedByPair '[' ']' berkeleyTextualCite - --- | Read a link-like org-ref style citation. The citation includes pre and --- post text. However, multiple citations are not possible due to limitations --- in the syntax. -linkLikeOrgRefCite :: PandocMonad m => OrgParser m (F Citation) -linkLikeOrgRefCite = try $ do - _ <- string "[[" - mode <- orgRefCiteMode - key <- orgRefCiteKey - _ <- string "][" - pre <- trimInlinesF . mconcat <$> manyTill inline (try $ string "::") - spc <- option False (True <$ spaceChar) - suf <- trimInlinesF . mconcat <$> manyTill inline (try $ string "]]") - return $ do - pre' <- pre - suf' <- suf - return Citation - { citationId = key - , citationPrefix = B.toList pre' - , citationSuffix = B.toList (if spc then B.space <> suf' else suf') - , citationMode = mode - , citationNoteNum = 0 - , citationHash = 0 - } - --- | Read a citation key. The characters allowed in citation keys are taken --- from the `org-ref-cite-re` variable in `org-ref.el`. -orgRefCiteKey :: PandocMonad m => OrgParser m String -orgRefCiteKey = try . many1 . satisfy $ \c -> - isAlphaNum c || c `elem` ("-_:\\./"::String) - --- | Supported citation types. Only a small subset of org-ref types is --- supported for now. TODO: rewrite this, use LaTeX reader as template. -orgRefCiteMode :: PandocMonad m => OrgParser m CitationMode -orgRefCiteMode = - choice $ map (\(s, mode) -> mode <$ try (string s <* char ':')) - [ ("cite", AuthorInText) - , ("citep", NormalCitation) - , ("citep*", NormalCitation) - , ("citet", AuthorInText) - , ("citet*", AuthorInText) - , ("citeyear", SuppressAuthor) - ] - -citeList :: PandocMonad m => OrgParser m (F [Citation]) -citeList = sequence <$> sepEndBy1 citation (try $ char ';' *> skipSpaces) - -citation :: PandocMonad m => OrgParser m (F Citation) -citation = try $ do - pref <- prefix - (suppress_author, key) <- citeKey - suff <- suffix - return $ do - x <- pref - y <- suff - return $ Citation{ citationId = key - , citationPrefix = B.toList x - , citationSuffix = B.toList y - , citationMode = if suppress_author - then SuppressAuthor - else NormalCitation - , citationNoteNum = 0 - , citationHash = 0 - } - where - prefix = trimInlinesF . mconcat <$> - manyTill inline (char ']' <|> (']' <$ lookAhead citeKey)) - suffix = try $ do - hasSpace <- option False (notFollowedBy nonspaceChar >> return True) - skipSpaces - rest <- trimInlinesF . mconcat <$> - many (notFollowedBy (oneOf ";]") *> inline) - return $ if hasSpace - then (B.space <>) <$> rest - else rest - -footnote :: PandocMonad m => OrgParser m (F Inlines) -footnote = try $ inlineNote <|> referencedNote - -inlineNote :: PandocMonad m => OrgParser m (F Inlines) -inlineNote = try $ do - string "[fn:" - ref <- many alphaNum - char ':' - note <- fmap B.para . trimInlinesF . mconcat <$> many1Till inline (char ']') - when (not $ null ref) $ - addToNotesTable ("fn:" ++ ref, note) - return $ B.note <$> note - -referencedNote :: PandocMonad m => OrgParser m (F Inlines) -referencedNote = try $ do - ref <- noteMarker - return $ do - notes <- asksF orgStateNotes' - case lookup ref notes of - Nothing -> return $ B.str $ "[" ++ ref ++ "]" - Just contents -> do - st <- askF - let contents' = runF contents st{ orgStateNotes' = [] } - return $ B.note contents' - -linkOrImage :: PandocMonad m => OrgParser m (F Inlines) -linkOrImage = explicitOrImageLink - <|> selflinkOrImage - <|> angleLink - <|> plainLink - <?> "link or image" - -explicitOrImageLink :: PandocMonad m => OrgParser m (F Inlines) -explicitOrImageLink = try $ do - char '[' - srcF <- applyCustomLinkFormat =<< possiblyEmptyLinkTarget - title <- enclosedRaw (char '[') (char ']') - title' <- parseFromString (mconcat <$> many inline) title - char ']' - return $ do - src <- srcF - case cleanLinkString title of - Just imgSrc | isImageFilename imgSrc -> - pure $ B.link src "" $ B.image imgSrc mempty mempty - _ -> - linkToInlinesF src =<< title' - -selflinkOrImage :: PandocMonad m => OrgParser m (F Inlines) -selflinkOrImage = try $ do - src <- char '[' *> linkTarget <* char ']' - return $ linkToInlinesF src (B.str src) - -plainLink :: PandocMonad m => OrgParser m (F Inlines) -plainLink = try $ do - (orig, src) <- uri - returnF $ B.link src "" (B.str orig) - -angleLink :: PandocMonad m => OrgParser m (F Inlines) -angleLink = try $ do - char '<' - link <- plainLink - char '>' - return link - -linkTarget :: PandocMonad m => OrgParser m String -linkTarget = enclosedByPair '[' ']' (noneOf "\n\r[]") - -possiblyEmptyLinkTarget :: PandocMonad m => OrgParser m String -possiblyEmptyLinkTarget = try linkTarget <|> ("" <$ string "[]") - -applyCustomLinkFormat :: String -> OrgParser m (F String) -applyCustomLinkFormat link = do - let (linkType, rest) = break (== ':') link - return $ do - formatter <- M.lookup linkType <$> asksF orgStateLinkFormatters - return $ maybe link ($ drop 1 rest) formatter - --- | Take a link and return a function which produces new inlines when given --- description inlines. -linkToInlinesF :: String -> Inlines -> F Inlines -linkToInlinesF linkStr = - case linkStr of - "" -> pure . B.link mempty "" -- wiki link (empty by convention) - ('#':_) -> pure . B.link linkStr "" -- document-local fraction - _ -> case cleanLinkString linkStr of - (Just cleanedLink) -> if isImageFilename cleanedLink - then const . pure $ B.image cleanedLink "" "" - else pure . B.link cleanedLink "" - Nothing -> internalLink linkStr -- other internal link - -internalLink :: String -> Inlines -> F Inlines -internalLink link title = do - anchorB <- (link `elem`) <$> asksF orgStateAnchorIds - if anchorB - then return $ B.link ('#':link) "" title - else return $ B.emph title - --- | Parse an anchor like @<<anchor-id>>@ and return an empty span with --- @anchor-id@ set as id. Legal anchors in org-mode are defined through --- @org-target-regexp@, which is fairly liberal. Since no link is created if --- @anchor-id@ contains spaces, we are more restrictive in what is accepted as --- an anchor. - -anchor :: PandocMonad m => OrgParser m (F Inlines) -anchor = try $ do - anchorId <- parseAnchor - recordAnchorId anchorId - returnF $ B.spanWith (solidify anchorId, [], []) mempty - where - parseAnchor = string "<<" - *> many1 (noneOf "\t\n\r<>\"' ") - <* string ">>" - <* skipSpaces - --- | Replace every char but [a-zA-Z0-9_.-:] with a hypen '-'. This mirrors --- the org function @org-export-solidify-link-text@. - -solidify :: String -> String -solidify = map replaceSpecialChar - where replaceSpecialChar c - | isAlphaNum c = c - | c `elem` ("_.-:" :: String) = c - | otherwise = '-' - --- | Parses an inline code block and marks it as an babel block. -inlineCodeBlock :: PandocMonad m => OrgParser m (F Inlines) -inlineCodeBlock = try $ do - string "src_" - lang <- many1 orgArgWordChar - opts <- option [] $ enclosedByPair '[' ']' inlineBlockOption - inlineCode <- enclosedByPair '{' '}' (noneOf "\n\r") - let attrClasses = [translateLang lang, rundocBlockClass] - let attrKeyVal = map toRundocAttrib (("language", lang) : opts) - returnF $ B.codeWith ("", attrClasses, attrKeyVal) inlineCode - where - inlineBlockOption :: PandocMonad m => OrgParser m (String, String) - inlineBlockOption = try $ do - argKey <- orgArgKey - paramValue <- option "yes" orgInlineParamValue - return (argKey, paramValue) - - orgInlineParamValue :: PandocMonad m => OrgParser m String - orgInlineParamValue = try $ - skipSpaces - *> notFollowedBy (char ':') - *> many1 (noneOf "\t\n\r ]") - <* skipSpaces - - -emphasizedText :: PandocMonad m => OrgParser m (F Inlines) -emphasizedText = do - state <- getState - guard . exportEmphasizedText . orgStateExportSettings $ state - try $ choice - [ emph - , strong - , strikeout - , underline - ] - -enclosedByPair :: PandocMonad m - => Char -- ^ opening char - -> Char -- ^ closing char - -> OrgParser m a -- ^ parser - -> OrgParser m [a] -enclosedByPair s e p = char s *> many1Till p (char e) - -emph :: PandocMonad m => OrgParser m (F Inlines) -emph = fmap B.emph <$> emphasisBetween '/' - -strong :: PandocMonad m => OrgParser m (F Inlines) -strong = fmap B.strong <$> emphasisBetween '*' - -strikeout :: PandocMonad m => OrgParser m (F Inlines) -strikeout = fmap B.strikeout <$> emphasisBetween '+' - --- There is no underline, so we use strong instead. -underline :: PandocMonad m => OrgParser m (F Inlines) -underline = fmap B.strong <$> emphasisBetween '_' - -verbatim :: PandocMonad m => OrgParser m (F Inlines) -verbatim = return . B.code <$> verbatimBetween '=' - -code :: PandocMonad m => OrgParser m (F Inlines) -code = return . B.code <$> verbatimBetween '~' - -subscript :: PandocMonad m => OrgParser m (F Inlines) -subscript = fmap B.subscript <$> try (char '_' *> subOrSuperExpr) - -superscript :: PandocMonad m => OrgParser m (F Inlines) -superscript = fmap B.superscript <$> try (char '^' *> subOrSuperExpr) - -math :: PandocMonad m => OrgParser m (F Inlines) -math = return . B.math <$> choice [ math1CharBetween '$' - , mathStringBetween '$' - , rawMathBetween "\\(" "\\)" - ] - -displayMath :: PandocMonad m => OrgParser m (F Inlines) -displayMath = return . B.displayMath <$> choice [ rawMathBetween "\\[" "\\]" - , rawMathBetween "$$" "$$" - ] - -updatePositions :: PandocMonad m - => Char - -> OrgParser m Char -updatePositions c = do - when (c `elem` emphasisPreChars) updateLastPreCharPos - when (c `elem` emphasisForbiddenBorderChars) updateLastForbiddenCharPos - return c - -symbol :: PandocMonad m => OrgParser m (F Inlines) -symbol = return . B.str . (: "") <$> (oneOf specialChars >>= updatePositions) - -emphasisBetween :: PandocMonad m - => Char - -> OrgParser m (F Inlines) -emphasisBetween c = try $ do - startEmphasisNewlinesCounting emphasisAllowedNewlines - res <- enclosedInlines (emphasisStart c) (emphasisEnd c) - isTopLevelEmphasis <- null . orgStateEmphasisCharStack <$> getState - when isTopLevelEmphasis - resetEmphasisNewlines - return res - -verbatimBetween :: PandocMonad m - => Char - -> OrgParser m String -verbatimBetween c = try $ - emphasisStart c *> - many1TillNOrLessNewlines 1 verbatimChar (emphasisEnd c) - where - verbatimChar = noneOf "\n\r" >>= updatePositions - --- | Parses a raw string delimited by @c@ using Org's math rules -mathStringBetween :: PandocMonad m - => Char - -> OrgParser m String -mathStringBetween c = try $ do - mathStart c - body <- many1TillNOrLessNewlines mathAllowedNewlines - (noneOf (c:"\n\r")) - (lookAhead $ mathEnd c) - final <- mathEnd c - return $ body ++ [final] - --- | Parse a single character between @c@ using math rules -math1CharBetween :: PandocMonad m - => Char - -> OrgParser m String -math1CharBetween c = try $ do - char c - res <- noneOf $ c:mathForbiddenBorderChars - char c - eof <|> () <$ lookAhead (oneOf mathPostChars) - return [res] - -rawMathBetween :: PandocMonad m - => String - -> String - -> OrgParser m String -rawMathBetween s e = try $ string s *> manyTill anyChar (try $ string e) - --- | Parses the start (opening character) of emphasis -emphasisStart :: PandocMonad m => Char -> OrgParser m Char -emphasisStart c = try $ do - guard =<< afterEmphasisPreChar - guard =<< notAfterString - char c - lookAhead (noneOf emphasisForbiddenBorderChars) - pushToInlineCharStack c - -- nested inlines are allowed, so mark this position as one which might be - -- followed by another inline. - updateLastPreCharPos - return c - --- | Parses the closing character of emphasis -emphasisEnd :: PandocMonad m => Char -> OrgParser m Char -emphasisEnd c = try $ do - guard =<< notAfterForbiddenBorderChar - char c - eof <|> () <$ lookAhead acceptablePostChars - updateLastStrPos - popInlineCharStack - return c - where acceptablePostChars = - surroundingEmphasisChar >>= \x -> oneOf (x ++ emphasisPostChars) - -mathStart :: PandocMonad m => Char -> OrgParser m Char -mathStart c = try $ - char c <* notFollowedBy' (oneOf (c:mathForbiddenBorderChars)) - -mathEnd :: PandocMonad m => Char -> OrgParser m Char -mathEnd c = try $ do - res <- noneOf (c:mathForbiddenBorderChars) - char c - eof <|> () <$ lookAhead (oneOf mathPostChars) - return res - - -enclosedInlines :: PandocMonad m => OrgParser m a - -> OrgParser m b - -> OrgParser m (F Inlines) -enclosedInlines start end = try $ - trimInlinesF . mconcat <$> enclosed start end inline - -enclosedRaw :: PandocMonad m => OrgParser m a - -> OrgParser m b - -> OrgParser m String -enclosedRaw start end = try $ - start *> (onSingleLine <|> spanningTwoLines) - where onSingleLine = try $ many1Till (noneOf "\n\r") end - spanningTwoLines = try $ - anyLine >>= \f -> mappend (f <> " ") <$> onSingleLine - --- | Like many1Till, but parses at most @n+1@ lines. @p@ must not consume --- newlines. -many1TillNOrLessNewlines :: PandocMonad m => Int - -> OrgParser m Char - -> OrgParser m a - -> OrgParser m String -many1TillNOrLessNewlines n p end = try $ - nMoreLines (Just n) mempty >>= oneOrMore - where - nMoreLines Nothing cs = return cs - nMoreLines (Just 0) cs = try $ (cs ++) <$> finalLine - nMoreLines k cs = try $ (final k cs <|> rest k cs) - >>= uncurry nMoreLines - final _ cs = (\x -> (Nothing, cs ++ x)) <$> try finalLine - rest m cs = (\x -> (minus1 <$> m, cs ++ x ++ "\n")) <$> try (manyTill p newline) - finalLine = try $ manyTill p end - minus1 k = k - 1 - oneOrMore cs = guard (not $ null cs) *> return cs - --- Org allows customization of the way it reads emphasis. We use the defaults --- here (see, e.g., the Emacs Lisp variable `org-emphasis-regexp-components` --- for details). - --- | Chars allowed to occur before emphasis (spaces and newlines are ok, too) -emphasisPreChars :: [Char] -emphasisPreChars = "\t \"'({" - --- | Chars allowed at after emphasis -emphasisPostChars :: [Char] -emphasisPostChars = "\t\n !\"'),-.:;?\\}" - --- | Chars not allowed at the (inner) border of emphasis -emphasisForbiddenBorderChars :: [Char] -emphasisForbiddenBorderChars = "\t\n\r \"'," - --- | The maximum number of newlines within -emphasisAllowedNewlines :: Int -emphasisAllowedNewlines = 1 - --- LaTeX-style math: see `org-latex-regexps` for details - --- | Chars allowed after an inline ($...$) math statement -mathPostChars :: [Char] -mathPostChars = "\t\n \"'),-.:;?" - --- | Chars not allowed at the (inner) border of math -mathForbiddenBorderChars :: [Char] -mathForbiddenBorderChars = "\t\n\r ,;.$" - --- | Maximum number of newlines in an inline math statement -mathAllowedNewlines :: Int -mathAllowedNewlines = 2 - --- | Whether we are right behind a char allowed before emphasis -afterEmphasisPreChar :: PandocMonad m => OrgParser m Bool -afterEmphasisPreChar = do - pos <- getPosition - lastPrePos <- orgStateLastPreCharPos <$> getState - return . fromMaybe True $ (== pos) <$> lastPrePos - --- | Whether the parser is right after a forbidden border char -notAfterForbiddenBorderChar :: PandocMonad m => OrgParser m Bool -notAfterForbiddenBorderChar = do - pos <- getPosition - lastFBCPos <- orgStateLastForbiddenCharPos <$> getState - return $ lastFBCPos /= Just pos - --- | Read a sub- or superscript expression -subOrSuperExpr :: PandocMonad m => OrgParser m (F Inlines) -subOrSuperExpr = try $ - choice [ id <$> charsInBalanced '{' '}' (noneOf "\n\r") - , enclosing ('(', ')') <$> charsInBalanced '(' ')' (noneOf "\n\r") - , simpleSubOrSuperString - ] >>= parseFromString (mconcat <$> many inline) - where enclosing (left, right) s = left : s ++ [right] - -simpleSubOrSuperString :: PandocMonad m => OrgParser m String -simpleSubOrSuperString = try $ do - state <- getState - guard . exportSubSuperscripts . orgStateExportSettings $ state - choice [ string "*" - , mappend <$> option [] ((:[]) <$> oneOf "+-") - <*> many1 alphaNum - ] - -inlineLaTeX :: PandocMonad m => OrgParser m (F Inlines) -inlineLaTeX = try $ do - cmd <- inlineLaTeXCommand - ils <- (lift . lift) $ parseAsInlineLaTeX cmd - maybe mzero returnF $ - parseAsMath cmd `mplus` parseAsMathMLSym cmd `mplus` ils - where - parseAsMath :: String -> Maybe Inlines - parseAsMath cs = B.fromList <$> texMathToPandoc cs - - parseAsInlineLaTeX :: PandocMonad m => String -> m (Maybe Inlines) - parseAsInlineLaTeX cs = maybeRight <$> runParserT inlineCommand state "" cs - - parseAsMathMLSym :: String -> Maybe Inlines - parseAsMathMLSym cs = B.str <$> MathMLEntityMap.getUnicode (clean cs) - -- drop initial backslash and any trailing "{}" - where clean = dropWhileEnd (`elem` ("{}" :: String)) . drop 1 - - state :: ParserState - state = def{ stateOptions = def{ readerExtensions = - enableExtension Ext_raw_tex (readerExtensions def) } } - - texMathToPandoc :: String -> Maybe [Inline] - texMathToPandoc cs = (maybeRight $ readTeX cs) >>= writePandoc DisplayInline - -maybeRight :: Either a b -> Maybe b -maybeRight = either (const Nothing) Just - -inlineLaTeXCommand :: PandocMonad m => OrgParser m String -inlineLaTeXCommand = try $ do - rest <- getInput - parsed <- (lift . lift) $ runParserT rawLaTeXInline def "source" rest - case parsed of - Right (RawInline _ cs) -> do - -- drop any trailing whitespace, those are not be part of the command as - -- far as org mode is concerned. - let cmdNoSpc = dropWhileEnd isSpace cs - let len = length cmdNoSpc - count len anyChar - return cmdNoSpc - _ -> mzero - --- Taken from Data.OldList. -dropWhileEnd :: (a -> Bool) -> [a] -> [a] -dropWhileEnd p = foldr (\x xs -> if p x && null xs then [] else x : xs) [] - -exportSnippet :: PandocMonad m => OrgParser m (F Inlines) -exportSnippet = try $ do - string "@@" - format <- many1Till (alphaNum <|> char '-') (char ':') - snippet <- manyTill anyChar (try $ string "@@") - returnF $ B.rawInline format snippet - -smart :: PandocMonad m => OrgParser m (F Inlines) -smart = do - guardEnabled Ext_smart - doubleQuoted <|> singleQuoted <|> - choice (map (return <$>) [orgApostrophe, orgDash, orgEllipses]) - where - orgDash = do - guard =<< getExportSetting exportSpecialStrings - dash <* updatePositions '-' - orgEllipses = do - guard =<< getExportSetting exportSpecialStrings - ellipses <* updatePositions '.' - orgApostrophe = - (char '\'' <|> char '\8217') <* updateLastPreCharPos - <* updateLastForbiddenCharPos - *> return (B.str "\x2019") - -singleQuoted :: PandocMonad m => OrgParser m (F Inlines) -singleQuoted = try $ do - guard =<< getExportSetting exportSmartQuotes - singleQuoteStart - updatePositions '\'' - withQuoteContext InSingleQuote $ - fmap B.singleQuoted . trimInlinesF . mconcat <$> - many1Till inline (singleQuoteEnd <* updatePositions '\'') - --- doubleQuoted will handle regular double-quoted sections, as well --- as dialogues with an open double-quote without a close double-quote --- in the same paragraph. -doubleQuoted :: PandocMonad m => OrgParser m (F Inlines) -doubleQuoted = try $ do - guard =<< getExportSetting exportSmartQuotes - doubleQuoteStart - updatePositions '"' - contents <- mconcat <$> many (try $ notFollowedBy doubleQuoteEnd >> inline) - (withQuoteContext InDoubleQuote $ (doubleQuoteEnd <* updateLastForbiddenCharPos) >> return - (fmap B.doubleQuoted . trimInlinesF $ contents)) - <|> (return $ return (B.str "\8220") <> contents) diff --git a/src/Text/Pandoc/Readers/Org/Meta.hs b/src/Text/Pandoc/Readers/Org/Meta.hs deleted file mode 100644 index 2f4e21248..000000000 --- a/src/Text/Pandoc/Readers/Org/Meta.hs +++ /dev/null @@ -1,218 +0,0 @@ -{-# LANGUAGE FlexibleContexts #-} -{-# LANGUAGE TupleSections #-} -{- -Copyright (C) 2014-2017 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.Readers.Org.Meta - Copyright : Copyright (C) 2014-2017 Albert Krewinkel - License : GNU GPL, version 2 or above - - Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> - -Parsers for Org-mode meta declarations. --} -module Text.Pandoc.Readers.Org.Meta - ( metaExport - , metaKey - , metaLine - ) where - -import Text.Pandoc.Readers.Org.BlockStarts -import Text.Pandoc.Readers.Org.ExportSettings ( exportSettings ) -import Text.Pandoc.Readers.Org.Inlines -import Text.Pandoc.Readers.Org.ParserState -import Text.Pandoc.Readers.Org.Parsing - -import qualified Text.Pandoc.Builder as B -import Text.Pandoc.Builder ( Blocks, Inlines ) -import Text.Pandoc.Class ( PandocMonad ) -import Text.Pandoc.Definition - -import Control.Monad ( mzero, void ) -import Data.Char ( toLower ) -import Data.List ( intersperse ) -import qualified Data.Map as M -import Data.Monoid ( (<>) ) -import Network.HTTP ( urlEncode ) - --- | Returns the current meta, respecting export options. -metaExport :: Monad m => OrgParser m (F Meta) -metaExport = do - st <- getState - let settings = orgStateExportSettings st - return $ (if exportWithAuthor settings then id else removeMeta "author") - . (if exportWithCreator settings then id else removeMeta "creator") - . (if exportWithEmail settings then id else removeMeta "email") - <$> orgStateMeta st - -removeMeta :: String -> Meta -> Meta -removeMeta key meta' = - let metaMap = unMeta meta' - in Meta $ M.delete key metaMap - --- | Parse and handle a single line containing meta information --- The order, in which blocks are tried, makes sure that we're not looking at --- the beginning of a block, so we don't need to check for it -metaLine :: PandocMonad m => OrgParser m Blocks -metaLine = mempty <$ metaLineStart <* (optionLine <|> declarationLine) - -declarationLine :: PandocMonad m => OrgParser m () -declarationLine = try $ do - key <- map toLower <$> metaKey - (key', value) <- metaValue key - updateState $ \st -> - let meta' = B.setMeta key' <$> value <*> pure nullMeta - in st { orgStateMeta = meta' <> orgStateMeta st } - -metaKey :: Monad m => OrgParser m String -metaKey = map toLower <$> many1 (noneOf ": \n\r") - <* char ':' - <* skipSpaces - -metaValue :: PandocMonad m => String -> OrgParser m (String, (F MetaValue)) -metaValue key = - let inclKey = "header-includes" - in case key of - "author" -> (key,) <$> metaInlinesCommaSeparated - "title" -> (key,) <$> metaInlines - "date" -> (key,) <$> metaInlines - "header-includes" -> (key,) <$> accumulatingList key metaInlines - "latex_header" -> (inclKey,) <$> - accumulatingList inclKey (metaExportSnippet "latex") - "latex_class" -> ("documentclass",) <$> metaString - -- Org-mode expects class options to contain the surrounding brackets, - -- pandoc does not. - "latex_class_options" -> ("classoption",) <$> - metaModifiedString (filter (`notElem` "[]")) - "html_head" -> (inclKey,) <$> - accumulatingList inclKey (metaExportSnippet "html") - _ -> (key,) <$> metaString - -metaInlines :: PandocMonad m => OrgParser m (F MetaValue) -metaInlines = fmap (MetaInlines . B.toList) <$> inlinesTillNewline - -metaInlinesCommaSeparated :: PandocMonad m => OrgParser m (F MetaValue) -metaInlinesCommaSeparated = do - authStrs <- (many1 (noneOf ",\n")) `sepBy1` (char ',') - newline - authors <- mapM (parseFromString inlinesTillNewline . (++ "\n")) authStrs - let toMetaInlines = MetaInlines . B.toList - return $ MetaList . map toMetaInlines <$> sequence authors - -metaString :: Monad m => OrgParser m (F MetaValue) -metaString = metaModifiedString id - -metaModifiedString :: Monad m => (String -> String) -> OrgParser m (F MetaValue) -metaModifiedString f = return . MetaString . f <$> anyLine - --- | Read an format specific meta definition -metaExportSnippet :: Monad m => String -> OrgParser m (F MetaValue) -metaExportSnippet format = - return . MetaInlines . B.toList . B.rawInline format <$> anyLine - --- | Accumulate the result of the @parser@ in a list under @key@. -accumulatingList :: Monad m => String - -> OrgParser m (F MetaValue) - -> OrgParser m (F MetaValue) -accumulatingList key p = do - value <- p - meta' <- orgStateMeta <$> getState - return $ (\m v -> MetaList (curList m ++ [v])) <$> meta' <*> value - where curList m = case lookupMeta key m of - Just (MetaList ms) -> ms - Just x -> [x] - _ -> [] - --- --- export options --- -optionLine :: Monad m => OrgParser m () -optionLine = try $ do - key <- metaKey - case key of - "link" -> parseLinkFormat >>= uncurry addLinkFormat - "options" -> exportSettings - "todo" -> todoSequence >>= updateState . registerTodoSequence - "seq_todo" -> todoSequence >>= updateState . registerTodoSequence - "typ_todo" -> todoSequence >>= updateState . registerTodoSequence - _ -> mzero - -addLinkFormat :: Monad m => String - -> (String -> String) - -> OrgParser m () -addLinkFormat key formatter = updateState $ \s -> - let fs = orgStateLinkFormatters s - in s{ orgStateLinkFormatters = M.insert key formatter fs } - -parseLinkFormat :: Monad m => OrgParser m ((String, String -> String)) -parseLinkFormat = try $ do - linkType <- (:) <$> letter <*> many (alphaNum <|> oneOf "-_") <* skipSpaces - linkSubst <- parseFormat - return (linkType, linkSubst) - --- | An ad-hoc, single-argument-only implementation of a printf-style format --- parser. -parseFormat :: Monad m => OrgParser m (String -> String) -parseFormat = try $ do - replacePlain <|> replaceUrl <|> justAppend - where - -- inefficient, but who cares - replacePlain = try $ (\x -> concat . flip intersperse x) - <$> sequence [tillSpecifier 's', rest] - replaceUrl = try $ (\x -> concat . flip intersperse x . urlEncode) - <$> sequence [tillSpecifier 'h', rest] - justAppend = try $ (++) <$> rest - - rest = manyTill anyChar (eof <|> () <$ oneOf "\n\r") - tillSpecifier c = manyTill (noneOf "\n\r") (try $ string ('%':c:"")) - -inlinesTillNewline :: PandocMonad m => OrgParser m (F Inlines) -inlinesTillNewline = trimInlinesF . mconcat <$> manyTill inline newline - --- --- ToDo Sequences and Keywords --- -todoSequence :: Monad m => OrgParser m TodoSequence -todoSequence = try $ do - todoKws <- todoKeywords - doneKws <- optionMaybe $ todoDoneSep *> todoKeywords - newline - -- There must be at least one DONE keyword. The last TODO keyword is taken if - -- necessary. - case doneKws of - Just done -> return $ keywordsToSequence todoKws done - Nothing -> case reverse todoKws of - [] -> mzero -- no keywords present - (x:xs) -> return $ keywordsToSequence (reverse xs) [x] - - where - todoKeywords :: Monad m => OrgParser m [String] - todoKeywords = try $ - let keyword = many1 nonspaceChar <* skipSpaces - endOfKeywords = todoDoneSep <|> void newline - in manyTill keyword (lookAhead endOfKeywords) - - todoDoneSep :: Monad m => OrgParser m () - todoDoneSep = void . try $ skipSpaces *> char '|' <* skipSpaces1 - - keywordsToSequence :: [String] -> [String] -> TodoSequence - keywordsToSequence todo done = - let todoMarkers = map (TodoMarker Todo) todo - doneMarkers = map (TodoMarker Done) done - in todoMarkers ++ doneMarkers diff --git a/src/Text/Pandoc/Readers/Org/ParserState.hs b/src/Text/Pandoc/Readers/Org/ParserState.hs deleted file mode 100644 index 181dd1d5c..000000000 --- a/src/Text/Pandoc/Readers/Org/ParserState.hs +++ /dev/null @@ -1,259 +0,0 @@ -{-# LANGUAGE FlexibleInstances #-} -{-# LANGUAGE GeneralizedNewtypeDeriving #-} -{-# LANGUAGE MultiParamTypeClasses #-} -{- -Copyright (C) 2014-2016 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.Readers.Org.Options - Copyright : Copyright (C) 2014-2016 Albert Krewinkel - License : GNU GPL, version 2 or above - - Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> - -Define the Org-mode parser state. --} -module Text.Pandoc.Readers.Org.ParserState - ( OrgParserState (..) - , OrgParserLocal (..) - , OrgNoteRecord - , HasReaderOptions (..) - , HasQuoteContext (..) - , TodoMarker (..) - , TodoSequence - , TodoState (..) - , activeTodoMarkers - , registerTodoSequence - , F(..) - , askF - , asksF - , trimInlinesF - , runF - , returnF - , ExportSettings (..) - , ArchivedTreesOption (..) - , optionsToParserState - ) where - -import Control.Monad (liftM, liftM2) -import Control.Monad.Reader (Reader, runReader, ReaderT, ask, asks, local) - -import Data.Default (Default(..)) -import qualified Data.Map as M -import qualified Data.Set as Set - -import Text.Pandoc.Builder ( Inlines, Blocks, trimInlines ) -import Text.Pandoc.Definition ( Meta(..), nullMeta ) -import Text.Pandoc.Options ( ReaderOptions(..) ) -import Text.Pandoc.Parsing ( HasHeaderMap(..) - , HasIdentifierList(..) - , HasLastStrPosition(..) - , HasQuoteContext(..) - , HasReaderOptions(..) - , ParserContext(..) - , QuoteContext(..) - , SourcePos ) - --- | An inline note / footnote containing the note key and its (inline) value. -type OrgNoteRecord = (String, F Blocks) --- | Table of footnotes -type OrgNoteTable = [OrgNoteRecord] --- | Map of functions for link transformations. The map key is refers to the --- link-type, the corresponding function transforms the given link string. -type OrgLinkFormatters = M.Map String (String -> String) - --- | The states in which a todo item can be -data TodoState = Todo | Done - deriving (Eq, Ord, Show) - --- | A ToDo keyword like @TODO@ or @DONE@. -data TodoMarker = TodoMarker - { todoMarkerState :: TodoState - , todoMarkerName :: String - } - deriving (Show, Eq) - --- | Collection of todo markers in the order in which items should progress -type TodoSequence = [TodoMarker] - --- | Org-mode parser state -data OrgParserState = OrgParserState - { orgStateAnchorIds :: [String] - , orgStateEmphasisCharStack :: [Char] - , orgStateEmphasisNewlines :: Maybe Int - , orgStateExportSettings :: ExportSettings - , orgStateHeaderMap :: M.Map Inlines String - , orgStateIdentifiers :: Set.Set String - , orgStateLastForbiddenCharPos :: Maybe SourcePos - , orgStateLastPreCharPos :: Maybe SourcePos - , orgStateLastStrPos :: Maybe SourcePos - , orgStateLinkFormatters :: OrgLinkFormatters - , orgStateMeta :: F Meta - , orgStateNotes' :: OrgNoteTable - , orgStateOptions :: ReaderOptions - , orgStateParserContext :: ParserContext - , orgStateTodoSequences :: [TodoSequence] - } - -data OrgParserLocal = OrgParserLocal { orgLocalQuoteContext :: QuoteContext } - -instance Default OrgParserLocal where - def = OrgParserLocal NoQuote - -instance HasReaderOptions OrgParserState where - extractReaderOptions = orgStateOptions - -instance HasLastStrPosition OrgParserState where - getLastStrPos = orgStateLastStrPos - setLastStrPos pos st = st{ orgStateLastStrPos = Just pos } - -instance Monad m => HasQuoteContext st (ReaderT OrgParserLocal m) where - getQuoteContext = asks orgLocalQuoteContext - withQuoteContext q = local (\s -> s{orgLocalQuoteContext = q}) - -instance HasIdentifierList OrgParserState where - extractIdentifierList = orgStateIdentifiers - updateIdentifierList f s = s{ orgStateIdentifiers = f (orgStateIdentifiers s) } - -instance HasHeaderMap OrgParserState where - extractHeaderMap = orgStateHeaderMap - updateHeaderMap f s = s{ orgStateHeaderMap = f (orgStateHeaderMap s) } - -instance Default OrgParserState where - def = defaultOrgParserState - -defaultOrgParserState :: OrgParserState -defaultOrgParserState = OrgParserState - { orgStateAnchorIds = [] - , orgStateEmphasisCharStack = [] - , orgStateEmphasisNewlines = Nothing - , orgStateExportSettings = def - , orgStateHeaderMap = M.empty - , orgStateIdentifiers = Set.empty - , orgStateLastForbiddenCharPos = Nothing - , orgStateLastPreCharPos = Nothing - , orgStateLastStrPos = Nothing - , orgStateLinkFormatters = M.empty - , orgStateMeta = return nullMeta - , orgStateNotes' = [] - , orgStateOptions = def - , orgStateParserContext = NullState - , orgStateTodoSequences = [] - } - -optionsToParserState :: ReaderOptions -> OrgParserState -optionsToParserState opts = - def { orgStateOptions = opts } - -registerTodoSequence :: TodoSequence -> OrgParserState -> OrgParserState -registerTodoSequence todoSeq st = - let curSeqs = orgStateTodoSequences st - in st{ orgStateTodoSequences = todoSeq : curSeqs } - --- | Get the current todo/done sequences. If no custom todo sequences have been --- defined, return a list containing just the default todo/done sequence. -activeTodoSequences :: OrgParserState -> [TodoSequence] -activeTodoSequences st = - let curSeqs = orgStateTodoSequences st - in if null curSeqs - then [[ TodoMarker Todo "TODO" , TodoMarker Done "DONE" ]] - else curSeqs - -activeTodoMarkers :: OrgParserState -> TodoSequence -activeTodoMarkers = concat . activeTodoSequences - - --- --- Export Settings --- - --- | Options for the way archived trees are handled. -data ArchivedTreesOption = - ArchivedTreesExport -- ^ Export the complete tree - | ArchivedTreesNoExport -- ^ Exclude archived trees from exporting - | ArchivedTreesHeadlineOnly -- ^ Export only the headline, discard the contents - --- | Export settings <http://orgmode.org/manual/Export-settings.html> --- These settings can be changed via OPTIONS statements. -data ExportSettings = ExportSettings - { exportArchivedTrees :: ArchivedTreesOption -- ^ How to treat archived trees - , exportDrawers :: Either [String] [String] - -- ^ Specify drawer names which should be exported. @Left@ names are - -- explicitly excluded from the resulting output while @Right@ means that - -- only the listed drawer names should be included. - , exportEmphasizedText :: Bool -- ^ Parse emphasized text - , exportHeadlineLevels :: Int - -- ^ Maximum depth of headlines, deeper headlines are convert to list - , exportSmartQuotes :: Bool -- ^ Parse quotes smartly - , exportSpecialStrings :: Bool -- ^ Parse ellipses and dashes smartly - , exportSubSuperscripts :: Bool -- ^ TeX-like syntax for sub- and superscripts - , exportWithAuthor :: Bool -- ^ Include author in final meta-data - , exportWithCreator :: Bool -- ^ Include creator in final meta-data - , exportWithEmail :: Bool -- ^ Include email in final meta-data - , exportWithTodoKeywords :: Bool -- ^ Keep TODO keywords in headers - } - -instance Default ExportSettings where - def = defaultExportSettings - -defaultExportSettings :: ExportSettings -defaultExportSettings = ExportSettings - { exportArchivedTrees = ArchivedTreesHeadlineOnly - , exportDrawers = Left ["LOGBOOK"] - , exportEmphasizedText = True - , exportHeadlineLevels = 3 - , exportSmartQuotes = True - , exportSpecialStrings = True - , exportSubSuperscripts = True - , exportWithAuthor = True - , exportWithCreator = True - , exportWithEmail = True - , exportWithTodoKeywords = True - } - - --- --- Parser state reader --- - --- | Reader monad wrapping the parser state. This is used to delay evaluation --- until all relevant information has been parsed and made available in the --- parser state. See also the newtype of the same name in --- Text.Pandoc.Parsing. -newtype F a = F { unF :: Reader OrgParserState a - } deriving (Functor, Applicative, Monad) - -instance Monoid a => Monoid (F a) where - mempty = return mempty - mappend = liftM2 mappend - mconcat = fmap mconcat . sequence - -runF :: F a -> OrgParserState -> a -runF = runReader . unF - -askF :: F OrgParserState -askF = F ask - -asksF :: (OrgParserState -> a) -> F a -asksF f = F $ asks f - -trimInlinesF :: F Inlines -> F Inlines -trimInlinesF = liftM trimInlines - -returnF :: Monad m => a -> m (F a) -returnF = return . return diff --git a/src/Text/Pandoc/Readers/Org/Parsing.hs b/src/Text/Pandoc/Readers/Org/Parsing.hs deleted file mode 100644 index 1eb8a3b00..000000000 --- a/src/Text/Pandoc/Readers/Org/Parsing.hs +++ /dev/null @@ -1,217 +0,0 @@ -{- -Copyright (C) 2014-2016 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.Readers.Org.Options - Copyright : Copyright (C) 2014-2016 Albert Krewinkel - License : GNU GPL, version 2 or above - - Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> - -Org-mode parsing utilities. - -Most functions are simply re-exports from @Text.Pandoc.Parsing@, some -functions are adapted to Org-mode specific functionality. --} -module Text.Pandoc.Readers.Org.Parsing - ( OrgParser - , anyLine - , blanklines - , newline - , parseFromString - , skipSpaces1 - , inList - , withContext - , getExportSetting - , updateLastForbiddenCharPos - , updateLastPreCharPos - , orgArgKey - , orgArgWord - , orgArgWordChar - -- * Re-exports from Text.Pandoc.Parser - , ParserContext (..) - , many1Till - , notFollowedBy' - , spaceChar - , nonspaceChar - , skipSpaces - , blankline - , enclosed - , stringAnyCase - , charsInBalanced - , uri - , withRaw - , readWithM - , guardEnabled - , updateLastStrPos - , notAfterString - , ParserState (..) - , registerHeader - , QuoteContext (..) - , singleQuoteStart - , singleQuoteEnd - , doubleQuoteStart - , doubleQuoteEnd - , dash - , ellipses - , citeKey - -- * Re-exports from Text.Pandoc.Parsec - , runParser - , runParserT - , getInput - , char - , letter - , digit - , alphaNum - , skipMany1 - , spaces - , anyChar - , satisfy - , string - , count - , eof - , noneOf - , oneOf - , lookAhead - , notFollowedBy - , many - , many1 - , manyTill - , (<|>) - , (<?>) - , choice - , try - , sepBy - , sepBy1 - , sepEndBy1 - , option - , optional - , optionMaybe - , getState - , updateState - , SourcePos - , getPosition - ) where - -import Text.Pandoc.Readers.Org.ParserState - -import qualified Text.Pandoc.Parsing as P -import Text.Pandoc.Parsing hiding ( anyLine, blanklines, newline - , parseFromString ) - -import Control.Monad ( guard ) -import Control.Monad.Reader ( ReaderT ) - --- | The parser used to read org files. -type OrgParser m = ParserT [Char] OrgParserState (ReaderT OrgParserLocal m) - --- --- Adaptions and specializations of parsing utilities --- - --- | Parse any line of text -anyLine :: Monad m => OrgParser m String -anyLine = - P.anyLine - <* updateLastPreCharPos - <* updateLastForbiddenCharPos - --- The version Text.Pandoc.Parsing cannot be used, as we need additional parts --- of the state saved and restored. -parseFromString :: Monad m => OrgParser m a -> String -> OrgParser m a -parseFromString parser str' = do - oldLastPreCharPos <- orgStateLastPreCharPos <$> getState - updateState $ \s -> s{ orgStateLastPreCharPos = Nothing } - result <- P.parseFromString parser str' - updateState $ \s -> s{ orgStateLastPreCharPos = oldLastPreCharPos } - return result - --- | Skip one or more tab or space characters. -skipSpaces1 :: Monad m => OrgParser m () -skipSpaces1 = skipMany1 spaceChar - --- | Like @Text.Parsec.Char.newline@, but causes additional state changes. -newline :: Monad m => OrgParser m Char -newline = - P.newline - <* updateLastPreCharPos - <* updateLastForbiddenCharPos - --- | Like @Text.Parsec.Char.blanklines@, but causes additional state changes. -blanklines :: Monad m => OrgParser m [Char] -blanklines = - P.blanklines - <* updateLastPreCharPos - <* updateLastForbiddenCharPos - --- | Succeeds when we're in list context. -inList :: Monad m => OrgParser m () -inList = do - ctx <- orgStateParserContext <$> getState - guard (ctx == ListItemState) - --- | Parse in different context -withContext :: Monad m - => ParserContext -- ^ New parser context - -> OrgParser m a -- ^ Parser to run in that context - -> OrgParser m a -withContext context parser = do - oldContext <- orgStateParserContext <$> getState - updateState $ \s -> s{ orgStateParserContext = context } - result <- parser - updateState $ \s -> s{ orgStateParserContext = oldContext } - return result - --- --- Parser state functions --- - --- | Get an export setting. -getExportSetting :: Monad m => (ExportSettings -> a) -> OrgParser m a -getExportSetting s = s . orgStateExportSettings <$> getState - --- | Set the current position as the last position at which a forbidden char --- was found (i.e. a character which is not allowed at the inner border of --- markup). -updateLastForbiddenCharPos :: Monad m => OrgParser m () -updateLastForbiddenCharPos = getPosition >>= \p -> - updateState $ \s -> s{ orgStateLastForbiddenCharPos = Just p} - --- | Set the current parser position as the position at which a character was --- seen which allows inline markup to follow. -updateLastPreCharPos :: Monad m => OrgParser m () -updateLastPreCharPos = getPosition >>= \p -> - updateState $ \s -> s{ orgStateLastPreCharPos = Just p} - --- --- Org key-value parsing --- - --- | Read the key of a plist style key-value list. -orgArgKey :: Monad m => OrgParser m String -orgArgKey = try $ - skipSpaces *> char ':' - *> many1 orgArgWordChar - --- | Read the value of a plist style key-value list. -orgArgWord :: Monad m => OrgParser m String -orgArgWord = many1 orgArgWordChar - --- | Chars treated as part of a word in plists. -orgArgWordChar :: Monad m => OrgParser m Char -orgArgWordChar = alphaNum <|> oneOf "-_" diff --git a/src/Text/Pandoc/Readers/Org/Shared.hs b/src/Text/Pandoc/Readers/Org/Shared.hs deleted file mode 100644 index 8c87cfa25..000000000 --- a/src/Text/Pandoc/Readers/Org/Shared.hs +++ /dev/null @@ -1,97 +0,0 @@ -{-# LANGUAGE OverloadedStrings #-} -{- -Copyright (C) 2014-2016 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.Readers.Org.Options - Copyright : Copyright (C) 2014-2016 Albert Krewinkel - License : GNU GPL, version 2 or above - - Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> - -Utility functions used in other Pandoc Org modules. --} -module Text.Pandoc.Readers.Org.Shared - ( cleanLinkString - , isImageFilename - , rundocBlockClass - , toRundocAttrib - , translateLang - ) where - -import Control.Arrow ( first ) -import Data.Char ( isAlphaNum ) -import Data.List ( isPrefixOf, isSuffixOf ) - - --- | Check whether the given string looks like the path to of URL of an image. -isImageFilename :: String -> Bool -isImageFilename filename = - any (\x -> ('.':x) `isSuffixOf` filename) imageExtensions && - (any (\x -> (x ++ "://") `isPrefixOf` filename) protocols || - ':' `notElem` filename) - where - imageExtensions = [ "jpeg" , "jpg" , "png" , "gif" , "svg" ] - protocols = [ "file", "http", "https" ] - --- | Cleanup and canonicalize a string describing a link. Return @Nothing@ if --- the string does not appear to be a link. -cleanLinkString :: String -> Maybe String -cleanLinkString s = - case s of - '/':_ -> Just $ "file://" ++ s -- absolute path - '.':'/':_ -> Just s -- relative path - '.':'.':'/':_ -> Just s -- relative path - -- Relative path or URL (file schema) - 'f':'i':'l':'e':':':s' -> Just $ if ("//" `isPrefixOf` s') then s else s' - _ | isUrl s -> Just s -- URL - _ -> Nothing - where - isUrl :: String -> Bool - isUrl cs = - let (scheme, path) = break (== ':') cs - in all (\c -> isAlphaNum c || c `elem` (".-"::String)) scheme - && not (null path) - --- | Prefix used for Rundoc classes and arguments. -rundocPrefix :: String -rundocPrefix = "rundoc-" - --- | The class-name used to mark rundoc blocks. -rundocBlockClass :: String -rundocBlockClass = rundocPrefix ++ "block" - --- | Prefix the name of a attribute, marking it as a code execution parameter. -toRundocAttrib :: (String, String) -> (String, String) -toRundocAttrib = first (rundocPrefix ++) - --- | Translate from Org-mode's programming language identifiers to those used --- by Pandoc. This is useful to allow for proper syntax highlighting in --- Pandoc output. -translateLang :: String -> String -translateLang cs = - case cs of - "C" -> "c" - "C++" -> "cpp" - "emacs-lisp" -> "commonlisp" -- emacs lisp is not supported - "js" -> "javascript" - "lisp" -> "commonlisp" - "R" -> "r" - "sh" -> "bash" - "sqlite" -> "sql" - _ -> cs |