diff options
Diffstat (limited to 'src/Text/Pandoc/Readers/Org')
-rw-r--r-- | src/Text/Pandoc/Readers/Org/BlockStarts.hs | 21 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Org/Blocks.hs | 390 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Org/DocumentTree.hs | 304 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Org/ExportSettings.hs | 8 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Org/Inlines.hs | 129 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Org/Meta.hs | 40 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Org/ParserState.hs | 53 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Org/Parsing.hs | 10 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Org/Shared.hs | 8 |
9 files changed, 584 insertions, 379 deletions
diff --git a/src/Text/Pandoc/Readers/Org/BlockStarts.hs b/src/Text/Pandoc/Readers/Org/BlockStarts.hs index cc2e82d5b..9c6614c99 100644 --- a/src/Text/Pandoc/Readers/Org/BlockStarts.hs +++ b/src/Text/Pandoc/Readers/Org/BlockStarts.hs @@ -1,5 +1,5 @@ {- -Copyright (C) 2014-2016 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> +Copyright (C) 2014-2017 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -17,8 +17,8 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -} {- | - Module : Text.Pandoc.Readers.Org.Options - Copyright : Copyright (C) 2014-2016 Albert Krewinkel + Module : Text.Pandoc.Readers.Org.BlockStarts + Copyright : Copyright (C) 2014-2017 Albert Krewinkel License : GNU GPL, version 2 or above Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> @@ -61,8 +61,12 @@ headerStart = try $ tableStart :: Monad m => OrgParser m Char tableStart = try $ skipSpaces *> char '|' +gridTableStart :: Monad m => OrgParser m () +gridTableStart = try $ skipSpaces <* char '+' <* char '-' + + latexEnvStart :: Monad m => OrgParser m String -latexEnvStart = try $ do +latexEnvStart = try $ skipSpaces *> string "\\begin{" *> latexEnvName <* string "}" @@ -93,8 +97,7 @@ orderedListStart = genericListStart orderedListMarker where orderedListMarker = mappend <$> many1 digit <*> (pure <$> oneOf ".)") drawerStart :: Monad m => OrgParser m String -drawerStart = try $ - skipSpaces *> drawerName <* skipSpaces <* newline +drawerStart = try $ skipSpaces *> drawerName <* skipSpaces <* newline where drawerName = char ':' *> manyTill nonspaceChar (char ':') metaLineStart :: Monad m => OrgParser m () @@ -116,8 +119,8 @@ noteMarker = try $ do -- | Succeeds if the parser is at the end of a block. endOfBlock :: Monad m => OrgParser m () -endOfBlock = lookAhead . try $ do - void blankline <|> anyBlockStart +endOfBlock = lookAhead . try $ + void blankline <|> anyBlockStart where -- Succeeds if there is a new block starting at this position. anyBlockStart :: Monad m => OrgParser m () @@ -126,6 +129,7 @@ endOfBlock = lookAhead . try $ do , hline , metaLineStart , commentLineStart + , gridTableStart , void noteMarker , void tableStart , void drawerStart @@ -134,4 +138,3 @@ endOfBlock = lookAhead . try $ do , void bulletListStart , void orderedListStart ] - diff --git a/src/Text/Pandoc/Readers/Org/Blocks.hs b/src/Text/Pandoc/Readers/Org/Blocks.hs index b0a19b833..3e0ab0127 100644 --- a/src/Text/Pandoc/Readers/Org/Blocks.hs +++ b/src/Text/Pandoc/Readers/Org/Blocks.hs @@ -1,6 +1,3 @@ -{-# LANGUAGE FlexibleContexts #-} -{-# LANGUAGE RecordWildCards #-} -{-# LANGUAGE ViewPatterns #-} {- Copyright (C) 2014-2017 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> @@ -18,9 +15,10 @@ You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -} - +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE RecordWildCards #-} {- | - Module : Text.Pandoc.Readers.Org.Options + Module : Text.Pandoc.Readers.Org.Blocks Copyright : Copyright (C) 2014-2017 Albert Krewinkel License : GNU GPL, version 2 or above @@ -34,6 +32,7 @@ module Text.Pandoc.Readers.Org.Blocks ) where import Text.Pandoc.Readers.Org.BlockStarts +import Text.Pandoc.Readers.Org.DocumentTree (documentTree, headlineToBlocks) import Text.Pandoc.Readers.Org.Inlines import Text.Pandoc.Readers.Org.Meta (metaExport, metaKey, metaLine) import Text.Pandoc.Readers.Org.ParserState @@ -52,211 +51,21 @@ import Control.Monad (foldM, guard, mzero, void) import Data.Char (isSpace, toLower, toUpper) import Data.Default (Default) import Data.List (foldl', isPrefixOf) -import Data.Maybe (fromMaybe, isNothing) +import Data.Maybe (fromMaybe, isJust, isNothing) import Data.Monoid ((<>)) -- --- Org headers --- -newtype Tag = Tag { fromTag :: String } - deriving (Show, Eq) - --- | Create a tag containing the given string. -toTag :: String -> Tag -toTag = Tag - --- | The key (also called name or type) of a property. -newtype PropertyKey = PropertyKey { fromKey :: String } - deriving (Show, Eq, Ord) - --- | Create a property key containing the given string. Org mode keys are --- case insensitive and are hence converted to lower case. -toPropertyKey :: String -> PropertyKey -toPropertyKey = PropertyKey . map toLower - --- | The value assigned to a property. -newtype PropertyValue = PropertyValue { fromValue :: String } - --- | Create a property value containing the given string. -toPropertyValue :: String -> PropertyValue -toPropertyValue = PropertyValue - --- | Check whether the property value is non-nil (i.e. truish). -isNonNil :: PropertyValue -> Bool -isNonNil p = map toLower (fromValue p) `notElem` ["()", "{}", "nil"] - --- | Key/value pairs from a PROPERTIES drawer -type Properties = [(PropertyKey, PropertyValue)] - --- | Org mode headline (i.e. a document subtree). -data Headline = Headline - { headlineLevel :: Int - , headlineTodoMarker :: Maybe TodoMarker - , headlineText :: Inlines - , headlineTags :: [Tag] - , headlineProperties :: Properties - , headlineContents :: Blocks - , headlineChildren :: [Headline] - } - --- --- Parsing headlines and subtrees --- - --- | Read an Org mode headline and its contents (i.e. a document subtree). --- @lvl@ gives the minimum acceptable level of the tree. -headline :: PandocMonad m => Int -> OrgParser m (F Headline) -headline lvl = try $ do - level <- headerStart - guard (lvl <= level) - todoKw <- optionMaybe todoKeyword - title <- trimInlinesF . mconcat <$> manyTill inline endOfTitle - tags <- option [] headerTags - newline - properties <- option mempty propertiesDrawer - contents <- blocks - children <- many (headline (level + 1)) - return $ do - title' <- title - contents' <- contents - children' <- sequence children - return $ Headline - { headlineLevel = level - , headlineTodoMarker = todoKw - , headlineText = title' - , headlineTags = tags - , headlineProperties = properties - , headlineContents = contents' - , headlineChildren = children' - } - where - endOfTitle :: Monad m => OrgParser m () - endOfTitle = void . lookAhead $ optional headerTags *> newline - - headerTags :: Monad m => OrgParser m [Tag] - headerTags = try $ - let tag = many1 (alphaNum <|> oneOf "@%#_") <* char ':' - in map toTag <$> (skipSpaces *> char ':' *> many1 tag <* skipSpaces) - --- | Convert an Org mode headline (i.e. a document tree) into pandoc's Blocks -headlineToBlocks :: Monad m => Headline -> OrgParser m Blocks -headlineToBlocks hdln@(Headline {..}) = do - maxHeadlineLevels <- getExportSetting exportHeadlineLevels - case () of - _ | any isNoExportTag headlineTags -> return mempty - _ | any isArchiveTag headlineTags -> archivedHeadlineToBlocks hdln - _ | isCommentTitle headlineText -> return mempty - _ | headlineLevel >= maxHeadlineLevels -> headlineToHeaderWithList hdln - _ | otherwise -> headlineToHeaderWithContents hdln - -isNoExportTag :: Tag -> Bool -isNoExportTag = (== toTag "noexport") - -isArchiveTag :: Tag -> Bool -isArchiveTag = (== toTag "ARCHIVE") - --- | Check if the title starts with COMMENT. --- FIXME: This accesses builder internals not intended for use in situations --- like these. Replace once keyword parsing is supported. -isCommentTitle :: Inlines -> Bool -isCommentTitle (B.toList -> (Str "COMMENT":_)) = True -isCommentTitle _ = False - -archivedHeadlineToBlocks :: Monad m => Headline -> OrgParser m Blocks -archivedHeadlineToBlocks hdln = do - archivedTreesOption <- getExportSetting exportArchivedTrees - case archivedTreesOption of - ArchivedTreesNoExport -> return mempty - ArchivedTreesExport -> headlineToHeaderWithContents hdln - ArchivedTreesHeadlineOnly -> headlineToHeader hdln - -headlineToHeaderWithList :: Monad m => Headline -> OrgParser m Blocks -headlineToHeaderWithList hdln@(Headline {..}) = do - maxHeadlineLevels <- getExportSetting exportHeadlineLevels - header <- headlineToHeader hdln - listElements <- sequence (map headlineToBlocks headlineChildren) - let listBlock = if null listElements - then mempty - else B.orderedList listElements - let headerText = if maxHeadlineLevels == headlineLevel - then header - else flattenHeader header - return $ headerText <> headlineContents <> listBlock - where - flattenHeader :: Blocks -> Blocks - flattenHeader blks = - case B.toList blks of - (Header _ _ inlns:_) -> B.para (B.fromList inlns) - _ -> mempty - -headlineToHeaderWithContents :: Monad m => Headline -> OrgParser m Blocks -headlineToHeaderWithContents hdln@(Headline {..}) = do - header <- headlineToHeader hdln - childrenBlocks <- mconcat <$> sequence (map headlineToBlocks headlineChildren) - return $ header <> headlineContents <> childrenBlocks - -headlineToHeader :: Monad m => Headline -> OrgParser m Blocks -headlineToHeader (Headline {..}) = do - exportTodoKeyword <- getExportSetting exportWithTodoKeywords - let todoText = if exportTodoKeyword - then case headlineTodoMarker of - Just kw -> todoKeywordToInlines kw <> B.space - Nothing -> mempty - else mempty - let text = tagTitle (todoText <> headlineText) headlineTags - let propAttr = propertiesToAttr headlineProperties - attr <- registerHeader propAttr headlineText - return $ B.headerWith attr headlineLevel text - -todoKeyword :: Monad m => OrgParser m TodoMarker -todoKeyword = try $ do - taskStates <- activeTodoMarkers <$> getState - let kwParser tdm = try $ (tdm <$ string (todoMarkerName tdm) <* spaceChar) - choice (map kwParser taskStates) - -todoKeywordToInlines :: TodoMarker -> Inlines -todoKeywordToInlines tdm = - let todoText = todoMarkerName tdm - todoState = map toLower . show $ todoMarkerState tdm - classes = [todoState, todoText] - in B.spanWith (mempty, classes, mempty) (B.str todoText) - -propertiesToAttr :: Properties -> Attr -propertiesToAttr properties = - let - toStringPair prop = (fromKey (fst prop), fromValue (snd prop)) - customIdKey = toPropertyKey "custom_id" - classKey = toPropertyKey "class" - unnumberedKey = toPropertyKey "unnumbered" - specialProperties = [customIdKey, classKey, unnumberedKey] - id' = fromMaybe mempty . fmap fromValue . lookup customIdKey $ properties - cls = fromMaybe mempty . fmap fromValue . lookup classKey $ properties - kvs' = map toStringPair . filter ((`notElem` specialProperties) . fst) - $ properties - isUnnumbered = - fromMaybe False . fmap isNonNil . lookup unnumberedKey $ properties - in - (id', words cls ++ (if isUnnumbered then ["unnumbered"] else []), kvs') - -tagTitle :: Inlines -> [Tag] -> Inlines -tagTitle title tags = title <> (mconcat $ map tagToInline tags) - -tagToInline :: Tag -> Inlines -tagToInline t = B.spanWith ("", ["tag"], [("data-tag-name", fromTag t)]) mempty - - --- -- parsing blocks -- -- | Get a list of blocks. blockList :: PandocMonad m => OrgParser m [Block] blockList = do - initialBlocks <- blocks - headlines <- sequence <$> manyTill (headline 1) eof + headlines <- documentTree blocks inline st <- getState - headlineBlocks <- fmap mconcat . sequence . map headlineToBlocks $ runF headlines st - return . B.toList $ (runF initialBlocks st) <> headlineBlocks + headlineBlocks <- headlineToBlocks $ runF headlines st + -- ignore first headline, it's the document's title + return . drop 1 . B.toList $ headlineBlocks -- | Get the meta information saved in the state. meta :: Monad m => OrgParser m Meta @@ -274,6 +83,7 @@ block = choice [ mempty <$ blanklines , figure , example , genericDrawer + , include , specialLine , horizontalRule , list @@ -302,7 +112,7 @@ data BlockAttributes = BlockAttributes -- | Convert BlockAttributes into pandoc Attr attrFromBlockAttributes :: BlockAttributes -> Attr -attrFromBlockAttributes (BlockAttributes{..}) = +attrFromBlockAttributes BlockAttributes{..} = let ident = fromMaybe mempty $ lookup "id" blockAttrKeyValues classes = case lookup "class" blockAttrKeyValues of @@ -311,18 +121,18 @@ attrFromBlockAttributes (BlockAttributes{..}) = kv = filter ((`notElem` ["id", "class"]) . fst) blockAttrKeyValues in (ident, classes, kv) -stringyMetaAttribute :: Monad m => (String -> Bool) -> OrgParser m (String, String) -stringyMetaAttribute attrCheck = try $ do +stringyMetaAttribute :: Monad m => OrgParser m (String, String) +stringyMetaAttribute = try $ do metaLineStart attrName <- map toUpper <$> many1Till nonspaceChar (char ':') - guard $ attrCheck attrName skipSpaces - attrValue <- anyLine + attrValue <- anyLine <|> ("" <$ newline) return (attrName, attrValue) blockAttributes :: PandocMonad m => OrgParser m BlockAttributes blockAttributes = try $ do - kv <- many (stringyMetaAttribute attrCheck) + kv <- many stringyMetaAttribute + guard $ all (attrCheck . fst) kv let caption = foldl' (appendValues "CAPTION") Nothing kv let kvAttrs = foldl' (appendValues "ATTR_HTML") Nothing kv let name = lookup "NAME" kv @@ -331,7 +141,7 @@ blockAttributes = try $ do Nothing -> return Nothing Just s -> Just <$> parseFromString inlines (s ++ "\n") kvAttrs' <- parseFromString keyValues . (++ "\n") $ fromMaybe mempty kvAttrs - return $ BlockAttributes + return BlockAttributes { blockAttrName = name , blockAttrLabel = label , blockAttrCaption = caption' @@ -339,13 +149,7 @@ blockAttributes = try $ do } where attrCheck :: String -> Bool - attrCheck attr = - case attr of - "NAME" -> True - "LABEL" -> True - "CAPTION" -> True - "ATTR_HTML" -> True - _ -> False + attrCheck x = x `elem` ["NAME", "LABEL", "CAPTION", "ATTR_HTML", "RESULTS"] appendValues :: String -> Maybe String -> (String, String) -> Maybe String appendValues attrName accValue (key, value) = @@ -355,6 +159,7 @@ blockAttributes = try $ do Just acc -> Just $ acc ++ ' ':value Nothing -> Just value +-- | Parse key-value pairs for HTML attributes keyValues :: Monad m => OrgParser m [(String, String)] keyValues = try $ manyTill ((,) <$> key <*> value) newline @@ -381,7 +186,7 @@ orgBlock = try $ do blockAttrs <- blockAttributes blkType <- blockHeaderStart ($ blkType) $ - case (map toLower blkType) of + case map toLower blkType of "export" -> exportBlock "comment" -> rawBlockLines (const mempty) "html" -> rawBlockLines (return . B.rawBlock (lowercase blkType)) @@ -402,10 +207,10 @@ orgBlock = try $ do lowercase = map toLower rawBlockLines :: Monad m => (String -> F Blocks) -> String -> OrgParser m (F Blocks) -rawBlockLines f blockType = (ignHeaders *> (f <$> rawBlockContent blockType)) +rawBlockLines f blockType = ignHeaders *> (f <$> rawBlockContent blockType) parseBlockLines :: PandocMonad m => (F Blocks -> F Blocks) -> String -> OrgParser m (F Blocks) -parseBlockLines f blockType = (ignHeaders *> (f <$> parsedBlockContent)) +parseBlockLines f blockType = ignHeaders *> (f <$> parsedBlockContent) where parsedBlockContent :: PandocMonad m => OrgParser m (F Blocks) parsedBlockContent = try $ do @@ -433,8 +238,7 @@ rawBlockContent blockType = try $ do stripIndent strs = map (drop (shortestIndent strs)) strs shortestIndent :: [String] -> Int - shortestIndent = foldr min maxBound - . map (length . takeWhile isSpace) + shortestIndent = foldr (min . length . takeWhile isSpace) maxBound . filter (not . null) tabsToSpaces :: Int -> String -> String @@ -442,7 +246,7 @@ rawBlockContent blockType = try $ do tabsToSpaces tabLen cs'@(c:cs) = case c of ' ' -> ' ':tabsToSpaces tabLen cs - '\t' -> (take tabLen $ repeat ' ') ++ tabsToSpaces tabLen cs + '\t' -> replicate tabLen ' ' ++ tabsToSpaces tabLen cs _ -> cs' commaEscaped :: String -> String @@ -490,16 +294,15 @@ codeBlock blockAttrs blockType = do skipSpaces (classes, kv) <- codeHeaderArgs <|> (mempty <$ ignHeaders) content <- rawBlockContent blockType - resultsContent <- trailingResultsBlock + resultsContent <- option mempty babelResultsBlock let id' = fromMaybe mempty $ blockAttrName blockAttrs let codeBlck = B.codeBlockWith ( id', classes, kv ) content let labelledBlck = maybe (pure codeBlck) (labelDiv codeBlck) (blockAttrCaption blockAttrs) - let resultBlck = fromMaybe mempty resultsContent return $ - (if exportsCode kv then labelledBlck else mempty) <> - (if exportsResults kv then resultBlck else mempty) + (if exportsCode kv then labelledBlck else mempty) <> + (if exportsResults kv then resultsContent else mempty) where labelDiv :: Blocks -> F Inlines -> F Blocks labelDiv blk value = @@ -514,12 +317,16 @@ codeBlock blockAttrs blockType = do exportsResults :: [(String, String)] -> Bool exportsResults = maybe False (`elem` ["results", "both"]) . lookup "exports" -trailingResultsBlock :: PandocMonad m => OrgParser m (Maybe (F Blocks)) -trailingResultsBlock = optionMaybe . try $ do +-- | Parse the result of an evaluated babel code block. +babelResultsBlock :: PandocMonad m => OrgParser m (F Blocks) +babelResultsBlock = try $ do blanklines - stringAnyCase "#+RESULTS:" - blankline + resultsMarker <|> + (lookAhead . void . try $ + manyTill (metaLineStart *> anyLineNewline) resultsMarker) block + where + resultsMarker = try . void $ stringAnyCase "#+RESULTS:" *> blankline -- | Parse code block arguments codeHeaderArgs :: Monad m => OrgParser m ([String], [(String, String)]) @@ -527,13 +334,13 @@ codeHeaderArgs = try $ do language <- skipSpaces *> orgArgWord (switchClasses, switchKv) <- switchesAsAttributes parameters <- manyTill blockOption newline - return $ ( translateLang language : switchClasses - , originalLang language <> switchKv <> parameters - ) + return ( translateLang language : switchClasses + , originalLang language <> switchKv <> parameters + ) switchesAsAttributes :: Monad m => OrgParser m ([String], [(String, String)]) switchesAsAttributes = try $ do - switches <- skipSpaces *> (try $ switch `sepBy` (many1 spaceChar)) + switches <- skipSpaces *> try (switch `sepBy` many1 spaceChar) return $ foldr addToAttr ([], []) switches where addToAttr :: (Char, Maybe String, SwitchPolarity) @@ -541,10 +348,10 @@ switchesAsAttributes = try $ do -> ([String], [(String, String)]) addToAttr ('n', lineNum, pol) (cls, kv) = let kv' = case lineNum of - Just num -> (("startFrom", num):kv) + Just num -> ("startFrom", num):kv Nothing -> kv cls' = case pol of - SwitchPlus -> "continuedSourceBlock":cls + SwitchPlus -> "continuedSourceBlock":cls SwitchMinus -> cls in ("numberLines":cls', kv') addToAttr _ x = x @@ -573,7 +380,7 @@ genericSwitch :: Monad m genericSwitch c p = try $ do polarity <- switchPolarity <* char c <* skipSpaces arg <- optionMaybe p - return $ (c, arg, polarity) + return (c, arg, polarity) -- | Reads a line number switch option. The line number switch can be used with -- example and source blocks. @@ -593,8 +400,8 @@ orgParamValue = try $ *> noneOf "\n\r" `many1Till` endOfValue <* skipSpaces where - endOfValue = lookAhead $ (try $ skipSpaces <* oneOf "\n\r") - <|> (try $ skipSpaces1 <* orgArgKey) + endOfValue = lookAhead $ try (skipSpaces <* oneOf "\n\r") + <|> try (skipSpaces1 <* orgArgKey) -- @@ -612,7 +419,7 @@ genericDrawer = try $ do -- Include drawer if it is explicitly included in or not explicitly excluded -- from the list of drawers that should be exported. PROPERTIES drawers are -- never exported. - case (exportDrawers . orgStateExportSettings $ state) of + case exportDrawers . orgStateExportSettings $ state of _ | name == "PROPERTIES" -> return mempty Left names | name `elem` names -> return mempty Right names | name `notElem` names -> return mempty @@ -631,25 +438,6 @@ drawerEnd :: Monad m => OrgParser m String drawerEnd = try $ skipSpaces *> stringAnyCase ":END:" <* skipSpaces <* newline --- | Read a :PROPERTIES: drawer and return the key/value pairs contained --- within. -propertiesDrawer :: Monad m => OrgParser m Properties -propertiesDrawer = try $ do - drawerType <- drawerStart - guard $ map toUpper drawerType == "PROPERTIES" - manyTill property (try drawerEnd) - where - property :: Monad m => OrgParser m (PropertyKey, PropertyValue) - property = try $ (,) <$> key <*> value - - key :: Monad m => OrgParser m PropertyKey - key = fmap toPropertyKey . try $ - skipSpaces *> char ':' *> many1Till nonspaceChar (char ':') - - value :: Monad m => OrgParser m PropertyValue - value = fmap toPropertyValue . try $ - skipSpaces *> manyTill anyChar (try $ skipSpaces *> newline) - -- -- Figures @@ -665,7 +453,7 @@ figure = try $ do Nothing -> mzero Just imgSrc -> do guard (isImageFilename imgSrc) - let isFigure = not . isNothing $ blockAttrCaption figAttrs + let isFigure = isJust $ blockAttrCaption figAttrs return $ imageBlock isFigure figAttrs imgSrc where selfTarget :: PandocMonad m => OrgParser m String @@ -700,8 +488,7 @@ endOfParagraph = try $ skipSpaces *> newline *> endOfBlock -- | Example code marked up by a leading colon. example :: Monad m => OrgParser m (F Blocks) -example = try $ do - returnF . exampleCode =<< unlines <$> many1 exampleLine +example = try $ returnF . exampleCode =<< unlines <$> many1 exampleLine where exampleLine :: Monad m => OrgParser m String exampleLine = try $ exampleLineStart *> anyLine @@ -717,6 +504,34 @@ exampleCode = B.codeBlockWith ("", ["example"], []) specialLine :: PandocMonad m => OrgParser m (F Blocks) specialLine = fmap return . try $ rawExportLine <|> metaLine <|> commentLine +-- | Include the content of a file. +include :: PandocMonad m => OrgParser m (F Blocks) +include = try $ do + metaLineStart <* stringAnyCase "include:" <* skipSpaces + filename <- includeTarget + blockType <- optionMaybe $ skipSpaces *> many1 alphaNum + blocksParser <- case blockType of + Just "example" -> + return $ pure . B.codeBlock <$> parseRaw + Just "export" -> do + format <- skipSpaces *> many (noneOf "\n\r\t ") + return $ pure . B.rawBlock format <$> parseRaw + Just "src" -> do + language <- skipSpaces *> many (noneOf "\n\r\t ") + let attr = (mempty, [language], mempty) + return $ pure . B.codeBlockWith attr <$> parseRaw + _ -> return $ pure . B.fromList <$> blockList + anyLine + insertIncludedFileF blocksParser ["."] filename + where + includeTarget :: PandocMonad m => OrgParser m FilePath + includeTarget = do + char '"' + manyTill (noneOf "\n\r\t") (char '"') + + parseRaw :: PandocMonad m => OrgParser m String + parseRaw = many anyChar + rawExportLine :: PandocMonad m => OrgParser m Blocks rawExportLine = try $ do metaLineStart @@ -755,11 +570,15 @@ data OrgTable = OrgTable } table :: PandocMonad m => OrgParser m (F Blocks) -table = try $ do +table = gridTableWith blocks True <|> orgTable + +-- | A normal org table +orgTable :: PandocMonad m => OrgParser m (F Blocks) +orgTable = try $ do -- don't allow a table on the first line of a list item; org requires that -- tables start at first non-space character on the line - let isFirstInListItem st = (orgStateParserContext st == ListItemState) && - (orgStateLastPreCharPos st == Nothing) + let isFirstInListItem st = orgStateParserContext st == ListItemState && + isNothing (orgStateLastPreCharPos st) guard =<< not . isFirstInListItem <$> getState blockAttrs <- blockAttributes lookAhead tableStart @@ -772,7 +591,7 @@ orgToPandocTable :: OrgTable -> Inlines -> Blocks orgToPandocTable (OrgTable colProps heads lns) caption = - let totalWidth = if any (not . isNothing) (map columnRelWidth colProps) + let totalWidth = if any isJust (map columnRelWidth colProps) then Just . sum $ map (fromMaybe 1 . columnRelWidth) colProps else Nothing in B.table caption (map (convertColProp totalWidth) colProps) heads lns @@ -782,7 +601,7 @@ orgToPandocTable (OrgTable colProps heads lns) caption = let align' = fromMaybe AlignDefault $ columnAlignment colProp width' = fromMaybe 0 $ (\w t -> (fromIntegral w / fromIntegral t)) - <$> (columnRelWidth colProp) + <$> columnRelWidth colProp <*> totalWidth in (align', width') @@ -808,7 +627,7 @@ tableAlignRow = try $ do columnPropertyCell :: Monad m => OrgParser m ColumnProperty columnPropertyCell = emptyCell <|> propCell <?> "alignment info" where - emptyCell = ColumnProperty Nothing Nothing <$ (try $ skipSpaces *> endOfCell) + emptyCell = ColumnProperty Nothing Nothing <$ try (skipSpaces *> endOfCell) propCell = try $ ColumnProperty <$> (skipSpaces *> char '<' @@ -854,28 +673,28 @@ normalizeTable (OrgTable colProps heads rows) = rowToContent :: OrgTable -> OrgTableRow -> F OrgTable -rowToContent orgTable row = +rowToContent tbl row = case row of OrgHlineRow -> return singleRowPromotedToHeader OrgAlignRow props -> return . setProperties $ props OrgContentRow cs -> appendToBody cs where singleRowPromotedToHeader :: OrgTable - singleRowPromotedToHeader = case orgTable of - OrgTable{ orgTableHeader = [], orgTableRows = b:[] } -> - orgTable{ orgTableHeader = b , orgTableRows = [] } - _ -> orgTable + singleRowPromotedToHeader = case tbl of + OrgTable{ orgTableHeader = [], orgTableRows = [b] } -> + tbl{ orgTableHeader = b , orgTableRows = [] } + _ -> tbl setProperties :: [ColumnProperty] -> OrgTable - setProperties ps = orgTable{ orgTableColumnProperties = ps } + setProperties ps = tbl{ orgTableColumnProperties = ps } appendToBody :: F [Blocks] -> F OrgTable appendToBody frow = do newRow <- frow - let oldRows = orgTableRows orgTable + let oldRows = orgTableRows tbl -- NOTE: This is an inefficient O(n) operation. This should be changed -- if performance ever becomes a problem. - return orgTable{ orgTableRows = oldRows ++ [newRow] } + return tbl{ orgTableRows = oldRows ++ [newRow] } -- @@ -917,7 +736,7 @@ noteBlock = try $ do paraOrPlain :: PandocMonad m => OrgParser m (F Blocks) paraOrPlain = try $ do -- Make sure we are not looking at a headline - notFollowedBy' (char '*' *> (oneOf " *")) + notFollowedBy' (char '*' *> oneOf " *") ils <- inlines nl <- option False (newline *> return True) -- Read block as paragraph, except if we are in a list context and the block @@ -926,7 +745,7 @@ paraOrPlain = try $ do try (guard nl *> notFollowedBy (inList *> (() <$ orderedListStart <|> bulletListStart)) *> return (B.para <$> ils)) - <|> (return (B.plain <$> ils)) + <|> return (B.plain <$> ils) -- @@ -938,16 +757,16 @@ list = choice [ definitionList, bulletList, orderedList ] <?> "list" definitionList :: PandocMonad m => OrgParser m (F Blocks) definitionList = try $ do n <- lookAhead (bulletListStart' Nothing) - fmap B.definitionList . fmap compactifyDL . sequence + fmap (B.definitionList . compactifyDL) . sequence <$> many1 (definitionListItem $ bulletListStart' (Just n)) bulletList :: PandocMonad m => OrgParser m (F Blocks) bulletList = try $ do n <- lookAhead (bulletListStart' Nothing) - fmap B.bulletList . fmap compactify . sequence + fmap (B.bulletList . compactify) . sequence <$> many1 (listItem (bulletListStart' $ Just n)) orderedList :: PandocMonad m => OrgParser m (F Blocks) -orderedList = fmap B.orderedList . fmap compactify . sequence +orderedList = fmap (B.orderedList . compactify) . sequence <$> many1 (listItem orderedListStart) bulletListStart' :: Monad m => Maybe Int -> OrgParser m Int @@ -1004,16 +823,3 @@ listContinuation markerLength = try $ <*> many blankline) where listLine = try $ indentWith markerLength *> anyLineNewline - - -- indent by specified number of spaces (or equiv. tabs) - indentWith :: Monad m => Int -> OrgParser m String - indentWith num = do - tabStop <- getOption readerTabStop - if num < tabStop - then count num (char ' ') - else choice [ try (count num (char ' ')) - , try (char '\t' >> count (num - tabStop) (char ' ')) ] - --- | Parse any line, include the final newline in the output. -anyLineNewline :: Monad m => OrgParser m String -anyLineNewline = (++ "\n") <$> anyLine diff --git a/src/Text/Pandoc/Readers/Org/DocumentTree.hs b/src/Text/Pandoc/Readers/Org/DocumentTree.hs new file mode 100644 index 000000000..743f6cc0e --- /dev/null +++ b/src/Text/Pandoc/Readers/Org/DocumentTree.hs @@ -0,0 +1,304 @@ +{- +Copyright (C) 2014-2017 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE RecordWildCards #-} +{-# LANGUAGE ViewPatterns #-} +{- | + Module : Text.Pandoc.Readers.Org.DocumentTree + Copyright : Copyright (C) 2014-2017 Albert Krewinkel + License : GNU GPL, version 2 or above + + Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> + +Parsers for org-mode headlines and document subtrees +-} +module Text.Pandoc.Readers.Org.DocumentTree + ( documentTree + , headlineToBlocks + ) where + +import Control.Arrow ((***)) +import Control.Monad (guard, void) +import Data.Char (toLower, toUpper) +import Data.List (intersperse) +import Data.Monoid ((<>)) +import Text.Pandoc.Builder (Blocks, Inlines) +import Text.Pandoc.Class (PandocMonad) +import Text.Pandoc.Definition +import Text.Pandoc.Readers.Org.BlockStarts +import Text.Pandoc.Readers.Org.ParserState +import Text.Pandoc.Readers.Org.Parsing + +import qualified Data.Map as Map +import qualified Text.Pandoc.Builder as B + +-- +-- Org headers +-- + +-- | Parse input as org document tree. +documentTree :: PandocMonad m + => OrgParser m (F Blocks) + -> OrgParser m (F Inlines) + -> OrgParser m (F Headline) +documentTree blocks inline = do + initialBlocks <- blocks + headlines <- sequence <$> manyTill (headline blocks inline 1) eof + title <- fmap (getTitle . unMeta) . orgStateMeta <$> getState + return $ do + headlines' <- headlines + initialBlocks' <- initialBlocks + title' <- title + return Headline + { headlineLevel = 0 + , headlineTodoMarker = Nothing + , headlineText = B.fromList title' + , headlineTags = mempty + , headlineProperties = mempty + , headlineContents = initialBlocks' + , headlineChildren = headlines' + } + where + getTitle :: Map.Map String MetaValue -> [Inline] + getTitle metamap = + case Map.lookup "title" metamap of + Just (MetaInlines inlns) -> inlns + _ -> [] + +newtype Tag = Tag { fromTag :: String } + deriving (Show, Eq) + +-- | Create a tag containing the given string. +toTag :: String -> Tag +toTag = Tag + +-- | The key (also called name or type) of a property. +newtype PropertyKey = PropertyKey { fromKey :: String } + deriving (Show, Eq, Ord) + +-- | Create a property key containing the given string. Org mode keys are +-- case insensitive and are hence converted to lower case. +toPropertyKey :: String -> PropertyKey +toPropertyKey = PropertyKey . map toLower + +-- | The value assigned to a property. +newtype PropertyValue = PropertyValue { fromValue :: String } + +-- | Create a property value containing the given string. +toPropertyValue :: String -> PropertyValue +toPropertyValue = PropertyValue + +-- | Check whether the property value is non-nil (i.e. truish). +isNonNil :: PropertyValue -> Bool +isNonNil p = map toLower (fromValue p) `notElem` ["()", "{}", "nil"] + +-- | Key/value pairs from a PROPERTIES drawer +type Properties = [(PropertyKey, PropertyValue)] + +-- | Org mode headline (i.e. a document subtree). +data Headline = Headline + { headlineLevel :: Int + , headlineTodoMarker :: Maybe TodoMarker + , headlineText :: Inlines + , headlineTags :: [Tag] + , headlineProperties :: Properties + , headlineContents :: Blocks + , headlineChildren :: [Headline] + } + +-- | Read an Org mode headline and its contents (i.e. a document subtree). +-- @lvl@ gives the minimum acceptable level of the tree. +headline :: PandocMonad m + => OrgParser m (F Blocks) + -> OrgParser m (F Inlines) + -> Int + -> OrgParser m (F Headline) +headline blocks inline lvl = try $ do + level <- headerStart + guard (lvl <= level) + todoKw <- optionMaybe todoKeyword + title <- trimInlinesF . mconcat <$> manyTill inline endOfTitle + tags <- option [] headerTags + newline + properties <- option mempty propertiesDrawer + contents <- blocks + children <- many (headline blocks inline (level + 1)) + return $ do + title' <- title + contents' <- contents + children' <- sequence children + return Headline + { headlineLevel = level + , headlineTodoMarker = todoKw + , headlineText = title' + , headlineTags = tags + , headlineProperties = properties + , headlineContents = contents' + , headlineChildren = children' + } + where + endOfTitle :: Monad m => OrgParser m () + endOfTitle = void . lookAhead $ optional headerTags *> newline + + headerTags :: Monad m => OrgParser m [Tag] + headerTags = try $ + let tag = many1 (alphaNum <|> oneOf "@%#_") <* char ':' + in map toTag <$> (skipSpaces *> char ':' *> many1 tag <* skipSpaces) + +-- | Convert an Org mode headline (i.e. a document tree) into pandoc's Blocks +headlineToBlocks :: Monad m => Headline -> OrgParser m Blocks +headlineToBlocks hdln@Headline {..} = do + maxHeadlineLevels <- getExportSetting exportHeadlineLevels + case () of + _ | any isNoExportTag headlineTags -> return mempty + _ | any isArchiveTag headlineTags -> archivedHeadlineToBlocks hdln + _ | isCommentTitle headlineText -> return mempty + _ | headlineLevel >= maxHeadlineLevels -> headlineToHeaderWithList hdln + _ | otherwise -> headlineToHeaderWithContents hdln + +isNoExportTag :: Tag -> Bool +isNoExportTag = (== toTag "noexport") + +isArchiveTag :: Tag -> Bool +isArchiveTag = (== toTag "ARCHIVE") + +-- | Check if the title starts with COMMENT. +-- FIXME: This accesses builder internals not intended for use in situations +-- like these. Replace once keyword parsing is supported. +isCommentTitle :: Inlines -> Bool +isCommentTitle (B.toList -> (Str "COMMENT":_)) = True +isCommentTitle _ = False + +archivedHeadlineToBlocks :: Monad m => Headline -> OrgParser m Blocks +archivedHeadlineToBlocks hdln = do + archivedTreesOption <- getExportSetting exportArchivedTrees + case archivedTreesOption of + ArchivedTreesNoExport -> return mempty + ArchivedTreesExport -> headlineToHeaderWithContents hdln + ArchivedTreesHeadlineOnly -> headlineToHeader hdln + +headlineToHeaderWithList :: Monad m => Headline -> OrgParser m Blocks +headlineToHeaderWithList hdln@Headline {..} = do + maxHeadlineLevels <- getExportSetting exportHeadlineLevels + header <- headlineToHeader hdln + listElements <- mapM headlineToBlocks headlineChildren + let listBlock = if null listElements + then mempty + else B.orderedList listElements + let headerText = if maxHeadlineLevels == headlineLevel + then header + else flattenHeader header + return $ headerText <> headlineContents <> listBlock + where + flattenHeader :: Blocks -> Blocks + flattenHeader blks = + case B.toList blks of + (Header _ _ inlns:_) -> B.para (B.fromList inlns) + _ -> mempty + +headlineToHeaderWithContents :: Monad m => Headline -> OrgParser m Blocks +headlineToHeaderWithContents hdln@Headline {..} = do + header <- headlineToHeader hdln + childrenBlocks <- mconcat <$> mapM headlineToBlocks headlineChildren + return $ header <> headlineContents <> childrenBlocks + +headlineToHeader :: Monad m => Headline -> OrgParser m Blocks +headlineToHeader Headline {..} = do + exportTodoKeyword <- getExportSetting exportWithTodoKeywords + exportTags <- getExportSetting exportWithTags + let todoText = if exportTodoKeyword + then case headlineTodoMarker of + Just kw -> todoKeywordToInlines kw <> B.space + Nothing -> mempty + else mempty + let text = todoText <> headlineText <> + if exportTags + then tagsToInlines headlineTags + else mempty + let propAttr = propertiesToAttr headlineProperties + attr <- registerHeader propAttr headlineText + return $ B.headerWith attr headlineLevel text + +todoKeyword :: Monad m => OrgParser m TodoMarker +todoKeyword = try $ do + taskStates <- activeTodoMarkers <$> getState + let kwParser tdm = try (tdm <$ string (todoMarkerName tdm) <* spaceChar) + choice (map kwParser taskStates) + +todoKeywordToInlines :: TodoMarker -> Inlines +todoKeywordToInlines tdm = + let todoText = todoMarkerName tdm + todoState = map toLower . show $ todoMarkerState tdm + classes = [todoState, todoText] + in B.spanWith (mempty, classes, mempty) (B.str todoText) + +propertiesToAttr :: Properties -> Attr +propertiesToAttr properties = + let + toStringPair = fromKey *** fromValue + customIdKey = toPropertyKey "custom_id" + classKey = toPropertyKey "class" + unnumberedKey = toPropertyKey "unnumbered" + specialProperties = [customIdKey, classKey, unnumberedKey] + id' = maybe mempty fromValue . lookup customIdKey $ properties + cls = maybe mempty fromValue . lookup classKey $ properties + kvs' = map toStringPair . filter ((`notElem` specialProperties) . fst) + $ properties + isUnnumbered = + maybe False isNonNil . lookup unnumberedKey $ properties + in + (id', words cls ++ ["unnumbered" | isUnnumbered], kvs') + +tagsToInlines :: [Tag] -> Inlines +tagsToInlines [] = mempty +tagsToInlines tags = + (B.space <>) . mconcat . intersperse (B.str "\160") . map tagToInline $ tags + where + tagToInline :: Tag -> Inlines + tagToInline t = tagSpan t . B.smallcaps . B.str $ fromTag t + +-- | Wrap the given inline in a span, marking it as a tag. +tagSpan :: Tag -> Inlines -> Inlines +tagSpan t = B.spanWith ("", ["tag"], [("data-tag-name", fromTag t)]) + + + + + +-- | Read a :PROPERTIES: drawer and return the key/value pairs contained +-- within. +propertiesDrawer :: Monad m => OrgParser m Properties +propertiesDrawer = try $ do + drawerType <- drawerStart + guard $ map toUpper drawerType == "PROPERTIES" + manyTill property (try endOfDrawer) + where + property :: Monad m => OrgParser m (PropertyKey, PropertyValue) + property = try $ (,) <$> key <*> value + + key :: Monad m => OrgParser m PropertyKey + key = fmap toPropertyKey . try $ + skipSpaces *> char ':' *> many1Till nonspaceChar (char ':') + + value :: Monad m => OrgParser m PropertyValue + value = fmap toPropertyValue . try $ + skipSpaces *> manyTill anyChar (try $ skipSpaces *> newline) + + endOfDrawer :: Monad m => OrgParser m String + endOfDrawer = try $ + skipSpaces *> stringAnyCase ":END:" <* skipSpaces <* newline diff --git a/src/Text/Pandoc/Readers/Org/ExportSettings.hs b/src/Text/Pandoc/Readers/Org/ExportSettings.hs index 1d6fdd7e1..11f0972d5 100644 --- a/src/Text/Pandoc/Readers/Org/ExportSettings.hs +++ b/src/Text/Pandoc/Readers/Org/ExportSettings.hs @@ -1,5 +1,5 @@ {- -Copyright (C) 2014-2016 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> +Copyright (C) 2014-2017 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -17,8 +17,8 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -} {- | - Module : Text.Pandoc.Readers.Org.Options - Copyright : Copyright (C) 2016 Albert Krewinkel + Module : Text.Pandoc.Readers.Org.ExportSettings + Copyright : © 2016–2017 Albert Krewinkel License : GNU GPL, version 2 or above Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> @@ -71,7 +71,7 @@ exportSetting = choice , ignoredSetting "pri" , ignoredSetting "prop" , ignoredSetting "stat" - , ignoredSetting "tags" + , booleanSetting "tags" (\val es -> es { exportWithTags = val }) , ignoredSetting "tasks" , ignoredSetting "tex" , ignoredSetting "timestamp" diff --git a/src/Text/Pandoc/Readers/Org/Inlines.hs b/src/Text/Pandoc/Readers/Org/Inlines.hs index 64ffb8ef5..66273e05d 100644 --- a/src/Text/Pandoc/Readers/Org/Inlines.hs +++ b/src/Text/Pandoc/Readers/Org/Inlines.hs @@ -1,6 +1,6 @@ {-# LANGUAGE OverloadedStrings #-} {- -Copyright (C) 2014-2016 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> +Copyright (C) 2014-2017 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,8 +18,8 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -} {- | - Module : Text.Pandoc.Readers.Org.Options - Copyright : Copyright (C) 2014-2016 Albert Krewinkel + Module : Text.Pandoc.Readers.Org.Inlines + Copyright : Copyright (C) 2014-2017 Albert Krewinkel License : GNU GPL, version 2 or above Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> @@ -48,7 +48,7 @@ import Text.Pandoc.Readers.LaTeX (inlineCommand, rawLaTeXInline) import Text.TeXMath (DisplayType (..), readTeX, writePandoc) import qualified Text.TeXMath.Readers.MathML.EntityMap as MathMLEntityMap -import Control.Monad (guard, mplus, mzero, void, when) +import Control.Monad (guard, mplus, mzero, unless, void, when) import Control.Monad.Trans (lift) import Data.Char (isAlphaNum, isSpace) import Data.List (intersperse) @@ -63,7 +63,7 @@ import Prelude hiding (sequence) -- recordAnchorId :: PandocMonad m => String -> OrgParser m () recordAnchorId i = updateState $ \s -> - s{ orgStateAnchorIds = i : (orgStateAnchorIds s) } + s{ orgStateAnchorIds = i : orgStateAnchorIds s } pushToInlineCharStack :: PandocMonad m => Char -> OrgParser m () pushToInlineCharStack c = updateState $ \s -> @@ -120,6 +120,7 @@ inline = , superscript , inlineLaTeX , exportSnippet + , macro , smart , symbol ] <* (guard =<< newlinesCountWithinLimits) @@ -183,7 +184,7 @@ cite = try $ berkeleyCite <|> do , orgRefCite , berkeleyTextualCite ] - return $ (flip B.cite (B.text raw)) <$> cs + return $ flip B.cite (B.text raw) <$> cs -- | A citation in Pandoc Org-mode style (@[prefix \@citekey suffix]@). pandocOrgCite :: PandocMonad m => OrgParser m (F [Citation]) @@ -208,7 +209,7 @@ normalOrgRefCite = try $ do orgRefCiteList :: PandocMonad m => CitationMode -> OrgParser m (F Citation) orgRefCiteList citeMode = try $ do key <- orgRefCiteKey - returnF $ Citation + returnF Citation { citationId = key , citationPrefix = mempty , citationSuffix = mempty @@ -231,11 +232,11 @@ berkeleyCite = try $ do return $ if parens then toCite - . maybe id (\p -> alterFirst (prependPrefix p)) prefix - . maybe id (\s -> alterLast (appendSuffix s)) suffix + . maybe id (alterFirst . prependPrefix) prefix + . maybe id (alterLast . appendSuffix) suffix $ citationList else maybe mempty (<> " ") prefix - <> (toListOfCites $ map toInTextMode citationList) + <> toListOfCites (map toInTextMode citationList) <> maybe mempty (", " <>) suffix where toCite :: [Citation] -> Inlines @@ -249,7 +250,7 @@ berkeleyCite = try $ do alterFirst, alterLast :: (a -> a) -> [a] -> [a] alterFirst _ [] = [] - alterFirst f (c:cs) = (f c):cs + alterFirst f (c:cs) = f c : cs alterLast f = reverse . alterFirst f . reverse prependPrefix, appendSuffix :: Inlines -> Citation -> Citation @@ -270,7 +271,7 @@ berkeleyCitationList = try $ do skipSpaces commonPrefix <- optionMaybe (try $ citationListPart <* char ';') citations <- citeList - commonSuffix <- optionMaybe (try $ citationListPart) + commonSuffix <- optionMaybe (try citationListPart) char ']' return (BerkeleyCitationList parens <$> sequence commonPrefix @@ -338,8 +339,15 @@ linkLikeOrgRefCite = try $ do -- | Read a citation key. The characters allowed in citation keys are taken -- from the `org-ref-cite-re` variable in `org-ref.el`. orgRefCiteKey :: PandocMonad m => OrgParser m String -orgRefCiteKey = try . many1 . satisfy $ \c -> - isAlphaNum c || c `elem` ("-_:\\./"::String) +orgRefCiteKey = + let citeKeySpecialChars = "-_:\\./," :: String + isCiteKeySpecialChar c = c `elem` citeKeySpecialChars + isCiteKeyChar c = isAlphaNum c || isCiteKeySpecialChar c + endOfCitation = try $ do + many $ satisfy isCiteKeySpecialChar + satisfy $ not . isCiteKeyChar + in try $ satisfy isCiteKeyChar `many1Till` lookAhead endOfCitation + -- | Supported citation types. Only a small subset of org-ref types is -- supported for now. TODO: rewrite this, use LaTeX reader as template. @@ -365,15 +373,16 @@ citation = try $ do return $ do x <- pref y <- suff - return $ Citation{ citationId = key - , citationPrefix = B.toList x - , citationSuffix = B.toList y - , citationMode = if suppress_author - then SuppressAuthor - else NormalCitation - , citationNoteNum = 0 - , citationHash = 0 - } + return Citation + { citationId = key + , citationPrefix = B.toList x + , citationSuffix = B.toList y + , citationMode = if suppress_author + then SuppressAuthor + else NormalCitation + , citationNoteNum = 0 + , citationHash = 0 + } where prefix = trimInlinesF . mconcat <$> manyTill inline (char ']' <|> (']' <$ lookAhead citeKey)) @@ -395,7 +404,7 @@ inlineNote = try $ do ref <- many alphaNum char ':' note <- fmap B.para . trimInlinesF . mconcat <$> many1Till inline (char ']') - when (not $ null ref) $ + unless (null ref) $ addToNotesTable ("fn:" ++ ref, note) return $ B.note <$> note @@ -405,7 +414,7 @@ referencedNote = try $ do return $ do notes <- asksF orgStateNotes' case lookup ref notes of - Nothing -> return $ B.str $ "[" ++ ref ++ "]" + Nothing -> return . B.str $ "[" ++ ref ++ "]" Just contents -> do st <- askF let contents' = runF contents st{ orgStateNotes' = [] } @@ -429,7 +438,7 @@ explicitOrImageLink = try $ do src <- srcF case cleanLinkString title of Just imgSrc | isImageFilename imgSrc -> - pure $ B.link src "" $ B.image imgSrc mempty mempty + pure . B.link src "" $ B.image imgSrc mempty mempty _ -> linkToInlinesF src =<< title' @@ -686,13 +695,13 @@ mathEnd c = try $ do return res -enclosedInlines :: PandocMonad m => OrgParser m a +enclosedInlines :: (PandocMonad m, Show b) => OrgParser m a -> OrgParser m b -> OrgParser m (F Inlines) enclosedInlines start end = try $ trimInlinesF . mconcat <$> enclosed start end inline -enclosedRaw :: PandocMonad m => OrgParser m a +enclosedRaw :: (PandocMonad m, Show b) => OrgParser m a -> OrgParser m b -> OrgParser m String enclosedRaw start end = try $ @@ -771,7 +780,7 @@ notAfterForbiddenBorderChar = do -- | Read a sub- or superscript expression subOrSuperExpr :: PandocMonad m => OrgParser m (F Inlines) subOrSuperExpr = try $ - choice [ id <$> charsInBalanced '{' '}' (noneOf "\n\r") + choice [ charsInBalanced '{' '}' (noneOf "\n\r") , enclosing ('(', ')') <$> charsInBalanced '(' ')' (noneOf "\n\r") , simpleSubOrSuperString ] >>= parseFromString (mconcat <$> many inline) @@ -809,7 +818,7 @@ inlineLaTeX = try $ do enableExtension Ext_raw_tex (readerExtensions def) } } texMathToPandoc :: String -> Maybe [Inline] - texMathToPandoc cs = (maybeRight $ readTeX cs) >>= writePandoc DisplayInline + texMathToPandoc cs = maybeRight (readTeX cs) >>= writePandoc DisplayInline maybeRight :: Either a b -> Maybe b maybeRight = either (const Nothing) Just @@ -839,26 +848,49 @@ exportSnippet = try $ do snippet <- manyTill anyChar (try $ string "@@") returnF $ B.rawInline format snippet +macro :: PandocMonad m => OrgParser m (F Inlines) +macro = try $ do + recursionDepth <- orgStateMacroDepth <$> getState + guard $ recursionDepth < 15 + string "{{{" + name <- many alphaNum + args <- ([] <$ string "}}}") + <|> char '(' *> argument `sepBy` char ',' <* eoa + expander <- lookupMacro name <$> getState + case expander of + Nothing -> mzero + Just fn -> do + updateState $ \s -> s { orgStateMacroDepth = recursionDepth + 1 } + res <- parseFromString (mconcat <$> many inline) $ fn args + updateState $ \s -> s { orgStateMacroDepth = recursionDepth } + return res + where + argument = many $ notFollowedBy eoa *> noneOf "," + eoa = string ")}}}" + smart :: PandocMonad m => OrgParser m (F Inlines) -smart = do - guardEnabled Ext_smart - doubleQuoted <|> singleQuoted <|> - choice (map (return <$>) [orgApostrophe, orgDash, orgEllipses]) +smart = choice [doubleQuoted, singleQuoted, orgApostrophe, orgDash, orgEllipses] where orgDash = do - guard =<< getExportSetting exportSpecialStrings - dash <* updatePositions '-' + guardOrSmartEnabled =<< getExportSetting exportSpecialStrings + pure <$> dash <* updatePositions '-' orgEllipses = do - guard =<< getExportSetting exportSpecialStrings - ellipses <* updatePositions '.' - orgApostrophe = - (char '\'' <|> char '\8217') <* updateLastPreCharPos - <* updateLastForbiddenCharPos - *> return (B.str "\x2019") + guardOrSmartEnabled =<< getExportSetting exportSpecialStrings + pure <$> ellipses <* updatePositions '.' + orgApostrophe = do + guardEnabled Ext_smart + (char '\'' <|> char '\8217') <* updateLastPreCharPos + <* updateLastForbiddenCharPos + returnF (B.str "\x2019") + +guardOrSmartEnabled :: PandocMonad m => Bool -> OrgParser m () +guardOrSmartEnabled b = do + smartExtension <- extensionEnabled Ext_smart <$> getOption readerExtensions + guard (b || smartExtension) singleQuoted :: PandocMonad m => OrgParser m (F Inlines) singleQuoted = try $ do - guard =<< getExportSetting exportSmartQuotes + guardOrSmartEnabled =<< getExportSetting exportSmartQuotes singleQuoteStart updatePositions '\'' withQuoteContext InSingleQuote $ @@ -870,10 +902,13 @@ singleQuoted = try $ do -- in the same paragraph. doubleQuoted :: PandocMonad m => OrgParser m (F Inlines) doubleQuoted = try $ do - guard =<< getExportSetting exportSmartQuotes + guardOrSmartEnabled =<< getExportSetting exportSmartQuotes doubleQuoteStart updatePositions '"' contents <- mconcat <$> many (try $ notFollowedBy doubleQuoteEnd >> inline) - (withQuoteContext InDoubleQuote $ (doubleQuoteEnd <* updateLastForbiddenCharPos) >> return - (fmap B.doubleQuoted . trimInlinesF $ contents)) - <|> (return $ return (B.str "\8220") <> contents) + let doubleQuotedContent = withQuoteContext InDoubleQuote $ do + doubleQuoteEnd + updateLastForbiddenCharPos + return . fmap B.doubleQuoted . trimInlinesF $ contents + let leftQuoteAndContent = return $ pure (B.str "\8220") <> contents + doubleQuotedContent <|> leftQuoteAndContent diff --git a/src/Text/Pandoc/Readers/Org/Meta.hs b/src/Text/Pandoc/Readers/Org/Meta.hs index 7938fc6c6..d22902eae 100644 --- a/src/Text/Pandoc/Readers/Org/Meta.hs +++ b/src/Text/Pandoc/Readers/Org/Meta.hs @@ -44,7 +44,7 @@ import qualified Text.Pandoc.Builder as B import Text.Pandoc.Class (PandocMonad) import Text.Pandoc.Definition -import Control.Monad (mzero, void) +import Control.Monad (mzero, void, when) import Data.Char (toLower) import Data.List (intersperse) import qualified Data.Map as M @@ -75,14 +75,16 @@ declarationLine :: PandocMonad m => OrgParser m () declarationLine = try $ do key <- map toLower <$> metaKey (key', value) <- metaValue key - updateState $ \st -> st { orgStateMeta = B.setMeta key' <$> value <*> orgStateMeta st } + let addMetaValue st = + st { orgStateMeta = B.setMeta key' <$> value <*> orgStateMeta st } + when (key' /= "results") $ updateState addMetaValue metaKey :: Monad m => OrgParser m String metaKey = map toLower <$> many1 (noneOf ": \n\r") <* char ':' <* skipSpaces -metaValue :: PandocMonad m => String -> OrgParser m (String, (F MetaValue)) +metaValue :: PandocMonad m => String -> OrgParser m (String, F MetaValue) metaValue key = let inclKey = "header-includes" in case key of @@ -109,7 +111,7 @@ metaInlines = fmap (MetaInlines . B.toList) <$> inlinesTillNewline metaInlinesCommaSeparated :: PandocMonad m => OrgParser m (F MetaValue) metaInlinesCommaSeparated = do - itemStrs <- (many1 (noneOf ",\n")) `sepBy1` (char ',') + itemStrs <- many1 (noneOf ",\n") `sepBy1` char ',' newline items <- mapM (parseFromString inlinesTillNewline . (++ "\n")) itemStrs let toMetaInlines = MetaInlines . B.toList @@ -151,6 +153,7 @@ optionLine = try $ do "todo" -> todoSequence >>= updateState . registerTodoSequence "seq_todo" -> todoSequence >>= updateState . registerTodoSequence "typ_todo" -> todoSequence >>= updateState . registerTodoSequence + "macro" -> macroDefinition >>= updateState . registerMacro _ -> mzero addLinkFormat :: Monad m => String @@ -160,7 +163,7 @@ addLinkFormat key formatter = updateState $ \s -> let fs = orgStateLinkFormatters s in s{ orgStateLinkFormatters = M.insert key formatter fs } -parseLinkFormat :: Monad m => OrgParser m ((String, String -> String)) +parseLinkFormat :: Monad m => OrgParser m (String, String -> String) parseLinkFormat = try $ do linkType <- (:) <$> letter <*> many (alphaNum <|> oneOf "-_") <* skipSpaces linkSubst <- parseFormat @@ -169,8 +172,7 @@ parseLinkFormat = try $ do -- | An ad-hoc, single-argument-only implementation of a printf-style format -- parser. parseFormat :: Monad m => OrgParser m (String -> String) -parseFormat = try $ do - replacePlain <|> replaceUrl <|> justAppend +parseFormat = try $ replacePlain <|> replaceUrl <|> justAppend where -- inefficient, but who cares replacePlain = try $ (\x -> concat . flip intersperse x) @@ -218,3 +220,27 @@ todoSequence = try $ do let todoMarkers = map (TodoMarker Todo) todo doneMarkers = map (TodoMarker Done) done in todoMarkers ++ doneMarkers + +macroDefinition :: Monad m => OrgParser m (String, [String] -> String) +macroDefinition = try $ do + macroName <- many1 nonspaceChar <* skipSpaces + firstPart <- expansionPart + (elemOrder, parts) <- unzip <$> many ((,) <$> placeholder <*> expansionPart) + let expander = mconcat . alternate (firstPart:parts) . reorder elemOrder + return (macroName, expander) + where + placeholder :: Monad m => OrgParser m Int + placeholder = try . fmap read $ char '$' *> many1 digit + + expansionPart :: Monad m => OrgParser m String + expansionPart = try $ many (notFollowedBy placeholder *> noneOf "\n\r") + + alternate :: [a] -> [a] -> [a] + alternate [] ys = ys + alternate xs [] = xs + alternate (x:xs) (y:ys) = x : y : alternate xs ys + + reorder :: [Int] -> [String] -> [String] + reorder perm xs = + let element n = take 1 $ drop (n - 1) xs + in concatMap element perm diff --git a/src/Text/Pandoc/Readers/Org/ParserState.hs b/src/Text/Pandoc/Readers/Org/ParserState.hs index bdd1dc951..92f868516 100644 --- a/src/Text/Pandoc/Readers/Org/ParserState.hs +++ b/src/Text/Pandoc/Readers/Org/ParserState.hs @@ -1,8 +1,7 @@ -{-# LANGUAGE FlexibleInstances #-} -{-# LANGUAGE GeneralizedNewtypeDeriving #-} -{-# LANGUAGE MultiParamTypeClasses #-} +{-# LANGUAGE FlexibleInstances #-} +{-# LANGUAGE MultiParamTypeClasses #-} {- -Copyright (C) 2014-2016 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> +Copyright (C) 2014-2017 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -20,8 +19,8 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -} {- | - Module : Text.Pandoc.Readers.Org.Options - Copyright : Copyright (C) 2014-2016 Albert Krewinkel + Module : Text.Pandoc.Readers.Org.ParserState + Copyright : Copyright (C) 2014-2017 Albert Krewinkel License : GNU GPL, version 2 or above Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> @@ -39,6 +38,9 @@ module Text.Pandoc.Readers.Org.ParserState , TodoState (..) , activeTodoMarkers , registerTodoSequence + , MacroExpander + , lookupMacro + , registerMacro , F , askF , asksF @@ -58,14 +60,14 @@ import qualified Data.Set as Set import Text.Pandoc.Builder (Blocks, Inlines) import Text.Pandoc.Definition (Meta (..), nullMeta) -import Text.Pandoc.Options (ReaderOptions (..)) import Text.Pandoc.Logging -import Text.Pandoc.Parsing (HasHeaderMap (..), HasIdentifierList (..), - HasLogMessages (..), - HasLastStrPosition (..), HasQuoteContext (..), +import Text.Pandoc.Options (ReaderOptions (..)) +import Text.Pandoc.Parsing (Future, HasHeaderMap (..), HasIdentifierList (..), + HasIncludeFiles (..), HasLastStrPosition (..), + HasLogMessages (..), HasQuoteContext (..), HasReaderOptions (..), ParserContext (..), - QuoteContext (..), SourcePos, Future, - askF, asksF, returnF, runF, trimInlinesF) + QuoteContext (..), SourcePos, askF, asksF, returnF, + runF, trimInlinesF) -- | This is used to delay evaluation until all relevant information has been -- parsed and made available in the parser state. @@ -78,6 +80,8 @@ type OrgNoteTable = [OrgNoteRecord] -- | Map of functions for link transformations. The map key is refers to the -- link-type, the corresponding function transforms the given link string. type OrgLinkFormatters = M.Map String (String -> String) +-- | Macro expander function +type MacroExpander = [String] -> String -- | The states in which a todo item can be data TodoState = Todo | Done @@ -101,10 +105,13 @@ data OrgParserState = OrgParserState , orgStateExportSettings :: ExportSettings , orgStateHeaderMap :: M.Map Inlines String , orgStateIdentifiers :: Set.Set String + , orgStateIncludeFiles :: [String] , orgStateLastForbiddenCharPos :: Maybe SourcePos , orgStateLastPreCharPos :: Maybe SourcePos , orgStateLastStrPos :: Maybe SourcePos , orgStateLinkFormatters :: OrgLinkFormatters + , orgStateMacros :: M.Map String MacroExpander + , orgStateMacroDepth :: Int , orgStateMeta :: F Meta , orgStateNotes' :: OrgNoteTable , orgStateOptions :: ReaderOptions @@ -141,6 +148,12 @@ instance HasLogMessages OrgParserState where addLogMessage msg st = st{ orgLogMessages = msg : orgLogMessages st } getLogMessages st = reverse $ orgLogMessages st +instance HasIncludeFiles OrgParserState where + getIncludeFiles = orgStateIncludeFiles + addIncludeFile f st = st { orgStateIncludeFiles = f : orgStateIncludeFiles st } + dropLatestIncludeFile st = + st { orgStateIncludeFiles = drop 1 $ orgStateIncludeFiles st } + instance Default OrgParserState where def = defaultOrgParserState @@ -152,10 +165,13 @@ defaultOrgParserState = OrgParserState , orgStateExportSettings = def , orgStateHeaderMap = M.empty , orgStateIdentifiers = Set.empty + , orgStateIncludeFiles = [] , orgStateLastForbiddenCharPos = Nothing , orgStateLastPreCharPos = Nothing , orgStateLastStrPos = Nothing , orgStateLinkFormatters = M.empty + , orgStateMacros = M.empty + , orgStateMacroDepth = 0 , orgStateMeta = return nullMeta , orgStateNotes' = [] , orgStateOptions = def @@ -185,6 +201,15 @@ activeTodoSequences st = activeTodoMarkers :: OrgParserState -> TodoSequence activeTodoMarkers = concat . activeTodoSequences +lookupMacro :: String -> OrgParserState -> Maybe MacroExpander +lookupMacro macroName = M.lookup macroName . orgStateMacros + +registerMacro :: (String, MacroExpander) -> OrgParserState -> OrgParserState +registerMacro (name, expander) st = + let curMacros = orgStateMacros st + in st{ orgStateMacros = M.insert name expander curMacros } + + -- -- Export Settings @@ -213,6 +238,7 @@ data ExportSettings = ExportSettings , exportWithAuthor :: Bool -- ^ Include author in final meta-data , exportWithCreator :: Bool -- ^ Include creator in final meta-data , exportWithEmail :: Bool -- ^ Include email in final meta-data + , exportWithTags :: Bool -- ^ Keep tags as part of headlines , exportWithTodoKeywords :: Bool -- ^ Keep TODO keywords in headers } @@ -225,11 +251,12 @@ defaultExportSettings = ExportSettings , exportDrawers = Left ["LOGBOOK"] , exportEmphasizedText = True , exportHeadlineLevels = 3 - , exportSmartQuotes = True + , exportSmartQuotes = False , exportSpecialStrings = True , exportSubSuperscripts = True , exportWithAuthor = True , exportWithCreator = True , exportWithEmail = True + , exportWithTags = True , exportWithTodoKeywords = True } diff --git a/src/Text/Pandoc/Readers/Org/Parsing.hs b/src/Text/Pandoc/Readers/Org/Parsing.hs index 464ef9ca6..3273c92e4 100644 --- a/src/Text/Pandoc/Readers/Org/Parsing.hs +++ b/src/Text/Pandoc/Readers/Org/Parsing.hs @@ -1,5 +1,5 @@ {- -Copyright (C) 2014-2016 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> +Copyright (C) 2014-2017 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -17,8 +17,8 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -} {- | - Module : Text.Pandoc.Readers.Org.Options - Copyright : Copyright (C) 2014-2016 Albert Krewinkel + Module : Text.Pandoc.Readers.Org.Parsing + Copyright : Copyright (C) 2014-2017 Albert Krewinkel License : GNU GPL, version 2 or above Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> @@ -31,6 +31,8 @@ functions are adapted to Org-mode specific functionality. module Text.Pandoc.Readers.Org.Parsing ( OrgParser , anyLine + , anyLineNewline + , indentWith , blanklines , newline , parseFromString @@ -70,6 +72,8 @@ module Text.Pandoc.Readers.Org.Parsing , dash , ellipses , citeKey + , gridTableWith + , insertIncludedFileF -- * Re-exports from Text.Pandoc.Parsec , runParser , runParserT diff --git a/src/Text/Pandoc/Readers/Org/Shared.hs b/src/Text/Pandoc/Readers/Org/Shared.hs index f89ce6732..952082ec1 100644 --- a/src/Text/Pandoc/Readers/Org/Shared.hs +++ b/src/Text/Pandoc/Readers/Org/Shared.hs @@ -1,6 +1,6 @@ {-# LANGUAGE OverloadedStrings #-} {- -Copyright (C) 2014-2016 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> +Copyright (C) 2014-2017 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,8 +18,8 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -} {- | - Module : Text.Pandoc.Readers.Org.Options - Copyright : Copyright (C) 2014-2016 Albert Krewinkel + Module : Text.Pandoc.Readers.Org.Shared + Copyright : Copyright (C) 2014-2017 Albert Krewinkel License : GNU GPL, version 2 or above Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> @@ -56,7 +56,7 @@ cleanLinkString s = '.':'/':_ -> Just s -- relative path '.':'.':'/':_ -> Just s -- relative path -- Relative path or URL (file schema) - 'f':'i':'l':'e':':':s' -> Just $ if ("//" `isPrefixOf` s') then s else s' + 'f':'i':'l':'e':':':s' -> Just $ if "//" `isPrefixOf` s' then s else s' _ | isUrl s -> Just s -- URL _ -> Nothing where |