aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorAlbert Krewinkel <albert@zeitkraut.de>2016-07-01 21:14:04 +0200
committerAlbert Krewinkel <albert@zeitkraut.de>2016-07-01 23:05:32 +0200
commit17484ed01a7659beddd93114d2ff542005df2465 (patch)
tree3ee3b9886054d00d69eb134c40c88d390e0aaee7 /src
parent2f8d6755f4a799544fba2dc364004f5035b45c90 (diff)
downloadpandoc-17484ed01a7659beddd93114d2ff542005df2465.tar.gz
Org reader: parse as headlines, convert to blocks
Emacs org-mode is based on outline-mode, which treats documents as trees with headlines are nodes. The reader is refactored to parse into a similar tree structure. This simplifies transformations acting on document (sub-)trees.
Diffstat (limited to 'src')
-rw-r--r--src/Text/Pandoc/Readers/Org/Blocks.hs133
1 files changed, 86 insertions, 47 deletions
diff --git a/src/Text/Pandoc/Readers/Org/Blocks.hs b/src/Text/Pandoc/Readers/Org/Blocks.hs
index 5423b1b83..9ebb22d13 100644
--- a/src/Text/Pandoc/Readers/Org/Blocks.hs
+++ b/src/Text/Pandoc/Readers/Org/Blocks.hs
@@ -1,4 +1,5 @@
{-# LANGUAGE FlexibleContexts #-}
+{-# LANGUAGE RecordWildCards #-}
{-
Copyright (C) 2014-2016 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
@@ -46,7 +47,7 @@ import Text.Pandoc.Compat.Monoid ((<>))
import Text.Pandoc.Options
import Text.Pandoc.Shared ( compactify', compactify'DL )
-import Control.Monad ( foldM, guard, mzero )
+import Control.Monad ( foldM, guard, mzero, void )
import Data.Char ( isSpace, toLower, toUpper)
import Data.List ( foldl', intersperse, isPrefixOf )
import qualified Data.Map as M
@@ -82,6 +83,82 @@ toPropertyValue = PropertyValue
-- | Key/value pairs from a PROPERTIES drawer
type Properties = [(PropertyKey, PropertyValue)]
+-- | Org mode headline (i.e. a document subtree).
+data Headline = Headline
+ { headlineLevel :: Int
+ , headlineText :: Inlines
+ , headlineTags :: [Tag]
+ , headlineProperties :: Properties
+ , headlineContents :: Blocks
+ , headlineChildren :: [Headline]
+ }
+
+--
+-- Parsing headlines and subtrees
+--
+
+-- | Read an Org mode headline and its contents (i.e. a document subtree).
+-- @lvl@ gives the minimum acceptable level of the tree.
+headline :: Int -> OrgParser (F Headline)
+headline lvl = try $ do
+ level <- headerStart
+ guard (lvl <= level)
+ title <- trimInlinesF . mconcat <$> manyTill inline endOfTitle
+ tags <- option [] headerTags
+ newline
+ properties <- option mempty propertiesDrawer
+ contents <- blocks
+ children <- many (headline (lvl + 1))
+ return $ do
+ title' <- title
+ contents' <- contents
+ children' <- sequence children
+ return $ Headline
+ { headlineLevel = level
+ , headlineText = title'
+ , headlineTags = tags
+ , headlineProperties = properties
+ , headlineContents = contents'
+ , headlineChildren = children'
+ }
+ where
+ endOfTitle :: OrgParser ()
+ endOfTitle = void . lookAhead $ optional headerTags *> newline
+
+ headerTags :: OrgParser [Tag]
+ headerTags = try $
+ let tag = many1 (alphaNum <|> oneOf "@%#_") <* char ':'
+ in map toTag <$> (skipSpaces *> char ':' *> many1 tag <* skipSpaces)
+
+-- | Convert an Org mode headline (i.e. a document tree) into pandoc's Blocks
+headlineToBlocks :: Headline -> OrgParser Blocks
+headlineToBlocks (Headline {..}) = do
+ let text = tagTitle headlineText headlineTags
+ let propAttr = propertiesToAttr headlineProperties
+ attr <- registerHeader propAttr headlineText
+ let header = B.headerWith attr headlineLevel text
+ childrenBlocks <- mconcat <$> sequence (map headlineToBlocks headlineChildren)
+ return $ header <> headlineContents <> childrenBlocks
+
+propertiesToAttr :: Properties -> Attr
+propertiesToAttr properties =
+ let
+ toStringPair prop = (fromKey (fst prop), fromValue (snd prop))
+ customIdKey = toPropertyKey "custom_id"
+ classKey = toPropertyKey "class"
+ id' = fromMaybe mempty . fmap fromValue . lookup customIdKey $ properties
+ cls = fromMaybe mempty . fmap fromValue . lookup classKey $ properties
+ kvs' = map toStringPair . filter ((`notElem` [customIdKey, classKey]) . fst)
+ $ properties
+ in
+ (id', words cls, kvs')
+
+tagTitle :: Inlines -> [Tag] -> Inlines
+tagTitle title tags = title <> (mconcat $ map tagToInline tags)
+
+tagToInline :: Tag -> Inlines
+tagToInline t = B.spanWith ("", ["tag"], [("data-tag-name", fromTag t)]) mempty
+
--
-- parsing blocks
@@ -90,9 +167,11 @@ type Properties = [(PropertyKey, PropertyValue)]
-- | Get a list of blocks.
blockList :: OrgParser [Block]
blockList = do
- blocks' <- blocks
- st <- getState
- return . B.toList $ runF blocks' st
+ initialBlocks <- blocks
+ headlines <- sequence <$> manyTill (headline 1) eof
+ st <- getState
+ headlineBlocks <- fmap mconcat . sequence . map headlineToBlocks $ runF headlines st
+ return . B.toList $ (runF initialBlocks st) <> headlineBlocks
-- | Get the meta information safed in the state.
meta :: OrgParser Meta
@@ -101,7 +180,7 @@ meta = do
return $ runF (orgStateMeta st) st
blocks :: OrgParser (F Blocks)
-blocks = mconcat <$> manyTill block eof
+blocks = mconcat <$> manyTill block (void (lookAhead headerStart) <|> eof)
block :: OrgParser (F Blocks)
block = choice [ mempty <$ blanklines
@@ -111,7 +190,6 @@ block = choice [ mempty <$ blanklines
, example
, genericDrawer
, specialLine
- , header
, horizontalRule
, list
, latexFragment
@@ -633,47 +711,6 @@ parseFormat = try $ do
rest = manyTill anyChar (eof <|> () <$ oneOf "\n\r")
tillSpecifier c = manyTill (noneOf "\n\r") (try $ string ('%':c:""))
---
--- Headers
---
-
--- | Headers
-header :: OrgParser (F Blocks)
-header = try $ do
- level <- headerStart
- title <- manyTill inline (lookAhead $ optional headerTags <* newline)
- tags <- option [] headerTags
- newline
- let text = tagTitle title tags
- propAttr <- option nullAttr (propertiesToAttr <$> propertiesDrawer)
- attr <- registerHeader propAttr (runF text def)
- return (B.headerWith attr level <$> text)
- where
- tagTitle :: [F Inlines] -> [Tag] -> F Inlines
- tagTitle title tags = trimInlinesF . mconcat $ title <> map tagToInlineF tags
-
- tagToInlineF :: Tag -> F Inlines
- tagToInlineF t =
- return $ B.spanWith ("", ["tag"], [("data-tag-name", fromTag t)]) mempty
-
- headerTags :: OrgParser [Tag]
- headerTags = try $
- let tag = many1 (alphaNum <|> oneOf "@%#_") <* char ':'
- in map toTag <$> (skipSpaces *> char ':' *> many1 tag <* skipSpaces)
-
-propertiesToAttr :: Properties -> Attr
-propertiesToAttr properties =
- let
- toStringPair prop = (fromKey (fst prop), fromValue (snd prop))
- customIdKey = toPropertyKey "custom_id"
- classKey = toPropertyKey "class"
- id' = fromMaybe mempty . fmap fromValue . lookup customIdKey $ properties
- cls = fromMaybe mempty . fmap fromValue . lookup classKey $ properties
- kvs' = map toStringPair . filter ((`notElem` [customIdKey, classKey]) . fst)
- $ properties
- in
- (id', words cls, kvs')
-
--
-- Tables
@@ -838,6 +875,8 @@ noteBlock = try $ do
-- Paragraphs or Plain text
paraOrPlain :: OrgParser (F Blocks)
paraOrPlain = try $ do
+ -- Make sure we are not looking at a headline
+ notFollowedBy' (char '*' *> (oneOf " *"))
ils <- inlines
nl <- option False (newline *> return True)
-- Read block as paragraph, except if we are in a list context and the block