aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorAlbert Krewinkel <albert@zeitkraut.de>2016-05-11 12:26:54 +0200
committerAlbert Krewinkel <albert@zeitkraut.de>2016-05-11 19:13:42 +0200
commit7a0729ea093cbf78188f9ef4e5b7c0f9a6b82c9b (patch)
treebf80e5250817256f28731e5bc39a8b38593d4f89 /src
parentfd9ec835ec1447fb6040ee63a1fa8ee13b19d555 (diff)
downloadpandoc-7a0729ea093cbf78188f9ef4e5b7c0f9a6b82c9b.tar.gz
Org reader: move parser state into separate module
The org reader code has become large and confusing. Extracting smaller parts into submodules should help to clean things up.
Diffstat (limited to 'src')
-rw-r--r--src/Text/Pandoc/Readers/Org.hs215
-rw-r--r--src/Text/Pandoc/Readers/Org/ParserState.hs175
2 files changed, 232 insertions, 158 deletions
diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs
index 5a50a8f34..610397d58 100644
--- a/src/Text/Pandoc/Readers/Org.hs
+++ b/src/Text/Pandoc/Readers/Org.hs
@@ -1,6 +1,5 @@
+{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE OverloadedStrings #-}
-{-# LANGUAGE GeneralizedNewtypeDeriving #-}
-{-# LANGUAGE MultiParamTypeClasses, FlexibleContexts, FlexibleInstances #-}
{-
Copyright (C) 2014-2016 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
@@ -31,8 +30,7 @@ Conversion of org-mode formatted plain text to 'Pandoc' document.
module Text.Pandoc.Readers.Org ( readOrg ) where
import qualified Text.Pandoc.Builder as B
-import Text.Pandoc.Builder ( Inlines, Blocks, HasMeta(..),
- trimInlines )
+import Text.Pandoc.Builder ( Inlines, Blocks )
import Text.Pandoc.Definition
import Text.Pandoc.Compat.Monoid ((<>))
import Text.Pandoc.Error
@@ -43,153 +41,33 @@ import Text.Pandoc.Parsing hiding ( F, unF, askF, asksF, runF
, parseFromString, blanklines
)
import Text.Pandoc.Readers.LaTeX (inlineCommand, rawLaTeXInline)
+import Text.Pandoc.Readers.Org.ParserState
import Text.Pandoc.Shared (compactify', compactify'DL)
import Text.TeXMath (readTeX, writePandoc, DisplayType(..))
import qualified Text.TeXMath.Readers.MathML.EntityMap as MathMLEntityMap
import Control.Arrow (first)
-import Control.Monad (foldM, guard, liftM, liftM2, mplus, mzero, when)
-import Control.Monad.Reader (Reader, runReader, ask, asks, local)
+import Control.Monad (foldM, guard, mplus, mzero, when)
+import Control.Monad.Reader ( Reader, runReader )
import Data.Char (isAlphaNum, isSpace, toLower)
-import Data.Default
import Data.List (intersperse, isPrefixOf, isSuffixOf)
import qualified Data.Map as M
-import qualified Data.Set as Set
import Data.Maybe (fromMaybe, isJust)
import Network.HTTP (urlEncode)
+
-- | Parse org-mode string and return a Pandoc document.
readOrg :: ReaderOptions -- ^ Reader options
-> String -- ^ String to parse (assuming @'\n'@ line endings)
-> Either PandocError Pandoc
readOrg opts s = flip runReader def $ readWithM parseOrg def{ orgStateOptions = opts } (s ++ "\n\n")
-data OrgParserLocal = OrgParserLocal { orgLocalQuoteContext :: QuoteContext }
-
+-- | The parser used to read org files.
type OrgParser = ParserT [Char] OrgParserState (Reader OrgParserLocal)
-instance HasIdentifierList OrgParserState where
- extractIdentifierList = orgStateIdentifiers
- updateIdentifierList f s = s{ orgStateIdentifiers = f (orgStateIdentifiers s) }
-
-instance HasHeaderMap OrgParserState where
- extractHeaderMap = orgStateHeaderMap
- updateHeaderMap f s = s{ orgStateHeaderMap = f (orgStateHeaderMap s) }
-
-parseOrg :: OrgParser Pandoc
-parseOrg = do
- blocks' <- parseBlocks
- st <- getState
- let meta = runF (orgStateMeta' st) st
- let removeUnwantedBlocks = dropCommentTrees . filter (/= Null)
- return $ Pandoc meta $ removeUnwantedBlocks (B.toList $ runF blocks' st)
-
--- | Drop COMMENT headers and the document tree below those headers.
-dropCommentTrees :: [Block] -> [Block]
-dropCommentTrees [] = []
-dropCommentTrees (b:bs) =
- maybe (b:dropCommentTrees bs)
- (dropCommentTrees . flip dropUntilHeaderAboveLevel bs)
- (commentHeaderLevel b)
-
--- | Return the level of a header starting a comment or :noexport: tree and
--- Nothing otherwise.
-commentHeaderLevel :: Block -> Maybe Int
-commentHeaderLevel blk =
- case blk of
- (Header level _ ((Str "COMMENT"):_)) -> Just level
- (Header level _ title) | hasNoExportTag title -> Just level
- _ -> Nothing
- where
- hasNoExportTag :: [Inline] -> Bool
- hasNoExportTag = any isNoExportTag
-
- isNoExportTag :: Inline -> Bool
- isNoExportTag (Span ("", ["tag"], [("data-tag-name", "noexport")]) []) = True
- isNoExportTag _ = False
-
--- | Drop blocks until a header on or above the given level is seen
-dropUntilHeaderAboveLevel :: Int -> [Block] -> [Block]
-dropUntilHeaderAboveLevel n = dropWhile (not . isHeaderLevelLowerEq n)
-
-isHeaderLevelLowerEq :: Int -> Block -> Bool
-isHeaderLevelLowerEq n blk =
- case blk of
- (Header level _ _) -> n >= level
- _ -> False
-
--
--- Parser State for Org
+-- Functions acting on the parser state
--
-
-type OrgNoteRecord = (String, F Blocks)
-type OrgNoteTable = [OrgNoteRecord]
-
-type OrgBlockAttributes = M.Map String String
-
-type OrgLinkFormatters = M.Map String (String -> String)
-
--- | Org-mode parser state
-data OrgParserState = OrgParserState
- { orgStateOptions :: ReaderOptions
- , orgStateAnchorIds :: [String]
- , orgStateBlockAttributes :: OrgBlockAttributes
- , orgStateEmphasisCharStack :: [Char]
- , orgStateEmphasisNewlines :: Maybe Int
- , orgStateLastForbiddenCharPos :: Maybe SourcePos
- , orgStateLastPreCharPos :: Maybe SourcePos
- , orgStateLastStrPos :: Maybe SourcePos
- , orgStateLinkFormatters :: OrgLinkFormatters
- , orgStateMeta :: Meta
- , orgStateMeta' :: F Meta
- , orgStateNotes' :: OrgNoteTable
- , orgStateParserContext :: ParserContext
- , orgStateIdentifiers :: Set.Set String
- , orgStateHeaderMap :: M.Map Inlines String
- }
-
-instance Default OrgParserLocal where
- def = OrgParserLocal NoQuote
-
-instance HasReaderOptions OrgParserState where
- extractReaderOptions = orgStateOptions
-
-instance HasMeta OrgParserState where
- setMeta field val st =
- st{ orgStateMeta = setMeta field val $ orgStateMeta st }
- deleteMeta field st =
- st{ orgStateMeta = deleteMeta field $ orgStateMeta st }
-
-instance HasLastStrPosition OrgParserState where
- getLastStrPos = orgStateLastStrPos
- setLastStrPos pos st = st{ orgStateLastStrPos = Just pos }
-
-instance HasQuoteContext st (Reader OrgParserLocal) where
- getQuoteContext = asks orgLocalQuoteContext
- withQuoteContext q = local (\s -> s{orgLocalQuoteContext = q})
-
-instance Default OrgParserState where
- def = defaultOrgParserState
-
-defaultOrgParserState :: OrgParserState
-defaultOrgParserState = OrgParserState
- { orgStateOptions = def
- , orgStateAnchorIds = []
- , orgStateBlockAttributes = M.empty
- , orgStateEmphasisCharStack = []
- , orgStateEmphasisNewlines = Nothing
- , orgStateLastForbiddenCharPos = Nothing
- , orgStateLastPreCharPos = Nothing
- , orgStateLastStrPos = Nothing
- , orgStateLinkFormatters = M.empty
- , orgStateMeta = nullMeta
- , orgStateMeta' = return nullMeta
- , orgStateNotes' = []
- , orgStateParserContext = NullState
- , orgStateIdentifiers = Set.empty
- , orgStateHeaderMap = M.empty
- }
-
recordAnchorId :: String -> OrgParser ()
recordAnchorId i = updateState $ \s ->
s{ orgStateAnchorIds = i : (orgStateAnchorIds s) }
@@ -243,44 +121,65 @@ addToNotesTable note = do
oldnotes <- orgStateNotes' <$> getState
updateState $ \s -> s{ orgStateNotes' = note:oldnotes }
--- The version Text.Pandoc.Parsing cannot be used, as we need additional parts
--- of the state saved and restored.
-parseFromString :: OrgParser a -> String -> OrgParser a
-parseFromString parser str' = do
- oldLastPreCharPos <- orgStateLastPreCharPos <$> getState
- updateState $ \s -> s{ orgStateLastPreCharPos = Nothing }
- result <- P.parseFromString parser str'
- updateState $ \s -> s{ orgStateLastPreCharPos = oldLastPreCharPos }
- return result
-
-
--
--- Adaptions and specializations of parsing utilities
+-- Parser
--
+parseOrg :: OrgParser Pandoc
+parseOrg = do
+ blocks' <- parseBlocks
+ st <- getState
+ let meta = runF (orgStateMeta' st) st
+ let removeUnwantedBlocks = dropCommentTrees . filter (/= Null)
+ return $ Pandoc meta $ removeUnwantedBlocks (B.toList $ runF blocks' st)
-newtype F a = F { unF :: Reader OrgParserState a
- } deriving (Monad, Applicative, Functor)
+-- | Drop COMMENT headers and the document tree below those headers.
+dropCommentTrees :: [Block] -> [Block]
+dropCommentTrees [] = []
+dropCommentTrees (b:bs) =
+ maybe (b:dropCommentTrees bs)
+ (dropCommentTrees . flip dropUntilHeaderAboveLevel bs)
+ (commentHeaderLevel b)
-runF :: F a -> OrgParserState -> a
-runF = runReader . unF
+-- | Return the level of a header starting a comment or :noexport: tree and
+-- Nothing otherwise.
+commentHeaderLevel :: Block -> Maybe Int
+commentHeaderLevel blk =
+ case blk of
+ (Header level _ ((Str "COMMENT"):_)) -> Just level
+ (Header level _ title) | hasNoExportTag title -> Just level
+ _ -> Nothing
+ where
+ hasNoExportTag :: [Inline] -> Bool
+ hasNoExportTag = any isNoExportTag
-askF :: F OrgParserState
-askF = F ask
+ isNoExportTag :: Inline -> Bool
+ isNoExportTag (Span ("", ["tag"], [("data-tag-name", "noexport")]) []) = True
+ isNoExportTag _ = False
-asksF :: (OrgParserState -> a) -> F a
-asksF f = F $ asks f
+-- | Drop blocks until a header on or above the given level is seen
+dropUntilHeaderAboveLevel :: Int -> [Block] -> [Block]
+dropUntilHeaderAboveLevel n = dropWhile (not . isHeaderLevelLowerEq n)
-instance Monoid a => Monoid (F a) where
- mempty = return mempty
- mappend = liftM2 mappend
- mconcat = fmap mconcat . sequence
+isHeaderLevelLowerEq :: Int -> Block -> Bool
+isHeaderLevelLowerEq n blk =
+ case blk of
+ (Header level _ _) -> n >= level
+ _ -> False
-trimInlinesF :: F Inlines -> F Inlines
-trimInlinesF = liftM trimInlines
-returnF :: a -> OrgParser (F a)
-returnF = return . return
+--
+-- Adaptions and specializations of parsing utilities
+--
+-- The version Text.Pandoc.Parsing cannot be used, as we need additional parts
+-- of the state saved and restored.
+parseFromString :: OrgParser a -> String -> OrgParser a
+parseFromString parser str' = do
+ oldLastPreCharPos <- orgStateLastPreCharPos <$> getState
+ updateState $ \s -> s{ orgStateLastPreCharPos = Nothing }
+ result <- P.parseFromString parser str'
+ updateState $ \s -> s{ orgStateLastPreCharPos = oldLastPreCharPos }
+ return result
-- | Like @Text.Parsec.Char.newline@, but causes additional state changes.
newline :: OrgParser Char
diff --git a/src/Text/Pandoc/Readers/Org/ParserState.hs b/src/Text/Pandoc/Readers/Org/ParserState.hs
new file mode 100644
index 000000000..680c469f3
--- /dev/null
+++ b/src/Text/Pandoc/Readers/Org/ParserState.hs
@@ -0,0 +1,175 @@
+{-# LANGUAGE FlexibleInstances #-}
+{-# LANGUAGE GeneralizedNewtypeDeriving #-}
+{-# LANGUAGE MultiParamTypeClasses #-}
+{-
+Copyright (C) 2014-2016 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Readers.Org.Options
+ Copyright : Copyright (C) 2014-2016 Albert Krewinkel
+ License : GNU GPL, version 2 or above
+
+ Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
+
+Define the Org-mode parser state.
+-}
+module Text.Pandoc.Readers.Org.ParserState
+ ( OrgParserState(..)
+ , OrgParserLocal(..)
+ , OrgNoteRecord
+ , F(..)
+ , askF
+ , asksF
+ , trimInlinesF
+ , runF
+ , returnF
+ ) where
+
+import Control.Monad (liftM, liftM2)
+import Control.Monad.Reader (Reader, runReader, ask, asks, local)
+
+import Data.Default (Default(..))
+import qualified Data.Map as M
+import qualified Data.Set as Set
+
+import Text.Pandoc.Builder ( Inlines, Blocks, HasMeta(..),
+ trimInlines )
+import Text.Pandoc.Definition ( Meta(..), nullMeta )
+import Text.Pandoc.Options ( ReaderOptions(..) )
+import Text.Pandoc.Parsing ( HasHeaderMap(..)
+ , HasIdentifierList(..)
+ , HasLastStrPosition(..)
+ , HasQuoteContext(..)
+ , HasReaderOptions(..)
+ , ParserContext(..)
+ , QuoteContext(..)
+ , SourcePos )
+
+-- | An inline note / footnote containing the note key and its (inline) value.
+type OrgNoteRecord = (String, F Blocks)
+-- | Table of footnotes
+type OrgNoteTable = [OrgNoteRecord]
+-- | Map of org block attributes (e.g. LABEL, CAPTION, NAME, etc)
+type OrgBlockAttributes = M.Map String String
+-- | Map of functions for link transformations. The map key is refers to the
+-- link-type, the corresponding function transforms the given link string.
+type OrgLinkFormatters = M.Map String (String -> String)
+
+-- | Org-mode parser state
+data OrgParserState = OrgParserState
+ { orgStateOptions :: ReaderOptions
+ , orgStateAnchorIds :: [String]
+ , orgStateBlockAttributes :: OrgBlockAttributes
+ , orgStateEmphasisCharStack :: [Char]
+ , orgStateEmphasisNewlines :: Maybe Int
+ , orgStateLastForbiddenCharPos :: Maybe SourcePos
+ , orgStateLastPreCharPos :: Maybe SourcePos
+ , orgStateLastStrPos :: Maybe SourcePos
+ , orgStateLinkFormatters :: OrgLinkFormatters
+ , orgStateMeta :: Meta
+ , orgStateMeta' :: F Meta
+ , orgStateNotes' :: OrgNoteTable
+ , orgStateParserContext :: ParserContext
+ , orgStateIdentifiers :: Set.Set String
+ , orgStateHeaderMap :: M.Map Inlines String
+ }
+
+data OrgParserLocal = OrgParserLocal { orgLocalQuoteContext :: QuoteContext }
+
+instance Default OrgParserLocal where
+ def = OrgParserLocal NoQuote
+
+instance HasReaderOptions OrgParserState where
+ extractReaderOptions = orgStateOptions
+
+instance HasMeta OrgParserState where
+ setMeta field val st =
+ st{ orgStateMeta = setMeta field val $ orgStateMeta st }
+ deleteMeta field st =
+ st{ orgStateMeta = deleteMeta field $ orgStateMeta st }
+
+instance HasLastStrPosition OrgParserState where
+ getLastStrPos = orgStateLastStrPos
+ setLastStrPos pos st = st{ orgStateLastStrPos = Just pos }
+
+instance HasQuoteContext st (Reader OrgParserLocal) where
+ getQuoteContext = asks orgLocalQuoteContext
+ withQuoteContext q = local (\s -> s{orgLocalQuoteContext = q})
+
+instance HasIdentifierList OrgParserState where
+ extractIdentifierList = orgStateIdentifiers
+ updateIdentifierList f s = s{ orgStateIdentifiers = f (orgStateIdentifiers s) }
+
+instance HasHeaderMap OrgParserState where
+ extractHeaderMap = orgStateHeaderMap
+ updateHeaderMap f s = s{ orgStateHeaderMap = f (orgStateHeaderMap s) }
+
+
+instance Default OrgParserState where
+ def = defaultOrgParserState
+
+defaultOrgParserState :: OrgParserState
+defaultOrgParserState = OrgParserState
+ { orgStateOptions = def
+ , orgStateAnchorIds = []
+ , orgStateBlockAttributes = M.empty
+ , orgStateEmphasisCharStack = []
+ , orgStateEmphasisNewlines = Nothing
+ , orgStateLastForbiddenCharPos = Nothing
+ , orgStateLastPreCharPos = Nothing
+ , orgStateLastStrPos = Nothing
+ , orgStateLinkFormatters = M.empty
+ , orgStateMeta = nullMeta
+ , orgStateMeta' = return nullMeta
+ , orgStateNotes' = []
+ , orgStateParserContext = NullState
+ , orgStateIdentifiers = Set.empty
+ , orgStateHeaderMap = M.empty
+ }
+
+
+--
+-- Parser state reader
+--
+
+-- | Reader monad wrapping the parser state. This is used to delay evaluation
+-- until all relevant information has been parsed and made available in the
+-- parser state. See also the newtype of the same name in
+-- Text.Pandoc.Parsing.
+newtype F a = F { unF :: Reader OrgParserState a
+ } deriving (Functor, Applicative, Monad)
+
+instance Monoid a => Monoid (F a) where
+ mempty = return mempty
+ mappend = liftM2 mappend
+ mconcat = fmap mconcat . sequence
+
+runF :: F a -> OrgParserState -> a
+runF = runReader . unF
+
+askF :: F OrgParserState
+askF = F ask
+
+asksF :: (OrgParserState -> a) -> F a
+asksF f = F $ asks f
+
+trimInlinesF :: F Inlines -> F Inlines
+trimInlinesF = liftM trimInlines
+
+returnF :: Monad m => a -> m (F a)
+returnF = return . return