diff options
Diffstat (limited to 'src/Text')
| -rw-r--r-- | src/Text/Pandoc/Readers/Org.hs | 86 | ||||
| -rw-r--r-- | src/Text/Pandoc/Readers/Org/ParserState.hs | 7 | ||||
| -rw-r--r-- | src/Text/Pandoc/Readers/Org/Parsing.hs | 182 | 
3 files changed, 197 insertions, 78 deletions
| diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index 621e7107f..fd811c078 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -29,27 +29,23 @@ Conversion of org-mode formatted plain text to 'Pandoc' document.  -}  module Text.Pandoc.Readers.Org ( readOrg ) where +import           Text.Pandoc.Readers.Org.ParserState +import           Text.Pandoc.Readers.Org.Parsing +  import qualified Text.Pandoc.Builder as B  import           Text.Pandoc.Builder ( Inlines, Blocks )  import           Text.Pandoc.Definition  import           Text.Pandoc.Compat.Monoid ((<>))  import           Text.Pandoc.Error  import           Text.Pandoc.Options -import qualified Text.Pandoc.Parsing as P -import           Text.Pandoc.Parsing hiding ( F, unF, askF, asksF, runF -                                            , anyLine, blanklines, newline -                                            , orderedListMarker -                                            , parseFromString -                                            )  import           Text.Pandoc.Readers.LaTeX (inlineCommand, rawLaTeXInline) -import           Text.Pandoc.Readers.Org.ParserState  import           Text.Pandoc.Shared (compactify', compactify'DL)  import           Text.TeXMath (readTeX, writePandoc, DisplayType(..))  import qualified Text.TeXMath.Readers.MathML.EntityMap as MathMLEntityMap  import           Control.Arrow (first)  import           Control.Monad (foldM, guard, mplus, mzero, when) -import           Control.Monad.Reader ( Reader, runReader ) +import           Control.Monad.Reader ( runReader )  import           Data.Char (isAlphaNum, isSpace, toLower, toUpper)  import           Data.List ( foldl', intersperse, isPrefixOf, isSuffixOf )  import qualified Data.Map as M @@ -63,9 +59,6 @@ readOrg :: ReaderOptions -- ^ Reader options          -> Either PandocError Pandoc  readOrg opts s = flip runReader def $ readWithM parseOrg def{ orgStateOptions = opts } (s ++ "\n\n") --- | The parser used to read org files. -type OrgParser = ParserT [Char] OrgParserState (Reader OrgParserLocal) -  --  -- Functions acting on the parser state  -- @@ -73,14 +66,6 @@ recordAnchorId :: String -> OrgParser ()  recordAnchorId i = updateState $ \s ->    s{ orgStateAnchorIds = i : (orgStateAnchorIds s) } -updateLastForbiddenCharPos :: OrgParser () -updateLastForbiddenCharPos = getPosition >>= \p -> -  updateState $ \s -> s{ orgStateLastForbiddenCharPos = Just p} - -updateLastPreCharPos :: OrgParser () -updateLastPreCharPos = getPosition >>= \p -> -  updateState $ \s -> s{ orgStateLastPreCharPos = Just p} -  pushToInlineCharStack :: Char -> OrgParser ()  pushToInlineCharStack c = updateState $ \s ->    s{ orgStateEmphasisCharStack = c:orgStateEmphasisCharStack s } @@ -259,57 +244,6 @@ isHeaderLevelLowerEq n blk =  -- --- Adaptions and specializations of parsing utilities --- - --- The version Text.Pandoc.Parsing cannot be used, as we need additional parts --- of the state saved and restored. -parseFromString :: OrgParser a -> String -> OrgParser a -parseFromString parser str' = do -  oldLastPreCharPos <- orgStateLastPreCharPos <$> getState -  updateState $ \s -> s{ orgStateLastPreCharPos = Nothing } -  result <- P.parseFromString parser str' -  updateState $ \s -> s{ orgStateLastPreCharPos = oldLastPreCharPos } -  return result - --- | Like @Text.Parsec.Char.newline@, but causes additional state changes. -newline :: OrgParser Char -newline = -  P.newline -       <* updateLastPreCharPos -       <* updateLastForbiddenCharPos - --- | Like @Text.Parsec.Char.blanklines@, but causes additional state changes. -blanklines :: OrgParser [Char] -blanklines = -  P.blanklines -       <* updateLastPreCharPos -       <* updateLastForbiddenCharPos - -anyLine :: OrgParser String -anyLine = -  P.anyLine -    <* updateLastPreCharPos -    <* updateLastForbiddenCharPos - --- | Succeeds when we're in list context. -inList :: OrgParser () -inList = do -  ctx <- orgStateParserContext <$> getState -  guard (ctx == ListItemState) - --- | Parse in different context -withContext :: ParserContext -- ^ New parser context -            -> OrgParser a   -- ^ Parser to run in that context -            -> OrgParser a -withContext context parser = do -  oldContext <- orgStateParserContext <$> getState -  updateState $ \s -> s{ orgStateParserContext = context } -  result <- parser -  updateState $ \s -> s{ orgStateParserContext = oldContext } -  return result - ---  -- parsing blocks  -- @@ -398,7 +332,7 @@ keyValues = try $     endOfValue :: OrgParser ()     endOfValue =       lookAhead $ (() <$ try (many1 spaceChar <* key)) -              <|> () <$ P.newline +              <|> () <$ newline  -- @@ -675,7 +609,7 @@ propertiesDrawer = try $ do     key = try $ skipSpaces *> char ':' *> many1Till nonspaceChar (char ':')     value :: OrgParser String -   value = try $ skipSpaces *> manyTill anyChar (try $ skipSpaces *> P.newline) +   value = try $ skipSpaces *> manyTill anyChar (try $ skipSpaces *> newline)  keyValuesToAttr :: [(String, String)] -> Attr  keyValuesToAttr kvs = @@ -696,7 +630,7 @@ keyValuesToAttr kvs =  figure :: OrgParser (F Blocks)  figure = try $ do    figAttrs <- blockAttributes -  src <- skipSpaces *> selfTarget <* skipSpaces <* P.newline +  src <- skipSpaces *> selfTarget <* skipSpaces <* newline    guard . not . isNothing . blockAttrCaption $ figAttrs    guard (isImageFilename src)    let figName    = fromMaybe mempty $ blockAttrName figAttrs @@ -785,7 +719,7 @@ parseFormat = try $ do  header :: OrgParser (F Blocks)  header = try $ do    level    <- headerStart -  title    <- manyTill inline (lookAhead $ optional headerTags <* P.newline) +  title    <- manyTill inline (lookAhead $ optional headerTags <* newline)    tags     <- option [] headerTags    newline    propAttr <- option nullAttr (keyValuesToAttr <$> propertiesDrawer) @@ -1083,7 +1017,7 @@ definitionListItem parseMarkerGetLength = try $ do    return $ (,) <$> term' <*> fmap (:[]) contents'   where     definitionMarker = -     spaceChar *> string "::" <* (spaceChar <|> lookAhead P.newline) +     spaceChar *> string "::" <* (spaceChar <|> lookAhead newline)  -- parse raw text for one list item, excluding start marker and continuations @@ -1559,7 +1493,7 @@ many1TillNOrLessNewlines n p end = try $     nMoreLines k        cs = try $ (final k cs <|> rest k cs)                                    >>= uncurry nMoreLines     final _ cs = (\x -> (Nothing,      cs ++ x)) <$> try finalLine -   rest  m cs = (\x -> (minus1 <$> m, cs ++ x ++ "\n")) <$> try (manyTill p P.newline) +   rest  m cs = (\x -> (minus1 <$> m, cs ++ x ++ "\n")) <$> try (manyTill p newline)     finalLine = try $ manyTill p end     minus1 k = k - 1     oneOrMore cs = guard (not $ null cs) *> return cs diff --git a/src/Text/Pandoc/Readers/Org/ParserState.hs b/src/Text/Pandoc/Readers/Org/ParserState.hs index 6a902cd46..e648a883e 100644 --- a/src/Text/Pandoc/Readers/Org/ParserState.hs +++ b/src/Text/Pandoc/Readers/Org/ParserState.hs @@ -29,9 +29,11 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA  Define the Org-mode parser state.  -}  module Text.Pandoc.Readers.Org.ParserState -  ( OrgParserState(..) -  , OrgParserLocal(..) +  ( OrgParserState (..) +  , OrgParserLocal (..)    , OrgNoteRecord +  , HasReaderOptions (..) +  , HasQuoteContext (..)    , F(..)    , askF    , asksF @@ -184,6 +186,7 @@ modifyExportSettings :: ExportSettingSetter a -> a -> OrgParserState -> OrgParse  modifyExportSettings setter val state =    state { orgStateExportSettings = setter val . orgStateExportSettings $ state } +  --  -- Parser state reader  -- diff --git a/src/Text/Pandoc/Readers/Org/Parsing.hs b/src/Text/Pandoc/Readers/Org/Parsing.hs new file mode 100644 index 000000000..efe2ae25f --- /dev/null +++ b/src/Text/Pandoc/Readers/Org/Parsing.hs @@ -0,0 +1,182 @@ +{- +Copyright (C) 2014-2016 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA +-} + +{- | +   Module      : Text.Pandoc.Readers.Org.Options +   Copyright   : Copyright (C) 2014-2016 Albert Krewinkel +   License     : GNU GPL, version 2 or above + +   Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> + +Org-mode parsing utilities. + +Most functions are simply re-exports from @Text.Pandoc.Parsing@, some +functions are adapted to Org-mode specific functionality. +-} +module Text.Pandoc.Readers.Org.Parsing +  ( OrgParser +  , anyLine +  , blanklines +  , newline +  , parseFromString +  , inList +  , withContext +  , updateLastForbiddenCharPos +  , updateLastPreCharPos +  -- * Re-exports from Text.Pandoc.Parser +  , ParserContext (..) +  , many1Till +  , notFollowedBy' +  , spaceChar +  , nonspaceChar +  , skipSpaces +  , blankline +  , enclosed +  , stringAnyCase +  , charsInBalanced +  , uri +  , withRaw +  , readWithM +  , guardEnabled +  , updateLastStrPos +  , notAfterString +  , ParserState (..) +  , registerHeader +  , QuoteContext (..) +  , singleQuoteStart +  , singleQuoteEnd +  , doubleQuoteStart +  , doubleQuoteEnd +  , dash +  , ellipses +  , citeKey +  -- * Re-exports from Text.Pandoc.Parsec +  , runParser +  , getInput +  , char +  , letter +  , digit +  , alphaNum +  , skipMany1 +  , spaces +  , anyChar +  , string +  , count +  , eof +  , noneOf +  , oneOf +  , lookAhead +  , notFollowedBy +  , many +  , many1 +  , manyTill +  , (<|>) +  , (<?>) +  , choice +  , try +  , sepBy +  , sepBy1 +  , option +  , optional +  , optionMaybe +  , getState +  , updateState +  , SourcePos +  , getPosition +  ) where + +import           Text.Pandoc.Readers.Org.ParserState + +import qualified Text.Pandoc.Parsing as P +import           Text.Pandoc.Parsing hiding ( anyLine, blanklines, newline +                                            , parseFromString ) + +import           Control.Monad ( guard ) +import           Control.Monad.Reader ( Reader ) + +-- | The parser used to read org files. +type OrgParser = ParserT [Char] OrgParserState (Reader OrgParserLocal) + +-- +-- Adaptions and specializations of parsing utilities +-- + +-- | Parse any line of text +anyLine :: OrgParser String +anyLine = +  P.anyLine +    <* updateLastPreCharPos +    <* updateLastForbiddenCharPos + +-- The version Text.Pandoc.Parsing cannot be used, as we need additional parts +-- of the state saved and restored. +parseFromString :: OrgParser a -> String -> OrgParser a +parseFromString parser str' = do +  oldLastPreCharPos <- orgStateLastPreCharPos <$> getState +  updateState $ \s -> s{ orgStateLastPreCharPos = Nothing } +  result <- P.parseFromString parser str' +  updateState $ \s -> s{ orgStateLastPreCharPos = oldLastPreCharPos } +  return result + +-- | Like @Text.Parsec.Char.newline@, but causes additional state changes. +newline :: OrgParser Char +newline = +  P.newline +       <* updateLastPreCharPos +       <* updateLastForbiddenCharPos + +-- | Like @Text.Parsec.Char.blanklines@, but causes additional state changes. +blanklines :: OrgParser [Char] +blanklines = +  P.blanklines +       <* updateLastPreCharPos +       <* updateLastForbiddenCharPos + +-- | Succeeds when we're in list context. +inList :: OrgParser () +inList = do +  ctx <- orgStateParserContext <$> getState +  guard (ctx == ListItemState) + +-- | Parse in different context +withContext :: ParserContext -- ^ New parser context +            -> OrgParser a   -- ^ Parser to run in that context +            -> OrgParser a +withContext context parser = do +  oldContext <- orgStateParserContext <$> getState +  updateState $ \s -> s{ orgStateParserContext = context } +  result <- parser +  updateState $ \s -> s{ orgStateParserContext = oldContext } +  return result + +-- +-- Parser state update functions +-- + +-- | Set the current position as the last position at which a forbidden char +-- was found (i.e. a character which is not allowed at the inner border of +-- markup). +updateLastForbiddenCharPos :: OrgParser () +updateLastForbiddenCharPos = getPosition >>= \p -> +  updateState $ \s -> s{ orgStateLastForbiddenCharPos = Just p} + +-- | Set the current parser position as the position at which a character was +-- seen which allows inline markup to follow. +updateLastPreCharPos :: OrgParser () +updateLastPreCharPos = getPosition >>= \p -> +  updateState $ \s -> s{ orgStateLastPreCharPos = Just p} | 
