src/Text/Pandoc/Readers/Org/Parsing.hs


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218

{- |
   Module      : Text.Pandoc.Readers.Org.Parsing
   Copyright   : Copyright (C) 2014-2021 Albert Krewinkel
   License     : GNU GPL, version 2 or above

   Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>

Org-mode parsing utilities.

Most functions are simply re-exports from @Text.Pandoc.Parsing@, some
functions are adapted to Org-mode specific functionality.
-}
module Text.Pandoc.Readers.Org.Parsing
  ( OrgParser
  , anyLine
  , anyLineNewline
  , indentWith
  , blanklines
  , newline
  , parseFromString
  , skipSpaces1
  , inList
  , withContext
  , getExportSetting
  , updateLastForbiddenCharPos
  , updateLastPreCharPos
  , orgArgKey
  , orgArgWord
  , orgArgWordChar
  , orgTagWord
  , orgTagWordChar
  -- * Re-exports from Text.Pandoc.Parser
  , ParserContext (..)
  , textStr
  , countChar
  , manyChar
  , many1Char
  , manyTillChar
  , many1Till
  , many1TillChar
  , notFollowedBy'
  , spaceChar
  , nonspaceChar
  , skipSpaces
  , blankline
  , enclosed
  , stringAnyCase
  , charsInBalanced
  , uri
  , withRaw
  , readWithM
  , guardEnabled
  , updateLastStrPos
  , notAfterString
  , ParserState (..)
  , registerHeader
  , QuoteContext (..)
  , singleQuoteStart
  , singleQuoteEnd
  , doubleQuoteStart
  , doubleQuoteEnd
  , dash
  , ellipses
  , citeKey
  , gridTableWith
  , insertIncludedFileF
  -- * Re-exports from Text.Pandoc.Parsec
  , runParser
  , runParserT
  , getInput
  , char
  , letter
  , digit
  , alphaNum
  , skipMany1
  , spaces
  , anyChar
  , satisfy
  , string
  , count
  , eof
  , noneOf
  , oneOf
  , lookAhead
  , notFollowedBy
  , many
  , many1
  , manyTill
  , (<|>)
  , (<?>)
  , choice
  , try
  , sepBy
  , sepBy1
  , sepEndBy1
  , endBy1
  , option
  , optional
  , optionMaybe
  , getState
  , updateState
  , SourcePos
  , getPosition
  ) where

import Data.Text (Text)
import Text.Pandoc.Readers.Org.ParserState

import Text.Pandoc.Parsing hiding (anyLine, blanklines, newline,
                                   parseFromString)
import qualified Text.Pandoc.Parsing as P

import Control.Monad (guard)
import Control.Monad.Reader (ReaderT)

-- | The parser used to read org files.
type OrgParser m = ParserT Sources OrgParserState (ReaderT OrgParserLocal m)

--
-- Adaptions and specializations of parsing utilities
--

-- | Parse any line of text
anyLine :: Monad m => OrgParser m Text
anyLine =
  P.anyLine
    <* updateLastPreCharPos
    <* updateLastForbiddenCharPos

-- | Like @'Text.Pandoc.Parsing'@, but resets the position of the last character
-- allowed before emphasised text.
parseFromString :: Monad m => OrgParser m a -> Text -> OrgParser m a
parseFromString parser str' = do
  updateState $ \s -> s{ orgStateLastPreCharPos = Nothing }
  result <- P.parseFromString parser str'
  updateState $ \s -> s { orgStateLastPreCharPos = Nothing }
  return result

-- | Skip one or more tab or space characters.
skipSpaces1 :: Monad m => OrgParser m ()
skipSpaces1 = skipMany1 spaceChar

-- | Like @Text.Parsec.Char.newline@, but causes additional state changes.
newline :: Monad m => OrgParser m Char
newline =
  P.newline
       <* updateLastPreCharPos
       <* updateLastForbiddenCharPos

-- | Like @Text.Parsec.Char.blanklines@, but causes additional state changes.
blanklines :: Monad m => OrgParser m Text
blanklines =
  P.blanklines
       <* updateLastPreCharPos
       <* updateLastForbiddenCharPos

-- | Succeeds when we're in list context.
inList :: Monad m => OrgParser m ()
inList = do
  ctx <- orgStateParserContext <$> getState
  guard (ctx == ListItemState)

-- | Parse in different context
withContext :: Monad m
            => ParserContext -- ^ New parser context
            -> OrgParser m a   -- ^ Parser to run in that context
            -> OrgParser m a
withContext context parser = do
  oldContext <- orgStateParserContext <$> getState
  updateState $ \s -> s{ orgStateParserContext = context }
  result <- parser
  updateState $ \s -> s{ orgStateParserContext = oldContext }
  return result

--
-- Parser state functions
--

-- | Get an export setting.
getExportSetting :: Monad m =>  (ExportSettings -> a) -> OrgParser m a
getExportSetting s = s . orgStateExportSettings <$> getState

-- | Set the current position as the last position at which a forbidden char
-- was found (i.e. a character which is not allowed at the inner border of
-- markup).
updateLastForbiddenCharPos :: Monad m => OrgParser m ()
updateLastForbiddenCharPos = getPosition >>= \p ->
  updateState $ \s -> s{ orgStateLastForbiddenCharPos = Just p}

-- | Set the current parser position as the position at which a character was
-- seen which allows inline markup to follow.
updateLastPreCharPos :: Monad m => OrgParser m ()
updateLastPreCharPos = getPosition >>= \p ->
  updateState $ \s -> s{ orgStateLastPreCharPos = Just p}

--
-- Org key-value parsing
--

-- | Read the key of a plist style key-value list.
orgArgKey :: Monad m => OrgParser m Text
orgArgKey = try $
  skipSpaces *> char ':'
             *> many1Char orgArgWordChar

-- | Read the value of a plist style key-value list.
orgArgWord :: Monad m => OrgParser m Text
orgArgWord = many1Char orgArgWordChar

-- | Chars treated as part of a word in plists.
orgArgWordChar :: Monad m => OrgParser m Char
orgArgWordChar = alphaNum <|> oneOf "-_"

orgTagWord :: Monad m => OrgParser m Text
orgTagWord = many1Char orgTagWordChar

orgTagWordChar :: Monad m => OrgParser m Char
orgTagWordChar = alphaNum <|> oneOf "@%#_"