aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Readers/Org/Parsing.hs
blob: 0b6b876d8db64b922da12a399e89cbb49a1e73d7 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
{-
Copyright (C) 2014-2016 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>

This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-}

{- |
   Module      : Text.Pandoc.Readers.Org.Options
   Copyright   : Copyright (C) 2014-2016 Albert Krewinkel
   License     : GNU GPL, version 2 or above

   Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>

Org-mode parsing utilities.

Most functions are simply re-exports from @Text.Pandoc.Parsing@, some
functions are adapted to Org-mode specific functionality.
-}
module Text.Pandoc.Readers.Org.Parsing
  ( OrgParser
  , anyLine
  , blanklines
  , newline
  , parseFromString
  , skipSpaces1
  , inList
  , withContext
  , updateLastForbiddenCharPos
  , updateLastPreCharPos
  , orgArgKey
  , orgArgWord
  , orgArgWordChar
  -- * Re-exports from Text.Pandoc.Parser
  , ParserContext (..)
  , many1Till
  , notFollowedBy'
  , spaceChar
  , nonspaceChar
  , skipSpaces
  , blankline
  , enclosed
  , stringAnyCase
  , charsInBalanced
  , uri
  , withRaw
  , readWithM
  , guardEnabled
  , updateLastStrPos
  , notAfterString
  , ParserState (..)
  , registerHeader
  , QuoteContext (..)
  , singleQuoteStart
  , singleQuoteEnd
  , doubleQuoteStart
  , doubleQuoteEnd
  , dash
  , ellipses
  , citeKey
  -- * Re-exports from Text.Pandoc.Parsec
  , runParser
  , getInput
  , char
  , letter
  , digit
  , alphaNum
  , skipMany1
  , spaces
  , anyChar
  , satisfy
  , string
  , count
  , eof
  , noneOf
  , oneOf
  , lookAhead
  , notFollowedBy
  , many
  , many1
  , manyTill
  , (<|>)
  , (<?>)
  , choice
  , try
  , sepBy
  , sepBy1
  , option
  , optional
  , optionMaybe
  , getState
  , updateState
  , SourcePos
  , getPosition
  ) where

import           Text.Pandoc.Readers.Org.ParserState

import qualified Text.Pandoc.Parsing as P
import           Text.Pandoc.Parsing hiding ( anyLine, blanklines, newline
                                            , parseFromString )

import           Control.Monad ( guard )
import           Control.Monad.Reader ( Reader )

-- | The parser used to read org files.
type OrgParser = ParserT [Char] OrgParserState (Reader OrgParserLocal)

--
-- Adaptions and specializations of parsing utilities
--

-- | Parse any line of text
anyLine :: OrgParser String
anyLine =
  P.anyLine
    <* updateLastPreCharPos
    <* updateLastForbiddenCharPos

-- The version Text.Pandoc.Parsing cannot be used, as we need additional parts
-- of the state saved and restored.
parseFromString :: OrgParser a -> String -> OrgParser a
parseFromString parser str' = do
  oldLastPreCharPos <- orgStateLastPreCharPos <$> getState
  updateState $ \s -> s{ orgStateLastPreCharPos = Nothing }
  result <- P.parseFromString parser str'
  updateState $ \s -> s{ orgStateLastPreCharPos = oldLastPreCharPos }
  return result

-- | Skip one or more tab or space characters.
skipSpaces1 :: OrgParser ()
skipSpaces1 = skipMany1 spaceChar

-- | Like @Text.Parsec.Char.newline@, but causes additional state changes.
newline :: OrgParser Char
newline =
  P.newline
       <* updateLastPreCharPos
       <* updateLastForbiddenCharPos

-- | Like @Text.Parsec.Char.blanklines@, but causes additional state changes.
blanklines :: OrgParser [Char]
blanklines =
  P.blanklines
       <* updateLastPreCharPos
       <* updateLastForbiddenCharPos

-- | Succeeds when we're in list context.
inList :: OrgParser ()
inList = do
  ctx <- orgStateParserContext <$> getState
  guard (ctx == ListItemState)

-- | Parse in different context
withContext :: ParserContext -- ^ New parser context
            -> OrgParser a   -- ^ Parser to run in that context
            -> OrgParser a
withContext context parser = do
  oldContext <- orgStateParserContext <$> getState
  updateState $ \s -> s{ orgStateParserContext = context }
  result <- parser
  updateState $ \s -> s{ orgStateParserContext = oldContext }
  return result

--
-- Parser state update functions
--

-- | Set the current position as the last position at which a forbidden char
-- was found (i.e. a character which is not allowed at the inner border of
-- markup).
updateLastForbiddenCharPos :: OrgParser ()
updateLastForbiddenCharPos = getPosition >>= \p ->
  updateState $ \s -> s{ orgStateLastForbiddenCharPos = Just p}

-- | Set the current parser position as the position at which a character was
-- seen which allows inline markup to follow.
updateLastPreCharPos :: OrgParser ()
updateLastPreCharPos = getPosition >>= \p ->
  updateState $ \s -> s{ orgStateLastPreCharPos = Just p}

orgArgKey :: OrgParser String
orgArgKey = try $
  skipSpaces *> char ':'
             *> many1 orgArgWordChar

orgArgWord :: OrgParser String
orgArgWord = many1 orgArgWordChar

orgArgWordChar :: OrgParser Char
orgArgWordChar = alphaNum <|> oneOf "-_"