aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Readers/Org/Meta.hs
blob: 1fea3e890b0ec5689481c262ae10c899602a80d2 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE TupleSections #-}
{-
Copyright (C) 2014-2017 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>

This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-}

{- |
   Module      : Text.Pandoc.Readers.Org.Meta
   Copyright   : Copyright (C) 2014-2017 Albert Krewinkel
   License     : GNU GPL, version 2 or above

   Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>

Parsers for Org-mode meta declarations.
-}
module Text.Pandoc.Readers.Org.Meta
  ( metaExport
  , metaKey
  , metaLine
  ) where

import           Text.Pandoc.Readers.Org.BlockStarts
import           Text.Pandoc.Readers.Org.ExportSettings ( exportSettings )
import           Text.Pandoc.Readers.Org.Inlines
import           Text.Pandoc.Readers.Org.ParserState
import           Text.Pandoc.Readers.Org.Parsing

import qualified Text.Pandoc.Builder as B
import           Text.Pandoc.Builder ( Blocks, Inlines )
import           Text.Pandoc.Definition

import           Control.Monad ( mzero, void )
import           Data.Char ( toLower )
import           Data.List ( intersperse )
import qualified Data.Map as M
import           Data.Monoid ( (<>) )
import           Network.HTTP ( urlEncode )

-- | Returns the current meta, respecting export options.
metaExport :: OrgParser (F Meta)
metaExport = do
  st <- getState
  let settings = orgStateExportSettings st
  return $ (if exportWithAuthor  settings then id else removeMeta "author")
         . (if exportWithCreator settings then id else removeMeta "creator")
         . (if exportWithEmail   settings then id else removeMeta "email")
        <$> orgStateMeta st

removeMeta :: String -> Meta -> Meta
removeMeta key meta' =
  let metaMap = unMeta meta'
  in Meta $ M.delete key metaMap

-- | Parse and handle a single line containing meta information
-- The order, in which blocks are tried, makes sure that we're not looking at
-- the beginning of a block, so we don't need to check for it
metaLine :: OrgParser Blocks
metaLine = mempty <$ metaLineStart <* (optionLine <|> declarationLine)

declarationLine :: OrgParser ()
declarationLine = try $ do
  key   <- map toLower <$> metaKey
  (key', value) <- metaValue key
  updateState $ \st ->
    let meta' = B.setMeta key' <$> value <*> pure nullMeta
    in st { orgStateMeta = meta' <> orgStateMeta st }

metaKey :: OrgParser String
metaKey = map toLower <$> many1 (noneOf ": \n\r")
                      <*  char ':'
                      <*  skipSpaces

metaValue :: String -> OrgParser (String, (F MetaValue))
metaValue key =
  let inclKey = "header-includes"
  in case key of
    "author"          -> (key,) <$> metaInlinesCommaSeparated
    "title"           -> (key,) <$> metaInlines
    "date"            -> (key,) <$> metaInlines
    "header-includes" -> (key,) <$> accumulatingList key metaInlines
    "latex_header"    -> (inclKey,) <$>
                         accumulatingList inclKey (metaExportSnippet "latex")
    "latex_class"     -> ("documentclass",) <$> metaString
    -- Org-mode expects class options to contain the surrounding brackets,
    -- pandoc does not.
    "latex_class_options" -> ("classoption",) <$>
                             metaModifiedString (filter (`notElem` "[]"))
    "html_head"       -> (inclKey,) <$>
                         accumulatingList inclKey (metaExportSnippet "html")
    _                 -> (key,) <$> metaString

metaInlines :: OrgParser (F MetaValue)
metaInlines = fmap (MetaInlines . B.toList) <$> inlinesTillNewline

metaInlinesCommaSeparated :: OrgParser (F MetaValue)
metaInlinesCommaSeparated = do
  authStrs <- (many1 (noneOf ",\n")) `sepBy1` (char ',')
  newline
  authors <- mapM (parseFromString inlinesTillNewline . (++ "\n")) authStrs
  let toMetaInlines = MetaInlines . B.toList
  return $ MetaList . map toMetaInlines <$> sequence authors

metaString :: OrgParser (F MetaValue)
metaString = metaModifiedString id

metaModifiedString :: (String -> String) -> OrgParser (F MetaValue)
metaModifiedString f = return . MetaString . f <$> anyLine

-- | Read an format specific meta definition
metaExportSnippet :: String -> OrgParser (F MetaValue)
metaExportSnippet format =
  return . MetaInlines . B.toList . B.rawInline format <$> anyLine

-- | Accumulate the result of the @parser@ in a list under @key@.
accumulatingList :: String
                 -> OrgParser (F MetaValue)
                 -> OrgParser (F MetaValue)
accumulatingList key p = do
  value <- p
  meta' <- orgStateMeta <$> getState
  return $ (\m v -> MetaList (curList m ++ [v])) <$> meta' <*> value
 where curList m = case lookupMeta key m of
                     Just (MetaList ms) -> ms
                     Just x             -> [x]
                     _                  -> []

--
-- export options
--
optionLine :: OrgParser ()
optionLine = try $ do
  key <- metaKey
  case key of
    "link"     -> parseLinkFormat >>= uncurry addLinkFormat
    "options"  -> exportSettings
    "todo"     -> todoSequence >>= updateState . registerTodoSequence
    "seq_todo" -> todoSequence >>= updateState . registerTodoSequence
    "typ_todo" -> todoSequence >>= updateState . registerTodoSequence
    _         -> mzero

addLinkFormat :: String
              -> (String -> String)
              -> OrgParser ()
addLinkFormat key formatter = updateState $ \s ->
  let fs = orgStateLinkFormatters s
  in s{ orgStateLinkFormatters = M.insert key formatter fs }

parseLinkFormat :: OrgParser ((String, String -> String))
parseLinkFormat = try $ do
  linkType <- (:) <$> letter <*> many (alphaNum <|> oneOf "-_") <* skipSpaces
  linkSubst <- parseFormat
  return (linkType, linkSubst)

-- | An ad-hoc, single-argument-only implementation of a printf-style format
-- parser.
parseFormat :: OrgParser (String -> String)
parseFormat = try $ do
  replacePlain <|> replaceUrl <|> justAppend
 where
   -- inefficient, but who cares
   replacePlain = try $ (\x -> concat . flip intersperse x)
                     <$> sequence [tillSpecifier 's', rest]
   replaceUrl   = try $ (\x -> concat . flip intersperse x . urlEncode)
                     <$> sequence [tillSpecifier 'h', rest]
   justAppend   = try $ (++) <$> rest

   rest            = manyTill anyChar         (eof <|> () <$ oneOf "\n\r")
   tillSpecifier c = manyTill (noneOf "\n\r") (try $ string ('%':c:""))

inlinesTillNewline :: OrgParser (F Inlines)
inlinesTillNewline = trimInlinesF . mconcat <$> manyTill inline newline

--
-- ToDo Sequences and Keywords
--
todoSequence :: OrgParser TodoSequence
todoSequence = try $ do
  todoKws <- todoKeywords
  doneKws <- optionMaybe $ todoDoneSep *> todoKeywords
  newline
  -- There must be at least one DONE keyword. The last TODO keyword is taken if
  -- necessary.
  case doneKws of
    Just done  -> return $ keywordsToSequence todoKws done
    Nothing    -> case reverse todoKws of
                    []     -> mzero  -- no keywords present
                    (x:xs) -> return $ keywordsToSequence (reverse xs) [x]

 where
   todoKeywords :: OrgParser [String]
   todoKeywords = try $
     let keyword = many1 nonspaceChar <* skipSpaces
         endOfKeywords = todoDoneSep <|> void newline
     in manyTill keyword (lookAhead endOfKeywords)

   todoDoneSep :: OrgParser ()
   todoDoneSep = void . try $ skipSpaces *> char '|' <* skipSpaces1

   keywordsToSequence :: [String] -> [String] -> TodoSequence
   keywordsToSequence todo done =
     let todoMarkers = map (TodoMarker Todo) todo
         doneMarkers = map (TodoMarker Done) done
     in todoMarkers ++ doneMarkers