aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Readers
diff options
context:
space:
mode:
authorJohn MacFarlane <fiddlosopher@gmail.com>2013-05-10 22:53:35 -0700
committerJohn MacFarlane <fiddlosopher@gmail.com>2013-06-24 20:29:41 -0700
commitf869f7e08dad315945d52be3fcacf6ff0c05c5c1 (patch)
tree4c426ebf5a30b51499859f9d41a890534b6a18a6 /src/Text/Pandoc/Readers
parente32a8f5981969bb6d0a11bd945188c35817e4d96 (diff)
downloadpandoc-f869f7e08dad315945d52be3fcacf6ff0c05c5c1.tar.gz
Use new flexible metadata type.
* Depend on pandoc 1.12. * Added yaml dependency. * `Text.Pandoc.XML`: Removed `stripTags`. (API change.) * `Text.Pandoc.Shared`: Added `metaToJSON`. This will be used in writers to create a JSON object for use in the templates from the pandoc metadata. * Revised readers and writers to use the new Meta type. * `Text.Pandoc.Options`: Added `Ext_yaml_title_block`. * Markdown reader: Added support for YAML metadata block. Note that it must come at the beginning of the document. * `Text.Pandoc.Parsing.ParserState`: Replace `stateTitle`, `stateAuthors`, `stateDate` with `stateMeta`. * RST reader: Improved metadata. Treat initial field list as metadata when standalone specified. Previously ALL fields "title", "author", "date" in field lists were treated as metadata, even if not at the beginning. Use `subtitle` metadata field for subtitle. * `Text.Pandoc.Templates`: Export `renderTemplate'` that takes a string instead of a compiled template.. * OPML template: Use 'for' loop for authors. * Org template: '#+TITLE:' is inserted before the title. Previously the writer did this.
Diffstat (limited to 'src/Text/Pandoc/Readers')
-rw-r--r--src/Text/Pandoc/Readers/HTML.hs43
-rw-r--r--src/Text/Pandoc/Readers/Haddock.hs2
-rw-r--r--src/Text/Pandoc/Readers/LaTeX.hs49
-rw-r--r--src/Text/Pandoc/Readers/Markdown.hs93
-rw-r--r--src/Text/Pandoc/Readers/Native.hs8
-rw-r--r--src/Text/Pandoc/Readers/RST.hs73
-rw-r--r--src/Text/Pandoc/Readers/Textile.hs2
7 files changed, 154 insertions, 116 deletions
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index 32ce46fba..f6657a4d1 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -39,7 +39,7 @@ module Text.Pandoc.Readers.HTML ( readHtml
import Text.HTML.TagSoup
import Text.HTML.TagSoup.Match
import Text.Pandoc.Definition
-import Text.Pandoc.Builder (text, toList)
+import qualified Text.Pandoc.Builder as B
import Text.Pandoc.Shared
import Text.Pandoc.Options
import Text.Pandoc.Parsing
@@ -47,6 +47,7 @@ import Data.Maybe ( fromMaybe, isJust )
import Data.List ( intercalate )
import Data.Char ( isDigit )
import Control.Monad ( liftM, guard, when, mzero )
+import Control.Applicative ( (<$>), (<$) )
isSpace :: Char -> Bool
isSpace ' ' = True
@@ -58,32 +59,26 @@ isSpace _ = False
readHtml :: ReaderOptions -- ^ Reader options
-> String -- ^ String to parse (assumes @'\n'@ line endings)
-> Pandoc
-readHtml opts inp = Pandoc meta blocks
- where blocks = case runParser parseBody def{ stateOptions = opts }
- "source" rest of
- Left err' -> error $ "\nError at " ++ show err'
- Right result -> result
- tags = canonicalizeTags $
+readHtml opts inp =
+ case runParser parseDoc def{ stateOptions = opts } "source" tags of
+ Left err' -> error $ "\nError at " ++ show err'
+ Right result -> result
+ where tags = canonicalizeTags $
parseTagsOptions parseOptions{ optTagPosition = True } inp
- hasHeader = any (~== TagOpen "head" []) tags
- (meta, rest) = if hasHeader
- then parseHeader tags
- else (Meta [] [] [], tags)
+ parseDoc = do
+ blocks <- (fixPlains False . concat) <$> manyTill block eof
+ meta <- stateMeta <$> getState
+ return $ Pandoc meta blocks
type TagParser = Parser [Tag String] ParserState
--- TODO - fix this - not every header has a title tag
-parseHeader :: [Tag String] -> (Meta, [Tag String])
-parseHeader tags = (Meta{docTitle = tit'', docAuthors = [], docDate = []}, rest)
- where (tit,_) = break (~== TagClose "title") $ drop 1 $
- dropWhile (\t -> not $ t ~== TagOpen "title" []) tags
- tit' = concatMap fromTagText $ filter isTagText tit
- tit'' = normalizeSpaces $ toList $ text tit'
- rest = drop 1 $ dropWhile (\t -> not $ t ~== TagClose "head" ||
- t ~== TagOpen "body" []) tags
+pBody :: TagParser [Block]
+pBody = pInTags "body" block
-parseBody :: TagParser [Block]
-parseBody = liftM (fixPlains False . concat) $ manyTill block eof
+pHead :: TagParser [Block]
+pHead = pInTags "head" $ pTitle <|> ([] <$ pAnyTag)
+ where pTitle = pInTags "title" inline >>= setTitle . normalizeSpaces
+ setTitle t = [] <$ (updateState $ B.setMeta "title" (B.fromList t))
block :: TagParser [Block]
block = choice
@@ -94,6 +89,8 @@ block = choice
, pList
, pHrule
, pSimpleTable
+ , pHead
+ , pBody
, pPlain
, pRawHtmlBlock
]
@@ -366,7 +363,7 @@ pImage = do
let url = fromAttrib "src" tag
let title = fromAttrib "title" tag
let alt = fromAttrib "alt" tag
- return [Image (toList $ text alt) (escapeURI url, title)]
+ return [Image (B.toList $ B.text alt) (escapeURI url, title)]
pCode :: TagParser [Inline]
pCode = try $ do
diff --git a/src/Text/Pandoc/Readers/Haddock.hs b/src/Text/Pandoc/Readers/Haddock.hs
index 081ec7b5e..0e74406ef 100644
--- a/src/Text/Pandoc/Readers/Haddock.hs
+++ b/src/Text/Pandoc/Readers/Haddock.hs
@@ -21,7 +21,7 @@ import Text.Pandoc.Readers.Haddock.Parse
readHaddock :: ReaderOptions -- ^ Reader options
-> String -- ^ String to parse
-> Pandoc
-readHaddock _ s = Pandoc (Meta [] [] []) blocks
+readHaddock _ s = Pandoc nullMeta blocks
where
blocks = case parseParas (tokenise s (0,0)) of
Left [] -> error "parse failure"
diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index f3f76ce5c..0d4afb2a7 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -65,13 +65,11 @@ parseLaTeX = do
bs <- blocks
eof
st <- getState
- let title' = stateTitle st
- let authors' = stateAuthors st
- let date' = stateDate st
+ let meta = stateMeta st
refs <- getOption readerReferences
mbsty <- getOption readerCitationStyle
- return $ processBiblio mbsty refs
- $ Pandoc (Meta title' authors' date') $ toList bs
+ let (Pandoc _ bs') = processBiblio mbsty refs $ doc bs
+ return $ Pandoc meta bs'
type LP = Parser [Char] ParserState
@@ -249,13 +247,13 @@ ignoreBlocks name = (name, doraw <|> (mempty <$ optargs))
blockCommands :: M.Map String (LP Blocks)
blockCommands = M.fromList $
[ ("par", mempty <$ skipopts)
- , ("title", mempty <$ (skipopts *> tok >>= addTitle))
- , ("subtitle", mempty <$ (skipopts *> tok >>= addSubtitle))
+ , ("title", mempty <$ (skipopts *> tok >>= addMeta "title"))
+ , ("subtitle", mempty <$ (skipopts *> tok >>= addMeta "subtitle"))
, ("author", mempty <$ (skipopts *> authors))
-- -- in letter class, temp. store address & sig as title, author
- , ("address", mempty <$ (skipopts *> tok >>= addTitle))
+ , ("address", mempty <$ (skipopts *> tok >>= addMeta "address"))
, ("signature", mempty <$ (skipopts *> authors))
- , ("date", mempty <$ (skipopts *> tok >>= addDate))
+ , ("date", mempty <$ (skipopts *> tok >>= addMeta "date"))
-- sectioning
, ("chapter", updateState (\s -> s{ stateHasChapters = True })
*> section nullAttr 0)
@@ -301,12 +299,8 @@ blockCommands = M.fromList $
, "hspace", "vspace"
]
-addTitle :: Inlines -> LP ()
-addTitle tit = updateState (\s -> s{ stateTitle = toList tit })
-
-addSubtitle :: Inlines -> LP ()
-addSubtitle tit = updateState (\s -> s{ stateTitle = stateTitle s ++
- toList (str ":" <> linebreak <> tit) })
+addMeta :: ToMetaValue a => String -> a -> LP ()
+addMeta field val = updateState $ setMeta field val
authors :: LP ()
authors = try $ do
@@ -317,10 +311,7 @@ authors = try $ do
-- skip e.g. \vspace{10pt}
auths <- sepBy oneAuthor (controlSeq "and")
char '}'
- updateState (\s -> s { stateAuthors = map (normalizeSpaces . toList) auths })
-
-addDate :: Inlines -> LP ()
-addDate dat = updateState (\s -> s{ stateDate = toList dat })
+ addMeta "authors" (map trimInlines auths)
section :: Attr -> Int -> LP Blocks
section attr lvl = do
@@ -872,20 +863,24 @@ letter_contents = do
bs <- blocks
st <- getState
-- add signature (author) and address (title)
- let addr = case stateTitle st of
- [] -> mempty
- x -> para $ trimInlines $ fromList x
- updateState $ \s -> s{ stateAuthors = [], stateTitle = [] }
+ let addr = case lookupMeta "address" (stateMeta st) of
+ Just (MetaBlocks [Plain xs]) ->
+ para $ trimInlines $ fromList xs
+ _ -> mempty
return $ addr <> bs -- sig added by \closing
closing :: LP Blocks
closing = do
contents <- tok
st <- getState
- let sigs = case stateAuthors st of
- [] -> mempty
- xs -> para $ trimInlines $ fromList
- $ intercalate [LineBreak] xs
+ let extractInlines (MetaBlocks [Plain ys]) = ys
+ extractInlines (MetaBlocks [Para ys ]) = ys
+ extractInlines _ = []
+ let sigs = case lookupMeta "author" (stateMeta st) of
+ Just (MetaList xs) ->
+ para $ trimInlines $ fromList $
+ intercalate [LineBreak] $ map extractInlines xs
+ _ -> mempty
return $ para (trimInlines contents) <> sigs
item :: LP Blocks
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 35c236041..8c836614f 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -37,7 +37,13 @@ import Data.Ord ( comparing )
import Data.Char ( isAlphaNum, toLower )
import Data.Maybe
import Text.Pandoc.Definition
+import qualified Data.Text as T
+import Data.Text (Text)
+import qualified Data.Yaml as Yaml
+import qualified Data.HashMap.Strict as H
import qualified Text.Pandoc.Builder as B
+import qualified Text.Pandoc.UTF8 as UTF8
+import qualified Data.Vector as V
import Text.Pandoc.Builder (Inlines, Blocks, trimInlines, (<>))
import Text.Pandoc.Options
import Text.Pandoc.Shared
@@ -196,12 +202,13 @@ dateLine = try $ do
skipSpaces
trimInlinesF . mconcat <$> manyTill inline newline
-titleBlock :: MarkdownParser (F Inlines, F [Inlines], F Inlines)
+titleBlock :: MarkdownParser (F (Pandoc -> Pandoc))
titleBlock = pandocTitleBlock
+ <|> yamlTitleBlock
<|> mmdTitleBlock
- <|> return (mempty, return [], mempty)
+ <|> return (return id)
-pandocTitleBlock :: MarkdownParser (F Inlines, F [Inlines], F Inlines)
+pandocTitleBlock :: MarkdownParser (F (Pandoc -> Pandoc))
pandocTitleBlock = try $ do
guardEnabled Ext_pandoc_title_block
lookAhead (char '%')
@@ -209,25 +216,78 @@ pandocTitleBlock = try $ do
author <- option (return []) authorsLine
date <- option mempty dateLine
optional blanklines
- return (title, author, date)
-
-mmdTitleBlock :: MarkdownParser (F Inlines, F [Inlines], F Inlines)
+ return $ do
+ title' <- title
+ author' <- author
+ date' <- date
+ return $ B.setMeta "title" title'
+ . B.setMeta "author" author'
+ . B.setMeta "date" date'
+
+yamlTitleBlock :: MarkdownParser (F (Pandoc -> Pandoc))
+yamlTitleBlock = try $ do
+ guardEnabled Ext_yaml_title_block
+ string "---"
+ blankline
+ rawYaml <- unlines <$> manyTill anyLine stopLine
+ optional blanklines
+ opts <- stateOptions <$> getState
+ return $ return $
+ case Yaml.decode $ UTF8.fromString rawYaml of
+ Just (Yaml.Object hashmap) ->
+ H.foldrWithKey (\k v f ->
+ if ignorable k
+ then f
+ else B.setMeta (T.unpack k) (yamlToMeta opts v) . f)
+ id hashmap
+ _ -> fail "Could not parse yaml object"
+
+-- ignore fields starting with _
+ignorable :: Text -> Bool
+ignorable t = (T.pack "_") `T.isPrefixOf` t
+
+toMetaValue :: ReaderOptions -> Text -> MetaValue
+toMetaValue opts x =
+ case readMarkdown opts (T.unpack x) of
+ Pandoc _ [Plain xs] -> MetaInlines xs
+ Pandoc _ [Para xs]
+ | endsWithNewline x -> MetaBlocks [Para xs]
+ | otherwise -> MetaInlines xs
+ Pandoc _ bs -> MetaBlocks bs
+ where endsWithNewline t = (T.pack "\n") `T.isSuffixOf` t
+
+yamlToMeta :: ReaderOptions -> Yaml.Value -> MetaValue
+yamlToMeta opts (Yaml.String t) = toMetaValue opts t
+yamlToMeta _ (Yaml.Number n) = MetaString $ show n
+yamlToMeta _ (Yaml.Bool b) = MetaString $ map toLower $ show b
+yamlToMeta opts (Yaml.Array xs) = B.toMetaValue $ map (yamlToMeta opts)
+ $ V.toList xs
+yamlToMeta opts (Yaml.Object o) = MetaMap $ H.foldrWithKey (\k v m ->
+ if ignorable k
+ then m
+ else M.insert (T.unpack k)
+ (yamlToMeta opts v) m)
+ M.empty o
+yamlToMeta _ _ = MetaString ""
+
+stopLine :: MarkdownParser ()
+stopLine = try $ (string "---" <|> string "...") >> blankline >> return ()
+
+mmdTitleBlock :: MarkdownParser (F (Pandoc -> Pandoc))
mmdTitleBlock = try $ do
guardEnabled Ext_mmd_title_block
kvPairs <- many1 kvPair
blanklines
- let title = maybe mempty return $ lookup "title" kvPairs
- let author = maybe mempty (\x -> return [x]) $ lookup "author" kvPairs
- let date = maybe mempty return $ lookup "date" kvPairs
- return (title, author, date)
+ return $ return $ \(Pandoc m bs) ->
+ Pandoc (foldl (\m' (k,v) -> addMetaField k v m') m kvPairs) bs
-kvPair :: MarkdownParser (String, Inlines)
+kvPair :: MarkdownParser (String, MetaValue)
kvPair = try $ do
key <- many1Till (alphaNum <|> oneOf "_- ") (char ':')
val <- manyTill anyChar
(try $ newline >> lookAhead (blankline <|> nonspaceChar))
let key' = concat $ words $ map toLower key
- let val' = trimInlines $ B.text val
+ let val' = MetaBlocks $ B.toList $ B.plain $ B.text $ trim val
return (key',val')
parseMarkdown :: MarkdownParser Pandoc
@@ -236,16 +296,15 @@ parseMarkdown = do
updateState $ \state -> state { stateOptions =
let oldOpts = stateOptions state in
oldOpts{ readerParseRaw = True } }
- (title, authors, date) <- option (mempty,return [],mempty) titleBlock
+ titleTrans <- option (return id) titleBlock
blocks <- parseBlocks
st <- getState
mbsty <- getOption readerCitationStyle
refs <- getOption readerReferences
return $ processBiblio mbsty refs
- $ B.setTitle (runF title st)
- $ B.setAuthors (runF authors st)
- $ B.setDate (runF date st)
- $ B.doc $ runF blocks st
+ $ runF titleTrans st
+ $ B.doc
+ $ runF blocks st
addWarning :: Maybe SourcePos -> String -> MarkdownParser ()
addWarning mbpos msg =
diff --git a/src/Text/Pandoc/Readers/Native.hs b/src/Text/Pandoc/Readers/Native.hs
index a0e5a0635..c5d4cb98a 100644
--- a/src/Text/Pandoc/Readers/Native.hs
+++ b/src/Text/Pandoc/Readers/Native.hs
@@ -33,12 +33,6 @@ module Text.Pandoc.Readers.Native ( readNative ) where
import Text.Pandoc.Definition
import Text.Pandoc.Shared (safeRead)
-nullMeta :: Meta
-nullMeta = Meta{ docTitle = []
- , docAuthors = []
- , docDate = []
- }
-
-- | Read native formatted text and return a Pandoc document.
-- The input may be a full pandoc document, a block list, a block,
-- an inline list, or an inline. Thus, for example,
@@ -47,7 +41,7 @@ nullMeta = Meta{ docTitle = []
--
-- will be treated as if it were
--
--- > Pandoc (Meta [] [] []) [Plain [Str "hi"]]
+-- > Pandoc nullMeta [Plain [Str "hi"]]
--
readNative :: String -- ^ String to parse (assuming @'\n'@ line endings)
-> Pandoc
diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs
index 4e0c0a277..0829996a7 100644
--- a/src/Text/Pandoc/Readers/RST.hs
+++ b/src/Text/Pandoc/Readers/RST.hs
@@ -31,6 +31,7 @@ module Text.Pandoc.Readers.RST (
readRST
) where
import Text.Pandoc.Definition
+import Text.Pandoc.Builder (setMeta, fromList)
import Text.Pandoc.Shared
import Text.Pandoc.Parsing
import Text.Pandoc.Options
@@ -39,7 +40,6 @@ import Data.List ( findIndex, intersperse, intercalate,
transpose, sort, deleteFirstsBy, isSuffixOf )
import qualified Data.Map as M
import Text.Printf ( printf )
-import Data.Maybe ( catMaybes )
import Control.Applicative ((<$>), (<$), (<*), (*>))
import Text.Pandoc.Builder (Inlines, Blocks, trimInlines, (<>))
import qualified Text.Pandoc.Builder as B
@@ -87,16 +87,30 @@ promoteHeaders _ [] = []
-- | If list of blocks starts with a header (or a header and subheader)
-- of level that are not found elsewhere, return it as a title and
--- promote all the other headers.
-titleTransform :: [Block] -- ^ list of blocks
- -> ([Block], [Inline]) -- ^ modified list of blocks, title
-titleTransform ((Header 1 _ head1):(Header 2 _ head2):rest) |
- not (any (isHeader 1) rest || any (isHeader 2) rest) = -- both title & subtitle
- (promoteHeaders 2 rest, head1 ++ [Str ":", Space] ++ head2)
-titleTransform ((Header 1 _ head1):rest) |
- not (any (isHeader 1) rest) = -- title, no subtitle
- (promoteHeaders 1 rest, head1)
-titleTransform blocks = (blocks, [])
+-- promote all the other headers. Also process a definition list right
+-- after the title block as metadata.
+titleTransform :: ([Block], Meta) -- ^ list of blocks, metadata
+ -> ([Block], Meta) -- ^ modified list of blocks, metadata
+titleTransform (bs, meta) =
+ let (bs', meta') =
+ case bs of
+ ((Header 1 _ head1):(Header 2 _ head2):rest)
+ | not (any (isHeader 1) rest || any (isHeader 2) rest) -> -- tit/sub
+ (promoteHeaders 2 rest, setMeta "title" (fromList head1) $
+ setMeta "subtitle" (fromList head2) meta)
+ ((Header 1 _ head1):rest)
+ | not (any (isHeader 1) rest) -> -- title only
+ (promoteHeaders 1 rest,
+ setMeta "title" (fromList head1) meta)
+ _ -> (bs, meta)
+ in case bs' of
+ (DefinitionList ds : rest) ->
+ (rest, metaFromDefList ds meta')
+ _ -> (bs', meta')
+
+metaFromDefList :: [([Inline], [[Block]])] -> Meta -> Meta
+metaFromDefList ds meta = foldr f meta ds
+ where f (k,v) = setMeta (map toLower $ stringify k) (mconcat $ map fromList v)
parseRST :: RSTParser Pandoc
parseRST = do
@@ -114,14 +128,12 @@ parseRST = do
-- now parse it for real...
blocks <- B.toList <$> parseBlocks
standalone <- getOption readerStandalone
- let (blocks', title) = if standalone
- then titleTransform blocks
- else (blocks, [])
state <- getState
- let authors = stateAuthors state
- let date = stateDate state
- let title' = if null title then stateTitle state else title
- return $ Pandoc (Meta title' authors date) blocks'
+ let meta = stateMeta state
+ let (blocks', meta') = if standalone
+ then titleTransform (blocks, meta)
+ else (blocks, meta)
+ return $ Pandoc meta' blocks'
--
-- parsing blocks
@@ -163,38 +175,19 @@ rawFieldListItem indent = try $ do
return (name, raw)
fieldListItem :: String
- -> RSTParser (Maybe (Inlines, [Blocks]))
+ -> RSTParser (Inlines, [Blocks])
fieldListItem indent = try $ do
(name, raw) <- rawFieldListItem indent
let term = B.str name
contents <- parseFromString parseBlocks raw
optional blanklines
- case (name, B.toList contents) of
- ("Author", x) -> do
- updateState $ \st ->
- st{ stateAuthors = stateAuthors st ++ [extractContents x] }
- return Nothing
- ("Authors", [BulletList auths]) -> do
- updateState $ \st -> st{ stateAuthors = map extractContents auths }
- return Nothing
- ("Date", x) -> do
- updateState $ \st -> st{ stateDate = extractContents x }
- return Nothing
- ("Title", x) -> do
- updateState $ \st -> st{ stateTitle = extractContents x }
- return Nothing
- _ -> return $ Just (term, [contents])
-
-extractContents :: [Block] -> [Inline]
-extractContents [Plain auth] = auth
-extractContents [Para auth] = auth
-extractContents _ = []
+ return (term, [contents])
fieldList :: RSTParser Blocks
fieldList = try $ do
indent <- lookAhead $ many spaceChar
items <- many1 $ fieldListItem indent
- case catMaybes items of
+ case items of
[] -> return mempty
items' -> return $ B.definitionList items'
diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs
index 22ec52362..a1687a691 100644
--- a/src/Text/Pandoc/Readers/Textile.hs
+++ b/src/Text/Pandoc/Readers/Textile.hs
@@ -93,7 +93,7 @@ parseTextile = do
updateState $ \s -> s { stateNotes = reverse reversedNotes }
-- now parse it for real...
blocks <- parseBlocks
- return $ Pandoc (Meta [] [] []) blocks -- FIXME
+ return $ Pandoc nullMeta blocks -- FIXME
noteMarker :: Parser [Char] ParserState [Char]
noteMarker = skipMany spaceChar >> string "fn" >> manyTill digit (char '.')