aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Readers
diff options
context:
space:
mode:
Diffstat (limited to 'src/Text/Pandoc/Readers')
-rw-r--r--src/Text/Pandoc/Readers/HTML.hs43
-rw-r--r--src/Text/Pandoc/Readers/Haddock.hs2
-rw-r--r--src/Text/Pandoc/Readers/LaTeX.hs49
-rw-r--r--src/Text/Pandoc/Readers/Markdown.hs93
-rw-r--r--src/Text/Pandoc/Readers/Native.hs8
-rw-r--r--src/Text/Pandoc/Readers/RST.hs73
-rw-r--r--src/Text/Pandoc/Readers/Textile.hs2
7 files changed, 154 insertions, 116 deletions
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index 32ce46fba..f6657a4d1 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -39,7 +39,7 @@ module Text.Pandoc.Readers.HTML ( readHtml
import Text.HTML.TagSoup
import Text.HTML.TagSoup.Match
import Text.Pandoc.Definition
-import Text.Pandoc.Builder (text, toList)
+import qualified Text.Pandoc.Builder as B
import Text.Pandoc.Shared
import Text.Pandoc.Options
import Text.Pandoc.Parsing
@@ -47,6 +47,7 @@ import Data.Maybe ( fromMaybe, isJust )
import Data.List ( intercalate )
import Data.Char ( isDigit )
import Control.Monad ( liftM, guard, when, mzero )
+import Control.Applicative ( (<$>), (<$) )
isSpace :: Char -> Bool
isSpace ' ' = True
@@ -58,32 +59,26 @@ isSpace _ = False
readHtml :: ReaderOptions -- ^ Reader options
-> String -- ^ String to parse (assumes @'\n'@ line endings)
-> Pandoc
-readHtml opts inp = Pandoc meta blocks
- where blocks = case runParser parseBody def{ stateOptions = opts }
- "source" rest of
- Left err' -> error $ "\nError at " ++ show err'
- Right result -> result
- tags = canonicalizeTags $
+readHtml opts inp =
+ case runParser parseDoc def{ stateOptions = opts } "source" tags of
+ Left err' -> error $ "\nError at " ++ show err'
+ Right result -> result
+ where tags = canonicalizeTags $
parseTagsOptions parseOptions{ optTagPosition = True } inp
- hasHeader = any (~== TagOpen "head" []) tags
- (meta, rest) = if hasHeader
- then parseHeader tags
- else (Meta [] [] [], tags)
+ parseDoc = do
+ blocks <- (fixPlains False . concat) <$> manyTill block eof
+ meta <- stateMeta <$> getState
+ return $ Pandoc meta blocks
type TagParser = Parser [Tag String] ParserState
--- TODO - fix this - not every header has a title tag
-parseHeader :: [Tag String] -> (Meta, [Tag String])
-parseHeader tags = (Meta{docTitle = tit'', docAuthors = [], docDate = []}, rest)
- where (tit,_) = break (~== TagClose "title") $ drop 1 $
- dropWhile (\t -> not $ t ~== TagOpen "title" []) tags
- tit' = concatMap fromTagText $ filter isTagText tit
- tit'' = normalizeSpaces $ toList $ text tit'
- rest = drop 1 $ dropWhile (\t -> not $ t ~== TagClose "head" ||
- t ~== TagOpen "body" []) tags
+pBody :: TagParser [Block]
+pBody = pInTags "body" block
-parseBody :: TagParser [Block]
-parseBody = liftM (fixPlains False . concat) $ manyTill block eof
+pHead :: TagParser [Block]
+pHead = pInTags "head" $ pTitle <|> ([] <$ pAnyTag)
+ where pTitle = pInTags "title" inline >>= setTitle . normalizeSpaces
+ setTitle t = [] <$ (updateState $ B.setMeta "title" (B.fromList t))
block :: TagParser [Block]
block = choice
@@ -94,6 +89,8 @@ block = choice
, pList
, pHrule
, pSimpleTable
+ , pHead
+ , pBody
, pPlain
, pRawHtmlBlock
]
@@ -366,7 +363,7 @@ pImage = do
let url = fromAttrib "src" tag
let title = fromAttrib "title" tag
let alt = fromAttrib "alt" tag
- return [Image (toList $ text alt) (escapeURI url, title)]
+ return [Image (B.toList $ B.text alt) (escapeURI url, title)]
pCode :: TagParser [Inline]
pCode = try $ do
diff --git a/src/Text/Pandoc/Readers/Haddock.hs b/src/Text/Pandoc/Readers/Haddock.hs
index 081ec7b5e..0e74406ef 100644
--- a/src/Text/Pandoc/Readers/Haddock.hs
+++ b/src/Text/Pandoc/Readers/Haddock.hs
@@ -21,7 +21,7 @@ import Text.Pandoc.Readers.Haddock.Parse
readHaddock :: ReaderOptions -- ^ Reader options
-> String -- ^ String to parse
-> Pandoc
-readHaddock _ s = Pandoc (Meta [] [] []) blocks
+readHaddock _ s = Pandoc nullMeta blocks
where
blocks = case parseParas (tokenise s (0,0)) of
Left [] -> error "parse failure"
diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index f3f76ce5c..0d4afb2a7 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -65,13 +65,11 @@ parseLaTeX = do
bs <- blocks
eof
st <- getState
- let title' = stateTitle st
- let authors' = stateAuthors st
- let date' = stateDate st
+ let meta = stateMeta st
refs <- getOption readerReferences
mbsty <- getOption readerCitationStyle
- return $ processBiblio mbsty refs
- $ Pandoc (Meta title' authors' date') $ toList bs
+ let (Pandoc _ bs') = processBiblio mbsty refs $ doc bs
+ return $ Pandoc meta bs'
type LP = Parser [Char] ParserState
@@ -249,13 +247,13 @@ ignoreBlocks name = (name, doraw <|> (mempty <$ optargs))
blockCommands :: M.Map String (LP Blocks)
blockCommands = M.fromList $
[ ("par", mempty <$ skipopts)
- , ("title", mempty <$ (skipopts *> tok >>= addTitle))
- , ("subtitle", mempty <$ (skipopts *> tok >>= addSubtitle))
+ , ("title", mempty <$ (skipopts *> tok >>= addMeta "title"))
+ , ("subtitle", mempty <$ (skipopts *> tok >>= addMeta "subtitle"))
, ("author", mempty <$ (skipopts *> authors))
-- -- in letter class, temp. store address & sig as title, author
- , ("address", mempty <$ (skipopts *> tok >>= addTitle))
+ , ("address", mempty <$ (skipopts *> tok >>= addMeta "address"))
, ("signature", mempty <$ (skipopts *> authors))
- , ("date", mempty <$ (skipopts *> tok >>= addDate))
+ , ("date", mempty <$ (skipopts *> tok >>= addMeta "date"))
-- sectioning
, ("chapter", updateState (\s -> s{ stateHasChapters = True })
*> section nullAttr 0)
@@ -301,12 +299,8 @@ blockCommands = M.fromList $
, "hspace", "vspace"
]
-addTitle :: Inlines -> LP ()
-addTitle tit = updateState (\s -> s{ stateTitle = toList tit })
-
-addSubtitle :: Inlines -> LP ()
-addSubtitle tit = updateState (\s -> s{ stateTitle = stateTitle s ++
- toList (str ":" <> linebreak <> tit) })
+addMeta :: ToMetaValue a => String -> a -> LP ()
+addMeta field val = updateState $ setMeta field val
authors :: LP ()
authors = try $ do
@@ -317,10 +311,7 @@ authors = try $ do
-- skip e.g. \vspace{10pt}
auths <- sepBy oneAuthor (controlSeq "and")
char '}'
- updateState (\s -> s { stateAuthors = map (normalizeSpaces . toList) auths })
-
-addDate :: Inlines -> LP ()
-addDate dat = updateState (\s -> s{ stateDate = toList dat })
+ addMeta "authors" (map trimInlines auths)
section :: Attr -> Int -> LP Blocks
section attr lvl = do
@@ -872,20 +863,24 @@ letter_contents = do
bs <- blocks
st <- getState
-- add signature (author) and address (title)
- let addr = case stateTitle st of
- [] -> mempty
- x -> para $ trimInlines $ fromList x
- updateState $ \s -> s{ stateAuthors = [], stateTitle = [] }
+ let addr = case lookupMeta "address" (stateMeta st) of
+ Just (MetaBlocks [Plain xs]) ->
+ para $ trimInlines $ fromList xs
+ _ -> mempty
return $ addr <> bs -- sig added by \closing
closing :: LP Blocks
closing = do
contents <- tok
st <- getState
- let sigs = case stateAuthors st of
- [] -> mempty
- xs -> para $ trimInlines $ fromList
- $ intercalate [LineBreak] xs
+ let extractInlines (MetaBlocks [Plain ys]) = ys
+ extractInlines (MetaBlocks [Para ys ]) = ys
+ extractInlines _ = []
+ let sigs = case lookupMeta "author" (stateMeta st) of
+ Just (MetaList xs) ->
+ para $ trimInlines $ fromList $
+ intercalate [LineBreak] $ map extractInlines xs
+ _ -> mempty
return $ para (trimInlines contents) <> sigs
item :: LP Blocks
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 35c236041..8c836614f 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -37,7 +37,13 @@ import Data.Ord ( comparing )
import Data.Char ( isAlphaNum, toLower )
import Data.Maybe
import Text.Pandoc.Definition
+import qualified Data.Text as T
+import Data.Text (Text)
+import qualified Data.Yaml as Yaml
+import qualified Data.HashMap.Strict as H
import qualified Text.Pandoc.Builder as B
+import qualified Text.Pandoc.UTF8 as UTF8
+import qualified Data.Vector as V
import Text.Pandoc.Builder (Inlines, Blocks, trimInlines, (<>))
import Text.Pandoc.Options
import Text.Pandoc.Shared
@@ -196,12 +202,13 @@ dateLine = try $ do
skipSpaces
trimInlinesF . mconcat <$> manyTill inline newline
-titleBlock :: MarkdownParser (F Inlines, F [Inlines], F Inlines)
+titleBlock :: MarkdownParser (F (Pandoc -> Pandoc))
titleBlock = pandocTitleBlock
+ <|> yamlTitleBlock
<|> mmdTitleBlock
- <|> return (mempty, return [], mempty)
+ <|> return (return id)
-pandocTitleBlock :: MarkdownParser (F Inlines, F [Inlines], F Inlines)
+pandocTitleBlock :: MarkdownParser (F (Pandoc -> Pandoc))
pandocTitleBlock = try $ do
guardEnabled Ext_pandoc_title_block
lookAhead (char '%')
@@ -209,25 +216,78 @@ pandocTitleBlock = try $ do
author <- option (return []) authorsLine
date <- option mempty dateLine
optional blanklines
- return (title, author, date)
-
-mmdTitleBlock :: MarkdownParser (F Inlines, F [Inlines], F Inlines)
+ return $ do
+ title' <- title
+ author' <- author
+ date' <- date
+ return $ B.setMeta "title" title'
+ . B.setMeta "author" author'
+ . B.setMeta "date" date'
+
+yamlTitleBlock :: MarkdownParser (F (Pandoc -> Pandoc))
+yamlTitleBlock = try $ do
+ guardEnabled Ext_yaml_title_block
+ string "---"
+ blankline
+ rawYaml <- unlines <$> manyTill anyLine stopLine
+ optional blanklines
+ opts <- stateOptions <$> getState
+ return $ return $
+ case Yaml.decode $ UTF8.fromString rawYaml of
+ Just (Yaml.Object hashmap) ->
+ H.foldrWithKey (\k v f ->
+ if ignorable k
+ then f
+ else B.setMeta (T.unpack k) (yamlToMeta opts v) . f)
+ id hashmap
+ _ -> fail "Could not parse yaml object"
+
+-- ignore fields starting with _
+ignorable :: Text -> Bool
+ignorable t = (T.pack "_") `T.isPrefixOf` t
+
+toMetaValue :: ReaderOptions -> Text -> MetaValue
+toMetaValue opts x =
+ case readMarkdown opts (T.unpack x) of
+ Pandoc _ [Plain xs] -> MetaInlines xs
+ Pandoc _ [Para xs]
+ | endsWithNewline x -> MetaBlocks [Para xs]
+ | otherwise -> MetaInlines xs
+ Pandoc _ bs -> MetaBlocks bs
+ where endsWithNewline t = (T.pack "\n") `T.isSuffixOf` t
+
+yamlToMeta :: ReaderOptions -> Yaml.Value -> MetaValue
+yamlToMeta opts (Yaml.String t) = toMetaValue opts t
+yamlToMeta _ (Yaml.Number n) = MetaString $ show n
+yamlToMeta _ (Yaml.Bool b) = MetaString $ map toLower $ show b
+yamlToMeta opts (Yaml.Array xs) = B.toMetaValue $ map (yamlToMeta opts)
+ $ V.toList xs
+yamlToMeta opts (Yaml.Object o) = MetaMap $ H.foldrWithKey (\k v m ->
+ if ignorable k
+ then m
+ else M.insert (T.unpack k)
+ (yamlToMeta opts v) m)
+ M.empty o
+yamlToMeta _ _ = MetaString ""
+
+stopLine :: MarkdownParser ()
+stopLine = try $ (string "---" <|> string "...") >> blankline >> return ()
+
+mmdTitleBlock :: MarkdownParser (F (Pandoc -> Pandoc))
mmdTitleBlock = try $ do
guardEnabled Ext_mmd_title_block
kvPairs <- many1 kvPair
blanklines
- let title = maybe mempty return $ lookup "title" kvPairs
- let author = maybe mempty (\x -> return [x]) $ lookup "author" kvPairs
- let date = maybe mempty return $ lookup "date" kvPairs
- return (title, author, date)
+ return $ return $ \(Pandoc m bs) ->
+ Pandoc (foldl (\m' (k,v) -> addMetaField k v m') m kvPairs) bs
-kvPair :: MarkdownParser (String, Inlines)
+kvPair :: MarkdownParser (String, MetaValue)
kvPair = try $ do
key <- many1Till (alphaNum <|> oneOf "_- ") (char ':')
val <- manyTill anyChar
(try $ newline >> lookAhead (blankline <|> nonspaceChar))
let key' = concat $ words $ map toLower key
- let val' = trimInlines $ B.text val
+ let val' = MetaBlocks $ B.toList $ B.plain $ B.text $ trim val
return (key',val')
parseMarkdown :: MarkdownParser Pandoc
@@ -236,16 +296,15 @@ parseMarkdown = do
updateState $ \state -> state { stateOptions =
let oldOpts = stateOptions state in
oldOpts{ readerParseRaw = True } }
- (title, authors, date) <- option (mempty,return [],mempty) titleBlock
+ titleTrans <- option (return id) titleBlock
blocks <- parseBlocks
st <- getState
mbsty <- getOption readerCitationStyle
refs <- getOption readerReferences
return $ processBiblio mbsty refs
- $ B.setTitle (runF title st)
- $ B.setAuthors (runF authors st)
- $ B.setDate (runF date st)
- $ B.doc $ runF blocks st
+ $ runF titleTrans st
+ $ B.doc
+ $ runF blocks st
addWarning :: Maybe SourcePos -> String -> MarkdownParser ()
addWarning mbpos msg =
diff --git a/src/Text/Pandoc/Readers/Native.hs b/src/Text/Pandoc/Readers/Native.hs
index a0e5a0635..c5d4cb98a 100644
--- a/src/Text/Pandoc/Readers/Native.hs
+++ b/src/Text/Pandoc/Readers/Native.hs
@@ -33,12 +33,6 @@ module Text.Pandoc.Readers.Native ( readNative ) where
import Text.Pandoc.Definition
import Text.Pandoc.Shared (safeRead)
-nullMeta :: Meta
-nullMeta = Meta{ docTitle = []
- , docAuthors = []
- , docDate = []
- }
-
-- | Read native formatted text and return a Pandoc document.
-- The input may be a full pandoc document, a block list, a block,
-- an inline list, or an inline. Thus, for example,
@@ -47,7 +41,7 @@ nullMeta = Meta{ docTitle = []
--
-- will be treated as if it were
--
--- > Pandoc (Meta [] [] []) [Plain [Str "hi"]]
+-- > Pandoc nullMeta [Plain [Str "hi"]]
--
readNative :: String -- ^ String to parse (assuming @'\n'@ line endings)
-> Pandoc
diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs
index 4e0c0a277..0829996a7 100644
--- a/src/Text/Pandoc/Readers/RST.hs
+++ b/src/Text/Pandoc/Readers/RST.hs
@@ -31,6 +31,7 @@ module Text.Pandoc.Readers.RST (
readRST
) where
import Text.Pandoc.Definition
+import Text.Pandoc.Builder (setMeta, fromList)
import Text.Pandoc.Shared
import Text.Pandoc.Parsing
import Text.Pandoc.Options
@@ -39,7 +40,6 @@ import Data.List ( findIndex, intersperse, intercalate,
transpose, sort, deleteFirstsBy, isSuffixOf )
import qualified Data.Map as M
import Text.Printf ( printf )
-import Data.Maybe ( catMaybes )
import Control.Applicative ((<$>), (<$), (<*), (*>))
import Text.Pandoc.Builder (Inlines, Blocks, trimInlines, (<>))
import qualified Text.Pandoc.Builder as B
@@ -87,16 +87,30 @@ promoteHeaders _ [] = []
-- | If list of blocks starts with a header (or a header and subheader)
-- of level that are not found elsewhere, return it as a title and
--- promote all the other headers.
-titleTransform :: [Block] -- ^ list of blocks
- -> ([Block], [Inline]) -- ^ modified list of blocks, title
-titleTransform ((Header 1 _ head1):(Header 2 _ head2):rest) |
- not (any (isHeader 1) rest || any (isHeader 2) rest) = -- both title & subtitle
- (promoteHeaders 2 rest, head1 ++ [Str ":", Space] ++ head2)
-titleTransform ((Header 1 _ head1):rest) |
- not (any (isHeader 1) rest) = -- title, no subtitle
- (promoteHeaders 1 rest, head1)
-titleTransform blocks = (blocks, [])
+-- promote all the other headers. Also process a definition list right
+-- after the title block as metadata.
+titleTransform :: ([Block], Meta) -- ^ list of blocks, metadata
+ -> ([Block], Meta) -- ^ modified list of blocks, metadata
+titleTransform (bs, meta) =
+ let (bs', meta') =
+ case bs of
+ ((Header 1 _ head1):(Header 2 _ head2):rest)
+ | not (any (isHeader 1) rest || any (isHeader 2) rest) -> -- tit/sub
+ (promoteHeaders 2 rest, setMeta "title" (fromList head1) $
+ setMeta "subtitle" (fromList head2) meta)
+ ((Header 1 _ head1):rest)
+ | not (any (isHeader 1) rest) -> -- title only
+ (promoteHeaders 1 rest,
+ setMeta "title" (fromList head1) meta)
+ _ -> (bs, meta)
+ in case bs' of
+ (DefinitionList ds : rest) ->
+ (rest, metaFromDefList ds meta')
+ _ -> (bs', meta')
+
+metaFromDefList :: [([Inline], [[Block]])] -> Meta -> Meta
+metaFromDefList ds meta = foldr f meta ds
+ where f (k,v) = setMeta (map toLower $ stringify k) (mconcat $ map fromList v)
parseRST :: RSTParser Pandoc
parseRST = do
@@ -114,14 +128,12 @@ parseRST = do
-- now parse it for real...
blocks <- B.toList <$> parseBlocks
standalone <- getOption readerStandalone
- let (blocks', title) = if standalone
- then titleTransform blocks
- else (blocks, [])
state <- getState
- let authors = stateAuthors state
- let date = stateDate state
- let title' = if null title then stateTitle state else title
- return $ Pandoc (Meta title' authors date) blocks'
+ let meta = stateMeta state
+ let (blocks', meta') = if standalone
+ then titleTransform (blocks, meta)
+ else (blocks, meta)
+ return $ Pandoc meta' blocks'
--
-- parsing blocks
@@ -163,38 +175,19 @@ rawFieldListItem indent = try $ do
return (name, raw)
fieldListItem :: String
- -> RSTParser (Maybe (Inlines, [Blocks]))
+ -> RSTParser (Inlines, [Blocks])
fieldListItem indent = try $ do
(name, raw) <- rawFieldListItem indent
let term = B.str name
contents <- parseFromString parseBlocks raw
optional blanklines
- case (name, B.toList contents) of
- ("Author", x) -> do
- updateState $ \st ->
- st{ stateAuthors = stateAuthors st ++ [extractContents x] }
- return Nothing
- ("Authors", [BulletList auths]) -> do
- updateState $ \st -> st{ stateAuthors = map extractContents auths }
- return Nothing
- ("Date", x) -> do
- updateState $ \st -> st{ stateDate = extractContents x }
- return Nothing
- ("Title", x) -> do
- updateState $ \st -> st{ stateTitle = extractContents x }
- return Nothing
- _ -> return $ Just (term, [contents])
-
-extractContents :: [Block] -> [Inline]
-extractContents [Plain auth] = auth
-extractContents [Para auth] = auth
-extractContents _ = []
+ return (term, [contents])
fieldList :: RSTParser Blocks
fieldList = try $ do
indent <- lookAhead $ many spaceChar
items <- many1 $ fieldListItem indent
- case catMaybes items of
+ case items of
[] -> return mempty
items' -> return $ B.definitionList items'
diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs
index 22ec52362..a1687a691 100644
--- a/src/Text/Pandoc/Readers/Textile.hs
+++ b/src/Text/Pandoc/Readers/Textile.hs
@@ -93,7 +93,7 @@ parseTextile = do
updateState $ \s -> s { stateNotes = reverse reversedNotes }
-- now parse it for real...
blocks <- parseBlocks
- return $ Pandoc (Meta [] [] []) blocks -- FIXME
+ return $ Pandoc nullMeta blocks -- FIXME
noteMarker :: Parser [Char] ParserState [Char]
noteMarker = skipMany spaceChar >> string "fn" >> manyTill digit (char '.')