diff options
author | John MacFarlane <fiddlosopher@gmail.com> | 2013-05-10 22:53:35 -0700 |
---|---|---|
committer | John MacFarlane <fiddlosopher@gmail.com> | 2013-06-24 20:29:41 -0700 |
commit | f869f7e08dad315945d52be3fcacf6ff0c05c5c1 (patch) | |
tree | 4c426ebf5a30b51499859f9d41a890534b6a18a6 /src/Text/Pandoc/Readers | |
parent | e32a8f5981969bb6d0a11bd945188c35817e4d96 (diff) | |
download | pandoc-f869f7e08dad315945d52be3fcacf6ff0c05c5c1.tar.gz |
Use new flexible metadata type.
* Depend on pandoc 1.12.
* Added yaml dependency.
* `Text.Pandoc.XML`: Removed `stripTags`. (API change.)
* `Text.Pandoc.Shared`: Added `metaToJSON`.
This will be used in writers to create a JSON object for use
in the templates from the pandoc metadata.
* Revised readers and writers to use the new Meta type.
* `Text.Pandoc.Options`: Added `Ext_yaml_title_block`.
* Markdown reader: Added support for YAML metadata block.
Note that it must come at the beginning of the document.
* `Text.Pandoc.Parsing.ParserState`: Replace `stateTitle`,
`stateAuthors`, `stateDate` with `stateMeta`.
* RST reader: Improved metadata.
Treat initial field list as metadata when standalone specified.
Previously ALL fields "title", "author", "date" in field lists
were treated as metadata, even if not at the beginning.
Use `subtitle` metadata field for subtitle.
* `Text.Pandoc.Templates`: Export `renderTemplate'` that takes a string
instead of a compiled template..
* OPML template: Use 'for' loop for authors.
* Org template: '#+TITLE:' is inserted before the title.
Previously the writer did this.
Diffstat (limited to 'src/Text/Pandoc/Readers')
-rw-r--r-- | src/Text/Pandoc/Readers/HTML.hs | 43 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Haddock.hs | 2 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/LaTeX.hs | 49 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Markdown.hs | 93 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Native.hs | 8 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/RST.hs | 73 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Textile.hs | 2 |
7 files changed, 154 insertions, 116 deletions
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index 32ce46fba..f6657a4d1 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -39,7 +39,7 @@ module Text.Pandoc.Readers.HTML ( readHtml import Text.HTML.TagSoup import Text.HTML.TagSoup.Match import Text.Pandoc.Definition -import Text.Pandoc.Builder (text, toList) +import qualified Text.Pandoc.Builder as B import Text.Pandoc.Shared import Text.Pandoc.Options import Text.Pandoc.Parsing @@ -47,6 +47,7 @@ import Data.Maybe ( fromMaybe, isJust ) import Data.List ( intercalate ) import Data.Char ( isDigit ) import Control.Monad ( liftM, guard, when, mzero ) +import Control.Applicative ( (<$>), (<$) ) isSpace :: Char -> Bool isSpace ' ' = True @@ -58,32 +59,26 @@ isSpace _ = False readHtml :: ReaderOptions -- ^ Reader options -> String -- ^ String to parse (assumes @'\n'@ line endings) -> Pandoc -readHtml opts inp = Pandoc meta blocks - where blocks = case runParser parseBody def{ stateOptions = opts } - "source" rest of - Left err' -> error $ "\nError at " ++ show err' - Right result -> result - tags = canonicalizeTags $ +readHtml opts inp = + case runParser parseDoc def{ stateOptions = opts } "source" tags of + Left err' -> error $ "\nError at " ++ show err' + Right result -> result + where tags = canonicalizeTags $ parseTagsOptions parseOptions{ optTagPosition = True } inp - hasHeader = any (~== TagOpen "head" []) tags - (meta, rest) = if hasHeader - then parseHeader tags - else (Meta [] [] [], tags) + parseDoc = do + blocks <- (fixPlains False . concat) <$> manyTill block eof + meta <- stateMeta <$> getState + return $ Pandoc meta blocks type TagParser = Parser [Tag String] ParserState --- TODO - fix this - not every header has a title tag -parseHeader :: [Tag String] -> (Meta, [Tag String]) -parseHeader tags = (Meta{docTitle = tit'', docAuthors = [], docDate = []}, rest) - where (tit,_) = break (~== TagClose "title") $ drop 1 $ - dropWhile (\t -> not $ t ~== TagOpen "title" []) tags - tit' = concatMap fromTagText $ filter isTagText tit - tit'' = normalizeSpaces $ toList $ text tit' - rest = drop 1 $ dropWhile (\t -> not $ t ~== TagClose "head" || - t ~== TagOpen "body" []) tags +pBody :: TagParser [Block] +pBody = pInTags "body" block -parseBody :: TagParser [Block] -parseBody = liftM (fixPlains False . concat) $ manyTill block eof +pHead :: TagParser [Block] +pHead = pInTags "head" $ pTitle <|> ([] <$ pAnyTag) + where pTitle = pInTags "title" inline >>= setTitle . normalizeSpaces + setTitle t = [] <$ (updateState $ B.setMeta "title" (B.fromList t)) block :: TagParser [Block] block = choice @@ -94,6 +89,8 @@ block = choice , pList , pHrule , pSimpleTable + , pHead + , pBody , pPlain , pRawHtmlBlock ] @@ -366,7 +363,7 @@ pImage = do let url = fromAttrib "src" tag let title = fromAttrib "title" tag let alt = fromAttrib "alt" tag - return [Image (toList $ text alt) (escapeURI url, title)] + return [Image (B.toList $ B.text alt) (escapeURI url, title)] pCode :: TagParser [Inline] pCode = try $ do diff --git a/src/Text/Pandoc/Readers/Haddock.hs b/src/Text/Pandoc/Readers/Haddock.hs index 081ec7b5e..0e74406ef 100644 --- a/src/Text/Pandoc/Readers/Haddock.hs +++ b/src/Text/Pandoc/Readers/Haddock.hs @@ -21,7 +21,7 @@ import Text.Pandoc.Readers.Haddock.Parse readHaddock :: ReaderOptions -- ^ Reader options -> String -- ^ String to parse -> Pandoc -readHaddock _ s = Pandoc (Meta [] [] []) blocks +readHaddock _ s = Pandoc nullMeta blocks where blocks = case parseParas (tokenise s (0,0)) of Left [] -> error "parse failure" diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index f3f76ce5c..0d4afb2a7 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -65,13 +65,11 @@ parseLaTeX = do bs <- blocks eof st <- getState - let title' = stateTitle st - let authors' = stateAuthors st - let date' = stateDate st + let meta = stateMeta st refs <- getOption readerReferences mbsty <- getOption readerCitationStyle - return $ processBiblio mbsty refs - $ Pandoc (Meta title' authors' date') $ toList bs + let (Pandoc _ bs') = processBiblio mbsty refs $ doc bs + return $ Pandoc meta bs' type LP = Parser [Char] ParserState @@ -249,13 +247,13 @@ ignoreBlocks name = (name, doraw <|> (mempty <$ optargs)) blockCommands :: M.Map String (LP Blocks) blockCommands = M.fromList $ [ ("par", mempty <$ skipopts) - , ("title", mempty <$ (skipopts *> tok >>= addTitle)) - , ("subtitle", mempty <$ (skipopts *> tok >>= addSubtitle)) + , ("title", mempty <$ (skipopts *> tok >>= addMeta "title")) + , ("subtitle", mempty <$ (skipopts *> tok >>= addMeta "subtitle")) , ("author", mempty <$ (skipopts *> authors)) -- -- in letter class, temp. store address & sig as title, author - , ("address", mempty <$ (skipopts *> tok >>= addTitle)) + , ("address", mempty <$ (skipopts *> tok >>= addMeta "address")) , ("signature", mempty <$ (skipopts *> authors)) - , ("date", mempty <$ (skipopts *> tok >>= addDate)) + , ("date", mempty <$ (skipopts *> tok >>= addMeta "date")) -- sectioning , ("chapter", updateState (\s -> s{ stateHasChapters = True }) *> section nullAttr 0) @@ -301,12 +299,8 @@ blockCommands = M.fromList $ , "hspace", "vspace" ] -addTitle :: Inlines -> LP () -addTitle tit = updateState (\s -> s{ stateTitle = toList tit }) - -addSubtitle :: Inlines -> LP () -addSubtitle tit = updateState (\s -> s{ stateTitle = stateTitle s ++ - toList (str ":" <> linebreak <> tit) }) +addMeta :: ToMetaValue a => String -> a -> LP () +addMeta field val = updateState $ setMeta field val authors :: LP () authors = try $ do @@ -317,10 +311,7 @@ authors = try $ do -- skip e.g. \vspace{10pt} auths <- sepBy oneAuthor (controlSeq "and") char '}' - updateState (\s -> s { stateAuthors = map (normalizeSpaces . toList) auths }) - -addDate :: Inlines -> LP () -addDate dat = updateState (\s -> s{ stateDate = toList dat }) + addMeta "authors" (map trimInlines auths) section :: Attr -> Int -> LP Blocks section attr lvl = do @@ -872,20 +863,24 @@ letter_contents = do bs <- blocks st <- getState -- add signature (author) and address (title) - let addr = case stateTitle st of - [] -> mempty - x -> para $ trimInlines $ fromList x - updateState $ \s -> s{ stateAuthors = [], stateTitle = [] } + let addr = case lookupMeta "address" (stateMeta st) of + Just (MetaBlocks [Plain xs]) -> + para $ trimInlines $ fromList xs + _ -> mempty return $ addr <> bs -- sig added by \closing closing :: LP Blocks closing = do contents <- tok st <- getState - let sigs = case stateAuthors st of - [] -> mempty - xs -> para $ trimInlines $ fromList - $ intercalate [LineBreak] xs + let extractInlines (MetaBlocks [Plain ys]) = ys + extractInlines (MetaBlocks [Para ys ]) = ys + extractInlines _ = [] + let sigs = case lookupMeta "author" (stateMeta st) of + Just (MetaList xs) -> + para $ trimInlines $ fromList $ + intercalate [LineBreak] $ map extractInlines xs + _ -> mempty return $ para (trimInlines contents) <> sigs item :: LP Blocks diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 35c236041..8c836614f 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -37,7 +37,13 @@ import Data.Ord ( comparing ) import Data.Char ( isAlphaNum, toLower ) import Data.Maybe import Text.Pandoc.Definition +import qualified Data.Text as T +import Data.Text (Text) +import qualified Data.Yaml as Yaml +import qualified Data.HashMap.Strict as H import qualified Text.Pandoc.Builder as B +import qualified Text.Pandoc.UTF8 as UTF8 +import qualified Data.Vector as V import Text.Pandoc.Builder (Inlines, Blocks, trimInlines, (<>)) import Text.Pandoc.Options import Text.Pandoc.Shared @@ -196,12 +202,13 @@ dateLine = try $ do skipSpaces trimInlinesF . mconcat <$> manyTill inline newline -titleBlock :: MarkdownParser (F Inlines, F [Inlines], F Inlines) +titleBlock :: MarkdownParser (F (Pandoc -> Pandoc)) titleBlock = pandocTitleBlock + <|> yamlTitleBlock <|> mmdTitleBlock - <|> return (mempty, return [], mempty) + <|> return (return id) -pandocTitleBlock :: MarkdownParser (F Inlines, F [Inlines], F Inlines) +pandocTitleBlock :: MarkdownParser (F (Pandoc -> Pandoc)) pandocTitleBlock = try $ do guardEnabled Ext_pandoc_title_block lookAhead (char '%') @@ -209,25 +216,78 @@ pandocTitleBlock = try $ do author <- option (return []) authorsLine date <- option mempty dateLine optional blanklines - return (title, author, date) - -mmdTitleBlock :: MarkdownParser (F Inlines, F [Inlines], F Inlines) + return $ do + title' <- title + author' <- author + date' <- date + return $ B.setMeta "title" title' + . B.setMeta "author" author' + . B.setMeta "date" date' + +yamlTitleBlock :: MarkdownParser (F (Pandoc -> Pandoc)) +yamlTitleBlock = try $ do + guardEnabled Ext_yaml_title_block + string "---" + blankline + rawYaml <- unlines <$> manyTill anyLine stopLine + optional blanklines + opts <- stateOptions <$> getState + return $ return $ + case Yaml.decode $ UTF8.fromString rawYaml of + Just (Yaml.Object hashmap) -> + H.foldrWithKey (\k v f -> + if ignorable k + then f + else B.setMeta (T.unpack k) (yamlToMeta opts v) . f) + id hashmap + _ -> fail "Could not parse yaml object" + +-- ignore fields starting with _ +ignorable :: Text -> Bool +ignorable t = (T.pack "_") `T.isPrefixOf` t + +toMetaValue :: ReaderOptions -> Text -> MetaValue +toMetaValue opts x = + case readMarkdown opts (T.unpack x) of + Pandoc _ [Plain xs] -> MetaInlines xs + Pandoc _ [Para xs] + | endsWithNewline x -> MetaBlocks [Para xs] + | otherwise -> MetaInlines xs + Pandoc _ bs -> MetaBlocks bs + where endsWithNewline t = (T.pack "\n") `T.isSuffixOf` t + +yamlToMeta :: ReaderOptions -> Yaml.Value -> MetaValue +yamlToMeta opts (Yaml.String t) = toMetaValue opts t +yamlToMeta _ (Yaml.Number n) = MetaString $ show n +yamlToMeta _ (Yaml.Bool b) = MetaString $ map toLower $ show b +yamlToMeta opts (Yaml.Array xs) = B.toMetaValue $ map (yamlToMeta opts) + $ V.toList xs +yamlToMeta opts (Yaml.Object o) = MetaMap $ H.foldrWithKey (\k v m -> + if ignorable k + then m + else M.insert (T.unpack k) + (yamlToMeta opts v) m) + M.empty o +yamlToMeta _ _ = MetaString "" + +stopLine :: MarkdownParser () +stopLine = try $ (string "---" <|> string "...") >> blankline >> return () + +mmdTitleBlock :: MarkdownParser (F (Pandoc -> Pandoc)) mmdTitleBlock = try $ do guardEnabled Ext_mmd_title_block kvPairs <- many1 kvPair blanklines - let title = maybe mempty return $ lookup "title" kvPairs - let author = maybe mempty (\x -> return [x]) $ lookup "author" kvPairs - let date = maybe mempty return $ lookup "date" kvPairs - return (title, author, date) + return $ return $ \(Pandoc m bs) -> + Pandoc (foldl (\m' (k,v) -> addMetaField k v m') m kvPairs) bs -kvPair :: MarkdownParser (String, Inlines) +kvPair :: MarkdownParser (String, MetaValue) kvPair = try $ do key <- many1Till (alphaNum <|> oneOf "_- ") (char ':') val <- manyTill anyChar (try $ newline >> lookAhead (blankline <|> nonspaceChar)) let key' = concat $ words $ map toLower key - let val' = trimInlines $ B.text val + let val' = MetaBlocks $ B.toList $ B.plain $ B.text $ trim val return (key',val') parseMarkdown :: MarkdownParser Pandoc @@ -236,16 +296,15 @@ parseMarkdown = do updateState $ \state -> state { stateOptions = let oldOpts = stateOptions state in oldOpts{ readerParseRaw = True } } - (title, authors, date) <- option (mempty,return [],mempty) titleBlock + titleTrans <- option (return id) titleBlock blocks <- parseBlocks st <- getState mbsty <- getOption readerCitationStyle refs <- getOption readerReferences return $ processBiblio mbsty refs - $ B.setTitle (runF title st) - $ B.setAuthors (runF authors st) - $ B.setDate (runF date st) - $ B.doc $ runF blocks st + $ runF titleTrans st + $ B.doc + $ runF blocks st addWarning :: Maybe SourcePos -> String -> MarkdownParser () addWarning mbpos msg = diff --git a/src/Text/Pandoc/Readers/Native.hs b/src/Text/Pandoc/Readers/Native.hs index a0e5a0635..c5d4cb98a 100644 --- a/src/Text/Pandoc/Readers/Native.hs +++ b/src/Text/Pandoc/Readers/Native.hs @@ -33,12 +33,6 @@ module Text.Pandoc.Readers.Native ( readNative ) where import Text.Pandoc.Definition import Text.Pandoc.Shared (safeRead) -nullMeta :: Meta -nullMeta = Meta{ docTitle = [] - , docAuthors = [] - , docDate = [] - } - -- | Read native formatted text and return a Pandoc document. -- The input may be a full pandoc document, a block list, a block, -- an inline list, or an inline. Thus, for example, @@ -47,7 +41,7 @@ nullMeta = Meta{ docTitle = [] -- -- will be treated as if it were -- --- > Pandoc (Meta [] [] []) [Plain [Str "hi"]] +-- > Pandoc nullMeta [Plain [Str "hi"]] -- readNative :: String -- ^ String to parse (assuming @'\n'@ line endings) -> Pandoc diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs index 4e0c0a277..0829996a7 100644 --- a/src/Text/Pandoc/Readers/RST.hs +++ b/src/Text/Pandoc/Readers/RST.hs @@ -31,6 +31,7 @@ module Text.Pandoc.Readers.RST ( readRST ) where import Text.Pandoc.Definition +import Text.Pandoc.Builder (setMeta, fromList) import Text.Pandoc.Shared import Text.Pandoc.Parsing import Text.Pandoc.Options @@ -39,7 +40,6 @@ import Data.List ( findIndex, intersperse, intercalate, transpose, sort, deleteFirstsBy, isSuffixOf ) import qualified Data.Map as M import Text.Printf ( printf ) -import Data.Maybe ( catMaybes ) import Control.Applicative ((<$>), (<$), (<*), (*>)) import Text.Pandoc.Builder (Inlines, Blocks, trimInlines, (<>)) import qualified Text.Pandoc.Builder as B @@ -87,16 +87,30 @@ promoteHeaders _ [] = [] -- | If list of blocks starts with a header (or a header and subheader) -- of level that are not found elsewhere, return it as a title and --- promote all the other headers. -titleTransform :: [Block] -- ^ list of blocks - -> ([Block], [Inline]) -- ^ modified list of blocks, title -titleTransform ((Header 1 _ head1):(Header 2 _ head2):rest) | - not (any (isHeader 1) rest || any (isHeader 2) rest) = -- both title & subtitle - (promoteHeaders 2 rest, head1 ++ [Str ":", Space] ++ head2) -titleTransform ((Header 1 _ head1):rest) | - not (any (isHeader 1) rest) = -- title, no subtitle - (promoteHeaders 1 rest, head1) -titleTransform blocks = (blocks, []) +-- promote all the other headers. Also process a definition list right +-- after the title block as metadata. +titleTransform :: ([Block], Meta) -- ^ list of blocks, metadata + -> ([Block], Meta) -- ^ modified list of blocks, metadata +titleTransform (bs, meta) = + let (bs', meta') = + case bs of + ((Header 1 _ head1):(Header 2 _ head2):rest) + | not (any (isHeader 1) rest || any (isHeader 2) rest) -> -- tit/sub + (promoteHeaders 2 rest, setMeta "title" (fromList head1) $ + setMeta "subtitle" (fromList head2) meta) + ((Header 1 _ head1):rest) + | not (any (isHeader 1) rest) -> -- title only + (promoteHeaders 1 rest, + setMeta "title" (fromList head1) meta) + _ -> (bs, meta) + in case bs' of + (DefinitionList ds : rest) -> + (rest, metaFromDefList ds meta') + _ -> (bs', meta') + +metaFromDefList :: [([Inline], [[Block]])] -> Meta -> Meta +metaFromDefList ds meta = foldr f meta ds + where f (k,v) = setMeta (map toLower $ stringify k) (mconcat $ map fromList v) parseRST :: RSTParser Pandoc parseRST = do @@ -114,14 +128,12 @@ parseRST = do -- now parse it for real... blocks <- B.toList <$> parseBlocks standalone <- getOption readerStandalone - let (blocks', title) = if standalone - then titleTransform blocks - else (blocks, []) state <- getState - let authors = stateAuthors state - let date = stateDate state - let title' = if null title then stateTitle state else title - return $ Pandoc (Meta title' authors date) blocks' + let meta = stateMeta state + let (blocks', meta') = if standalone + then titleTransform (blocks, meta) + else (blocks, meta) + return $ Pandoc meta' blocks' -- -- parsing blocks @@ -163,38 +175,19 @@ rawFieldListItem indent = try $ do return (name, raw) fieldListItem :: String - -> RSTParser (Maybe (Inlines, [Blocks])) + -> RSTParser (Inlines, [Blocks]) fieldListItem indent = try $ do (name, raw) <- rawFieldListItem indent let term = B.str name contents <- parseFromString parseBlocks raw optional blanklines - case (name, B.toList contents) of - ("Author", x) -> do - updateState $ \st -> - st{ stateAuthors = stateAuthors st ++ [extractContents x] } - return Nothing - ("Authors", [BulletList auths]) -> do - updateState $ \st -> st{ stateAuthors = map extractContents auths } - return Nothing - ("Date", x) -> do - updateState $ \st -> st{ stateDate = extractContents x } - return Nothing - ("Title", x) -> do - updateState $ \st -> st{ stateTitle = extractContents x } - return Nothing - _ -> return $ Just (term, [contents]) - -extractContents :: [Block] -> [Inline] -extractContents [Plain auth] = auth -extractContents [Para auth] = auth -extractContents _ = [] + return (term, [contents]) fieldList :: RSTParser Blocks fieldList = try $ do indent <- lookAhead $ many spaceChar items <- many1 $ fieldListItem indent - case catMaybes items of + case items of [] -> return mempty items' -> return $ B.definitionList items' diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs index 22ec52362..a1687a691 100644 --- a/src/Text/Pandoc/Readers/Textile.hs +++ b/src/Text/Pandoc/Readers/Textile.hs @@ -93,7 +93,7 @@ parseTextile = do updateState $ \s -> s { stateNotes = reverse reversedNotes } -- now parse it for real... blocks <- parseBlocks - return $ Pandoc (Meta [] [] []) blocks -- FIXME + return $ Pandoc nullMeta blocks -- FIXME noteMarker :: Parser [Char] ParserState [Char] noteMarker = skipMany spaceChar >> string "fn" >> manyTill digit (char '.') |