diff options
Diffstat (limited to 'src/Text/Pandoc/Readers')
-rw-r--r-- | src/Text/Pandoc/Readers/HTML.hs | 43 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Haddock.hs | 2 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/LaTeX.hs | 49 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Markdown.hs | 93 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Native.hs | 8 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/RST.hs | 73 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Textile.hs | 2 |
7 files changed, 154 insertions, 116 deletions
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index 32ce46fba..f6657a4d1 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -39,7 +39,7 @@ module Text.Pandoc.Readers.HTML ( readHtml import Text.HTML.TagSoup import Text.HTML.TagSoup.Match import Text.Pandoc.Definition -import Text.Pandoc.Builder (text, toList) +import qualified Text.Pandoc.Builder as B import Text.Pandoc.Shared import Text.Pandoc.Options import Text.Pandoc.Parsing @@ -47,6 +47,7 @@ import Data.Maybe ( fromMaybe, isJust ) import Data.List ( intercalate ) import Data.Char ( isDigit ) import Control.Monad ( liftM, guard, when, mzero ) +import Control.Applicative ( (<$>), (<$) ) isSpace :: Char -> Bool isSpace ' ' = True @@ -58,32 +59,26 @@ isSpace _ = False readHtml :: ReaderOptions -- ^ Reader options -> String -- ^ String to parse (assumes @'\n'@ line endings) -> Pandoc -readHtml opts inp = Pandoc meta blocks - where blocks = case runParser parseBody def{ stateOptions = opts } - "source" rest of - Left err' -> error $ "\nError at " ++ show err' - Right result -> result - tags = canonicalizeTags $ +readHtml opts inp = + case runParser parseDoc def{ stateOptions = opts } "source" tags of + Left err' -> error $ "\nError at " ++ show err' + Right result -> result + where tags = canonicalizeTags $ parseTagsOptions parseOptions{ optTagPosition = True } inp - hasHeader = any (~== TagOpen "head" []) tags - (meta, rest) = if hasHeader - then parseHeader tags - else (Meta [] [] [], tags) + parseDoc = do + blocks <- (fixPlains False . concat) <$> manyTill block eof + meta <- stateMeta <$> getState + return $ Pandoc meta blocks type TagParser = Parser [Tag String] ParserState --- TODO - fix this - not every header has a title tag -parseHeader :: [Tag String] -> (Meta, [Tag String]) -parseHeader tags = (Meta{docTitle = tit'', docAuthors = [], docDate = []}, rest) - where (tit,_) = break (~== TagClose "title") $ drop 1 $ - dropWhile (\t -> not $ t ~== TagOpen "title" []) tags - tit' = concatMap fromTagText $ filter isTagText tit - tit'' = normalizeSpaces $ toList $ text tit' - rest = drop 1 $ dropWhile (\t -> not $ t ~== TagClose "head" || - t ~== TagOpen "body" []) tags +pBody :: TagParser [Block] +pBody = pInTags "body" block -parseBody :: TagParser [Block] -parseBody = liftM (fixPlains False . concat) $ manyTill block eof +pHead :: TagParser [Block] +pHead = pInTags "head" $ pTitle <|> ([] <$ pAnyTag) + where pTitle = pInTags "title" inline >>= setTitle . normalizeSpaces + setTitle t = [] <$ (updateState $ B.setMeta "title" (B.fromList t)) block :: TagParser [Block] block = choice @@ -94,6 +89,8 @@ block = choice , pList , pHrule , pSimpleTable + , pHead + , pBody , pPlain , pRawHtmlBlock ] @@ -366,7 +363,7 @@ pImage = do let url = fromAttrib "src" tag let title = fromAttrib "title" tag let alt = fromAttrib "alt" tag - return [Image (toList $ text alt) (escapeURI url, title)] + return [Image (B.toList $ B.text alt) (escapeURI url, title)] pCode :: TagParser [Inline] pCode = try $ do diff --git a/src/Text/Pandoc/Readers/Haddock.hs b/src/Text/Pandoc/Readers/Haddock.hs index 081ec7b5e..0e74406ef 100644 --- a/src/Text/Pandoc/Readers/Haddock.hs +++ b/src/Text/Pandoc/Readers/Haddock.hs @@ -21,7 +21,7 @@ import Text.Pandoc.Readers.Haddock.Parse readHaddock :: ReaderOptions -- ^ Reader options -> String -- ^ String to parse -> Pandoc -readHaddock _ s = Pandoc (Meta [] [] []) blocks +readHaddock _ s = Pandoc nullMeta blocks where blocks = case parseParas (tokenise s (0,0)) of Left [] -> error "parse failure" diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index f3f76ce5c..0d4afb2a7 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -65,13 +65,11 @@ parseLaTeX = do bs <- blocks eof st <- getState - let title' = stateTitle st - let authors' = stateAuthors st - let date' = stateDate st + let meta = stateMeta st refs <- getOption readerReferences mbsty <- getOption readerCitationStyle - return $ processBiblio mbsty refs - $ Pandoc (Meta title' authors' date') $ toList bs + let (Pandoc _ bs') = processBiblio mbsty refs $ doc bs + return $ Pandoc meta bs' type LP = Parser [Char] ParserState @@ -249,13 +247,13 @@ ignoreBlocks name = (name, doraw <|> (mempty <$ optargs)) blockCommands :: M.Map String (LP Blocks) blockCommands = M.fromList $ [ ("par", mempty <$ skipopts) - , ("title", mempty <$ (skipopts *> tok >>= addTitle)) - , ("subtitle", mempty <$ (skipopts *> tok >>= addSubtitle)) + , ("title", mempty <$ (skipopts *> tok >>= addMeta "title")) + , ("subtitle", mempty <$ (skipopts *> tok >>= addMeta "subtitle")) , ("author", mempty <$ (skipopts *> authors)) -- -- in letter class, temp. store address & sig as title, author - , ("address", mempty <$ (skipopts *> tok >>= addTitle)) + , ("address", mempty <$ (skipopts *> tok >>= addMeta "address")) , ("signature", mempty <$ (skipopts *> authors)) - , ("date", mempty <$ (skipopts *> tok >>= addDate)) + , ("date", mempty <$ (skipopts *> tok >>= addMeta "date")) -- sectioning , ("chapter", updateState (\s -> s{ stateHasChapters = True }) *> section nullAttr 0) @@ -301,12 +299,8 @@ blockCommands = M.fromList $ , "hspace", "vspace" ] -addTitle :: Inlines -> LP () -addTitle tit = updateState (\s -> s{ stateTitle = toList tit }) - -addSubtitle :: Inlines -> LP () -addSubtitle tit = updateState (\s -> s{ stateTitle = stateTitle s ++ - toList (str ":" <> linebreak <> tit) }) +addMeta :: ToMetaValue a => String -> a -> LP () +addMeta field val = updateState $ setMeta field val authors :: LP () authors = try $ do @@ -317,10 +311,7 @@ authors = try $ do -- skip e.g. \vspace{10pt} auths <- sepBy oneAuthor (controlSeq "and") char '}' - updateState (\s -> s { stateAuthors = map (normalizeSpaces . toList) auths }) - -addDate :: Inlines -> LP () -addDate dat = updateState (\s -> s{ stateDate = toList dat }) + addMeta "authors" (map trimInlines auths) section :: Attr -> Int -> LP Blocks section attr lvl = do @@ -872,20 +863,24 @@ letter_contents = do bs <- blocks st <- getState -- add signature (author) and address (title) - let addr = case stateTitle st of - [] -> mempty - x -> para $ trimInlines $ fromList x - updateState $ \s -> s{ stateAuthors = [], stateTitle = [] } + let addr = case lookupMeta "address" (stateMeta st) of + Just (MetaBlocks [Plain xs]) -> + para $ trimInlines $ fromList xs + _ -> mempty return $ addr <> bs -- sig added by \closing closing :: LP Blocks closing = do contents <- tok st <- getState - let sigs = case stateAuthors st of - [] -> mempty - xs -> para $ trimInlines $ fromList - $ intercalate [LineBreak] xs + let extractInlines (MetaBlocks [Plain ys]) = ys + extractInlines (MetaBlocks [Para ys ]) = ys + extractInlines _ = [] + let sigs = case lookupMeta "author" (stateMeta st) of + Just (MetaList xs) -> + para $ trimInlines $ fromList $ + intercalate [LineBreak] $ map extractInlines xs + _ -> mempty return $ para (trimInlines contents) <> sigs item :: LP Blocks diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 35c236041..8c836614f 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -37,7 +37,13 @@ import Data.Ord ( comparing ) import Data.Char ( isAlphaNum, toLower ) import Data.Maybe import Text.Pandoc.Definition +import qualified Data.Text as T +import Data.Text (Text) +import qualified Data.Yaml as Yaml +import qualified Data.HashMap.Strict as H import qualified Text.Pandoc.Builder as B +import qualified Text.Pandoc.UTF8 as UTF8 +import qualified Data.Vector as V import Text.Pandoc.Builder (Inlines, Blocks, trimInlines, (<>)) import Text.Pandoc.Options import Text.Pandoc.Shared @@ -196,12 +202,13 @@ dateLine = try $ do skipSpaces trimInlinesF . mconcat <$> manyTill inline newline -titleBlock :: MarkdownParser (F Inlines, F [Inlines], F Inlines) +titleBlock :: MarkdownParser (F (Pandoc -> Pandoc)) titleBlock = pandocTitleBlock + <|> yamlTitleBlock <|> mmdTitleBlock - <|> return (mempty, return [], mempty) + <|> return (return id) -pandocTitleBlock :: MarkdownParser (F Inlines, F [Inlines], F Inlines) +pandocTitleBlock :: MarkdownParser (F (Pandoc -> Pandoc)) pandocTitleBlock = try $ do guardEnabled Ext_pandoc_title_block lookAhead (char '%') @@ -209,25 +216,78 @@ pandocTitleBlock = try $ do author <- option (return []) authorsLine date <- option mempty dateLine optional blanklines - return (title, author, date) - -mmdTitleBlock :: MarkdownParser (F Inlines, F [Inlines], F Inlines) + return $ do + title' <- title + author' <- author + date' <- date + return $ B.setMeta "title" title' + . B.setMeta "author" author' + . B.setMeta "date" date' + +yamlTitleBlock :: MarkdownParser (F (Pandoc -> Pandoc)) +yamlTitleBlock = try $ do + guardEnabled Ext_yaml_title_block + string "---" + blankline + rawYaml <- unlines <$> manyTill anyLine stopLine + optional blanklines + opts <- stateOptions <$> getState + return $ return $ + case Yaml.decode $ UTF8.fromString rawYaml of + Just (Yaml.Object hashmap) -> + H.foldrWithKey (\k v f -> + if ignorable k + then f + else B.setMeta (T.unpack k) (yamlToMeta opts v) . f) + id hashmap + _ -> fail "Could not parse yaml object" + +-- ignore fields starting with _ +ignorable :: Text -> Bool +ignorable t = (T.pack "_") `T.isPrefixOf` t + +toMetaValue :: ReaderOptions -> Text -> MetaValue +toMetaValue opts x = + case readMarkdown opts (T.unpack x) of + Pandoc _ [Plain xs] -> MetaInlines xs + Pandoc _ [Para xs] + | endsWithNewline x -> MetaBlocks [Para xs] + | otherwise -> MetaInlines xs + Pandoc _ bs -> MetaBlocks bs + where endsWithNewline t = (T.pack "\n") `T.isSuffixOf` t + +yamlToMeta :: ReaderOptions -> Yaml.Value -> MetaValue +yamlToMeta opts (Yaml.String t) = toMetaValue opts t +yamlToMeta _ (Yaml.Number n) = MetaString $ show n +yamlToMeta _ (Yaml.Bool b) = MetaString $ map toLower $ show b +yamlToMeta opts (Yaml.Array xs) = B.toMetaValue $ map (yamlToMeta opts) + $ V.toList xs +yamlToMeta opts (Yaml.Object o) = MetaMap $ H.foldrWithKey (\k v m -> + if ignorable k + then m + else M.insert (T.unpack k) + (yamlToMeta opts v) m) + M.empty o +yamlToMeta _ _ = MetaString "" + +stopLine :: MarkdownParser () +stopLine = try $ (string "---" <|> string "...") >> blankline >> return () + +mmdTitleBlock :: MarkdownParser (F (Pandoc -> Pandoc)) mmdTitleBlock = try $ do guardEnabled Ext_mmd_title_block kvPairs <- many1 kvPair blanklines - let title = maybe mempty return $ lookup "title" kvPairs - let author = maybe mempty (\x -> return [x]) $ lookup "author" kvPairs - let date = maybe mempty return $ lookup "date" kvPairs - return (title, author, date) + return $ return $ \(Pandoc m bs) -> + Pandoc (foldl (\m' (k,v) -> addMetaField k v m') m kvPairs) bs -kvPair :: MarkdownParser (String, Inlines) +kvPair :: MarkdownParser (String, MetaValue) kvPair = try $ do key <- many1Till (alphaNum <|> oneOf "_- ") (char ':') val <- manyTill anyChar (try $ newline >> lookAhead (blankline <|> nonspaceChar)) let key' = concat $ words $ map toLower key - let val' = trimInlines $ B.text val + let val' = MetaBlocks $ B.toList $ B.plain $ B.text $ trim val return (key',val') parseMarkdown :: MarkdownParser Pandoc @@ -236,16 +296,15 @@ parseMarkdown = do updateState $ \state -> state { stateOptions = let oldOpts = stateOptions state in oldOpts{ readerParseRaw = True } } - (title, authors, date) <- option (mempty,return [],mempty) titleBlock + titleTrans <- option (return id) titleBlock blocks <- parseBlocks st <- getState mbsty <- getOption readerCitationStyle refs <- getOption readerReferences return $ processBiblio mbsty refs - $ B.setTitle (runF title st) - $ B.setAuthors (runF authors st) - $ B.setDate (runF date st) - $ B.doc $ runF blocks st + $ runF titleTrans st + $ B.doc + $ runF blocks st addWarning :: Maybe SourcePos -> String -> MarkdownParser () addWarning mbpos msg = diff --git a/src/Text/Pandoc/Readers/Native.hs b/src/Text/Pandoc/Readers/Native.hs index a0e5a0635..c5d4cb98a 100644 --- a/src/Text/Pandoc/Readers/Native.hs +++ b/src/Text/Pandoc/Readers/Native.hs @@ -33,12 +33,6 @@ module Text.Pandoc.Readers.Native ( readNative ) where import Text.Pandoc.Definition import Text.Pandoc.Shared (safeRead) -nullMeta :: Meta -nullMeta = Meta{ docTitle = [] - , docAuthors = [] - , docDate = [] - } - -- | Read native formatted text and return a Pandoc document. -- The input may be a full pandoc document, a block list, a block, -- an inline list, or an inline. Thus, for example, @@ -47,7 +41,7 @@ nullMeta = Meta{ docTitle = [] -- -- will be treated as if it were -- --- > Pandoc (Meta [] [] []) [Plain [Str "hi"]] +-- > Pandoc nullMeta [Plain [Str "hi"]] -- readNative :: String -- ^ String to parse (assuming @'\n'@ line endings) -> Pandoc diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs index 4e0c0a277..0829996a7 100644 --- a/src/Text/Pandoc/Readers/RST.hs +++ b/src/Text/Pandoc/Readers/RST.hs @@ -31,6 +31,7 @@ module Text.Pandoc.Readers.RST ( readRST ) where import Text.Pandoc.Definition +import Text.Pandoc.Builder (setMeta, fromList) import Text.Pandoc.Shared import Text.Pandoc.Parsing import Text.Pandoc.Options @@ -39,7 +40,6 @@ import Data.List ( findIndex, intersperse, intercalate, transpose, sort, deleteFirstsBy, isSuffixOf ) import qualified Data.Map as M import Text.Printf ( printf ) -import Data.Maybe ( catMaybes ) import Control.Applicative ((<$>), (<$), (<*), (*>)) import Text.Pandoc.Builder (Inlines, Blocks, trimInlines, (<>)) import qualified Text.Pandoc.Builder as B @@ -87,16 +87,30 @@ promoteHeaders _ [] = [] -- | If list of blocks starts with a header (or a header and subheader) -- of level that are not found elsewhere, return it as a title and --- promote all the other headers. -titleTransform :: [Block] -- ^ list of blocks - -> ([Block], [Inline]) -- ^ modified list of blocks, title -titleTransform ((Header 1 _ head1):(Header 2 _ head2):rest) | - not (any (isHeader 1) rest || any (isHeader 2) rest) = -- both title & subtitle - (promoteHeaders 2 rest, head1 ++ [Str ":", Space] ++ head2) -titleTransform ((Header 1 _ head1):rest) | - not (any (isHeader 1) rest) = -- title, no subtitle - (promoteHeaders 1 rest, head1) -titleTransform blocks = (blocks, []) +-- promote all the other headers. Also process a definition list right +-- after the title block as metadata. +titleTransform :: ([Block], Meta) -- ^ list of blocks, metadata + -> ([Block], Meta) -- ^ modified list of blocks, metadata +titleTransform (bs, meta) = + let (bs', meta') = + case bs of + ((Header 1 _ head1):(Header 2 _ head2):rest) + | not (any (isHeader 1) rest || any (isHeader 2) rest) -> -- tit/sub + (promoteHeaders 2 rest, setMeta "title" (fromList head1) $ + setMeta "subtitle" (fromList head2) meta) + ((Header 1 _ head1):rest) + | not (any (isHeader 1) rest) -> -- title only + (promoteHeaders 1 rest, + setMeta "title" (fromList head1) meta) + _ -> (bs, meta) + in case bs' of + (DefinitionList ds : rest) -> + (rest, metaFromDefList ds meta') + _ -> (bs', meta') + +metaFromDefList :: [([Inline], [[Block]])] -> Meta -> Meta +metaFromDefList ds meta = foldr f meta ds + where f (k,v) = setMeta (map toLower $ stringify k) (mconcat $ map fromList v) parseRST :: RSTParser Pandoc parseRST = do @@ -114,14 +128,12 @@ parseRST = do -- now parse it for real... blocks <- B.toList <$> parseBlocks standalone <- getOption readerStandalone - let (blocks', title) = if standalone - then titleTransform blocks - else (blocks, []) state <- getState - let authors = stateAuthors state - let date = stateDate state - let title' = if null title then stateTitle state else title - return $ Pandoc (Meta title' authors date) blocks' + let meta = stateMeta state + let (blocks', meta') = if standalone + then titleTransform (blocks, meta) + else (blocks, meta) + return $ Pandoc meta' blocks' -- -- parsing blocks @@ -163,38 +175,19 @@ rawFieldListItem indent = try $ do return (name, raw) fieldListItem :: String - -> RSTParser (Maybe (Inlines, [Blocks])) + -> RSTParser (Inlines, [Blocks]) fieldListItem indent = try $ do (name, raw) <- rawFieldListItem indent let term = B.str name contents <- parseFromString parseBlocks raw optional blanklines - case (name, B.toList contents) of - ("Author", x) -> do - updateState $ \st -> - st{ stateAuthors = stateAuthors st ++ [extractContents x] } - return Nothing - ("Authors", [BulletList auths]) -> do - updateState $ \st -> st{ stateAuthors = map extractContents auths } - return Nothing - ("Date", x) -> do - updateState $ \st -> st{ stateDate = extractContents x } - return Nothing - ("Title", x) -> do - updateState $ \st -> st{ stateTitle = extractContents x } - return Nothing - _ -> return $ Just (term, [contents]) - -extractContents :: [Block] -> [Inline] -extractContents [Plain auth] = auth -extractContents [Para auth] = auth -extractContents _ = [] + return (term, [contents]) fieldList :: RSTParser Blocks fieldList = try $ do indent <- lookAhead $ many spaceChar items <- many1 $ fieldListItem indent - case catMaybes items of + case items of [] -> return mempty items' -> return $ B.definitionList items' diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs index 22ec52362..a1687a691 100644 --- a/src/Text/Pandoc/Readers/Textile.hs +++ b/src/Text/Pandoc/Readers/Textile.hs @@ -93,7 +93,7 @@ parseTextile = do updateState $ \s -> s { stateNotes = reverse reversedNotes } -- now parse it for real... blocks <- parseBlocks - return $ Pandoc (Meta [] [] []) blocks -- FIXME + return $ Pandoc nullMeta blocks -- FIXME noteMarker :: Parser [Char] ParserState [Char] noteMarker = skipMany spaceChar >> string "fn" >> manyTill digit (char '.') |