diff options
Diffstat (limited to 'src/Text/Pandoc')
-rw-r--r-- | src/Text/Pandoc/Readers/DocBook.hs | 99 | ||||
-rw-r--r-- | src/Text/Pandoc/Shared.hs | 14 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/Custom.hs | 2 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/LaTeX.hs | 21 |
4 files changed, 92 insertions, 44 deletions
diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs index 56cb16b20..d58f8b3c5 100644 --- a/src/Text/Pandoc/Readers/DocBook.hs +++ b/src/Text/Pandoc/Readers/DocBook.hs @@ -45,7 +45,7 @@ List of all DocBook tags, with [x] indicating implemented, [ ] audioobject - A wrapper for audio data and its associated meta-information [x] author - The name of an individual author [ ] authorblurb - A short description or note about an author -[ ] authorgroup - Wrapper for author information when a document has +[x] authorgroup - Wrapper for author information when a document has multiple authors or collabarators [x] authorinitials - The initials or other short identifier for an author [o] beginpage - The location of a page break in a print version of the document @@ -341,7 +341,7 @@ List of all DocBook tags, with [x] indicating implemented, [x] refsectioninfo - Meta-information for a refsection [ ] refsynopsisdiv - A syntactic synopsis of the subject of the reference page [ ] refsynopsisdivinfo - Meta-information for a RefSynopsisDiv -[ ] releaseinfo - Information about a particular release of a document +[x] releaseinfo - Information about a particular release of a document [ ] remark - A remark (or comment) intended for presentation in a draft manuscript [ ] replaceable - Content that may or must be replaced by the user @@ -499,24 +499,19 @@ type DB = State DBState data DBState = DBState{ dbSectionLevel :: Int , dbQuoteType :: QuoteType - , dbDocTitle :: Inlines - , dbDocAuthors :: [Inlines] - , dbDocDate :: Inlines + , dbMeta :: Meta + , dbAcceptsMeta :: Bool , dbBook :: Bool , dbFigureTitle :: Inlines } deriving Show readDocBook :: ReaderOptions -> String -> Pandoc -readDocBook _ inp = setTitle (dbDocTitle st') - $ setAuthors (dbDocAuthors st') - $ setDate (dbDocDate st') - $ doc $ mconcat bs +readDocBook _ inp = Pandoc (dbMeta st') (toList $ mconcat bs) where (bs, st') = runState (mapM parseBlock $ normalizeTree $ parseXML inp) DBState{ dbSectionLevel = 0 , dbQuoteType = DoubleQuote - , dbDocTitle = mempty - , dbDocAuthors = [] - , dbDocDate = mempty + , dbMeta = mempty + , dbAcceptsMeta = False , dbBook = False , dbFigureTitle = mempty } @@ -560,6 +555,30 @@ attrValue attr elt = named :: String -> Element -> Bool named s e = qName (elName e) == s +-- + +acceptingMetadata :: DB a -> DB a +acceptingMetadata p = do + modify (\s -> s { dbAcceptsMeta = True } ) + res <- p + modify (\s -> s { dbAcceptsMeta = False }) + return res + +checkInMeta :: Monoid a => DB () -> DB a +checkInMeta p = do + accepts <- dbAcceptsMeta <$> get + when accepts p + return mempty + + + +addMeta :: ToMetaValue a => String -> a -> DB () +addMeta field val = modify (setMeta field val) + +instance HasMeta DBState where + setMeta field v s = s {dbMeta = setMeta field v (dbMeta s)} + deleteMeta field s = s {dbMeta = deleteMeta field (dbMeta s)} + isBlockElement :: Content -> Bool isBlockElement (Elem e) = qName (elName e) `elem` blocktags where blocktags = ["toc","index","para","formalpara","simpara", @@ -606,6 +625,7 @@ getImage e = do getBlocks :: Element -> DB Blocks getBlocks e = mconcat <$> (mapM parseBlock $ elContent e) + parseBlock :: Content -> DB Blocks parseBlock (Text (CData CDataRaw _ _)) = return mempty -- DOCTYPE parseBlock (Text (CData _ s _)) = if all isSpace s @@ -619,10 +639,10 @@ parseBlock (Elem e) = "para" -> parseMixed para (elContent e) "formalpara" -> do tit <- case filterChild (named "title") e of - Just t -> (<> str "." <> linebreak) <$> emph - <$> getInlines t + Just t -> (para . strong . (<> str ".")) <$> + getInlines t Nothing -> return mempty - addToStart tit <$> parseMixed para (elContent e) + (tit <>) <$> parseMixed para (elContent e) "simpara" -> parseMixed para (elContent e) "ackno" -> parseMixed para (elContent e) "epigraph" -> parseBlockquote @@ -630,7 +650,11 @@ parseBlock (Elem e) = "attribution" -> return mempty "titleabbrev" -> return mempty "authorinitials" -> return mempty - "title" -> return mempty -- handled by getTitle or sect or figure + "title" -> checkInMeta getTitle + "author" -> checkInMeta getAuthor + "authorgroup" -> checkInMeta getAuthorGroup + "releaseinfo" -> checkInMeta (getInlines e >>= addMeta "release") + "date" -> checkInMeta getDate "bibliography" -> sect 0 "bibliodiv" -> sect 1 "biblioentry" -> parseMixed para (elContent e) @@ -693,8 +717,8 @@ parseBlock (Elem e) = "figure" -> getFigure e "mediaobject" -> para <$> getImage e "caption" -> return mempty - "info" -> getTitle >> getAuthors >> getDate >> return mempty - "articleinfo" -> getTitle >> getAuthors >> getDate >> return mempty + "info" -> metaBlock + "articleinfo" -> metaBlock "sectioninfo" -> return mempty -- keywords & other metadata "refsectioninfo" -> return mempty -- keywords & other metadata "refsect1info" -> return mempty -- keywords & other metadata @@ -708,10 +732,10 @@ parseBlock (Elem e) = "chapterinfo" -> return mempty -- keywords & other metadata "glossaryinfo" -> return mempty -- keywords & other metadata "appendixinfo" -> return mempty -- keywords & other metadata - "bookinfo" -> getTitle >> getAuthors >> getDate >> return mempty + "bookinfo" -> metaBlock "article" -> modify (\st -> st{ dbBook = False }) >> - getTitle >> getBlocks e - "book" -> modify (\st -> st{ dbBook = True }) >> getTitle >> getBlocks e + getBlocks e + "book" -> modify (\st -> st{ dbBook = True }) >> getBlocks e "table" -> parseTable "informaltable" -> parseTable "literallayout" -> codeBlockWithLang @@ -757,24 +781,19 @@ parseBlock (Elem e) = terms' <- mapM getInlines terms items' <- mapM getBlocks items return (mconcat $ intersperse (str "; ") terms', items') - getTitle = case filterChild (named "title") e of - Just t -> do - tit <- getInlines t - subtit <- case filterChild (named "subtitle") e of - Just s -> (text ": " <>) <$> - getInlines s - Nothing -> return mempty - modify $ \st -> st{dbDocTitle = tit <> subtit} - Nothing -> return () - getAuthors = do - auths <- mapM getInlines - $ filterChildren (named "author") e - modify $ \st -> st{dbDocAuthors = auths} - getDate = case filterChild (named "date") e of - Just t -> do - dat <- getInlines t - modify $ \st -> st{dbDocDate = dat} - Nothing -> return () + getTitle = do + tit <- getInlines e + subtit <- case filterChild (named "subtitle") e of + Just s -> (text ": " <>) <$> + getInlines s + Nothing -> return mempty + addMeta "title" (tit <> subtit) + + getAuthor = (:[]) <$> getInlines e >>= addMeta "author" + getAuthorGroup = do + let terms = filterChildren (named "author") e + mapM getInlines terms >>= addMeta "author" + getDate = getInlines e >>= addMeta "date" parseTable = do let isCaption x = named "title" x || named "caption" x caption <- case filterChild isCaption e of @@ -836,6 +855,7 @@ parseBlock (Elem e) = b <- getBlocks e modify $ \st -> st{ dbSectionLevel = n - 1 } return $ header n' headerText <> b + metaBlock = acceptingMetadata (getBlocks e) >> return mempty getInlines :: Element -> DB Inlines getInlines e' = (trimInlines . mconcat) <$> (mapM parseInline $ elContent e') @@ -899,6 +919,7 @@ parseInline (Elem e) = _ -> emph <$> innerInlines "footnote" -> (note . mconcat) <$> (mapM parseBlock $ elContent e) "title" -> return mempty + "affiliation" -> return mempty _ -> innerInlines where innerInlines = (trimInlines . mconcat) <$> (mapM parseInline $ elContent e) diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index 714402e42..3835629db 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -1,5 +1,5 @@ {-# LANGUAGE DeriveDataTypeable, CPP, MultiParamTypeClasses, - FlexibleContexts #-} + FlexibleContexts, ScopedTypeVariables #-} {- Copyright (C) 2006-2013 John MacFarlane <jgm@berkeley.edu> @@ -120,7 +120,9 @@ import Paths_pandoc (getDataFileName) #ifdef HTTP_CONDUIT import Data.ByteString.Lazy (toChunks) import Network.HTTP.Conduit (httpLbs, parseUrl, withManager, - responseBody, responseHeaders) + responseBody, responseHeaders, addProxy, + Request(port,host)) +import System.Environment (getEnv) import Network.HTTP.Types.Header ( hContentType) import Network (withSocketsDo) #else @@ -648,7 +650,13 @@ openURL u #ifdef HTTP_CONDUIT | otherwise = withSocketsDo $ E.try $ do req <- parseUrl u - resp <- withManager $ httpLbs req + (proxy :: Either E.SomeException String) <- E.try $ getEnv "http_proxy" + let req' = case proxy of + Left _ -> req + Right pr -> case parseUrl pr of + Just r -> addProxy (host r) (port r) req + Nothing -> req + resp <- withManager $ httpLbs req' return (BS.concat $ toChunks $ responseBody resp, UTF8.toString `fmap` lookup hContentType (responseHeaders resp)) #else diff --git a/src/Text/Pandoc/Writers/Custom.hs b/src/Text/Pandoc/Writers/Custom.hs index 0234e1e35..0b30287f5 100644 --- a/src/Text/Pandoc/Writers/Custom.hs +++ b/src/Text/Pandoc/Writers/Custom.hs @@ -131,7 +131,7 @@ instance StackValue MetaValue where -- | Convert Pandoc to custom markup. writeCustom :: FilePath -> WriterOptions -> Pandoc -> IO String writeCustom luaFile opts doc = do - luaScript <- readFile luaFile + luaScript <- C8.unpack `fmap` C8.readFile luaFile lua <- Lua.newstate Lua.openlibs lua Lua.loadstring lua luaScript "custom" diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs index 07be6e9af..6cf7ed730 100644 --- a/src/Text/Pandoc/Writers/LaTeX.hs +++ b/src/Text/Pandoc/Writers/LaTeX.hs @@ -500,9 +500,28 @@ tableRowToLaTeX header aligns widths cols = do cells <- mapM (tableCellToLaTeX header) $ zip3 widths' aligns cols return $ hsep (intersperse "&" cells) $$ "\\\\\\addlinespace" +-- For simple latex tables (without minipages or parboxes), +-- we need to go to some lengths to get line breaks working: +-- as LineBreak bs = \vtop{\hbox{\strut as}\hbox{\strut bs}}. +fixLineBreaks :: Block -> Block +fixLineBreaks (Para ils) = Para $ fixLineBreaks' ils +fixLineBreaks (Plain ils) = Plain $ fixLineBreaks' ils +fixLineBreaks x = x + +fixLineBreaks' :: [Inline] -> [Inline] +fixLineBreaks' ils = case splitBy (== LineBreak) ils of + [] -> [] + [xs] -> xs + chunks -> RawInline "tex" "\\vtop{" : + concatMap tohbox chunks ++ + [RawInline "tex" "}"] + where tohbox ys = RawInline "tex" "\\hbox{\\strut " : ys ++ + [RawInline "tex" "}"] + tableCellToLaTeX :: Bool -> (Double, Alignment, [Block]) -> State WriterState Doc -tableCellToLaTeX _ (0, _, blocks) = blockListToLaTeX blocks +tableCellToLaTeX _ (0, _, blocks) = + blockListToLaTeX $ walk fixLineBreaks blocks tableCellToLaTeX header (width, align, blocks) = do modify $ \st -> st{ stInMinipage = True, stNotes = [] } cellContents <- blockListToLaTeX blocks |