diff options
Diffstat (limited to 'src/Text/Pandoc')
-rw-r--r-- | src/Text/Pandoc/PDF.hs | 2 | ||||
-rw-r--r-- | src/Text/Pandoc/Parsing.hs | 6 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/HTML.hs | 39 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Markdown.hs | 37 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/Docbook.hs | 8 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/EPUB.hs | 82 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/HTML.hs | 6 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/LaTeX.hs | 5 |
8 files changed, 122 insertions, 63 deletions
diff --git a/src/Text/Pandoc/PDF.hs b/src/Text/Pandoc/PDF.hs index 0a09d3222..1711c0f36 100644 --- a/src/Text/Pandoc/PDF.hs +++ b/src/Text/Pandoc/PDF.hs @@ -198,6 +198,8 @@ runTeXProgram verbose program args runNumber numRuns tmpDir source = do let env'' = ("TEXINPUTS", texinputs) : [(k,v) | (k,v) <- env', k /= "TEXINPUTS"] when (verbose && runNumber == 1) $ do + putStrLn $ "[makePDF] temp dir:" + putStrLn tmpDir' putStrLn $ "[makePDF] Command line:" putStrLn $ program ++ " " ++ unwords (map show programArgs) putStr "\n" diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs index d8beb1810..82e7e2c33 100644 --- a/src/Text/Pandoc/Parsing.hs +++ b/src/Text/Pandoc/Parsing.hs @@ -903,7 +903,8 @@ data ParserState = ParserState stateAllowLinks :: Bool, -- ^ Allow parsing of links stateMaxNestingLevel :: Int, -- ^ Max # of nested Strong/Emph stateLastStrPos :: Maybe SourcePos, -- ^ Position after last str parsed - stateKeys :: KeyTable, -- ^ List of reference keys (with fallbacks) + stateKeys :: KeyTable, -- ^ List of reference keys + stateHeaderKeys :: KeyTable, -- ^ List of implicit header ref keys stateSubstitutions :: SubstTable, -- ^ List of substitution references stateNotes :: NoteTable, -- ^ List of notes (raw bodies) stateNotes' :: NoteTable', -- ^ List of notes (parsed bodies) @@ -1001,6 +1002,7 @@ defaultParserState = stateMaxNestingLevel = 6, stateLastStrPos = Nothing, stateKeys = M.empty, + stateHeaderKeys = M.empty, stateSubstitutions = M.empty, stateNotes = [], stateNotes' = [], @@ -1206,7 +1208,7 @@ citeKey = try $ do guard =<< notAfterString suppress_author <- option False (char '-' *> return True) char '@' - firstChar <- alphaNum <|> char '_' + firstChar <- alphaNum <|> char '_' <|> char '*' -- @* for wildcard in nocite let regchar = satisfy (\c -> isAlphaNum c || c == '_') let internal p = try $ p <* lookAhead regchar rest <- many $ regchar <|> internal (oneOf ":.#$%&-+?<>~/") diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index 5b3c907aa..c766bb4ee 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -51,7 +51,7 @@ import Text.Pandoc.Options (ReaderOptions(readerParseRaw, readerTrace) import Text.Pandoc.Parsing hiding ((<|>)) import Text.Pandoc.Walk import Data.Maybe ( fromMaybe, isJust) -import Data.List ( intercalate, isInfixOf, isPrefixOf ) +import Data.List ( intercalate, isInfixOf, isPrefixOf, isSuffixOf ) import Data.Char ( isDigit ) import Control.Monad ( liftM, guard, when, mzero, void, unless ) import Control.Arrow ((***)) @@ -62,7 +62,7 @@ import Debug.Trace (trace) import Text.TeXMath (readMathML, writeTeX) import Data.Default (Default (..), def) import Control.Monad.Reader (Reader,ask, asks, local, runReader) - +import Network.URI (isURI) import Text.Pandoc.Error import Text.Parsec.Error @@ -74,7 +74,8 @@ readHtml :: ReaderOptions -- ^ Reader options -> Either PandocError Pandoc readHtml opts inp = mapLeft (ParseFailure . getError) . flip runReader def $ - runParserT parseDoc (HTMLState def{ stateOptions = opts } []) "source" tags + runParserT parseDoc (HTMLState def{ stateOptions = opts } [] Nothing) + "source" tags where tags = stripPrefixes . canonicalizeTags $ parseTagsOptions parseOptions{ optTagPosition = True } inp parseDoc = do @@ -98,7 +99,8 @@ replaceNotes' x = return x data HTMLState = HTMLState { parserState :: ParserState, - noteTable :: [(String, Blocks)] + noteTable :: [(String, Blocks)], + baseHref :: Maybe String } data HTMLLocal = HTMLLocal { quoteContext :: QuoteContext @@ -120,7 +122,7 @@ pBody :: TagParser Blocks pBody = pInTags "body" block pHead :: TagParser Blocks -pHead = pInTags "head" $ pTitle <|> pMetaTag <|> (mempty <$ pAnyTag) +pHead = pInTags "head" $ pTitle <|> pMetaTag <|> pBaseTag <|> (mempty <$ pAnyTag) where pTitle = pInTags "title" inline >>= setTitle . trimInlines setTitle t = mempty <$ (updateState $ B.setMeta "title" t) pMetaTag = do @@ -132,6 +134,17 @@ pHead = pInTags "head" $ pTitle <|> pMetaTag <|> (mempty <$ pAnyTag) let content = fromAttrib "content" mt updateState $ B.setMeta name (B.text content) return mempty + pBaseTag = do + bt <- pSatisfy (~== TagOpen "base" []) + let baseH = fromAttrib "href" bt + if null baseH + then return mempty + else do + let baseH' = case reverse baseH of + '/':_ -> baseH + _ -> baseH ++ "/" + updateState $ \st -> st{ baseHref = Just baseH' } + return mempty block :: TagParser Blocks block = do @@ -566,7 +579,11 @@ pAnchor = try $ do pRelLink :: TagParser Inlines pRelLink = try $ do tag <- pSatisfy (tagOpenLit "a" (isJust . lookup "href")) - let url = fromAttrib "href" tag + mbBaseHref <- baseHref <$> getState + let url' = fromAttrib "href" tag + let url = case (isURI url', mbBaseHref) of + (False, Just h) -> h ++ url' + _ -> url' let title = fromAttrib "title" tag let uid = fromAttrib "id" tag let spanC = case uid of @@ -578,7 +595,11 @@ pRelLink = try $ do pImage :: TagParser Inlines pImage = do tag <- pSelfClosing (=="img") (isJust . lookup "src") - let url = fromAttrib "src" tag + mbBaseHref <- baseHref <$> getState + let url' = fromAttrib "src" tag + let url = case (isURI url', mbBaseHref) of + (False, Just h) -> h ++ url' + _ -> url' let title = fromAttrib "title" tag let alt = fromAttrib "alt" tag return $ B.image (escapeURI url) title (B.text alt) @@ -874,7 +895,7 @@ htmlInBalanced :: (Monad m) -> ParserT String st m String htmlInBalanced f = try $ do (TagOpen t _, tag) <- htmlTag f - guard $ '/' `notElem` tag -- not a self-closing tag + guard $ not $ "/>" `isSuffixOf` tag -- not a self-closing tag let stopper = htmlTag (~== TagClose t) let anytag = snd <$> htmlTag (const True) contents <- many $ notFollowedBy' stopper >> @@ -945,7 +966,7 @@ instance HasReaderOptions HTMLState where extractReaderOptions = extractReaderOptions . parserState instance Default HTMLState where - def = HTMLState def [] + def = HTMLState def [] Nothing instance HasMeta HTMLState where setMeta s b st = st {parserState = setMeta s b $ parserState st} diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index cc5521a62..3b5ae0978 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -177,9 +177,10 @@ charsInBalancedBrackets openBrackets = (char '[' >> charsInBalancedBrackets (openBrackets + 1)) <|> (char ']' >> charsInBalancedBrackets (openBrackets - 1)) <|> (( (() <$ code) - <|> (() <$ escapedChar') + <|> (() <$ (escapedChar')) <|> (newline >> notFollowedBy blankline) <|> skipMany1 (noneOf "[]`\n\\") + <|> (() <$ count 1 (oneOf "`\\")) ) >> charsInBalancedBrackets openBrackets) -- @@ -508,9 +509,12 @@ atxHeader = try $ do notFollowedBy $ guardEnabled Ext_fancy_lists >> (char '.' <|> char ')') -- this would be a list skipSpaces - text <- trimInlinesF . mconcat <$> many (notFollowedBy atxClosing >> inline) + (text, raw) <- withRaw $ + trimInlinesF . mconcat <$> many (notFollowedBy atxClosing >> inline) attr <- atxClosing - attr' <- registerHeader attr (runF text defaultParserState) + attr'@(ident,_,_) <- registerHeader attr (runF text defaultParserState) + guardDisabled Ext_implicit_header_references + <|> registerImplicitHeader raw ident return $ B.headerWith attr' level <$> text atxClosing :: MarkdownParser Attr @@ -543,15 +547,24 @@ setextHeader = try $ do -- This lookahead prevents us from wasting time parsing Inlines -- unless necessary -- it gives a significant performance boost. lookAhead $ anyLine >> many1 (oneOf setextHChars) >> blankline - text <- trimInlinesF . mconcat <$> many1 (notFollowedBy setextHeaderEnd >> inline) + (text, raw) <- withRaw $ + trimInlinesF . mconcat <$> many1 (notFollowedBy setextHeaderEnd >> inline) attr <- setextHeaderEnd underlineChar <- oneOf setextHChars many (char underlineChar) blanklines let level = (fromMaybe 0 $ findIndex (== underlineChar) setextHChars) + 1 - attr' <- registerHeader attr (runF text defaultParserState) + attr'@(ident,_,_) <- registerHeader attr (runF text defaultParserState) + guardDisabled Ext_implicit_header_references + <|> registerImplicitHeader raw ident return $ B.headerWith attr' level <$> text +registerImplicitHeader :: String -> String -> MarkdownParser () +registerImplicitHeader raw ident = do + let key = toKey $ "[" ++ raw ++ "]" + updateState (\s -> s { stateHeaderKeys = + M.insert key ('#':ident,"") (stateHeaderKeys s) }) + -- -- hrule block -- @@ -1699,7 +1712,7 @@ referenceLink :: (String -> String -> Inlines -> Inlines) -> (F Inlines, String) -> MarkdownParser (F Inlines) referenceLink constructor (lab, raw) = do sp <- (True <$ lookAhead (char ' ')) <|> return False - (ref,raw') <- option (mempty, "") $ + (_,raw') <- option (mempty, "") $ lookAhead (try (spnl >> normalCite >> return (mempty, ""))) <|> try (spnl >> reference) @@ -1719,13 +1732,13 @@ referenceLink constructor (lab, raw) = do return $ do keys <- asksF stateKeys case M.lookup key keys of - Nothing -> do - headers <- asksF stateHeaders - ref' <- if labIsRef then lab else ref + Nothing -> if implicitHeaderRefs - then case M.lookup ref' headers of - Just ident -> constructor ('#':ident) "" <$> lab - Nothing -> makeFallback + then do + headerKeys <- asksF stateHeaderKeys + case M.lookup key headerKeys of + Just (src, tit) -> constructor src tit <$> lab + Nothing -> makeFallback else makeFallback Just (src,tit) -> constructor src tit <$> lab diff --git a/src/Text/Pandoc/Writers/Docbook.hs b/src/Text/Pandoc/Writers/Docbook.hs index 08a83c85e..f3b99e141 100644 --- a/src/Text/Pandoc/Writers/Docbook.hs +++ b/src/Text/Pandoc/Writers/Docbook.hs @@ -154,6 +154,14 @@ listItemToDocbook opts item = -- | Convert a Pandoc block element to Docbook. blockToDocbook :: WriterOptions -> Block -> Doc blockToDocbook _ Null = empty +-- Add ids to paragraphs in divs with ids - this is needed for +-- pandoc-citeproc to get link anchors in bibliographies: +blockToDocbook opts (Div (ident,_,_) [Para lst]) = + let attribs = [("id", ident) | not (null ident)] in + if hasLineBreaks lst + then flush $ nowrap $ inTags False "literallayout" attribs + $ inlinesToDocbook opts lst + else inTags True "para" attribs $ inlinesToDocbook opts lst blockToDocbook opts (Div _ bs) = blocksToDocbook opts $ map plainToPara bs blockToDocbook _ (Header _ _ _) = empty -- should not occur after hierarchicalize blockToDocbook opts (Plain lst) = inlinesToDocbook opts lst diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs index 37c285dc2..4ce7857ac 100644 --- a/src/Text/Pandoc/Writers/EPUB.hs +++ b/src/Text/Pandoc/Writers/EPUB.hs @@ -31,7 +31,7 @@ Conversion of 'Pandoc' documents to EPUB. module Text.Pandoc.Writers.EPUB ( writeEPUB ) where import Data.IORef ( IORef, newIORef, readIORef, modifyIORef ) import qualified Data.Map as M -import Data.Maybe ( fromMaybe ) +import Data.Maybe ( fromMaybe, catMaybes ) import Data.List ( isPrefixOf, isInfixOf, intercalate ) import System.Environment ( getEnv ) import Text.Printf (printf) @@ -60,14 +60,14 @@ import Text.Pandoc.Walk (walk, walkM) import Data.Default import Text.Pandoc.Writers.Markdown (writePlain) import Control.Monad.State (modify, get, execState, State, put, evalState) -import Control.Monad (foldM, mplus, liftM, when) +import Control.Monad (mplus, liftM, when) import Text.XML.Light ( unode, Element(..), unqual, Attr(..), add_attrs , strContent, lookupAttr, Node(..), QName(..), parseXML , onlyElems, node, ppElement) import Text.Pandoc.UUID (getRandomUUID) import Text.Pandoc.Writers.HTML (writeHtmlString, writeHtml) import Data.Char ( toLower, isDigit, isAlphaNum ) -import Text.Pandoc.MIME (MimeType, getMimeType) +import Text.Pandoc.MIME (MimeType, getMimeType, extensionFromMimeType) import qualified Control.Exception as E import Text.Blaze.Html.Renderer.Utf8 (renderHtml) import Text.HTML.TagSoup (Tag(TagOpen), fromAttrib, parseTags) @@ -378,17 +378,7 @@ writeEPUB opts doc@(Pandoc meta _) = do mediaRef <- newIORef [] Pandoc _ blocks <- walkM (transformInline opts' mediaRef) doc >>= walkM (transformBlock opts' mediaRef) - pics <- readIORef mediaRef - let readPicEntry entries (oldsrc, newsrc) = do - res <- fetchItem' (writerMediaBag opts') - (writerSourceURL opts') oldsrc - case res of - Left _ -> do - warn $ "Could not find media `" ++ oldsrc ++ "', skipping..." - return entries - Right (img,_) -> return $ - (toEntry newsrc epochtime $ B.fromChunks . (:[]) $ img) : entries - picEntries <- foldM readPicEntry [] pics + picEntries <- (catMaybes . map (snd . snd)) <$> readIORef mediaRef -- handle fonts let matchingGlob f = do @@ -425,10 +415,14 @@ writeEPUB opts doc@(Pandoc meta _) = do let blocks'' = replaceRefs reftable blocks' let isChapterHeader (Header n _ _) = n <= chapterHeaderLevel + isChapterHeader (Div ("",["references"],[]) (Header n _ _:_)) = + n <= chapterHeaderLevel isChapterHeader _ = False let toChapters :: [Block] -> State [Int] [Chapter] toChapters [] = return [] + toChapters (Div ("",["references"],[]) bs@(Header 1 _ _:_) : rest) = + toChapters (bs ++ rest) toChapters (Header n attr@(_,classes,_) ils : bs) = do nums <- get mbnum <- if "unnumbered" `elem` classes @@ -794,59 +788,75 @@ metadataElement version md currentTime = showDateTimeISO8601 :: UTCTime -> String showDateTimeISO8601 = formatTime defaultTimeLocale "%FT%TZ" -transformTag :: IORef [(FilePath, FilePath)] -- ^ (oldpath, newpath) media +transformTag :: WriterOptions + -> IORef [(FilePath, (FilePath, Maybe Entry))] -- ^ (oldpath, newpath, entry) media -> Tag String -> IO (Tag String) -transformTag mediaRef tag@(TagOpen name attr) +transformTag opts mediaRef tag@(TagOpen name attr) | name `elem` ["video", "source", "img", "audio"] = do let src = fromAttrib "src" tag let poster = fromAttrib "poster" tag - newsrc <- modifyMediaRef mediaRef src - newposter <- modifyMediaRef mediaRef poster + newsrc <- modifyMediaRef opts mediaRef src + newposter <- modifyMediaRef opts mediaRef poster let attr' = filter (\(x,_) -> x /= "src" && x /= "poster") attr ++ [("src", newsrc) | not (null newsrc)] ++ [("poster", newposter) | not (null newposter)] return $ TagOpen name attr' -transformTag _ tag = return tag - -modifyMediaRef :: IORef [(FilePath, FilePath)] -> FilePath -> IO FilePath -modifyMediaRef _ "" = return "" -modifyMediaRef mediaRef oldsrc = do +transformTag _ _ tag = return tag + +modifyMediaRef :: WriterOptions + -> IORef [(FilePath, (FilePath, Maybe Entry))] + -> FilePath + -> IO FilePath +modifyMediaRef _ _ "" = return "" +modifyMediaRef opts mediaRef oldsrc = do media <- readIORef mediaRef case lookup oldsrc media of - Just n -> return n - Nothing -> do - let new = "media/file" ++ show (length media) ++ - takeExtension (takeWhile (/='?') oldsrc) -- remove query - modifyIORef mediaRef ( (oldsrc, new): ) + Just (n,_) -> return n + Nothing -> do + res <- fetchItem' (writerMediaBag opts) + (writerSourceURL opts) oldsrc + (new, mbEntry) <- + case res of + Left _ -> do + warn $ "Could not find media `" ++ oldsrc ++ "', skipping..." + return (oldsrc, Nothing) + Right (img,mbMime) -> do + let new = "media/file" ++ show (length media) ++ + fromMaybe (takeExtension (takeWhile (/='?') oldsrc)) + (('.':) <$> (mbMime >>= extensionFromMimeType)) + epochtime <- floor `fmap` getPOSIXTime + let entry = toEntry new epochtime $ B.fromChunks . (:[]) $ img + return (new, Just entry) + modifyIORef mediaRef ( (oldsrc, (new, mbEntry)): ) return new transformBlock :: WriterOptions - -> IORef [(FilePath, FilePath)] -- ^ (oldpath, newpath) media + -> IORef [(FilePath, (FilePath, Maybe Entry))] -- ^ (oldpath, newpath, entry) media -> Block -> IO Block -transformBlock _ mediaRef (RawBlock fmt raw) +transformBlock opts mediaRef (RawBlock fmt raw) | fmt == Format "html" = do let tags = parseTags raw - tags' <- mapM (transformTag mediaRef) tags + tags' <- mapM (transformTag opts mediaRef) tags return $ RawBlock fmt (renderTags' tags') transformBlock _ _ b = return b transformInline :: WriterOptions - -> IORef [(FilePath, FilePath)] -- ^ (oldpath, newpath) media + -> IORef [(FilePath, (FilePath, Maybe Entry))] -- ^ (oldpath, newpath) media -> Inline -> IO Inline -transformInline _ mediaRef (Image lab (src,tit)) = do - newsrc <- modifyMediaRef mediaRef src +transformInline opts mediaRef (Image lab (src,tit)) = do + newsrc <- modifyMediaRef opts mediaRef src return $ Image lab (newsrc, tit) transformInline opts _ (x@(Math _ _)) | WebTeX _ <- writerHTMLMathMethod opts = do raw <- makeSelfContained opts $ writeHtmlInline opts x return $ RawInline (Format "html") raw -transformInline _ mediaRef (RawInline fmt raw) +transformInline opts mediaRef (RawInline fmt raw) | fmt == Format "html" = do let tags = parseTags raw - tags' <- mapM (transformTag mediaRef) tags + tags' <- mapM (transformTag opts mediaRef) tags return $ RawInline fmt (renderTags' tags') transformInline _ _ x = return x diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs index 3a89b226b..022a0e17f 100644 --- a/src/Text/Pandoc/Writers/HTML.hs +++ b/src/Text/Pandoc/Writers/HTML.hs @@ -46,7 +46,7 @@ import Numeric ( showHex ) import Data.Char ( ord, toLower ) import Data.List ( isPrefixOf, intersperse ) import Data.String ( fromString ) -import Data.Maybe ( catMaybes, fromMaybe ) +import Data.Maybe ( catMaybes, fromMaybe, isJust ) import Control.Monad.State import Text.Blaze.Html hiding(contents) #if MIN_VERSION_blaze_markup(0,6,3) @@ -825,7 +825,9 @@ inlineToHtml opts inline = writerIdentifierPrefix opts ++ "fn" ++ ref) ! A.class_ "footnoteRef" ! prefixedId opts ("fnref" ++ ref) - $ H.sup + $ (if isJust (writerEpubVersion opts) + then id + else H.sup) $ toHtml ref return $ case writerEpubVersion opts of Just EPUB3 -> link ! customAttribute "epub:type" "noteref" diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs index 70280aaec..a785e1edc 100644 --- a/src/Text/Pandoc/Writers/LaTeX.hs +++ b/src/Text/Pandoc/Writers/LaTeX.hs @@ -274,10 +274,11 @@ elementToBeamer slideLevel (Sec lvl _num (ident,classes,kvs) tit elts) let hasCode (Code _ _) = [True] hasCode _ = [] opts <- gets stOptions - let fragile = not $ null $ query hasCodeBlock elts ++ + let fragile = "fragile" `elem` classes || + not (null $ query hasCodeBlock elts ++ if writerListings opts then query hasCode elts - else [] + else []) let allowframebreaks = "allowframebreaks" `elem` classes let optionslist = ["fragile" | fragile] ++ ["allowframebreaks" | allowframebreaks] |