diff options
Diffstat (limited to 'src/Text/Pandoc')
-rw-r--r-- | src/Text/Pandoc/MediaBag.hs | 10 | ||||
-rw-r--r-- | src/Text/Pandoc/Options.hs | 1 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Docx.hs | 40 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Docx/Lists.hs | 10 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/HTML.hs | 7 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Markdown.hs | 7 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Org.hs | 11 | ||||
-rw-r--r-- | src/Text/Pandoc/Shared.hs | 16 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/Docx.hs | 68 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/EPUB.hs | 36 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/HTML.hs | 45 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/LaTeX.hs | 39 |
12 files changed, 214 insertions, 76 deletions
diff --git a/src/Text/Pandoc/MediaBag.hs b/src/Text/Pandoc/MediaBag.hs index 5921b56cf..a55d5417e 100644 --- a/src/Text/Pandoc/MediaBag.hs +++ b/src/Text/Pandoc/MediaBag.hs @@ -51,7 +51,7 @@ import System.IO (stderr) -- mime types. Note that a 'MediaBag' is a Monoid, so 'mempty' -- can be used for an empty 'MediaBag', and '<>' can be used to append -- two 'MediaBag's. -newtype MediaBag = MediaBag (M.Map String (MimeType, BL.ByteString)) +newtype MediaBag = MediaBag (M.Map [String] (MimeType, BL.ByteString)) deriving (Monoid) instance Show MediaBag where @@ -65,7 +65,7 @@ insertMedia :: FilePath -- ^ relative path and canonical name of resource -> MediaBag -> MediaBag insertMedia fp mbMime contents (MediaBag mediamap) = - MediaBag (M.insert fp (mime, contents) mediamap) + MediaBag (M.insert (splitPath fp) (mime, contents) mediamap) where mime = fromMaybe fallback mbMime fallback = case takeExtension fp of ".gz" -> getMimeTypeDef $ dropExtension fp @@ -75,14 +75,14 @@ insertMedia fp mbMime contents (MediaBag mediamap) = lookupMedia :: FilePath -> MediaBag -> Maybe (MimeType, BL.ByteString) -lookupMedia fp (MediaBag mediamap) = M.lookup fp mediamap +lookupMedia fp (MediaBag mediamap) = M.lookup (splitPath fp) mediamap -- | Get a list of the file paths stored in a 'MediaBag', with -- their corresponding mime types and the lengths in bytes of the contents. mediaDirectory :: MediaBag -> [(String, MimeType, Int)] mediaDirectory (MediaBag mediamap) = M.foldWithKey (\fp (mime,contents) -> - ((fp, mime, fromIntegral $ BL.length contents):)) [] mediamap + (((joinPath fp), mime, fromIntegral $ BL.length contents):)) [] mediamap -- | Extract contents of MediaBag to a given directory. Print informational -- messages if 'verbose' is true. @@ -93,7 +93,7 @@ extractMediaBag :: Bool extractMediaBag verbose dir (MediaBag mediamap) = do sequence_ $ M.foldWithKey (\fp (_ ,contents) -> - ((writeMedia verbose dir (fp, contents)):)) [] mediamap + ((writeMedia verbose dir (joinPath fp, contents)):)) [] mediamap writeMedia :: Bool -> FilePath -> (FilePath, BL.ByteString) -> IO () writeMedia verbose dir (subpath, bs) = do diff --git a/src/Text/Pandoc/Options.hs b/src/Text/Pandoc/Options.hs index 84ccbbdc9..ebfd8f8a9 100644 --- a/src/Text/Pandoc/Options.hs +++ b/src/Text/Pandoc/Options.hs @@ -251,6 +251,7 @@ data HTMLMathMethod = PlainMath | WebTeX String -- url of TeX->image script. | MathML (Maybe String) -- url of MathMLinHTML.js | MathJax String -- url of MathJax.js + | KaTeX String String -- url of stylesheet and katex.js deriving (Show, Read, Eq) data CiteMethod = Citeproc -- use citeproc to render them diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs index 8ebe59569..4b5fbfdfc 100644 --- a/src/Text/Pandoc/Readers/Docx.hs +++ b/src/Text/Pandoc/Readers/Docx.hs @@ -85,7 +85,7 @@ import Text.Pandoc.Readers.Docx.Reducible import Text.Pandoc.Shared import Text.Pandoc.MediaBag (insertMedia, MediaBag) import Data.Maybe (isJust) -import Data.List (delete, stripPrefix, (\\), intersect) +import Data.List (delete, stripPrefix, (\\), intersect, isPrefixOf) import Data.Monoid import Text.TeXMath (writeTeX) import Data.Default (Default) @@ -203,6 +203,13 @@ blockQuoteDivs = ["Quote", "BlockQuote", "BlockQuotation"] codeDivs :: [String] codeDivs = ["SourceCode"] + +-- For the moment, we have English, Danish, German, and French. This +-- is fairly ad-hoc, and there might be a more systematic way to do +-- it, but it's better than nothing. +headerPrefixes :: [String] +headerPrefixes = ["Heading", "Overskrift", "berschrift", "Titre"] + runElemToInlines :: RunElem -> Inlines runElemToInlines (TextRun s) = text s runElemToInlines (LnBrk) = linebreak @@ -461,12 +468,11 @@ bodyPartToBlocks (Paragraph pPr parparts) $ codeBlock $ concatMap parPartToString parparts | (c : cs) <- filter (isJust . isHeaderClass) $ pStyle pPr - , Just n <- isHeaderClass c = do + , Just (prefix, n) <- isHeaderClass c = do ils <- local (\s-> s{docxInHeaderBlock=True}) $ (concatReduce <$> mapM parPartToInlines parparts) - makeHeaderAnchor $ - headerWith ("", delete ("Heading" ++ show n) cs, []) n ils + headerWith ("", delete (prefix ++ show n) cs, []) n ils | otherwise = do ils <- concatReduce <$> mapM parPartToInlines parparts >>= (return . fromList . trimLineBreaks . normalizeSpaces . toList) @@ -535,23 +541,18 @@ rewriteLink' l@(Link ils ('#':target, title)) = do Nothing -> l rewriteLink' il = return il -rewriteLink :: Blocks -> DocxContext Blocks -rewriteLink ils = case viewl $ unMany ils of - (x :< xs) -> do - x' <- walkM rewriteLink' x - xs' <- rewriteLink $ Many xs - return $ (singleton x') <> xs' - EmptyL -> return ils +rewriteLinks :: [Block] -> DocxContext [Block] +rewriteLinks = mapM (walkM rewriteLink') bodyToOutput :: Body -> DocxContext (Meta, [Block], MediaBag) bodyToOutput (Body bps) = do let (metabps, blkbps) = sepBodyParts bps meta <- bodyPartsToMeta metabps blks <- concatReduce <$> mapM bodyPartToBlocks blkbps - blks' <- rewriteLink blks + blks' <- rewriteLinks $ blocksToDefinitions $ blocksToBullets $ toList blks mediaBag <- gets docxMediaBag return $ (meta, - blocksToDefinitions $ blocksToBullets $ toList blks', + blks', mediaBag) docxToOutput :: ReaderOptions -> Docx -> (Meta, [Block], MediaBag) @@ -559,10 +560,11 @@ docxToOutput opts (Docx (Document _ body)) = let dEnv = def { docxOptions = opts} in evalDocxContext (bodyToOutput body) dEnv def -isHeaderClass :: String -> Maybe Int -isHeaderClass s | Just s' <- stripPrefix "Heading" s = - case reads s' :: [(Int, String)] of - [] -> Nothing - ((n, "") : []) -> Just n - _ -> Nothing +isHeaderClass :: String -> Maybe (String, Int) +isHeaderClass s | (pref:_) <- filter (\h -> isPrefixOf h s) headerPrefixes + , Just s' <- stripPrefix pref s = + case reads s' :: [(Int, String)] of + [] -> Nothing + ((n, "") : []) -> Just (pref, n) + _ -> Nothing isHeaderClass _ = Nothing diff --git a/src/Text/Pandoc/Readers/Docx/Lists.hs b/src/Text/Pandoc/Readers/Docx/Lists.hs index ea195c14a..c265ad074 100644 --- a/src/Text/Pandoc/Readers/Docx/Lists.hs +++ b/src/Text/Pandoc/Readers/Docx/Lists.hs @@ -160,8 +160,14 @@ flatToBullets' num xs@(b : elems) flatToBullets :: [Block] -> [Block] flatToBullets elems = flatToBullets' (-1) elems +singleItemHeaderToHeader :: Block -> Block +singleItemHeaderToHeader (OrderedList _ [[h@(Header _ _ _)]]) = h +singleItemHeaderToHeader blk = blk + + blocksToBullets :: [Block] -> [Block] blocksToBullets blks = + map singleItemHeaderToHeader $ bottomUp removeListDivs $ flatToBullets $ (handleListParagraphs blks) @@ -221,7 +227,3 @@ removeListDivs = concatMap removeListDivs' blocksToDefinitions :: [Block] -> [Block] blocksToDefinitions = blocksToDefinitions' [] [] - - - - diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index 4ea5f41d5..4e0bb375a 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -440,7 +440,7 @@ pCodeBlock :: TagParser Blocks pCodeBlock = try $ do TagOpen _ attr <- pSatisfy (~== TagOpen "pre" []) contents <- manyTill pAnyTag (pCloses "pre" <|> eof) - let rawText = concatMap fromTagText $ filter isTagText contents + let rawText = concatMap tagToString contents -- drop leading newline if any let result' = case rawText of '\n':xs -> xs @@ -451,6 +451,11 @@ pCodeBlock = try $ do _ -> result' return $ B.codeBlockWith (mkAttr attr) result +tagToString :: Tag String -> String +tagToString (TagText s) = s +tagToString (TagOpen "br" _) = "\n" +tagToString _ = "" + inline :: TagParser Inlines inline = choice [ eNoteref diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 6b7f1a8fb..02a787670 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -927,6 +927,12 @@ para = try $ do <|> (guardDisabled Ext_blank_before_header >> () <$ lookAhead header) <|> (guardEnabled Ext_lists_without_preceding_blankline >> () <$ lookAhead listStart) + <|> do guardEnabled Ext_native_divs + inHtmlBlock <- stateInHtmlBlock <$> getState + case inHtmlBlock of + Just "div" -> () <$ + lookAhead (htmlTag (~== TagClose "div")) + _ -> mzero return $ do result' <- result case B.toList result' of @@ -1611,6 +1617,7 @@ endline = try $ do guardEnabled Ext_blank_before_header <|> notFollowedBy (char '#') -- atx header guardDisabled Ext_backtick_code_blocks <|> notFollowedBy (() <$ (lookAhead (char '`') >> codeBlockFenced)) + notFollowedByHtmlCloser (eof >> return mempty) <|> (guardEnabled Ext_hard_line_breaks >> return (return B.linebreak)) <|> (guardEnabled Ext_ignore_line_breaks >> return mempty) diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index 62421d2fb..5c00a1b27 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -37,7 +37,7 @@ import Text.Pandoc.Options import qualified Text.Pandoc.Parsing as P import Text.Pandoc.Parsing hiding ( F, unF, askF, asksF, runF , newline, orderedListMarker - , parseFromString + , parseFromString, blanklines ) import Text.Pandoc.Readers.LaTeX (inlineCommand, rawLaTeXInline) import Text.Pandoc.Shared (compactify', compactify'DL) @@ -242,6 +242,13 @@ newline = <* updateLastPreCharPos <* updateLastForbiddenCharPos +-- | Like @Text.Parsec.Char.blanklines@, but causes additional state changes. +blanklines :: OrgParser [Char] +blanklines = + P.blanklines + <* updateLastPreCharPos + <* updateLastForbiddenCharPos + -- -- parsing blocks -- @@ -856,7 +863,7 @@ definitionListItem parseMarkerGetLength = try $ do line1 <- anyLineNewline blank <- option "" ("\n" <$ blankline) cont <- concat <$> many (listContinuation markerLength) - term' <- parseFromString inline term + term' <- parseFromString parseInlines term contents' <- parseFromString parseBlocks $ line1 ++ blank ++ cont return $ (,) <$> term' <*> fmap (:[]) contents' diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index 54d252d43..6e1f84335 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -1,5 +1,6 @@ {-# LANGUAGE DeriveDataTypeable, CPP, MultiParamTypeClasses, - FlexibleContexts, ScopedTypeVariables, PatternGuards #-} + FlexibleContexts, ScopedTypeVariables, PatternGuards, + ViewPatterns #-} {- Copyright (C) 2006-2014 John MacFarlane <jgm@berkeley.edu> @@ -106,7 +107,7 @@ import Network.URI ( escapeURIString, isURI, nonStrictRelativeTo, unEscapeString, parseURIReference, isAllowedInURI ) import qualified Data.Set as Set import System.Directory -import System.FilePath (joinPath, splitDirectories) +import System.FilePath (joinPath, splitDirectories, pathSeparator, isPathSeparator) import Text.Pandoc.MIME (MimeType, getMimeType) import System.FilePath ( (</>), takeExtension, dropExtension) import Data.Generics (Typeable, Data) @@ -126,6 +127,7 @@ import Text.Pandoc.Compat.Monoid import Data.ByteString.Base64 (decodeLenient) import Data.Sequence (ViewR(..), ViewL(..), viewl, viewr) import qualified Data.Text as T (toUpper, pack, unpack) +import Data.ByteString.Lazy (toChunks) #ifdef EMBED_DATA_FILES import Text.Pandoc.Data (dataFiles) @@ -133,7 +135,6 @@ import Text.Pandoc.Data (dataFiles) import Paths_pandoc (getDataFileName) #endif #ifdef HTTP_CLIENT -import Data.ByteString.Lazy (toChunks) import Network.HTTP.Client (httpLbs, parseUrl, withManager, responseBody, responseHeaders, Request(port,host)) @@ -871,11 +872,14 @@ collapseFilePath = joinPath . reverse . foldl go [] . splitDirectories go rs "." = rs go r@(p:rs) ".." = case p of ".." -> ("..":r) - "/" -> ("..":r) + (checkPathSeperator -> Just True) -> ("..":r) _ -> rs - go _ "/" = ["/"] + go _ (checkPathSeperator -> Just True) = [[pathSeparator]] go rs x = x:rs - + isSingleton [] = Nothing + isSingleton [x] = Just x + isSingleton _ = Nothing + checkPathSeperator = fmap isPathSeparator . isSingleton -- -- Safe read diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index 09321d1cc..5320a2816 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -52,7 +52,7 @@ import Text.Pandoc.Readers.TeXMath import Text.Pandoc.Highlighting ( highlight ) import Text.Pandoc.Walk import Text.Highlighting.Kate.Types () -import Text.XML.Light +import Text.XML.Light as XML import Text.TeXMath import Control.Monad.State import Text.Highlighting.Kate @@ -143,6 +143,31 @@ renderXml :: Element -> BL.ByteString renderXml elt = BL8.pack "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" <> UTF8.fromStringLazy (showElement elt) +renumIdMap :: Int -> [Element] -> M.Map String String +renumIdMap _ [] = M.empty +renumIdMap n (e:es) + | Just oldId <- findAttr (QName "Id" Nothing Nothing) e = + M.insert oldId ("rId" ++ (show n)) (renumIdMap (n+1) es) + | otherwise = renumIdMap n es + +replaceAttr :: (QName -> Bool) -> String -> [XML.Attr] -> [XML.Attr] +replaceAttr _ _ [] = [] +replaceAttr f val (a:as) | f (attrKey a) = + (XML.Attr (attrKey a) val) : (replaceAttr f val as) + | otherwise = a : (replaceAttr f val as) + +renumId :: (QName -> Bool) -> (M.Map String String) -> Element -> Element +renumId f renumMap e + | Just oldId <- findAttrBy f e + , Just newId <- M.lookup oldId renumMap = + let attrs' = replaceAttr f newId (elAttribs e) + in + e { elAttribs = attrs' } + | otherwise = e + +renumIds :: (QName -> Bool) -> (M.Map String String) -> [Element] -> [Element] +renumIds f renumMap = map (renumId f renumMap) + -- | Produce an Docx file from a Pandoc document. writeDocx :: WriterOptions -- ^ Writer options -> Pandoc -- ^ Document to convert @@ -168,12 +193,8 @@ writeDocx opts doc@(Pandoc meta _) = do let toImageEntry (_,path,_,_,img) = toEntry ("word/" ++ path) epochtime $ toLazy img let imageEntries = map toImageEntry imgs - -- adjust contents to add sectPr from reference.docx - parsedDoc <- parseXml refArchive distArchive "word/document.xml" - let wname f qn = qPrefix qn == Just "w" && f (qName qn) - let mbsectpr = filterElementName (wname (=="sectPr")) parsedDoc - let sectpr = fromMaybe (mknode "w:sectPr" [] ()) mbsectpr + let stdAttributes = [("xmlns:w","http://schemas.openxmlformats.org/wordprocessingml/2006/main") @@ -186,9 +207,6 @@ writeDocx opts doc@(Pandoc meta _) = do ,("xmlns:pic","http://schemas.openxmlformats.org/drawingml/2006/picture") ,("xmlns:wp","http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing")] - let contents' = contents ++ [sectpr] - let docContents = mknode "w:document" stdAttributes - $ mknode "w:body" [] contents' parsedRels <- parseXml refArchive distArchive "word/_rels/document.xml.rels" let isHeaderNode e = findAttr (QName "Type" Nothing Nothing) e == Just "http://schemas.openxmlformats.org/officeDocument/2006/relationships/header" @@ -255,7 +273,7 @@ writeDocx opts doc@(Pandoc meta _) = do [("Type",url') ,("Id",id') ,("Target",target')] () - let baserels = map toBaseRel + let baserels' = map toBaseRel [("http://schemas.openxmlformats.org/officeDocument/2006/relationships/numbering", "rId1", "numbering.xml") @@ -277,8 +295,12 @@ writeDocx opts doc@(Pandoc meta _) = do ,("http://schemas.openxmlformats.org/officeDocument/2006/relationships/footnotes", "rId7", "footnotes.xml") - ] ++ - headers ++ footers + ] + + let idMap = renumIdMap (length baserels' + 1) (headers ++ footers) + let renumHeaders = renumIds (\q -> qName q == "Id") idMap headers + let renumFooters = renumIds (\q -> qName q == "Id") idMap footers + let baserels = baserels' ++ renumHeaders ++ renumFooters let toImgRel (ident,path,_,_,_) = mknode "Relationship" [("Type","http://schemas.openxmlformats.org/officeDocument/2006/relationships/image"),("Id",ident),("Target",path)] () let imgrels = map toImgRel imgs let toLinkRel (src,ident) = mknode "Relationship" [("Type","http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink"),("Id",ident),("Target",src),("TargetMode","External") ] () @@ -288,6 +310,28 @@ writeDocx opts doc@(Pandoc meta _) = do $ renderXml reldoc + -- adjust contents to add sectPr from reference.docx + parsedDoc <- parseXml refArchive distArchive "word/document.xml" + let wname f qn = qPrefix qn == Just "w" && f (qName qn) + let mbsectpr = filterElementName (wname (=="sectPr")) parsedDoc + let sectpr = case mbsectpr of + Just sectpr' -> let cs = renumIds + (\q -> qName q == "id" && qPrefix q == Just "r") + idMap + (elChildren sectpr') + in + add_attrs (elAttribs sectpr') $ mknode "w:sectPr" [] cs + Nothing -> (mknode "w:sectPr" [] ()) + + + + -- let sectpr = fromMaybe (mknode "w:sectPr" [] ()) mbsectpr' + let contents' = contents ++ [sectpr] + let docContents = mknode "w:document" stdAttributes + $ mknode "w:body" [] contents' + + + -- word/document.xml let contentEntry = toEntry "word/document.xml" epochtime $ renderXml docContents diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs index ffd5bf101..32256cb42 100644 --- a/src/Text/Pandoc/Writers/EPUB.hs +++ b/src/Text/Pandoc/Writers/EPUB.hs @@ -406,7 +406,7 @@ writeEPUB opts doc@(Pandoc meta _) = do $ case blocks of (Header 1 _ _ : _) -> blocks _ -> Header 1 ("",["unnumbered"],[]) - (docTitle meta) : blocks + (docTitle' meta) : blocks let chapterHeaderLevel = writerEpubChapterLevel opts -- internal reference IDs change when we chunk the file, @@ -484,7 +484,7 @@ writeEPUB opts doc@(Pandoc meta _) = do [("id", toId $ eRelativePath ent), ("href", eRelativePath ent), ("media-type", fromMaybe "" $ getMimeType $ eRelativePath ent)] $ () - let plainTitle = case docTitle meta of + let plainTitle = case docTitle' meta of [] -> case epubTitle metadata of [] -> "UNTITLED" (x:_) -> titleText x @@ -524,13 +524,12 @@ writeEPUB opts doc@(Pandoc meta _) = do Just _ -> [ unode "itemref" ! [("idref", "cover_xhtml"),("linear","no")] $ () ] ++ ((unode "itemref" ! [("idref", "title_page_xhtml") - ,("linear", if null (docTitle meta) - then "no" - else "yes")] $ ()) : - (unode "itemref" ! [("idref", "nav") - ,("linear", if writerTableOfContents opts - then "yes" - else "no")] $ ()) : + ,("linear", + case lookupMeta "title" meta of + Just _ -> "yes" + Nothing -> "no")] $ ()) : + [unode "itemref" ! [("idref", "nav")] $ () + | writerTableOfContents opts ] ++ map chapterRefNode chapterEntries) , unode "guide" $ [ unode "reference" ! @@ -578,7 +577,7 @@ writeEPUB opts doc@(Pandoc meta _) = do ] ++ subs let tpNode = unode "navPoint" ! [("id", "navPoint-0")] $ - [ unode "navLabel" $ unode "text" (stringify $ docTitle meta) + [ unode "navLabel" $ unode "text" (stringify $ docTitle' meta) , unode "content" ! [("src","title_page.xhtml")] $ () ] let tocData = UTF8.fromStringLazy $ ppTopElement $ @@ -731,8 +730,8 @@ metadataElement version md currentTime = toTitleNode id' title | version == EPUB2 = [dcNode "title" ! (("id",id') : - maybe [] (\x -> [("opf:file-as",x)]) (titleFileAs title) ++ - maybe [] (\x -> [("opf:title-type",x)]) (titleType title)) $ + -- note: EPUB2 doesn't accept opf:title-type + maybe [] (\x -> [("opf:file-as",x)]) (titleFileAs title)) $ titleText title] | otherwise = [dcNode "title" ! [("id",id')] $ titleText title] ++ @@ -1192,3 +1191,16 @@ relatorMap = ,("writer of added text", "wat") ] +docTitle' :: Meta -> [Inline] +docTitle' meta = fromMaybe [] $ go <$> lookupMeta "title" meta + where go (MetaString s) = [Str s] + go (MetaInlines xs) = xs + go (MetaBlocks [Para xs]) = xs + go (MetaBlocks [Plain xs]) = xs + go (MetaMap m) = + case M.lookup "type" m of + Just x | stringify x == "main" -> + maybe [] go $ M.lookup "text" m + _ -> [] + go (MetaList xs) = concatMap go xs + go _ = [] diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs index 9ead604d7..1a00c7660 100644 --- a/src/Text/Pandoc/Writers/HTML.hs +++ b/src/Text/Pandoc/Writers/HTML.hs @@ -1,4 +1,4 @@ -{-# LANGUAGE OverloadedStrings, CPP #-} +{-# LANGUAGE OverloadedStrings, CPP, ViewPatterns #-} {-# OPTIONS_GHC -fno-warn-deprecations #-} {- Copyright (C) 2006-2014 John MacFarlane <jgm@berkeley.edu> @@ -60,6 +60,8 @@ import qualified Text.Blaze.XHtml1.Transitional.Attributes as A import Text.Blaze.Renderer.String (renderHtml) import Text.TeXMath import Text.XML.Light.Output +import Text.XML.Light (unode, elChildren, add_attr, unqual) +import qualified Text.XML.Light as XML import System.FilePath (takeExtension) import Data.Monoid import Data.Aeson (Value) @@ -155,6 +157,10 @@ pandocToHtml opts (Pandoc meta blocks) = do H.script ! A.src (toValue url) ! A.type_ "text/javascript" $ mempty + KaTeX js css -> + (H.script ! A.src (toValue js) $ mempty) <> + (H.link ! A.rel "stylesheet" ! A.href (toValue css)) <> + (H.script ! A.type_ "text/javascript" $ toHtml renderKaTeX) _ -> case lookup "mathml-script" (writerVariables opts) of Just s | not (writerHtml5 opts) -> H.script ! A.type_ "text/javascript" @@ -342,10 +348,10 @@ parseMailto s = do _ -> fail "not a mailto: URL" -- | Obfuscate a "mailto:" link. -obfuscateLink :: WriterOptions -> String -> String -> Html +obfuscateLink :: WriterOptions -> Html -> String -> Html obfuscateLink opts txt s | writerEmailObfuscation opts == NoObfuscation = - H.a ! A.href (toValue s) $ toHtml txt -obfuscateLink opts txt s = + H.a ! A.href (toValue s) $ txt +obfuscateLink opts (renderHtml -> txt) s = let meth = writerEmailObfuscation opts s' = map toLower (take 7 s) ++ drop 7 s in case parseMailto s' of @@ -615,6 +621,18 @@ inlineListToHtml :: WriterOptions -> [Inline] -> State WriterState Html inlineListToHtml opts lst = mapM (inlineToHtml opts) lst >>= return . mconcat +-- | Annotates a MathML expression with the tex source +annotateMML :: XML.Element -> String -> XML.Element +annotateMML e tex = math (unode "semantics" [cs, unode "annotation" (annotAttrs, tex)]) + where + cs = case elChildren e of + [] -> unode "mrow" () + [x] -> x + xs -> unode "mrow" xs + math = add_attr (XML.Attr (unqual "xmlns") "http://www.w3.org/1998/Math/MathML") . unode "math" + annotAttrs = [XML.Attr (unqual "encoding") "application/x-tex"] + + -- | Convert Pandoc inline element to HTML. inlineToHtml :: WriterOptions -> Inline -> State WriterState Html inlineToHtml opts inline = @@ -706,7 +724,7 @@ inlineToHtml opts inline = defaultConfigPP case writeMathML dt <$> readTeX str of Right r -> return $ preEscapedString $ - ppcElement conf r + ppcElement conf (annotateMML r str) Left _ -> inlineListToHtml opts (texMathToInlines t str) >>= return . (H.span ! A.class_ "math") @@ -714,6 +732,10 @@ inlineToHtml opts inline = case t of InlineMath -> "\\(" ++ str ++ "\\)" DisplayMath -> "\\[" ++ str ++ "\\]" + KaTeX _ _ -> return $ H.span ! A.class_ "math" $ + toHtml (case t of + InlineMath -> str + DisplayMath -> "\\displaystyle " ++ str) PlainMath -> do x <- inlineListToHtml opts (texMathToInlines t str) let m = H.span ! A.class_ "math" $ x @@ -731,7 +753,7 @@ inlineToHtml opts inline = | otherwise -> return mempty (Link txt (s,_)) | "mailto:" `isPrefixOf` s -> do linkText <- inlineListToHtml opts txt - return $ obfuscateLink opts (renderHtml linkText) s + return $ obfuscateLink opts linkText s (Link txt (s,tit)) -> do linkText <- inlineListToHtml opts txt let s' = case s of @@ -815,3 +837,14 @@ blockListToNote opts ref blocks = Just EPUB3 -> noteItem ! customAttribute "epub:type" "footnote" _ -> noteItem return $ nl opts >> noteItem' + +-- Javascript snippet to render all KaTeX elements +renderKaTeX :: String +renderKaTeX = unlines [ + "window.onload = function(){var mathElements = document.getElementsByClassName(\"math\");" + , "for (var i=0; i < mathElements.length; i++)" + , "{" + , " var texText = mathElements[i].firstChild" + , " katex.render(texText.data, mathElements[i])" + , "}}" + ] diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs index acbe8a48d..ae2f4e907 100644 --- a/src/Text/Pandoc/Writers/LaTeX.hs +++ b/src/Text/Pandoc/Writers/LaTeX.hs @@ -54,6 +54,7 @@ data WriterState = WriterState { stInNote :: Bool -- true if we're in a note , stInQuote :: Bool -- true if in a blockquote , stInMinipage :: Bool -- true if in minipage + , stInHeading :: Bool -- true if in a section heading , stNotes :: [Doc] -- notes in a minipage , stOLLevel :: Int -- level of ordered list nesting , stOptions :: WriterOptions -- writer options, so they don't have to be parameter @@ -76,9 +77,9 @@ writeLaTeX :: WriterOptions -> Pandoc -> String writeLaTeX options document = evalState (pandocToLaTeX options document) $ WriterState { stInNote = False, stInQuote = False, - stInMinipage = False, stNotes = [], - stOLLevel = 1, stOptions = options, - stVerbInNote = False, + stInMinipage = False, stInHeading = False, + stNotes = [], stOLLevel = 1, + stOptions = options, stVerbInNote = False, stTable = False, stStrikeout = False, stUrl = False, stGraphics = False, stLHS = False, stBook = writerChapters options, @@ -179,7 +180,9 @@ pandocToLaTeX options (Pandoc meta blocks) = do elementToLaTeX :: WriterOptions -> Element -> State WriterState Doc elementToLaTeX _ (Blk block) = blockToLaTeX block elementToLaTeX opts (Sec level _ (id',classes,_) title' elements) = do + modify $ \s -> s{stInHeading = True} header' <- sectionHeader ("unnumbered" `elem` classes) id' level title' + modify $ \s -> s{stInHeading = False} innerContents <- mapM (elementToLaTeX opts) elements return $ vsep (header' : innerContents) @@ -466,8 +469,11 @@ blockToLaTeX (DefinitionList lst) = do "\\end{description}" blockToLaTeX HorizontalRule = return $ "\\begin{center}\\rule{0.5\\linewidth}{\\linethickness}\\end{center}" -blockToLaTeX (Header level (id',classes,_) lst) = - sectionHeader ("unnumbered" `elem` classes) id' level lst +blockToLaTeX (Header level (id',classes,_) lst) = do + modify $ \s -> s{stInHeading = True} + hdr <- sectionHeader ("unnumbered" `elem` classes) id' level lst + modify $ \s -> s{stInHeading = False} + return hdr blockToLaTeX (Table caption aligns widths heads rows) = do headers <- if all null heads then return empty @@ -572,7 +578,13 @@ tableCellToLaTeX header (width, align, blocks) = do $ reverse ns) listItemToLaTeX :: [Block] -> State WriterState Doc -listItemToLaTeX lst = blockListToLaTeX lst >>= return . (text "\\item" $$) . +listItemToLaTeX lst + -- we need to put some text before a header if it's the first + -- element in an item. This will look ugly in LaTeX regardless, but + -- this will keep the typesetter from throwing an error. + | ((Header _ _ _) :_) <- lst = + blockListToLaTeX lst >>= return . (text "\\item ~" $$) . (nest 2) + | otherwise = blockListToLaTeX lst >>= return . (text "\\item" $$) . (nest 2) defListItemToLaTeX :: ([Inline], [[Block]]) -> State WriterState Doc @@ -586,7 +598,11 @@ defListItemToLaTeX (term, defs) = do then braces term' else term' def' <- liftM vsep $ mapM blockListToLaTeX defs - return $ "\\item" <> brackets term'' $$ def' + return $ case defs of + (((Header _ _ _) : _) : _) -> + "\\item" <> brackets term'' <> " ~ " $$ def' + _ -> + "\\item" <> brackets term'' $$ def' -- | Craft the section header, inserting the secton reference, if supplied. sectionHeader :: Bool -- True for unnumbered @@ -721,7 +737,9 @@ inlineToLaTeX (Code (_,classes,_) str) = do where listingsCode = do inNote <- gets stInNote when inNote $ modify $ \s -> s{ stVerbInNote = True } - let chr = ((enumFromTo '!' '~') \\ str) !! 0 + let chr = case "!\"&'()*,-./:;?@_" \\ str of + (c:_) -> c + [] -> '!' return $ text $ "\\lstinline" ++ [chr] ++ str ++ [chr] highlightCode = do case highlight formatLaTeXInline ("",classes,[]) str of @@ -791,7 +809,10 @@ inlineToLaTeX (Image _ (source, _)) = do then source else unEscapeString source source'' <- stringToLaTeX URLString source' - return $ "\\includegraphics" <> braces (text source'') + inHeading <- gets stInHeading + return $ + (if inHeading then "\\protect\\includegraphics" else "\\includegraphics") + <> braces (text source'') inlineToLaTeX (Note contents) = do inMinipage <- gets stInMinipage modify (\s -> s{stInNote = True}) |