diff options
Diffstat (limited to 'src/Text/Pandoc/Writers')
-rw-r--r-- | src/Text/Pandoc/Writers/AsciiDoc.hs | 5 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/ConTeXt.hs | 41 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/Docx.hs | 22 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/EPUB.hs | 7 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/FB2.hs | 616 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/HTML.hs | 34 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/LaTeX.hs | 132 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/Man.hs | 17 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/Markdown.hs | 21 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/MediaWiki.hs | 9 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/Org.hs | 6 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/RST.hs | 57 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/Texinfo.hs | 5 |
13 files changed, 840 insertions, 132 deletions
diff --git a/src/Text/Pandoc/Writers/AsciiDoc.hs b/src/Text/Pandoc/Writers/AsciiDoc.hs index 1913eb92b..1ccfab6e6 100644 --- a/src/Text/Pandoc/Writers/AsciiDoc.hs +++ b/src/Text/Pandoc/Writers/AsciiDoc.hs @@ -40,8 +40,7 @@ module Text.Pandoc.Writers.AsciiDoc (writeAsciiDoc) where import Text.Pandoc.Definition import Text.Pandoc.Templates (renderTemplate) import Text.Pandoc.Shared -import Text.Pandoc.Parsing hiding (blankline) -import Text.ParserCombinators.Parsec ( runParser, GenParser ) +import Text.Pandoc.Parsing hiding (blankline, space) import Data.List ( isPrefixOf, intersperse, intercalate ) import Text.Pandoc.Pretty import Control.Monad.State @@ -93,7 +92,7 @@ escapeString = escapeStringUsing escs where escs = backslashEscapes "{" -- | Ordered list start parser for use in Para below. -olMarker :: GenParser Char ParserState Char +olMarker :: Parser [Char] ParserState Char olMarker = do (start, style', delim) <- anyOrderedListMarker if delim == Period && (style' == UpperAlpha || (style' == UpperRoman && diff --git a/src/Text/Pandoc/Writers/ConTeXt.hs b/src/Text/Pandoc/Writers/ConTeXt.hs index dfdf7a140..964320eb2 100644 --- a/src/Text/Pandoc/Writers/ConTeXt.hs +++ b/src/Text/Pandoc/Writers/ConTeXt.hs @@ -77,6 +77,8 @@ pandocToConTeXt options (Pandoc (Meta title authors date) blocks) = do , ("title", titletext) , ("date", datetext) ] ++ [ ("number-sections", "yes") | writerNumberSections options ] ++ + [ ("mainlang", maybe "" (reverse . takeWhile (/=',') . reverse) + (lookup "lang" $ writerVariables options)) ] ++ [ ("author", a) | a <- authorstext ] return $ if writerStandalone options then renderTemplate context $ writerTemplate options @@ -84,34 +86,30 @@ pandocToConTeXt options (Pandoc (Meta title authors date) blocks) = do -- escape things as needed for ConTeXt -escapeCharForConTeXt :: Char -> String -escapeCharForConTeXt ch = +escapeCharForConTeXt :: WriterOptions -> Char -> String +escapeCharForConTeXt opts ch = + let ligatures = writerTeXLigatures opts in case ch of - '{' -> "\\letteropenbrace{}" - '}' -> "\\letterclosebrace{}" + '{' -> "\\{" + '}' -> "\\}" '\\' -> "\\letterbackslash{}" '$' -> "\\$" '|' -> "\\letterbar{}" - '^' -> "\\letterhat{}" - '%' -> "\\%" + '%' -> "\\letterpercent{}" '~' -> "\\lettertilde{}" - '&' -> "\\&" '#' -> "\\#" - '<' -> "\\letterless{}" - '>' -> "\\lettermore{}" '[' -> "{[}" ']' -> "{]}" - '_' -> "\\letterunderscore{}" '\160' -> "~" - '\x2014' -> "---" - '\x2013' -> "--" - '\x2019' -> "'" + '\x2014' | ligatures -> "---" + '\x2013' | ligatures -> "--" + '\x2019' | ligatures -> "'" '\x2026' -> "\\ldots{}" x -> [x] -- | Escape string for ConTeXt -stringToConTeXt :: String -> String -stringToConTeXt = concatMap escapeCharForConTeXt +stringToConTeXt :: WriterOptions -> String -> String +stringToConTeXt opts = concatMap (escapeCharForConTeXt opts) -- | Convert Elements to ConTeXt elementToConTeXt :: WriterOptions -> Element -> State WriterState Doc @@ -252,8 +250,9 @@ inlineToConTeXt (SmallCaps lst) = do return $ braces $ "\\sc " <> contents inlineToConTeXt (Code _ str) | not ('{' `elem` str || '}' `elem` str) = return $ "\\type" <> braces (text str) -inlineToConTeXt (Code _ str) = - return $ "\\mono" <> braces (text $ stringToConTeXt str) +inlineToConTeXt (Code _ str) = do + opts <- gets stOptions + return $ "\\mono" <> braces (text $ stringToConTeXt opts str) inlineToConTeXt (Quoted SingleQuote lst) = do contents <- inlineListToConTeXt lst return $ "\\quote" <> braces contents @@ -261,11 +260,13 @@ inlineToConTeXt (Quoted DoubleQuote lst) = do contents <- inlineListToConTeXt lst return $ "\\quotation" <> braces contents inlineToConTeXt (Cite _ lst) = inlineListToConTeXt lst -inlineToConTeXt (Str str) = return $ text $ stringToConTeXt str +inlineToConTeXt (Str str) = do + opts <- gets stOptions + return $ text $ stringToConTeXt opts str inlineToConTeXt (Math InlineMath str) = return $ char '$' <> text str <> char '$' inlineToConTeXt (Math DisplayMath str) = - return $ text "\\startformula " <> text str <> text " \\stopformula" + return $ text "\\startformula " <> text str <> text " \\stopformula" <> space inlineToConTeXt (RawInline "context" str) = return $ text str inlineToConTeXt (RawInline "tex" str) = return $ text str inlineToConTeXt (RawInline _ _) = return empty @@ -296,7 +297,7 @@ inlineToConTeXt (Link txt (src, _)) = do label <- inlineListToConTeXt txt return $ "\\useURL" <> brackets (text ref) - <> brackets (text $ escapeStringUsing [('#',"\\#")] src) + <> brackets (text $ escapeStringUsing [('#',"\\#"),('%',"\\%")] src) <> brackets empty <> brackets label <> "\\from" diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index a2995b705..396e7a482 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -146,7 +146,8 @@ writeDocx mbRefDocx opts doc@(Pandoc (Meta tit auths date) _) = do let styledoc = case findEntryByPath stylepath refArchive >>= parseXMLDoc . toString . fromEntry of Just d -> d - Nothing -> error $ stylepath ++ "missing in reference docx" + Nothing -> error $ "Unable to parse " ++ stylepath ++ + " from reference.docx" let styledoc' = styledoc{ elContent = elContent styledoc ++ map Elem newstyles } let styleEntry = toEntry stylepath epochtime $ fromString $ showTopElement' styledoc' -- construct word/numbering.xml @@ -261,13 +262,13 @@ mkLvl marker lvl = ,show n) step = 720 hang = 480 - bulletFor 0 = "\8226" - bulletFor 1 = "\9702" - bulletFor 2 = "\8227" - bulletFor 3 = "\8259" - bulletFor 4 = "\8226" - bulletFor 5 = "\9702" - bulletFor _ = "\8227" + bulletFor 0 = "\x2022" -- filled circle + bulletFor 1 = "\x2013" -- en dash + bulletFor 2 = "\x2022" -- hyphen bullet + bulletFor 3 = "\x2013" + bulletFor 4 = "\x2022" + bulletFor 5 = "\x2013" + bulletFor _ = "\x2022" styleFor UpperAlpha _ = "upperLetter" styleFor LowerAlpha _ = "lowerLetter" styleFor UpperRoman _ = "upperRoman" @@ -488,7 +489,10 @@ getParaProps :: WS [Element] getParaProps = do props <- gets stParaProperties listLevel <- gets stListLevel - numid <- getNumId + listMarker <- gets stListMarker + numid <- case listMarker of + NoMarker -> return 1 + _ -> getNumId let listPr = if listLevel >= 0 then [ mknode "w:numPr" [] [ mknode "w:numId" [("w:val",show numid)] () diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs index 67048348e..b423f136f 100644 --- a/src/Text/Pandoc/Writers/EPUB.hs +++ b/src/Text/Pandoc/Writers/EPUB.hs @@ -48,6 +48,8 @@ import Text.Pandoc.Writers.Markdown ( writePlain ) import Data.Char ( toLower ) import Network.URI ( unEscapeString ) import Text.Pandoc.MIME (getMimeType) +import Prelude hiding (catch) +import Control.Exception (catch, SomeException) -- | Produce an EPUB file from a Pandoc document. writeEPUB :: Maybe String -- ^ EPUB stylesheet specified at command line @@ -126,8 +128,9 @@ writeEPUB mbStylesheet fonts opts doc@(Pandoc meta _) = do let chapterEntries = zipWith chapterToEntry [1..] chapters -- contents.opf - localeLang <- catch (liftM (takeWhile (/='.')) $ getEnv "LANG") - (\_ -> return "en-US") + localeLang <- catch (liftM (map (\c -> if c == '_' then '-' else c) . + takeWhile (/='.')) $ getEnv "LANG") + (\e -> let _ = (e :: SomeException) in return "en-US") let lang = case lookup "lang" (writerVariables opts') of Just x -> x Nothing -> localeLang diff --git a/src/Text/Pandoc/Writers/FB2.hs b/src/Text/Pandoc/Writers/FB2.hs new file mode 100644 index 000000000..0fbfb3968 --- /dev/null +++ b/src/Text/Pandoc/Writers/FB2.hs @@ -0,0 +1,616 @@ +{- +Copyright (c) 2011-2012, Sergey Astanin +All rights reserved. + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | Conversion of 'Pandoc' documents to FB2 (FictionBook2) format. + +FictionBook is an XML-based e-book format. For more information see: +<http://www.fictionbook.org/index.php/Eng:XML_Schema_Fictionbook_2.1> + +-} +module Text.Pandoc.Writers.FB2 (writeFB2) where + +import Control.Monad.State (StateT, evalStateT, get, modify) +import Control.Monad.State (liftM, liftM2, liftIO) +import Data.ByteString.Base64 (encode) +import Data.Char (toUpper, toLower, isSpace, isAscii, isControl) +import Data.List (intersperse, intercalate, isPrefixOf) +import Data.Either (lefts, rights) +import Network.Browser (browse, request, setAllowRedirects, setOutHandler) +import Network.HTTP (catchIO_, getRequest, getHeaders, getResponseBody) +import Network.HTTP (lookupHeader, HeaderName(..), urlEncode) +import Network.URI (isURI, unEscapeString) +import System.FilePath (takeExtension) +import Text.XML.Light +import qualified Control.Exception as E +import qualified Data.ByteString as B +import qualified Text.XML.Light as X +import qualified Text.XML.Light.Cursor as XC + +import Text.Pandoc.Definition +import Text.Pandoc.Shared (WriterOptions(..), HTMLMathMethod(..)) +import Text.Pandoc.Shared (orderedListMarkers, defaultWriterOptions) +import Text.Pandoc.Generic (bottomUp) + +-- | Data to be written at the end of the document: +-- (foot)notes, URLs, references, images. +data FbRenderState = FbRenderState + { footnotes :: [ (Int, String, [Content]) ] -- ^ #, ID, text + , imagesToFetch :: [ (String, String) ] -- ^ filename, URL or path + , parentListMarker :: String -- ^ list marker of the parent ordered list + , parentBulletLevel :: Int -- ^ nesting level of the unordered list + , writerOptions :: WriterOptions + } deriving (Show) + +-- | FictionBook building monad. +type FBM = StateT FbRenderState IO + +newFB :: FbRenderState +newFB = FbRenderState { footnotes = [], imagesToFetch = [] + , parentListMarker = "", parentBulletLevel = 0 + , writerOptions = defaultWriterOptions } + +data ImageMode = NormalImage | InlineImage deriving (Eq) +instance Show ImageMode where + show NormalImage = "imageType" + show InlineImage = "inlineImageType" + +-- | Produce an FB2 document from a 'Pandoc' document. +writeFB2 :: WriterOptions -- ^ conversion options + -> Pandoc -- ^ document to convert + -> IO String -- ^ FictionBook2 document (not encoded yet) +writeFB2 opts (Pandoc meta blocks) = flip evalStateT newFB $ do + modify (\s -> s { writerOptions = opts { writerStandalone = True } }) + desc <- description meta + fp <- frontpage meta + secs <- renderSections 1 blocks + let body = el "body" $ fp ++ secs + notes <- renderFootnotes + (imgs,missing) <- liftM imagesToFetch get >>= \s -> liftIO (fetchImages s) + let body' = replaceImagesWithAlt missing body + let fb2_xml = el "FictionBook" (fb2_attrs, [desc, body'] ++ notes ++ imgs) + return $ xml_head ++ (showContent fb2_xml) + where + xml_head = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" + fb2_attrs = + let xmlns = "http://www.gribuser.ru/xml/fictionbook/2.0" + xlink = "http://www.w3.org/1999/xlink" + in [ uattr "xmlns" xmlns + , attr ("xmlns", "l") xlink ] + -- + frontpage :: Meta -> FBM [Content] + frontpage meta' = do + t <- cMapM toXml . docTitle $ meta' + return $ + [ el "title" (el "p" t) + , el "annotation" (map (el "p" . cMap plain) + (docAuthors meta' ++ [docDate meta'])) + ] + description :: Meta -> FBM Content + description meta' = do + bt <- booktitle meta' + let as = authors meta' + dd <- docdate meta' + return $ el "description" + [ el "title-info" (bt ++ as ++ dd) + , el "document-info" [ el "program-used" "pandoc" ] -- FIXME: +version + ] + booktitle :: Meta -> FBM [Content] + booktitle meta' = do + t <- cMapM toXml . docTitle $ meta' + return $ if null t + then [] + else [ el "book-title" t ] + authors :: Meta -> [Content] + authors meta' = cMap author (docAuthors meta') + author :: [Inline] -> [Content] + author ss = + let ws = words . cMap plain $ ss + email = (el "email") `fmap` (take 1 $ filter ('@' `elem`) ws) + ws' = filter ('@' `notElem`) ws + names = case ws' of + (nickname:[]) -> [ el "nickname" nickname ] + (fname:lname:[]) -> [ el "first-name" fname + , el "last-name" lname ] + (fname:rest) -> [ el "first-name" fname + , el "middle-name" (concat . init $ rest) + , el "last-name" (last rest) ] + ([]) -> [] + in list $ el "author" (names ++ email) + docdate :: Meta -> FBM [Content] + docdate meta' = do + let ss = docDate meta' + d <- cMapM toXml ss + return $ if null d + then [] + else [el "date" d] + +-- | Divide the stream of blocks into sections and convert to XML +-- representation. +renderSections :: Int -> [Block] -> FBM [Content] +renderSections level blocks = do + let secs = splitSections level blocks + mapM (renderSection level) secs + +renderSection :: Int -> ([Inline], [Block]) -> FBM Content +renderSection level (ttl, body) = do + title <- if null ttl + then return [] + else return . list . el "title" . formatTitle $ ttl + content <- if (hasSubsections body) + then renderSections (level + 1) body + else cMapM blockToXml body + return $ el "section" (title ++ content) + where + hasSubsections = any isHeader + isHeader (Header _ _) = True + isHeader _ = False + +-- | Only <p> and <empty-line> are allowed within <title> in FB2. +formatTitle :: [Inline] -> [Content] +formatTitle inlines = + let lns = split isLineBreak inlines + lns' = map (el "p" . cMap plain) lns + in intersperse (el "empty-line" ()) lns' + +split :: (a -> Bool) -> [a] -> [[a]] +split _ [] = [] +split cond xs = let (b,a) = break cond xs + in (b:split cond (drop 1 a)) + +isLineBreak :: Inline -> Bool +isLineBreak LineBreak = True +isLineBreak _ = False + +-- | Divide the stream of block elements into sections: [(title, blocks)]. +splitSections :: Int -> [Block] -> [([Inline], [Block])] +splitSections level blocks = reverse $ revSplit (reverse blocks) + where + revSplit [] = [] + revSplit rblocks = + let (lastsec, before) = break sameLevel rblocks + (header, prevblocks) = + case before of + ((Header n title):prevblocks') -> + if n == level + then (title, prevblocks') + else ([], before) + _ -> ([], before) + in (header, reverse lastsec) : revSplit prevblocks + sameLevel (Header n _) = n == level + sameLevel _ = False + +-- | Make another FictionBook body with footnotes. +renderFootnotes :: FBM [Content] +renderFootnotes = do + fns <- footnotes `liftM` get + if null fns + then return [] -- no footnotes + else return . list $ + el "body" ([uattr "name" "notes"], map renderFN (reverse fns)) + where + renderFN (n, idstr, cs) = + let fn_texts = (el "title" (el "p" (show n))) : cs + in el "section" ([uattr "id" idstr], fn_texts) + +-- | Fetch images and encode them for the FictionBook XML. +-- Return image data and a list of hrefs of the missing images. +fetchImages :: [(String,String)] -> IO ([Content],[String]) +fetchImages links = do + imgs <- mapM (uncurry fetchImage) links + return $ (rights imgs, lefts imgs) + +-- | Fetch image data from disk or from network and make a <binary> XML section. +-- Return either (Left hrefOfMissingImage) or (Right xmlContent). +fetchImage :: String -> String -> IO (Either String Content) +fetchImage href link = do + mbimg <- + case (isURI link, readDataURI link) of + (True, Just (mime,_,True,base64)) -> + let mime' = map toLower mime + in if mime' == "image/png" || mime' == "image/jpeg" + then return (Just (mime',base64)) + else return Nothing + (True, Just _) -> return Nothing -- not base64-encoded + (True, Nothing) -> fetchURL link + (False, _) -> do + d <- nothingOnError $ B.readFile (unEscapeString link) + let t = case map toLower (takeExtension link) of + ".png" -> Just "image/png" + ".jpg" -> Just "image/jpeg" + ".jpeg" -> Just "image/jpeg" + ".jpe" -> Just "image/jpeg" + _ -> Nothing -- only PNG and JPEG are supported in FB2 + return $ liftM2 (,) t (liftM (toStr . encode) d) + case mbimg of + Just (imgtype, imgdata) -> do + return . Right $ el "binary" + ( [uattr "id" href + , uattr "content-type" imgtype] + , txt imgdata ) + _ -> return (Left ('#':href)) + where + nothingOnError :: (IO B.ByteString) -> (IO (Maybe B.ByteString)) + nothingOnError action = liftM Just action `E.catch` omnihandler + omnihandler :: E.SomeException -> IO (Maybe B.ByteString) + omnihandler _ = return Nothing + +-- | Extract mime type and encoded data from the Data URI. +readDataURI :: String -- ^ URI + -> Maybe (String,String,Bool,String) + -- ^ Maybe (mime,charset,isBase64,data) +readDataURI uri = + let prefix = "data:" + in if not (prefix `isPrefixOf` uri) + then Nothing + else + let rest = drop (length prefix) uri + meta = takeWhile (/= ',') rest -- without trailing ',' + uridata = drop (length meta + 1) rest + parts = split (== ';') meta + (mime,cs,enc)=foldr upd ("text/plain","US-ASCII",False) parts + in Just (mime,cs,enc,uridata) + where + upd str m@(mime,cs,enc) + | isMimeType str = (str,cs,enc) + | "charset=" `isPrefixOf` str = (mime,drop (length "charset=") str,enc) + | str == "base64" = (mime,cs,True) + | otherwise = m + +-- Without parameters like ;charset=...; see RFC 2045, 5.1 +isMimeType :: String -> Bool +isMimeType s = + case split (=='/') s of + [mtype,msubtype] -> + ((map toLower mtype) `elem` types + || "x-" `isPrefixOf` (map toLower mtype)) + && all valid mtype + && all valid msubtype + _ -> False + where + types = ["text","image","audio","video","application","message","multipart"] + valid c = isAscii c && not (isControl c) && not (isSpace c) && + c `notElem` "()<>@,;:\\\"/[]?=" + +-- | Fetch URL, return its Content-Type and binary data on success. +fetchURL :: String -> IO (Maybe (String, String)) +fetchURL url = do + flip catchIO_ (return Nothing) $ do + r <- browse $ do + setOutHandler (const (return ())) + setAllowRedirects True + liftM snd . request . getRequest $ url + let content_type = lookupHeader HdrContentType (getHeaders r) + content <- liftM (Just . toStr . encode . toBS) . getResponseBody $ Right r + return $ liftM2 (,) content_type content + where + +toBS :: String -> B.ByteString +toBS = B.pack . map (toEnum . fromEnum) + +toStr :: B.ByteString -> String +toStr = map (toEnum . fromEnum) . B.unpack + +footnoteID :: Int -> String +footnoteID i = "n" ++ (show i) + +linkID :: Int -> String +linkID i = "l" ++ (show i) + +-- | Convert a block-level Pandoc's element to FictionBook XML representation. +blockToXml :: Block -> FBM [Content] +blockToXml (Plain ss) = cMapM toXml ss -- FIXME: can lead to malformed FB2 +blockToXml (Para [Math DisplayMath formula]) = insertMath NormalImage formula +blockToXml (Para [img@(Image _ _)]) = insertImage NormalImage img +blockToXml (Para ss) = liftM (list . el "p") $ cMapM toXml ss +blockToXml (CodeBlock _ s) = return . spaceBeforeAfter . + map (el "p" . el "code") . lines $ s +blockToXml (RawBlock _ s) = return . spaceBeforeAfter . + map (el "p" . el "code") . lines $ s +blockToXml (BlockQuote bs) = liftM (list . el "cite") $ cMapM blockToXml bs +blockToXml (OrderedList a bss) = do + state <- get + let pmrk = parentListMarker state + let markers = map ((pmrk ++ " ") ++) $ orderedListMarkers a + let mkitem mrk bs = do + modify (\s -> s { parentListMarker = mrk }) + itemtext <- cMapM blockToXml . paraToPlain $ bs + modify (\s -> s { parentListMarker = pmrk }) -- old parent marker + return . el "p" $ [ txt mrk, txt " " ] ++ itemtext + mapM (uncurry mkitem) (zip markers bss) +blockToXml (BulletList bss) = do + state <- get + let level = parentBulletLevel state + let pmrk = parentListMarker state + let prefix = replicate (length pmrk) ' ' + let bullets = ["\x2022", "\x25e6", "*", "\x2043", "\x2023"] + let mrk = prefix ++ bullets !! (level `mod` (length bullets)) + let mkitem bs = do + modify (\s -> s { parentBulletLevel = (level+1) }) + itemtext <- cMapM blockToXml . paraToPlain $ bs + modify (\s -> s { parentBulletLevel = level }) -- restore bullet level + return $ el "p" $ [ txt (mrk ++ " ") ] ++ itemtext + mapM mkitem bss +blockToXml (DefinitionList defs) = + cMapM mkdef defs + where + mkdef (term, bss) = do + def <- cMapM (cMapM blockToXml . sep . paraToPlain . map indent) bss + t <- wrap "strong" term + return [ el "p" t, el "p" def ] + sep blocks = + if all needsBreak blocks then + blocks ++ [Plain [LineBreak]] + else + blocks + needsBreak (Para _) = False + needsBreak (Plain ins) = LineBreak `notElem` ins + needsBreak _ = True +blockToXml (Header _ _) = -- should never happen, see renderSections + error "unexpected header in section text" +blockToXml HorizontalRule = return + [ el "empty-line" () + , el "p" (txt (replicate 10 '—')) + , el "empty-line" () ] +blockToXml (Table caption aligns _ headers rows) = do + hd <- mkrow "th" headers aligns + bd <- mapM (\r -> mkrow "td" r aligns) rows + c <- return . el "emphasis" =<< cMapM toXml caption + return [el "table" (hd : bd), el "p" c] + where + mkrow :: String -> [TableCell] -> [Alignment] -> FBM Content + mkrow tag cells aligns' = + (el "tr") `liftM` (mapM (mkcell tag) (zip cells aligns')) + -- + mkcell :: String -> (TableCell, Alignment) -> FBM Content + mkcell tag (cell, align) = do + cblocks <- cMapM blockToXml cell + return $ el tag ([align_attr align], cblocks) + -- + align_attr a = Attr (QName "align" Nothing Nothing) (align_str a) + align_str AlignLeft = "left" + align_str AlignCenter = "center" + align_str AlignRight = "right" + align_str AlignDefault = "left" +blockToXml Null = return [] + +-- Replace paragraphs with plain text and line break. +-- Necessary to simulate multi-paragraph lists in FB2. +paraToPlain :: [Block] -> [Block] +paraToPlain [] = [] +paraToPlain (Para inlines : rest) = + let p = (Plain (inlines ++ [LineBreak])) + in p : paraToPlain rest +paraToPlain (p:rest) = p : paraToPlain rest + +-- Simulate increased indentation level. Will not really work +-- for multi-line paragraphs. +indent :: Block -> Block +indent = indentBlock + where + -- indentation space + spacer :: String + spacer = replicate 4 ' ' + -- + indentBlock (Plain ins) = Plain ((Str spacer):ins) + indentBlock (Para ins) = Para ((Str spacer):ins) + indentBlock (CodeBlock a s) = + let s' = unlines . map (spacer++) . lines $ s + in CodeBlock a s' + indentBlock (BlockQuote bs) = BlockQuote (map indent bs) + indentBlock (Header l ins) = Header l (indentLines ins) + indentBlock everythingElse = everythingElse + -- indent every (explicit) line + indentLines :: [Inline] -> [Inline] + indentLines ins = let lns = split isLineBreak ins :: [[Inline]] + in intercalate [LineBreak] $ map ((Str spacer):) lns + +-- | Convert a Pandoc's Inline element to FictionBook XML representation. +toXml :: Inline -> FBM [Content] +toXml (Str s) = return [txt s] +toXml (Emph ss) = list `liftM` wrap "emphasis" ss +toXml (Strong ss) = list `liftM` wrap "strong" ss +toXml (Strikeout ss) = list `liftM` wrap "strikethrough" ss +toXml (Superscript ss) = list `liftM` wrap "sup" ss +toXml (Subscript ss) = list `liftM` wrap "sub" ss +toXml (SmallCaps ss) = cMapM toXml $ bottomUp (map toUpper) ss +toXml (Quoted SingleQuote ss) = do -- FIXME: should be language-specific + inner <- cMapM toXml ss + return $ [txt "‘"] ++ inner ++ [txt "’"] +toXml (Quoted DoubleQuote ss) = do + inner <- cMapM toXml ss + return $ [txt "“"] ++ inner ++ [txt "”"] +toXml (Cite _ ss) = cMapM toXml ss -- FIXME: support citation styles +toXml (Code _ s) = return [el "code" s] +toXml Space = return [txt " "] +toXml LineBreak = return [el "empty-line" ()] +toXml (Math _ formula) = insertMath InlineImage formula +toXml (RawInline _ _) = return [] -- raw TeX and raw HTML are suppressed +toXml (Link text (url,ttl)) = do + fns <- footnotes `liftM` get + let n = 1 + length fns + let ln_id = linkID n + let ln_ref = list . el "sup" . txt $ "[" ++ show n ++ "]" + ln_text <- cMapM toXml text + let ln_desc = + let ttl' = dropWhile isSpace ttl + in if null ttl' + then list . el "p" $ el "code" url + else list . el "p" $ [ txt (ttl' ++ ": "), el "code" url ] + modify (\s -> s { footnotes = (n, ln_id, ln_desc) : fns }) + return $ ln_text ++ + [ el "a" + ( [ attr ("l","href") ('#':ln_id) + , uattr "type" "note" ] + , ln_ref) ] +toXml img@(Image _ _) = insertImage InlineImage img +toXml (Note bs) = do + fns <- footnotes `liftM` get + let n = 1 + length fns + let fn_id = footnoteID n + fn_desc <- cMapM blockToXml bs + modify (\s -> s { footnotes = (n, fn_id, fn_desc) : fns }) + let fn_ref = el "sup" . txt $ "[" ++ show n ++ "]" + return . list $ el "a" ( [ attr ("l","href") ('#':fn_id) + , uattr "type" "note" ] + , fn_ref ) + +insertMath :: ImageMode -> String -> FBM [Content] +insertMath immode formula = do + htmlMath <- return . writerHTMLMathMethod . writerOptions =<< get + case htmlMath of + WebTeX url -> do + let alt = [Code nullAttr formula] + let imgurl = url ++ urlEncode formula + let img = Image alt (imgurl, "") + insertImage immode img + _ -> return [el "code" formula] + +insertImage :: ImageMode -> Inline -> FBM [Content] +insertImage immode (Image alt (url,ttl)) = do + images <- imagesToFetch `liftM` get + let n = 1 + length images + let fname = "image" ++ show n + modify (\s -> s { imagesToFetch = (fname, url) : images }) + let ttlattr = case (immode, null ttl) of + (NormalImage, False) -> [ uattr "title" ttl ] + _ -> [] + return . list $ + el "image" $ + [ attr ("l","href") ('#':fname) + , attr ("l","type") (show immode) + , uattr "alt" (cMap plain alt) ] + ++ ttlattr +insertImage _ _ = error "unexpected inline instead of image" + +replaceImagesWithAlt :: [String] -> Content -> Content +replaceImagesWithAlt missingHrefs body = + let cur = XC.fromContent body + cur' = replaceAll cur + in XC.toTree . XC.root $ cur' + where + -- + replaceAll :: XC.Cursor -> XC.Cursor + replaceAll c = + let n = XC.current c + c' = if isImage n && isMissing n + then XC.modifyContent replaceNode c + else c + in case XC.nextDF c' of + (Just cnext) -> replaceAll cnext + Nothing -> c' -- end of document + -- + isImage :: Content -> Bool + isImage (Elem e) = (elName e) == (uname "image") + isImage _ = False + -- + isMissing (Elem img@(Element _ _ _ _)) = + let imgAttrs = elAttribs img + badAttrs = map (attr ("l","href")) missingHrefs + in any (`elem` imgAttrs) badAttrs + isMissing _ = False + -- + replaceNode :: Content -> Content + replaceNode n@(Elem img@(Element _ _ _ _)) = + let attrs = elAttribs img + alt = getAttrVal attrs (uname "alt") + imtype = getAttrVal attrs (qname "l" "type") + in case (alt, imtype) of + (Just alt', Just imtype') -> + if imtype' == show NormalImage + then el "p" alt' + else txt alt' + (Just alt', Nothing) -> txt alt' -- no type attribute + _ -> n -- don't replace if alt text is not found + replaceNode n = n + -- + getAttrVal :: [X.Attr] -> QName -> Maybe String + getAttrVal attrs name = + case filter ((name ==) . attrKey) attrs of + (a:_) -> Just (attrVal a) + _ -> Nothing + + +-- | Wrap all inlines with an XML tag (given its unqualified name). +wrap :: String -> [Inline] -> FBM Content +wrap tagname inlines = el tagname `liftM` cMapM toXml inlines + +-- " Create a singleton list. +list :: a -> [a] +list = (:[]) + +-- | Convert an 'Inline' to plaintext. +plain :: Inline -> String +plain (Str s) = s +plain (Emph ss) = concat (map plain ss) +plain (Strong ss) = concat (map plain ss) +plain (Strikeout ss) = concat (map plain ss) +plain (Superscript ss) = concat (map plain ss) +plain (Subscript ss) = concat (map plain ss) +plain (SmallCaps ss) = concat (map plain ss) +plain (Quoted _ ss) = concat (map plain ss) +plain (Cite _ ss) = concat (map plain ss) -- FIXME +plain (Code _ s) = s +plain Space = " " +plain LineBreak = "\n" +plain (Math _ s) = s +plain (RawInline _ s) = s +plain (Link text (url,_)) = concat (map plain text ++ [" <", url, ">"]) +plain (Image alt _) = concat (map plain alt) +plain (Note _) = "" -- FIXME + +-- | Create an XML element. +el :: (Node t) + => String -- ^ unqualified element name + -> t -- ^ node contents + -> Content -- ^ XML content +el name cs = Elem $ unode name cs + +-- | Put empty lines around content +spaceBeforeAfter :: [Content] -> [Content] +spaceBeforeAfter cs = + let emptyline = el "empty-line" () + in [emptyline] ++ cs ++ [emptyline] + +-- | Create a plain-text XML content. +txt :: String -> Content +txt s = Text $ CData CDataText s Nothing + +-- | Create an XML attribute with an unqualified name. +uattr :: String -> String -> Text.XML.Light.Attr +uattr name val = Attr (uname name) val + +-- | Create an XML attribute with a qualified name from given namespace. +attr :: (String, String) -> String -> Text.XML.Light.Attr +attr (ns, name) val = Attr (qname ns name) val + +-- | Unqualified name +uname :: String -> QName +uname name = QName name Nothing Nothing + +-- | Qualified name +qname :: String -> String -> QName +qname ns name = QName name Nothing (Just ns) + +-- | Abbreviation for 'concatMap'. +cMap :: (a -> [b]) -> [a] -> [b] +cMap = concatMap + +-- | Monadic equivalent of 'concatMap'. +cMapM :: (Monad m) => (a -> m [b]) -> [a] -> m [b] +cMapM f xs = concat `liftM` mapM f xs
\ No newline at end of file diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs index 9dd29f183..cafb6ca74 100644 --- a/src/Text/Pandoc/Writers/HTML.hs +++ b/src/Text/Pandoc/Writers/HTML.hs @@ -1,4 +1,4 @@ -{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE OverloadedStrings, CPP #-} {-# OPTIONS_GHC -fno-warn-deprecations #-} {- Copyright (C) 2006-2010 John MacFarlane <jgm@berkeley.edu> @@ -46,7 +46,12 @@ import Data.List ( isPrefixOf, intersperse ) import Data.String ( fromString ) import Data.Maybe ( catMaybes ) import Control.Monad.State +#if MIN_VERSION_blaze_html(0,5,0) +import Text.Blaze.Html hiding(contents) +import Text.Blaze.Internal(preEscapedString) +#else import Text.Blaze +#endif import qualified Text.Blaze.Html5 as H5 import qualified Text.Blaze.XHtml1.Transitional as H import qualified Text.Blaze.XHtml1.Transitional.Attributes as A @@ -59,12 +64,14 @@ import Data.Monoid (mempty, mconcat) data WriterState = WriterState { stNotes :: [Html] -- ^ List of notes , stMath :: Bool -- ^ Math is used in document + , stQuotes :: Bool -- ^ <q> tag is used , stHighlighting :: Bool -- ^ Syntax highlighting is used , stSecNum :: [Int] -- ^ Number of current section } defaultWriterState :: WriterState -defaultWriterState = WriterState {stNotes= [], stMath = False, stHighlighting = False, stSecNum = []} +defaultWriterState = WriterState {stNotes= [], stMath = False, stQuotes = False, + stHighlighting = False, stSecNum = []} -- Helpers to render HTML with the appropriate function. @@ -156,7 +163,8 @@ pandocToHtml opts (Pandoc (Meta title' authors' date') blocks) = do let newvars = [("highlighting-css", styleToCss $ writerHighlightStyle opts) | stHighlighting st] ++ - [("math", renderHtml math) | stMath st] + [("math", renderHtml math) | stMath st] ++ + [("quotes", "yes") | stQuotes st] return (tit, auths, authsMeta, date, toc, thebody, newvars) -- | Prepare author for meta tag, converting notes into @@ -191,6 +199,7 @@ inTemplate opts tit auths authsMeta date toc body' newvars = , ("date", date') , ("idprefix", writerIdentifierPrefix opts) , ("slidy-url", "http://www.w3.org/Talks/Tools/Slidy2") + , ("slideous-url", "slideous") , ("s5-url", "s5/default") ] ++ [ ("html5","true") | writerHtml5 opts ] ++ (case toc of @@ -253,7 +262,9 @@ elementToHtml slideLevel opts (Sec level num id' title' elements) = do -- always use level 1 for slide titles let level' = if slide then 1 else level let titleSlide = slide && level < slideLevel - header' <- blockToHtml opts (Header level' title') + header' <- if title' == [Str "\0"] -- marker for hrule + then return mempty + else blockToHtml opts (Header level' title') let isSec (Sec _ _ _ _ _) = True isSec (Blk _) = False innerContents <- mapM (elementToHtml slideLevel opts) @@ -261,9 +272,8 @@ elementToHtml slideLevel opts (Sec level num id' title' elements) = do -- title slides have no content of their own then filter isSec elements else elements - let header'' = if (writerStrictMarkdown opts || - writerSectionDivs opts || - writerSlideVariant opts == S5Slides) + let header'' = if (writerStrictMarkdown opts || writerSectionDivs opts || + writerSlideVariant opts == S5Slides || slide) then header' else header' ! prefixedId opts id' let inNl x = mconcat $ nl opts : intersperse (nl opts) x ++ [nl opts] @@ -581,8 +591,12 @@ inlineToHtml opts inline = strToHtml "’") DoubleQuote -> (strToHtml "“", strToHtml "”") - in do contents <- inlineListToHtml opts lst - return $ leftQuote >> contents >> rightQuote + in if writerHtml5 opts + then do + modify $ \st -> st{ stQuotes = True } + H.q `fmap` inlineListToHtml opts lst + else (\x -> leftQuote >> x >> rightQuote) + `fmap` inlineListToHtml opts lst (Math t str) -> modify (\st -> st {stMath = True}) >> (case writerHTMLMathMethod opts of LaTeXMathML _ -> @@ -624,7 +638,7 @@ inlineToHtml opts inline = Left _ -> inlineListToHtml opts (readTeXMath str) >>= return . (H.span ! A.class_ "math") - MathJax _ -> return $ toHtml $ + MathJax _ -> return $ H.span ! A.class_ "math" $ toHtml $ case t of InlineMath -> "\\(" ++ str ++ "\\)" DisplayMath -> "\\[" ++ str ++ "\\]" diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs index e99b20c60..7beee2d42 100644 --- a/src/Text/Pandoc/Writers/LaTeX.hs +++ b/src/Text/Pandoc/Writers/LaTeX.hs @@ -56,7 +56,6 @@ data WriterState = , stEnumerate :: Bool -- true if document needs fancy enumerated lists , stTable :: Bool -- true if document has a table , stStrikeout :: Bool -- true if document has strikeout - , stSubscript :: Bool -- true if document has subscript , stUrl :: Bool -- true if document has visible URL link , stGraphics :: Bool -- true if document contains images , stLHS :: Bool -- true if document has literate haskell code @@ -65,6 +64,7 @@ data WriterState = , stHighlighting :: Bool -- true if document has highlighted code , stIncremental :: Bool -- true if beamer lists should be displayed bit by bit , stInternalLinks :: [String] -- list of internal link targets + , stUsesEuro :: Bool -- true if euro symbol used } -- | Convert Pandoc to LaTeX. @@ -74,12 +74,12 @@ writeLaTeX options document = WriterState { stInNote = False, stInTable = False, stTableNotes = [], stOLLevel = 1, stOptions = options, stVerbInNote = False, stEnumerate = False, - stTable = False, stStrikeout = False, stSubscript = False, + stTable = False, stStrikeout = False, stUrl = False, stGraphics = False, stLHS = False, stBook = writerChapters options, stCsquotes = False, stHighlighting = False, stIncremental = writerIncremental options, - stInternalLinks = [] } + stInternalLinks = [], stUsesEuro = False } pandocToLaTeX :: WriterOptions -> Pandoc -> State WriterState String pandocToLaTeX options (Pandoc (Meta title authors date) blocks) = do @@ -117,7 +117,7 @@ pandocToLaTeX options (Pandoc (Meta title authors date) blocks) = do else return blocks' body <- mapM (elementToLaTeX options) $ hierarchicalize blocks'' biblioTitle <- liftM (render colwidth) $ inlineListToLaTeX lastHeader - let main = render colwidth $ vcat body + let main = render colwidth $ vsep body st <- get let biblioFiles = intercalate "," $ map dropExtension $ writerBiblioFiles options citecontext = case writerCiteMethod options of @@ -134,6 +134,8 @@ pandocToLaTeX options (Pandoc (Meta title authors date) blocks) = do [ ("toc", if writerTableOfContents options then "yes" else "") , ("body", main) , ("title", titletext) + , ("title-meta", stringify title) + , ("author-meta", intercalate "; " $ map stringify authors) , ("date", dateText) , ("documentclass", if writerBeamer options then "beamer" @@ -145,14 +147,16 @@ pandocToLaTeX options (Pandoc (Meta title authors date) blocks) = do [ ("fancy-enums", "yes") | stEnumerate st ] ++ [ ("tables", "yes") | stTable st ] ++ [ ("strikeout", "yes") | stStrikeout st ] ++ - [ ("subscript", "yes") | stSubscript st ] ++ [ ("url", "yes") | stUrl st ] ++ [ ("numbersections", "yes") | writerNumberSections options ] ++ [ ("lhs", "yes") | stLHS st ] ++ [ ("graphics", "yes") | stGraphics st ] ++ [ ("book-class", "yes") | stBook st] ++ + [ ("euro", "yes") | stUsesEuro st] ++ [ ("listings", "yes") | writerListings options || stLHS st ] ++ [ ("beamer", "yes") | writerBeamer options ] ++ + [ ("mainlang", maybe "" (reverse . takeWhile (/=',') . reverse) + (lookup "lang" $ writerVariables options)) ] ++ [ ("highlighting-macros", styleToLaTeX $ writerHighlightStyle options ) | stHighlighting st ] ++ citecontext @@ -166,13 +170,20 @@ elementToLaTeX _ (Blk block) = blockToLaTeX block elementToLaTeX opts (Sec level _ id' title' elements) = do header' <- sectionHeader id' level title' innerContents <- mapM (elementToLaTeX opts) elements - return $ vcat (header' : innerContents) + return $ vsep (header' : innerContents) -- escape things as needed for LaTeX -stringToLaTeX :: Bool -> String -> String -stringToLaTeX _ [] = "" -stringToLaTeX isUrl (x:xs) = - case x of +stringToLaTeX :: Bool -> String -> State WriterState String +stringToLaTeX _ [] = return "" +stringToLaTeX isUrl (x:xs) = do + opts <- gets stOptions + rest <- stringToLaTeX isUrl xs + let ligatures = writerTeXLigatures opts + when (x == '€') $ + modify $ \st -> st{ stUsesEuro = True } + return $ + case x of + '€' -> "\\euro{}" ++ rest '{' -> "\\{" ++ rest '}' -> "\\}" ++ rest '$' -> "\\$" ++ rest @@ -183,25 +194,23 @@ stringToLaTeX isUrl (x:xs) = '-' -> case xs of -- prevent adjacent hyphens from forming ligatures ('-':_) -> "-{}" ++ rest _ -> '-' : rest - '~' | not isUrl -> "\\ensuremath{\\sim}" + '~' | not isUrl -> "\\textasciitilde{}" ++ rest '^' -> "\\^{}" ++ rest '\\' -> "\\textbackslash{}" ++ rest - '€' -> "\\euro{}" ++ rest '|' -> "\\textbar{}" ++ rest '<' -> "\\textless{}" ++ rest '>' -> "\\textgreater{}" ++ rest '[' -> "{[}" ++ rest -- to avoid interpretation as ']' -> "{]}" ++ rest -- optional arguments '\160' -> "~" ++ rest - '\x2018' -> "`" ++ rest - '\x2019' -> "'" ++ rest - '\x201C' -> "``" ++ rest - '\x201D' -> "''" ++ rest '\x2026' -> "\\ldots{}" ++ rest - '\x2014' -> "---" ++ rest - '\x2013' -> "--" ++ rest + '\x2018' | ligatures -> "`" ++ rest + '\x2019' | ligatures -> "'" ++ rest + '\x201C' | ligatures -> "``" ++ rest + '\x201D' | ligatures -> "''" ++ rest + '\x2014' | ligatures -> "---" ++ rest + '\x2013' | ligatures -> "--" ++ rest _ -> x : rest - where rest = stringToLaTeX isUrl xs -- | Puts contents into LaTeX command. inCmd :: String -> Doc -> Doc @@ -234,8 +243,11 @@ elementToBeamer slideLevel (Sec lvl _num _ident tit elts) let fragile = if not $ null $ queryWith hasCodeBlock elts ++ queryWith hasCode elts then "[fragile]" else "" - let slideStart = Para $ RawInline "latex" ("\\begin{frame}" ++ fragile ++ - "\\frametitle{") : tit ++ [RawInline "latex" "}"] + let slideStart = Para $ RawInline "latex" ("\\begin{frame}" ++ fragile) : + if tit == [Str "\0"] -- marker for hrule + then [] + else (RawInline "latex" "\\frametitle{") : tit ++ + [RawInline "latex" "}"] let slideEnd = RawBlock "latex" "\\end{frame}" -- now carve up slide into blocks if there are sections inside bs <- concat `fmap` mapM (elementToBeamer slideLevel) elts @@ -256,16 +268,16 @@ blockToLaTeX (Para [Image txt (src,tit)]) = do capt <- inlineListToLaTeX txt img <- inlineToLaTeX (Image txt (src,tit)) return $ "\\begin{figure}[htbp]" $$ "\\centering" $$ img $$ - ("\\caption{" <> capt <> char '}') $$ "\\end{figure}" $$ blankline + ("\\caption{" <> capt <> char '}') $$ "\\end{figure}" blockToLaTeX (Para lst) = do result <- inlineListToLaTeX lst - return $ result <> blankline + return result blockToLaTeX (BlockQuote lst) = do beamer <- writerBeamer `fmap` gets stOptions case lst of [b] | beamer && isListBlock b -> do oldIncremental <- gets stIncremental - modify $ \s -> s{ stIncremental = True } + modify $ \s -> s{ stIncremental = not oldIncremental } result <- blockToLaTeX b modify $ \s -> s{ stIncremental = oldIncremental } return result @@ -290,7 +302,7 @@ blockToLaTeX (CodeBlock (_,classes,keyvalAttr) str) = do return "Verbatim" else return "verbatim" return $ flush (text ("\\begin{" ++ env ++ "}") $$ text str $$ - text ("\\end{" ++ env ++ "}")) $$ cr -- final cr because of notes + text ("\\end{" ++ env ++ "}")) <> cr listingsCodeBlock = do st <- get let params = if writerListings (stOptions st) @@ -325,13 +337,14 @@ blockToLaTeX (CodeBlock (_,classes,keyvalAttr) str) = do Nothing -> rawCodeBlock Just h -> modify (\st -> st{ stHighlighting = True }) >> return (flush $ text h) -blockToLaTeX (RawBlock "latex" x) = return $ text x <> blankline +blockToLaTeX (RawBlock "latex" x) = return $ text x blockToLaTeX (RawBlock _ _) = return empty blockToLaTeX (BulletList lst) = do incremental <- gets stIncremental let inc = if incremental then "[<+->]" else "" items <- mapM listItemToLaTeX lst - return $ text ("\\begin{itemize}" ++ inc) $$ vcat items $$ "\\end{itemize}" + return $ text ("\\begin{itemize}" ++ inc) $$ vcat items $$ + "\\end{itemize}" blockToLaTeX (OrderedList (start, numstyle, numdelim) lst) = do st <- get let inc = if stIncremental st then "[<+->]" else "" @@ -357,9 +370,10 @@ blockToLaTeX (DefinitionList lst) = do incremental <- gets stIncremental let inc = if incremental then "[<+->]" else "" items <- mapM defListItemToLaTeX lst - return $ text ("\\begin{description}" ++ inc) $$ vcat items $$ "\\end{description}" + return $ text ("\\begin{description}" ++ inc) $$ vcat items $$ + "\\end{description}" blockToLaTeX HorizontalRule = return $ - "\\begin{center}\\rule{3in}{0.4pt}\\end{center}" $$ blankline + "\\begin{center}\\rule{3in}{0.4pt}\\end{center}" blockToLaTeX (Header level lst) = sectionHeader "" level lst blockToLaTeX (Table caption aligns widths heads rows) = do modify $ \s -> s{ stInTable = True, stTableNotes = [] } @@ -370,7 +384,7 @@ blockToLaTeX (Table caption aligns widths heads rows) = do captionText <- inlineListToLaTeX caption let capt = if isEmpty captionText then empty - else text "caption = " <> captionText <> "," <> space + else text "caption = {" <> captionText <> "}," <> space rows' <- mapM (tableRowToLaTeX False aligns widths) rows let rows'' = intersperse ("\\\\\\noalign{\\medskip}") rows' tableNotes <- liftM (reverse . stTableNotes) get @@ -385,7 +399,7 @@ blockToLaTeX (Table caption aligns widths heads rows) = do $$ braces (text "% rows" $$ "\\FL" $$ vcat (headers : rows'') $$ "\\LL" <> cr) modify $ \s -> s{ stTable = True, stInTable = False, stTableNotes = [] } - return $ tableBody $$ blankline + return $ tableBody toColDescriptor :: Alignment -> String toColDescriptor align = @@ -396,7 +410,7 @@ toColDescriptor align = AlignDefault -> "l" blockListToLaTeX :: [Block] -> State WriterState Doc -blockListToLaTeX lst = mapM blockToLaTeX lst >>= return . vcat +blockListToLaTeX lst = vsep `fmap` mapM blockToLaTeX lst tableRowToLaTeX :: Bool -> [Alignment] @@ -457,7 +471,6 @@ sectionHeader ref level lst = do <> braces (lab <> text "\\label" <> braces (text ref)) else lab) - $$ blankline let headerWith x y = refLabel $ text x <> y return $ case level' of 0 -> if writerBeamer opts @@ -468,7 +481,7 @@ sectionHeader ref level lst = do 3 -> headerWith "\\subsubsection" stuffing 4 -> headerWith "\\paragraph" stuffing 5 -> headerWith "\\subparagraph" stuffing - _ -> txt $$ blankline + _ -> txt -- | Convert list of inline elements to LaTeX. @@ -494,11 +507,7 @@ inlineToLaTeX (Strikeout lst) = do inlineToLaTeX (Superscript lst) = inlineListToLaTeX lst >>= return . inCmd "textsuperscript" inlineToLaTeX (Subscript lst) = do - modify $ \s -> s{ stSubscript = True } - contents <- inlineListToLaTeX lst - -- oddly, latex includes \textsuperscript but not \textsubscript - -- so we have to define it (using a different name so as not to conflict with memoir class): - return $ inCmd "textsubscr" contents + inlineListToLaTeX lst >>= return . inCmd "textsubscript" inlineToLaTeX (SmallCaps lst) = inlineListToLaTeX lst >>= return . inCmd "textsc" inlineToLaTeX (Cite cits lst) = do @@ -525,24 +534,12 @@ inlineToLaTeX (Code (_,classes,_) str) = do Nothing -> rawCode Just h -> modify (\st -> st{ stHighlighting = True }) >> return (text h) - rawCode = return - $ text $ "\\texttt{" ++ stringToLaTeX False str ++ "}" -inlineToLaTeX (Quoted SingleQuote lst) = do - contents <- inlineListToLaTeX lst - csquotes <- liftM stCsquotes get - if csquotes - then return $ "\\enquote" <> braces contents - else do - let s1 = if (not (null lst)) && (isQuoted (head lst)) - then "\\," - else empty - let s2 = if (not (null lst)) && (isQuoted (last lst)) - then "\\," - else empty - return $ char '`' <> s1 <> contents <> s2 <> char '\'' -inlineToLaTeX (Quoted DoubleQuote lst) = do + rawCode = liftM (text . (\s -> "\\texttt{" ++ s ++ "}")) + $ stringToLaTeX False str +inlineToLaTeX (Quoted qt lst) = do contents <- inlineListToLaTeX lst csquotes <- liftM stCsquotes get + opts <- gets stOptions if csquotes then return $ "\\enquote" <> braces contents else do @@ -552,8 +549,17 @@ inlineToLaTeX (Quoted DoubleQuote lst) = do let s2 = if (not (null lst)) && (isQuoted (last lst)) then "\\," else empty - return $ "``" <> s1 <> contents <> s2 <> "''" -inlineToLaTeX (Str str) = return $ text $ stringToLaTeX False str + let inner = s1 <> contents <> s2 + return $ case qt of + DoubleQuote -> + if writerTeXLigatures opts + then text "``" <> inner <> text "''" + else char '\x201C' <> inner <> char '\x201D' + SingleQuote -> + if writerTeXLigatures opts + then char '`' <> inner <> char '\'' + else char '\x2018' <> inner <> char '\x2019' +inlineToLaTeX (Str str) = liftM text $ stringToLaTeX False str inlineToLaTeX (Math InlineMath str) = return $ char '$' <> text str <> char '$' inlineToLaTeX (Math DisplayMath str) = return $ "\\[" <> text str <> "\\]" inlineToLaTeX (RawInline "latex" str) = return $ text str @@ -561,13 +567,18 @@ inlineToLaTeX (RawInline "tex" str) = return $ text str inlineToLaTeX (RawInline _ _) = return empty inlineToLaTeX (LineBreak) = return "\\\\" inlineToLaTeX Space = return space +inlineToLaTeX (Link txt ('#':ident, _)) = do + contents <- inlineListToLaTeX txt + ident' <- stringToLaTeX False ident + return $ text "\\hyperref" <> brackets (text ident') <> braces contents inlineToLaTeX (Link txt (src, _)) = case txt of [Code _ x] | x == src -> -- autolink do modify $ \s -> s{ stUrl = True } return $ text $ "\\url{" ++ x ++ "}" _ -> do contents <- inlineListToLaTeX txt - return $ text ("\\href{" ++ stringToLaTeX True src ++ "}{") <> + src' <- stringToLaTeX True src + return $ text ("\\href{" ++ src' ++ "}{") <> contents <> char '}' inlineToLaTeX (Image _ (source, _)) = do modify $ \s -> s{ stGraphics = True } @@ -580,13 +591,16 @@ inlineToLaTeX (Note contents) = do contents' <- blockListToLaTeX contents modify (\s -> s {stInNote = False}) inTable <- liftM stInTable get + let optnl = case reverse contents of + (CodeBlock _ _ : _) -> cr + _ -> empty if inTable then do curnotes <- liftM stTableNotes get let marker = cycle ['a'..'z'] !! length curnotes modify $ \s -> s{ stTableNotes = (marker, contents') : curnotes } return $ "\\tmark" <> brackets (char marker) <> space - else return $ "\\footnote" <> braces (nest 2 contents') + else return $ "\\footnote" <> braces (nest 2 contents' <> optnl) -- note: a \n before } needed when note ends with a Verbatim environment citationsToNatbib :: [Citation] -> State WriterState Doc diff --git a/src/Text/Pandoc/Writers/Man.hs b/src/Text/Pandoc/Writers/Man.hs index d3735efa7..c481e6c87 100644 --- a/src/Text/Pandoc/Writers/Man.hs +++ b/src/Text/Pandoc/Writers/Man.hs @@ -112,7 +112,11 @@ escapeString = escapeStringUsing manEscapes -- | Escape a literal (code) section for Man. escapeCode :: String -> String -escapeCode = escapeStringUsing (manEscapes ++ backslashEscapes "\t ") +escapeCode = concat . intersperse "\n" . map escapeLine . lines where + escapeLine codeline = + case escapeStringUsing (manEscapes ++ backslashEscapes "\t ") codeline of + a@('.':_) -> "\\&" ++ a + b -> b -- We split inline lists into sentences, and print one sentence per -- line. groff/troff treats the line-ending period differently. @@ -122,15 +126,18 @@ escapeCode = escapeStringUsing (manEscapes ++ backslashEscapes "\t ") breakSentence :: [Inline] -> ([Inline], [Inline]) breakSentence [] = ([],[]) breakSentence xs = - let isSentenceEndInline (Str ".") = True - isSentenceEndInline (Str "?") = True + let isSentenceEndInline (Str ys@(_:_)) | last ys == '.' = True + isSentenceEndInline (Str ys@(_:_)) | last ys == '?' = True + isSentenceEndInline (LineBreak) = True isSentenceEndInline _ = False (as, bs) = break isSentenceEndInline xs in case bs of [] -> (as, []) [c] -> (as ++ [c], []) (c:Space:cs) -> (as ++ [c], cs) - (Str ".":Str ")":cs) -> (as ++ [Str ".", Str ")"], cs) + (Str ".":Str (')':ys):cs) -> (as ++ [Str ".", Str (')':ys)], cs) + (x@(Str ('.':')':_)):cs) -> (as ++ [x], cs) + (LineBreak:x@(Str ('.':_)):cs) -> (as ++[LineBreak], x:cs) (c:cs) -> (as ++ [c] ++ ds, es) where (ds, es) = breakSentence cs @@ -279,7 +286,7 @@ blockListToMan opts blocks = inlineListToMan :: WriterOptions -> [Inline] -> State WriterState Doc -- if list starts with ., insert a zero-width character \& so it -- won't be interpreted as markup if it falls at the beginning of a line. -inlineListToMan opts lst@(Str "." : _) = mapM (inlineToMan opts) lst >>= +inlineListToMan opts lst@(Str ('.':_) : _) = mapM (inlineToMan opts) lst >>= (return . (text "\\&" <>) . hcat) inlineListToMan opts lst = mapM (inlineToMan opts) lst >>= (return . hcat) diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs index 7ce939395..32b28a770 100644 --- a/src/Text/Pandoc/Writers/Markdown.hs +++ b/src/Text/Pandoc/Writers/Markdown.hs @@ -35,8 +35,7 @@ import Text.Pandoc.Definition import Text.Pandoc.Generic import Text.Pandoc.Templates (renderTemplate) import Text.Pandoc.Shared -import Text.Pandoc.Parsing hiding (blankline) -import Text.ParserCombinators.Parsec ( runParser, GenParser ) +import Text.Pandoc.Parsing hiding (blankline, char, space) import Data.List ( group, isPrefixOf, find, intersperse, transpose ) import Text.Pandoc.Pretty import Control.Monad.State @@ -152,7 +151,7 @@ noteToMarkdown opts num blocks = do -- | Escape special characters for Markdown. escapeString :: String -> String escapeString = escapeStringUsing markdownEscapes - where markdownEscapes = backslashEscapes "\\`*_>#~^" + where markdownEscapes = backslashEscapes "\\`*_$<>#~^" -- | Construct table of contents from list of header blocks. tableOfContents :: WriterOptions -> [Block] -> Doc @@ -188,7 +187,7 @@ attrsToMarkdown attribs = braces $ hsep [attribId, attribClasses, attribKeys] <> "=\"" <> text v <> "\"") ks -- | Ordered list start parser for use in Para below. -olMarker :: GenParser Char ParserState Char +olMarker :: Parser [Char] ParserState Char olMarker = do (start, style', delim) <- anyOrderedListMarker if delim == Period && (style' == UpperAlpha || (style' == UpperRoman && @@ -218,7 +217,7 @@ blockToMarkdown opts (Para inlines) = do let esc = if (not (writerStrictMarkdown opts)) && not (stPlain st) && beginsWithOrderedListMarker (render Nothing contents) - then text "\\" + then text "\x200B" -- zero-width space, a hack else empty return $ esc <> contents <> blankline blockToMarkdown _ (RawBlock f str) @@ -254,7 +253,7 @@ blockToMarkdown opts (CodeBlock attribs str) = return $ if writerStrictMarkdown opts || attribs == nullAttr then nest (writerTabStop opts) (text str) <> blankline else -- use delimited code block - flush (tildes <> space <> attrs <> cr <> text str <> + (tildes <> space <> attrs <> cr <> text str <> cr <> tildes) <> blankline where tildes = text "~~~~" attrs = attrsToMarkdown attribs @@ -355,13 +354,13 @@ definitionListItemToMarkdown opts (label, defs) = do labelText <- inlineListToMarkdown opts label let tabStop = writerTabStop opts st <- get - let leader = if stPlain st then " " else " ~" + let leader = if stPlain st then " " else ": " let sps = case writerTabStop opts - 3 of n | n > 0 -> text $ replicate n ' ' _ -> text " " defs' <- mapM (mapM (blockToMarkdown opts)) defs let contents = vcat $ map (\d -> hang tabStop (leader <> sps) $ vcat d <> cr) defs' - return $ labelText <> cr <> contents <> cr + return $ nowrap labelText <> cr <> contents <> cr -- | Convert list of Pandoc block elements to markdown. blockListToMarkdown :: WriterOptions -- ^ Options @@ -516,9 +515,9 @@ inlineToMarkdown opts (Link txt (src, tit)) = do else "[" <> linktext <> "](" <> text src <> linktitle <> ")" inlineToMarkdown opts (Image alternate (source, tit)) = do - let txt = if (null alternate) || (alternate == [Str ""]) || - (alternate == [Str source]) -- to prevent autolinks - then [Str "image"] + let txt = if null alternate || alternate == [Str source] + -- to prevent autolinks + then [Str ""] else alternate linkPart <- inlineToMarkdown opts (Link txt (source, tit)) return $ "!" <> linkPart diff --git a/src/Text/Pandoc/Writers/MediaWiki.hs b/src/Text/Pandoc/Writers/MediaWiki.hs index f31a2c2d1..b32c5327d 100644 --- a/src/Text/Pandoc/Writers/MediaWiki.hs +++ b/src/Text/Pandoc/Writers/MediaWiki.hs @@ -149,6 +149,7 @@ blockToMediaWiki opts (Table capt aligns widths headers rows') = do blockToMediaWiki opts x@(BulletList items) = do oldUseTags <- get >>= return . stUseTags + listLevel <- get >>= return . stListLevel let useTags = oldUseTags || not (isSimpleList x) if useTags then do @@ -160,10 +161,11 @@ blockToMediaWiki opts x@(BulletList items) = do modify $ \s -> s { stListLevel = stListLevel s ++ "*" } contents <- mapM (listItemToMediaWiki opts) items modify $ \s -> s { stListLevel = init (stListLevel s) } - return $ vcat contents ++ "\n" + return $ vcat contents ++ if null listLevel then "\n" else "" blockToMediaWiki opts x@(OrderedList attribs items) = do oldUseTags <- get >>= return . stUseTags + listLevel <- get >>= return . stListLevel let useTags = oldUseTags || not (isSimpleList x) if useTags then do @@ -175,10 +177,11 @@ blockToMediaWiki opts x@(OrderedList attribs items) = do modify $ \s -> s { stListLevel = stListLevel s ++ "#" } contents <- mapM (listItemToMediaWiki opts) items modify $ \s -> s { stListLevel = init (stListLevel s) } - return $ vcat contents ++ "\n" + return $ vcat contents ++ if null listLevel then "\n" else "" blockToMediaWiki opts x@(DefinitionList items) = do oldUseTags <- get >>= return . stUseTags + listLevel <- get >>= return . stListLevel let useTags = oldUseTags || not (isSimpleList x) if useTags then do @@ -190,7 +193,7 @@ blockToMediaWiki opts x@(DefinitionList items) = do modify $ \s -> s { stListLevel = stListLevel s ++ ";" } contents <- mapM (definitionListItemToMediaWiki opts) items modify $ \s -> s { stListLevel = init (stListLevel s) } - return $ vcat contents ++ "\n" + return $ vcat contents ++ if null listLevel then "\n" else "" -- Auxiliary functions for lists: diff --git a/src/Text/Pandoc/Writers/Org.hs b/src/Text/Pandoc/Writers/Org.hs index 4c77ba7c6..7eb943a22 100644 --- a/src/Text/Pandoc/Writers/Org.hs +++ b/src/Text/Pandoc/Writers/Org.hs @@ -139,9 +139,9 @@ blockToOrg (CodeBlock (_,classes,_) str) = do "ledger", "lisp", "matlab", "mscgen", "ocaml", "octave", "oz", "perl", "plantuml", "python", "R", "ruby", "sass", "scheme", "screen", "sh", "sql", "sqlite"] - let (beg, end) = if null at - then ("#+BEGIN_EXAMPLE", "#+END_EXAMPLE") - else ("#+BEGIN_SRC" ++ head at, "#+END_SRC") + let (beg, end) = case at of + [] -> ("#+BEGIN_EXAMPLE", "#+END_EXAMPLE") + (x:_) -> ("#+BEGIN_SRC " ++ x, "#+END_SRC") return $ text beg $$ nest tabstop (text str) $$ text end $$ blankline blockToOrg (BlockQuote blocks) = do contents <- blockListToOrg blocks diff --git a/src/Text/Pandoc/Writers/RST.hs b/src/Text/Pandoc/Writers/RST.hs index d6e5b5c9e..d98079940 100644 --- a/src/Text/Pandoc/Writers/RST.hs +++ b/src/Text/Pandoc/Writers/RST.hs @@ -38,6 +38,7 @@ import Data.List ( isPrefixOf, intersperse, transpose ) import Text.Pandoc.Pretty import Control.Monad.State import Control.Applicative ( (<$>) ) +import Data.Char (isSpace) type Refs = [([Inline], Target)] @@ -96,7 +97,7 @@ keyToRST (label, (src, _)) = do let label'' = if ':' `elem` (render Nothing label') then char '`' <> label' <> char '`' else label' - return $ ".. _" <> label'' <> ": " <> text src + return $ nowrap $ ".. _" <> label'' <> ": " <> text src -- | Return RST representation of notes. notesToRST :: [[Block]] -> State WriterState Doc @@ -253,7 +254,52 @@ blockListToRST blocks = mapM blockToRST blocks >>= return . vcat -- | Convert list of Pandoc inline elements to RST. inlineListToRST :: [Inline] -> State WriterState Doc -inlineListToRST lst = mapM inlineToRST lst >>= return . hcat +inlineListToRST lst = mapM inlineToRST (insertBS lst) >>= return . hcat + where insertBS :: [Inline] -> [Inline] -- insert '\ ' where needed + insertBS (x:y:z:zs) + | isComplex y && surroundComplex x z = + x : y : RawInline "rst" "\\ " : insertBS (z:zs) + insertBS (x:y:zs) + | isComplex x && not (okAfterComplex y) = + x : RawInline "rst" "\\ " : insertBS (y : zs) + | isComplex y && not (okBeforeComplex x) = + x : RawInline "rst" "\\ " : insertBS (y : zs) + | otherwise = + x : insertBS (y : zs) + insertBS (x:ys) = x : insertBS ys + insertBS [] = [] + surroundComplex :: Inline -> Inline -> Bool + surroundComplex (Str s@(_:_)) (Str s'@(_:_)) = + case (last s, head s') of + ('\'','\'') -> True + ('"','"') -> True + ('<','>') -> True + ('[',']') -> True + ('{','}') -> True + _ -> False + surroundComplex _ _ = False + okAfterComplex :: Inline -> Bool + okAfterComplex Space = True + okAfterComplex LineBreak = True + okAfterComplex (Str (c:_)) = isSpace c || c `elem` "-.,:;!?\\/'\")]}>–—" + okAfterComplex _ = False + okBeforeComplex :: Inline -> Bool + okBeforeComplex Space = True + okBeforeComplex LineBreak = True + okBeforeComplex (Str (c:_)) = isSpace c || c `elem` "-:/'\"<([{–—" + okBeforeComplex _ = False + isComplex :: Inline -> Bool + isComplex (Emph _) = True + isComplex (Strong _) = True + isComplex (SmallCaps _) = True + isComplex (Strikeout _) = True + isComplex (Superscript _) = True + isComplex (Subscript _) = True + isComplex (Link _ _) = True + isComplex (Image _ _) = True + isComplex (Code _ _) = True + isComplex (Math _ _) = True + isComplex _ = False -- | Convert Pandoc inline element to RST. inlineToRST :: Inline -> State WriterState Doc @@ -268,10 +314,10 @@ inlineToRST (Strikeout lst) = do return $ "[STRIKEOUT:" <> contents <> "]" inlineToRST (Superscript lst) = do contents <- inlineListToRST lst - return $ "\\ :sup:`" <> contents <> "`\\ " + return $ ":sup:`" <> contents <> "`" inlineToRST (Subscript lst) = do contents <- inlineListToRST lst - return $ "\\ :sub:`" <> contents <> "`\\ " + return $ ":sub:`" <> contents <> "`" inlineToRST (SmallCaps lst) = inlineListToRST lst inlineToRST (Quoted SingleQuote lst) = do contents <- inlineListToRST lst @@ -286,11 +332,12 @@ inlineToRST (Str str) = return $ text $ escapeString str inlineToRST (Math t str) = do modify $ \st -> st{ stHasMath = True } return $ if t == InlineMath - then ":math:`" <> text str <> "`" <> beforeNonBlank "\\ " + then ":math:`" <> text str <> "`" else if '\n' `elem` str then blankline $$ ".. math::" $$ blankline $$ nest 3 (text str) $$ blankline else blankline $$ (".. math:: " <> text str) $$ blankline +inlineToRST (RawInline "rst" x) = return $ text x inlineToRST (RawInline _ _) = return empty inlineToRST (LineBreak) = return cr -- there's no line break in RST inlineToRST Space = return space diff --git a/src/Text/Pandoc/Writers/Texinfo.hs b/src/Text/Pandoc/Writers/Texinfo.hs index 563ad7044..6bb782899 100644 --- a/src/Text/Pandoc/Writers/Texinfo.hs +++ b/src/Text/Pandoc/Writers/Texinfo.hs @@ -344,7 +344,8 @@ inlineListToTexinfo lst = mapM inlineToTexinfo lst >>= return . hcat -- | Convert list of inline elements to Texinfo acceptable for a node name. inlineListForNode :: [Inline] -- ^ Inlines to convert -> State WriterState Doc -inlineListForNode = return . text . filter (not . disallowedInNode) . stringify +inlineListForNode = return . text . stringToTexinfo . + filter (not . disallowedInNode) . stringify -- periods, commas, colons, and parentheses are disallowed in node names disallowedInNode :: Char -> Bool @@ -415,7 +416,7 @@ inlineToTexinfo (Image alternate (source, _)) = do text (ext ++ "}") where ext = drop 1 $ takeExtension source' - base = takeBaseName source' + base = dropExtension source' source' = if isAbsoluteURI source then source else unEscapeString source |