From 6a235ba60693596af3f13b093b83defa37501e09 Mon Sep 17 00:00:00 2001 From: Alexander Kondratskiy Date: Sat, 13 Jul 2013 02:23:27 -0400 Subject: Checking options before applying syntax highlighting for HTML output --- src/Text/Pandoc/Writers/HTML.hs | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs index 57bf2a349..cfc187e02 100644 --- a/src/Text/Pandoc/Writers/HTML.hs +++ b/src/Text/Pandoc/Writers/HTML.hs @@ -422,7 +422,10 @@ blockToHtml opts (CodeBlock (id',classes,keyvals) rawCode) = do adjCode = if tolhs then unlines . map ("> " ++) . lines $ rawCode else rawCode - case highlight formatHtmlBlock (id',classes',keyvals) adjCode of + hlCode = if writerHighlight opts -- check highlighting options + then highlight formatHtmlBlock (id',classes',keyvals) adjCode + else Nothing + case hlCode of Nothing -> return $ addAttrs opts (id',classes,keyvals) $ H.pre $ H.code $ toHtml adjCode Just h -> modify (\st -> st{ stHighlighting = True }) >> @@ -589,14 +592,17 @@ inlineToHtml opts inline = (LineBreak) -> return $ if writerHtml5 opts then H5.br else H.br (Emph lst) -> inlineListToHtml opts lst >>= return . H.em (Strong lst) -> inlineListToHtml opts lst >>= return . H.strong - (Code attr str) -> case highlight formatHtmlInline attr str of + (Code attr str) -> case hlCode of Nothing -> return $ addAttrs opts attr $ H.code $ strToHtml str Just h -> do modify $ \st -> st{ stHighlighting = True } return $ addAttrs opts (id',[],keyvals) h - where (id',_,keyvals) = attr + where (id',_,keyvals) = attr + hlCode = if writerHighlight opts + then highlight formatHtmlInline attr str + else Nothing (Strikeout lst) -> inlineListToHtml opts lst >>= return . H.del (SmallCaps lst) -> inlineListToHtml opts lst >>= -- cgit v1.2.3 From f42095b7b72fc3419a661c65d17f46ba3cbc8d62 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 13 Jul 2013 13:48:50 -0700 Subject: Docx writer: Make `--no-highlight` work properly. --- src/Text/Pandoc/Writers/Docx.hs | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index e899200f6..d579d4fa6 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -214,7 +214,8 @@ writeDocx opts doc@(Pandoc meta _) = do let newstyles = styleToOpenXml $ writerHighlightStyle opts let stylepath = "word/styles.xml" styledoc <- parseXml refArchive stylepath - let styledoc' = styledoc{ elContent = elContent styledoc ++ map Elem newstyles } + let styledoc' = styledoc{ elContent = elContent styledoc ++ + [Elem x | x <- newstyles, writerHighlight opts] } let styleEntry = toEntry stylepath epochtime $ renderXml styledoc' -- construct word/numbering.xml @@ -665,13 +666,16 @@ inlineToOpenXML opts (Math mathType str) = do Right r -> return [r] Left _ -> inlinesToOpenXML opts (readTeXMath str) inlineToOpenXML opts (Cite _ lst) = inlinesToOpenXML opts lst -inlineToOpenXML _ (Code attrs str) = +inlineToOpenXML opts (Code attrs str) = withTextProp (rStyle "VerbatimChar") - $ case highlight formatOpenXML attrs str of - Nothing -> intercalate [br] - `fmap` (mapM formattedString $ lines str) - Just h -> return h - where formatOpenXML _fmtOpts = intercalate [br] . map (map toHlTok) + $ if writerHighlight opts + then case highlight formatOpenXML attrs str of + Nothing -> unhighlighted + Just h -> return h + else unhighlighted + where unhighlighted = intercalate [br] `fmap` + (mapM formattedString $ lines str) + formatOpenXML _fmtOpts = intercalate [br] . map (map toHlTok) toHlTok (toktype,tok) = mknode "w:r" [] [ mknode "w:rPr" [] [ rStyle $ show toktype ] -- cgit v1.2.3 From 0b49f810f401b9154b50727d2179d1ec39cd8d3e Mon Sep 17 00:00:00 2001 From: Alexander Kondratskiy Date: Sun, 14 Jul 2013 14:33:58 -0400 Subject: Fixing wrong numbered-list indentation in open document format --- src/Text/Pandoc/Writers/OpenDocument.hs | 12 ++++++----- tests/writer.opendocument | 38 ++++++++++++++++----------------- 2 files changed, 26 insertions(+), 24 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Writers/OpenDocument.hs b/src/Text/Pandoc/Writers/OpenDocument.hs index 30f99c3e4..0efbf7580 100644 --- a/src/Text/Pandoc/Writers/OpenDocument.hs +++ b/src/Text/Pandoc/Writers/OpenDocument.hs @@ -489,14 +489,16 @@ paraStyle parent attrs = do tight = if t then [ ("fo:margin-top" , "0in" ) , ("fo:margin-bottom" , "0in" )] else [] - indent = when (i /= 0 || b || t) $ - selfClosingTag "style:paragraph-properties" $ - [ ("fo:margin-left" , indentVal) + indent = if (i /= 0 || b) + then [ ("fo:margin-left" , indentVal) , ("fo:margin-right" , "0in" ) , ("fo:text-indent" , "0in" ) , ("style:auto-text-indent" , "false" )] - ++ tight - addParaStyle $ inTags True "style:style" (styleAttr ++ attrs) indent + else [] + attributes = indent ++ tight + paraProps = when (not $ null attributes) $ + selfClosingTag "style:paragraph-properties" attributes + addParaStyle $ inTags True "style:style" (styleAttr ++ attrs) paraProps return pn paraListStyle :: Int -> State WriterState Int diff --git a/tests/writer.opendocument b/tests/writer.opendocument index 8727373a0..9e1661475 100644 --- a/tests/writer.opendocument +++ b/tests/writer.opendocument @@ -741,25 +741,25 @@ - + - + - + - + - + @@ -768,37 +768,37 @@ - + - + - + - + - + - + - + - + - + @@ -822,18 +822,18 @@ - + - + - + - + @@ -846,7 +846,7 @@ - + -- cgit v1.2.3 From 0bd5830ad4cbf056d18595208532082fe674c6d2 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 16 Jul 2013 15:37:15 -0700 Subject: HTML reader: Generalized table parser. This commit doesn't change the present behavior at all, but it will make it easier to support non-simple tables in the future. --- src/Text/Pandoc/Readers/HTML.hs | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index f6657a4d1..56d35160c 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -88,7 +88,7 @@ block = choice , pCodeBlock , pList , pHrule - , pSimpleTable + , pTable , pHead , pBody , pPlain @@ -212,8 +212,8 @@ pHrule = do pSelfClosing (=="hr") (const True) return [HorizontalRule] -pSimpleTable :: TagParser [Block] -pSimpleTable = try $ do +pTable :: TagParser [Block] +pTable = try $ do TagOpen _ _ <- pSatisfy (~== TagOpen "table" []) skipMany pBlank caption <- option [] $ pInTags "caption" inline >>~ skipMany pBlank @@ -225,6 +225,11 @@ pSimpleTable = try $ do $ many1 $ try $ skipMany pBlank >> pInTags "tr" (pCell "td") skipMany pBlank TagClose _ <- pSatisfy (~== TagClose "table") + let isSinglePlain [] = True + isSinglePlain [Plain _] = True + isSinglePlain _ = False + let isSimple = all isSinglePlain $ concat (head':rows) + guard isSimple let cols = maximum $ map length rows let aligns = replicate cols AlignLeft let widths = replicate cols 0 @@ -233,7 +238,7 @@ pSimpleTable = try $ do pCell :: String -> TagParser [TableCell] pCell celltype = try $ do skipMany pBlank - res <- pInTags celltype pPlain + res <- pInTags celltype block skipMany pBlank return [res] -- cgit v1.2.3 From 8483b5756fbf45270a84fa3e9174081041ff5558 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 16 Jul 2013 15:49:53 -0700 Subject: HTML reader: Handle non-simple tables (#893). Column widths are divided equally. TODO: Get column widths from col tags if present. --- src/Text/Pandoc/Readers/HTML.hs | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index 56d35160c..35b667fb0 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -217,6 +217,7 @@ pTable = try $ do TagOpen _ _ <- pSatisfy (~== TagOpen "table" []) skipMany pBlank caption <- option [] $ pInTags "caption" inline >>~ skipMany pBlank + -- TODO actually read these and take width information from them skipMany $ (pInTags "col" block >> skipMany pBlank) <|> (pInTags "colgroup" block >> skipMany pBlank) head' <- option [] $ pOptInTag "thead" $ pInTags "tr" (pCell "th") @@ -229,10 +230,15 @@ pTable = try $ do isSinglePlain [Plain _] = True isSinglePlain _ = False let isSimple = all isSinglePlain $ concat (head':rows) - guard isSimple - let cols = maximum $ map length rows + let cols = length $ if null head' + then head rows + else head' + -- fail if there are colspans or rowspans + guard $ all (\r -> length r == cols) rows let aligns = replicate cols AlignLeft - let widths = replicate cols 0 + let widths = if isSimple + then replicate cols 0 + else replicate cols (1.0 / fromIntegral cols) return [Table caption aligns widths head' rows] pCell :: String -> TagParser [TableCell] -- cgit v1.2.3 From 94c9825468692a343af7ef1686b1c92e1ec71adf Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 16 Jul 2013 17:03:28 -0700 Subject: HTML reader: read widths from col tags if present. Closes #893. --- src/Text/Pandoc/Readers/HTML.hs | 29 +++++++++++++++++++++++------ 1 file changed, 23 insertions(+), 6 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index 35b667fb0..0068ab5c1 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -47,7 +47,7 @@ import Data.Maybe ( fromMaybe, isJust ) import Data.List ( intercalate ) import Data.Char ( isDigit ) import Control.Monad ( liftM, guard, when, mzero ) -import Control.Applicative ( (<$>), (<$) ) +import Control.Applicative ( (<$>), (<$), (<*) ) isSpace :: Char -> Bool isSpace ' ' = True @@ -218,8 +218,7 @@ pTable = try $ do skipMany pBlank caption <- option [] $ pInTags "caption" inline >>~ skipMany pBlank -- TODO actually read these and take width information from them - skipMany $ (pInTags "col" block >> skipMany pBlank) <|> - (pInTags "colgroup" block >> skipMany pBlank) + widths' <- pColgroup <|> many pCol head' <- option [] $ pOptInTag "thead" $ pInTags "tr" (pCell "th") skipMany pBlank rows <- pOptInTag "tbody" @@ -236,11 +235,29 @@ pTable = try $ do -- fail if there are colspans or rowspans guard $ all (\r -> length r == cols) rows let aligns = replicate cols AlignLeft - let widths = if isSimple - then replicate cols 0 - else replicate cols (1.0 / fromIntegral cols) + let widths = if null widths' + then if isSimple + then replicate cols 0 + else replicate cols (1.0 / fromIntegral cols) + else widths' return [Table caption aligns widths head' rows] +pCol :: TagParser Double +pCol = try $ do + TagOpen _ attribs <- pSatisfy (~== TagOpen "col" []) + optional $ pSatisfy (~== TagClose "col") + skipMany pBlank + return $ case lookup "width" attribs of + Just x | not (null x) && last x == '%' -> + maybe 0.0 id $ safeRead ('0':'.':init x) + _ -> 0.0 + +pColgroup :: TagParser [Double] +pColgroup = try $ do + pSatisfy (~== TagOpen "colgroup" []) + skipMany pBlank + manyTill pCol (pCloses "colgroup" <|> eof) <* skipMany pBlank + pCell :: String -> TagParser [TableCell] pCell celltype = try $ do skipMany pBlank -- cgit v1.2.3 From b2385d0e9bf13f2fc152a3983893c47f2ab5d4c0 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 16 Jul 2013 22:04:59 -0700 Subject: Text.Pandoc.ImageSize: Handle EPS. Closes #903. This change will make EPS images properly sized on conversion to Word. --- src/Text/Pandoc/ImageSize.hs | 24 +++++++++++++++++++++++- src/Text/Pandoc/Writers/Docx.hs | 1 + 2 files changed, 24 insertions(+), 1 deletion(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/ImageSize.hs b/src/Text/Pandoc/ImageSize.hs index 273a1a428..9b0850efb 100644 --- a/src/Text/Pandoc/ImageSize.hs +++ b/src/Text/Pandoc/ImageSize.hs @@ -34,11 +34,12 @@ import Data.ByteString (ByteString, unpack) import qualified Data.ByteString.Char8 as B import Control.Monad import Data.Bits +import Text.Pandoc.Shared (safeRead) -- quick and dirty functions to get image sizes -- algorithms borrowed from wwwis.pl -data ImageType = Png | Gif | Jpeg | Pdf deriving Show +data ImageType = Png | Gif | Jpeg | Pdf | Eps deriving Show data ImageSize = ImageSize{ pxX :: Integer @@ -54,6 +55,9 @@ imageType img = case B.take 4 img of "\x47\x49\x46\x38" -> return Gif "\xff\xd8\xff\xe0" -> return Jpeg "%PDF" -> return Pdf + "%!PS" + | (B.take 4 $ B.drop 1 $ B.dropWhile (/=' ') img) == "EPSF" + -> return Eps _ -> fail "Unknown image type" imageSize :: ByteString -> Maybe ImageSize @@ -63,6 +67,7 @@ imageSize img = do Png -> pngSize img Gif -> gifSize img Jpeg -> jpegSize img + Eps -> epsSize img Pdf -> Nothing -- TODO sizeInPixels :: ImageSize -> (Integer, Integer) @@ -71,6 +76,23 @@ sizeInPixels s = (pxX s, pxY s) sizeInPoints :: ImageSize -> (Integer, Integer) sizeInPoints s = (pxX s * 72 `div` dpiX s, pxY s * 72 `div` dpiY s) +epsSize :: ByteString -> Maybe ImageSize +epsSize img = do + let ls = takeWhile ("%" `B.isPrefixOf`) $ B.lines img + let ls' = dropWhile (not . ("%%BoundingBox:" `B.isPrefixOf`)) ls + case ls' of + [] -> mzero + (x:_) -> case B.words x of + (_:_:_:ux:uy:[]) -> do + ux' <- safeRead $ B.unpack ux + uy' <- safeRead $ B.unpack uy + return ImageSize{ + pxX = ux' + , pxY = uy' + , dpiX = 72 + , dpiY = 72 } + _ -> mzero + pngSize :: ByteString -> Maybe ImageSize pngSize img = do let (h, rest) = B.splitAt 8 img diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index d579d4fa6..1ed8c2fa5 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -776,6 +776,7 @@ inlineToOpenXML opts (Image alt (src, tit)) = do Just Jpeg -> ".jpeg" Just Gif -> ".gif" Just Pdf -> ".pdf" + Just Eps -> ".eps" Nothing -> takeExtension src if null imgext then -- without an extension there is no rule for content type -- cgit v1.2.3 From 6c2e76ac617e5972db5d118525e7f6f59f43caac Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Wed, 17 Jul 2013 15:38:56 -0700 Subject: Added `ignore_line_breaks` markdown extension. This causes intra-paragraph line breaks to be ignored, rather than being treated as hard line breaks or spaces. This is useful for some East Asian languages, where spaces aren't used between words, but text is separated into lines for readability. --- README | 6 ++++++ src/Text/Pandoc/Options.hs | 1 + src/Text/Pandoc/Readers/Markdown.hs | 1 + 3 files changed, 8 insertions(+) (limited to 'src/Text') diff --git a/README b/README index f86de8cd7..a65e22a70 100644 --- a/README +++ b/README @@ -2416,6 +2416,12 @@ example, `markdown+hard_line_breaks` is markdown with hard line breaks. Causes all newlines within a paragraph to be interpreted as hard line breaks instead of spaces. +**Extension: `ignore_line_breaks`**\ +Causes newlines within a paragraph to be ignored, rather than being +treated as spaces or as hard line breaks. This option is intended for +use with East Asian languages where spaces are not used between words, +but text is divided into lines for readability. + **Extension: `tex_math_single_backslash`**\ Causes anything between `\(` and `\)` to be interpreted as inline TeX math, and anything between `\[` and `\]` to be interpreted diff --git a/src/Text/Pandoc/Options.hs b/src/Text/Pandoc/Options.hs index c9a5e27da..61a85cf6e 100644 --- a/src/Text/Pandoc/Options.hs +++ b/src/Text/Pandoc/Options.hs @@ -92,6 +92,7 @@ data Extension = | Ext_superscript -- ^ Superscript using ^this^ syntax | Ext_subscript -- ^ Subscript using ~this~ syntax | Ext_hard_line_breaks -- ^ All newlines become hard line breaks + | Ext_ignore_line_breaks -- ^ Newlines in paragraphs are ignored | Ext_literate_haskell -- ^ Enable literate Haskell conventions | Ext_abbreviations -- ^ PHP markdown extra abbreviation definitions | Ext_auto_identifiers -- ^ Automatic identifiers for headers diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index a3500fbcf..1aa392162 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -1566,6 +1566,7 @@ endline = try $ do notFollowedBy' bulletListStart notFollowedBy' anyOrderedListStart (guardEnabled Ext_hard_line_breaks >> return (return B.linebreak)) + <|> (guardEnabled Ext_ignore_line_breaks >> return mempty) <|> (return $ return B.space) -- -- cgit v1.2.3 From 7c980f39bf1cff941d3e78056fd69e0b371833e3 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Thu, 18 Jul 2013 20:58:14 -0700 Subject: Improved fetching of external resources. * In Shared, openURL and fetchItem now return an Either, for better error handling. (API change.) * Better error message when fetching a URL fails with `--self-contained`. * EPUB writer: If resource not found, skip it, as in Docx writer. * Closes #916. --- src/Text/Pandoc/SelfContained.hs | 5 +++-- src/Text/Pandoc/Shared.hs | 16 +++++++++------- src/Text/Pandoc/Writers/Docx.hs | 2 +- src/Text/Pandoc/Writers/EPUB.hs | 13 +++++++++---- src/Text/Pandoc/Writers/ODT.hs | 5 ++--- 5 files changed, 24 insertions(+), 17 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/SelfContained.hs b/src/Text/Pandoc/SelfContained.hs index c4613992a..0547bc065 100644 --- a/src/Text/Pandoc/SelfContained.hs +++ b/src/Text/Pandoc/SelfContained.hs @@ -40,7 +40,7 @@ import System.FilePath (takeExtension, dropExtension, takeDirectory, ()) import Data.Char (toLower, isAscii, isAlphaNum) import Codec.Compression.GZip as Gzip import qualified Data.ByteString.Lazy as L -import Text.Pandoc.Shared (renderTags', openURL, readDataFile) +import Text.Pandoc.Shared (renderTags', openURL, readDataFile, err) import Text.Pandoc.UTF8 (toString, fromString) import Text.Pandoc.MIME (getMimeType) import System.Directory (doesFileExist) @@ -98,7 +98,7 @@ cssURLs userdata d orig = getItem :: Maybe FilePath -> String -> IO (ByteString, Maybe String) getItem userdata f = if isAbsoluteURI f - then openURL f + then openURL f >>= either handleErr return else do -- strip off trailing query or fragment part, if relative URL. -- this is needed for things like cmunrm.eot?#iefix, @@ -110,6 +110,7 @@ getItem userdata f = exists <- doesFileExist f' cont <- if exists then B.readFile f' else readDataFile userdata f' return (cont, mime) + where handleErr e = err 61 $ "Failed to retrieve " ++ f ++ "\n" ++ show e getRaw :: Maybe FilePath -> String -> String -> IO (ByteString, String) getRaw userdata mimetype src = do diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index 09086da1f..0f2e16d2e 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -95,6 +95,7 @@ import Text.Pandoc.MIME (getMimeType) import System.FilePath ( (), takeExtension, dropExtension ) import Data.Generics (Typeable, Data) import qualified Control.Monad.State as S +import qualified Control.Exception as E import Control.Monad (msum, unless) import Text.Pandoc.Pretty (charWidth) import System.Locale (defaultTimeLocale) @@ -586,12 +587,13 @@ readDataFileUTF8 userDir fname = -- | Fetch an image or other item from the local filesystem or the net. -- Returns raw content and maybe mime type. -fetchItem :: String -> String -> IO (BS.ByteString, Maybe String) +fetchItem :: String -> String + -> IO (Either E.SomeException (BS.ByteString, Maybe String)) fetchItem sourceDir s = case s of _ | isAbsoluteURI s -> openURL s | isAbsoluteURI sourceDir -> openURL $ sourceDir ++ "/" ++ s - | otherwise -> do + | otherwise -> E.try $ do let mime = case takeExtension s of ".gz" -> getMimeType $ dropExtension s x -> getMimeType x @@ -600,21 +602,21 @@ fetchItem sourceDir s = return (cont, mime) -- | Read from a URL and return raw data and maybe mime type. -openURL :: String -> IO (BS.ByteString, Maybe String) +openURL :: String -> IO (Either E.SomeException (BS.ByteString, Maybe String)) openURL u | "data:" `isPrefixOf` u = let mime = takeWhile (/=',') $ drop 5 u contents = B8.pack $ unEscapeString $ drop 1 $ dropWhile (/=',') u - in return (contents, Just mime) + in return $ Right (contents, Just mime) #ifdef HTTP_CONDUIT - | otherwise = do + | otherwise = E.try $ do req <- parseUrl u resp <- withManager $ httpLbs req return (BS.concat $ toChunks $ responseBody resp, UTF8.toString `fmap` lookup hContentType (responseHeaders resp)) #else - | otherwise = getBodyAndMimeType `fmap` browse - (do S.liftIO $ UTF8.hPutStrLn stderr $ "Fetching " ++ u ++ "..." + | otherwise = E.try $ getBodyAndMimeType `fmap` browse + (do UTF8.hPutStrLn stderr $ "Fetching " ++ u ++ "..." setOutHandler $ const (return ()) setAllowRedirects True request (getRequest' u')) diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index 1ed8c2fa5..611cddc65 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -726,7 +726,7 @@ inlineToOpenXML opts (Image alt (src, tit)) = do Just (_,_,_,elt,_) -> return [elt] Nothing -> do let sourceDir = writerSourceDirectory opts - res <- liftIO $ E.try $ fetchItem sourceDir src + res <- liftIO $ fetchItem sourceDir src case res of Left (_ :: E.SomeException) -> do liftIO $ warn $ "Could not find image `" ++ src ++ "', skipping..." diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs index f171a2560..42863ef86 100644 --- a/src/Text/Pandoc/Writers/EPUB.hs +++ b/src/Text/Pandoc/Writers/EPUB.hs @@ -123,10 +123,15 @@ writeEPUB opts doc@(Pandoc meta _) = do Pandoc _ blocks <- bottomUpM (transformInline opts' sourceDir picsRef) doc pics <- readIORef picsRef - let readPicEntry (oldsrc, newsrc) = do - (img,_) <- fetchItem sourceDir oldsrc - return $ toEntry newsrc epochtime $ B.fromChunks . (:[]) $ img - picEntries <- mapM readPicEntry pics + let readPicEntry entries (oldsrc, newsrc) = do + res <- fetchItem sourceDir oldsrc + case res of + Left e -> do + warn $ "Could not find image `" ++ oldsrc ++ "', skipping..." + return entries + Right (img,_) -> return $ + (toEntry newsrc epochtime $ B.fromChunks . (:[]) $ img) : entries + picEntries <- foldM readPicEntry [] pics -- handle fonts let mkFontEntry f = mkEntry (takeFileName f) `fmap` B.readFile f diff --git a/src/Text/Pandoc/Writers/ODT.hs b/src/Text/Pandoc/Writers/ODT.hs index db27286e8..589010bb9 100644 --- a/src/Text/Pandoc/Writers/ODT.hs +++ b/src/Text/Pandoc/Writers/ODT.hs @@ -42,7 +42,6 @@ import Text.Pandoc.Definition import Text.Pandoc.Generic import Text.Pandoc.Writers.OpenDocument ( writeOpenDocument ) import Control.Monad (liftM) -import Control.Monad.Trans (liftIO) import Text.Pandoc.XML import Text.Pandoc.Pretty import qualified Control.Exception as E @@ -114,10 +113,10 @@ writeODT opts doc@(Pandoc meta _) = do transformPic :: FilePath -> IORef [Entry] -> Inline -> IO Inline transformPic sourceDir entriesRef (Image lab (src,_)) = do - res <- liftIO $ E.try $ fetchItem sourceDir src + res <- fetchItem sourceDir src case res of Left (_ :: E.SomeException) -> do - liftIO $ warn $ "Could not find image `" ++ src ++ "', skipping..." + warn $ "Could not find image `" ++ src ++ "', skipping..." return $ Emph lab Right (img, _) -> do let size = imageSize img -- cgit v1.2.3 From 93e096fe1d23bf60a7ca7fa39fa6e730336338eb Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Thu, 18 Jul 2013 21:51:11 -0700 Subject: Fixed warning. --- src/Text/Pandoc/Writers/EPUB.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs index 42863ef86..e625931fc 100644 --- a/src/Text/Pandoc/Writers/EPUB.hs +++ b/src/Text/Pandoc/Writers/EPUB.hs @@ -126,7 +126,7 @@ writeEPUB opts doc@(Pandoc meta _) = do let readPicEntry entries (oldsrc, newsrc) = do res <- fetchItem sourceDir oldsrc case res of - Left e -> do + Left _ -> do warn $ "Could not find image `" ++ oldsrc ++ "', skipping..." return entries Right (img,_) -> return $ -- cgit v1.2.3 From fd0f8c1a8a03cedf868ea2b26e40bfe00852d0b2 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Thu, 18 Jul 2013 21:51:23 -0700 Subject: Text.Pandoc.PDF: put temporary output directory in TEXINPUTS. This will help later when we try to download external resources. We can put them in the temp directory. See #917. --- src/Text/Pandoc/PDF.hs | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/PDF.hs b/src/Text/Pandoc/PDF.hs index 3227fd0bd..b36f2a0af 100644 --- a/src/Text/Pandoc/PDF.hs +++ b/src/Text/Pandoc/PDF.hs @@ -38,6 +38,7 @@ import System.Exit (ExitCode (..)) import System.FilePath import System.Directory import System.Process +import System.Environment import Control.Exception (evaluate) import System.IO (hClose) import Control.Concurrent (putMVar, takeMVar, newEmptyMVar, forkIO) @@ -102,7 +103,12 @@ runTeXProgram program runsLeft tmpDir source = do unless exists $ UTF8.writeFile file source let programArgs = ["-halt-on-error", "-interaction", "nonstopmode", "-output-directory", tmpDir, file] - (exit, out, err) <- readCommand program programArgs + env' <- getEnvironment + let texinputs = maybe (tmpDir ++ ":") ((tmpDir ++ ":") ++) + $ lookup "TEXINPUTS" env' + let env'' = ("TEXINPUTS", texinputs) : + [(k,v) | (k,v) <- env', k /= "TEXINPUTS"] + (exit, out, err) <- readCommand (Just env'') program programArgs if runsLeft > 1 then runTeXProgram program (runsLeft - 1) tmpDir source else do @@ -118,12 +124,14 @@ runTeXProgram program runsLeft tmpDir source = do -- Run a command and return exitcode, contents of stdout, and -- contents of stderr. (Based on -- 'readProcessWithExitCode' from 'System.Process'.) -readCommand :: FilePath -- ^ command to run +readCommand :: Maybe [(String, String)] -- ^ environment variables + -> FilePath -- ^ command to run -> [String] -- ^ any arguments -> IO (ExitCode,ByteString,ByteString) -- ^ exit, stdout, stderr -readCommand cmd args = do +readCommand mbenv cmd args = do (Just inh, Just outh, Just errh, pid) <- - createProcess (proc cmd args){ std_in = CreatePipe, + createProcess (proc cmd args){ env = mbenv, + std_in = CreatePipe, std_out = CreatePipe, std_err = CreatePipe } outMVar <- newEmptyMVar -- cgit v1.2.3 From 7102254e244b37c91d6b35b4940511a8656edc49 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 20 Jul 2013 12:14:43 -0700 Subject: PDF generation improvements. * `Text.Pandoc.PDF` exports `makePDF` instead of `tex2pdf`. (API change.) * `makePDF` walks the pandoc AST and checks for the existence of images in the local directory. If they are not found, it attempts to find them, either in the directory containing the first source file, or at an absolute URL, or at a URL relative to the base URL of the first command line argument. * Closes #917. --- pandoc.hs | 4 ++-- src/Text/Pandoc/MIME.hs | 11 +++++++++-- src/Text/Pandoc/PDF.hs | 50 ++++++++++++++++++++++++++++++++++++++++++++----- 3 files changed, 56 insertions(+), 9 deletions(-) (limited to 'src/Text') diff --git a/pandoc.hs b/pandoc.hs index 18124da3a..79bade221 100644 --- a/pandoc.hs +++ b/pandoc.hs @@ -31,7 +31,7 @@ writers. -} module Main where import Text.Pandoc -import Text.Pandoc.PDF (tex2pdf) +import Text.Pandoc.PDF (makePDF) import Text.Pandoc.Readers.LaTeX (handleIncludes) import Text.Pandoc.Shared ( tabFilter, readDataFileUTF8, safeRead, headerShift, normalize, err, warn ) @@ -1113,7 +1113,7 @@ main = do Right (IOByteStringWriter f) -> f writerOptions doc0 >>= writeBinary Right (PureStringWriter f) | pdfOutput -> do - res <- tex2pdf latexEngine $ f writerOptions doc0 + res <- makePDF latexEngine f writerOptions doc0 case res of Right pdf -> writeBinary pdf Left err' -> err 43 $ UTF8.toStringLazy err' diff --git a/src/Text/Pandoc/MIME.hs b/src/Text/Pandoc/MIME.hs index eb54bd48d..d9cb94a33 100644 --- a/src/Text/Pandoc/MIME.hs +++ b/src/Text/Pandoc/MIME.hs @@ -27,7 +27,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Mime type lookup for ODT writer. -} -module Text.Pandoc.MIME ( getMimeType ) +module Text.Pandoc.MIME ( getMimeType, extensionFromMimeType ) where import System.FilePath import Data.Char ( toLower ) @@ -37,7 +37,14 @@ import qualified Data.Map as M getMimeType :: FilePath -> Maybe String getMimeType "layout-cache" = Just "application/binary" -- in ODT getMimeType f = M.lookup (map toLower $ drop 1 $ takeExtension f) mimeTypes - where mimeTypes = M.fromList -- List borrowed from happstack-server. + where mimeTypes = M.fromList mimeTypesList + +extensionFromMimeType :: String -> Maybe String +extensionFromMimeType mimetype = M.lookup mimetype reverseMimeTypes + where reverseMimeTypes = M.fromList $ map (\(k,v) -> (v,k)) mimeTypesList + +mimeTypesList :: [(String, String)] +mimeTypesList = -- List borrowed from happstack-server. [("gz","application/x-gzip") ,("cabal","application/x-cabal") ,("%","application/x-trash") diff --git a/src/Text/Pandoc/PDF.hs b/src/Text/Pandoc/PDF.hs index b36f2a0af..49b455285 100644 --- a/src/Text/Pandoc/PDF.hs +++ b/src/Text/Pandoc/PDF.hs @@ -28,12 +28,13 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Conversion of LaTeX documents to PDF. -} -module Text.Pandoc.PDF ( tex2pdf ) where +module Text.Pandoc.PDF ( makePDF ) where import System.IO.Temp import Data.ByteString.Lazy (ByteString) import qualified Data.ByteString.Lazy as B import qualified Data.ByteString.Lazy.Char8 as BC +import qualified Data.ByteString as BS import System.Exit (ExitCode (..)) import System.FilePath import System.Directory @@ -42,9 +43,15 @@ import System.Environment import Control.Exception (evaluate) import System.IO (hClose) import Control.Concurrent (putMVar, takeMVar, newEmptyMVar, forkIO) -import Text.Pandoc.UTF8 as UTF8 import Control.Monad (unless) import Data.List (isInfixOf) +import qualified Data.ByteString.Base64 as B64 +import qualified Text.Pandoc.UTF8 as UTF8 +import Text.Pandoc.Definition +import Text.Pandoc.Generic (bottomUpM) +import Text.Pandoc.Shared (fetchItem, warn) +import Text.Pandoc.Options (WriterOptions(..)) +import Text.Pandoc.MIME (extensionFromMimeType) withTempDir :: String -> (FilePath -> IO a) -> IO a withTempDir = @@ -54,12 +61,45 @@ withTempDir = withSystemTempDirectory #endif -tex2pdf :: String -- ^ tex program (pdflatex, lualatex, xelatex) - -> String -- ^ latex source +makePDF :: String -- ^ pdf creator (pdflatex, lualatex, xelatex) + -> (WriterOptions -> Pandoc -> String) -- ^ writer + -> WriterOptions -- ^ options + -> Pandoc -- ^ document -> IO (Either ByteString ByteString) -tex2pdf program source = withTempDir "tex2pdf." $ \tmpdir -> +makePDF program writer opts doc = withTempDir "tex2pdf." $ \tmpdir -> do + doc' <- handleImages (writerSourceDirectory opts) tmpdir doc + let source = writer opts doc' tex2pdf' tmpdir program source +handleImages :: String -- ^ source directory/base URL + -> FilePath -- ^ temp dir to store images + -> Pandoc -- ^ document + -> IO Pandoc +handleImages baseURL tmpdir = bottomUpM (handleImage' baseURL tmpdir) + +handleImage' :: String + -> FilePath + -> Inline + -> IO Inline +handleImage' baseURL tmpdir (Image ils (src,tit)) = do + exists <- doesFileExist src + if exists + then return $ Image ils (src,tit) + else do + res <- fetchItem baseURL src + case res of + Right (contents, Just mime) -> do + let ext = maybe (takeExtension src) id $ + extensionFromMimeType mime + let basename = UTF8.toString $ B64.encode $ UTF8.fromString src + let fname = tmpdir basename <.> ext + BS.writeFile fname contents + return $ Image ils (fname,tit) + _ -> do + warn $ "Could not find image `" ++ src ++ "', skipping..." + return $ Image ils (src,tit) +handleImage' _ _ x = return x + tex2pdf' :: FilePath -- ^ temp directory for output -> String -- ^ tex program -> String -- ^ tex source -- cgit v1.2.3 From bd1979f1b74fb18baa70c4b77cc58931e980087a Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 20 Jul 2013 21:14:38 -0700 Subject: Markdown reader: Improved strong/emph parsing. Using technique from github.com/jgm/Markdown. The new parsing algorithm requires no backtracking, and no keeping track of nesting levels. It will give different results in some edge cases but should not affect most people. --- src/Text/Pandoc/Readers/Markdown.hs | 88 +++++++++++++++++++++++-------------- 1 file changed, 54 insertions(+), 34 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 1aa392162..28f69eae4 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -1340,17 +1340,15 @@ inline = choice [ whitespace , str , endline , code - , fours - , strong - , emph + , strongOrEmph , note , cite , link , image , math , strikeout - , superscript , subscript + , superscript , inlineNote -- after superscript because of ^[link](/foo)^ , autoLink , rawHtmlInline @@ -1455,14 +1453,58 @@ mathInlineWith op cl = try $ do notFollowedBy digit -- to prevent capture of $5 return $ concat words' --- to avoid performance problems, treat 4 or more _ or * or ~ or ^ in a row --- as a literal rather than attempting to parse for emph/strong/strikeout/super/sub -fours :: Parser [Char] st (F Inlines) -fours = try $ do - x <- char '*' <|> char '_' <|> char '~' <|> char '^' - count 2 $ satisfy (==x) - rest <- many1 (satisfy (==x)) - return $ return $ B.str (x:x:x:rest) +-- Parses material enclosed in *s, **s, _s, or __s. +-- Designed to avoid backtracking. +enclosure :: Char + -> MarkdownParser (F Inlines) +enclosure c = do + cs <- many1 (char c) + (return (B.str cs) <>) <$> whitespace + <|> case length cs of + 3 -> three c + 2 -> two c mempty + 1 -> one c mempty + _ -> return (return $ B.str cs) + +-- Parse inlines til you hit one c or a sequence of two cs. +-- If one c, emit emph and then parse two. +-- If two cs, emit strong and then parse one. +three :: Char -> MarkdownParser (F Inlines) +three c = do + contents <- mconcat <$> many (notFollowedBy (char c) >> inline) + (try (string [c,c,c]) >> return ((B.strong . B.emph) <$> contents)) + <|> (try (string [c,c]) >> one c (B.strong <$> contents)) + <|> (char c >> two c (B.emph <$> contents)) + <|> return (return (B.str [c,c,c]) <> contents) + +-- Parse inlines til you hit two c's, and emit strong. +-- If you never do hit two cs, emit ** plus inlines parsed. +two :: Char -> F Inlines -> MarkdownParser (F Inlines) +two c prefix' = do + let ender = try $ string [c,c] + contents <- mconcat <$> many (try $ notFollowedBy ender >> inline) + (ender >> return (B.strong <$> (prefix' <> contents))) + <|> return (return (B.str [c,c]) <> (prefix' <> contents)) + +-- Parse inlines til you hit a c, and emit emph. +-- If you never hit a c, emit * plus inlines parsed. +one :: Char -> F Inlines -> MarkdownParser (F Inlines) +one c prefix' = do + contents <- mconcat <$> many ( (notFollowedBy (char c) >> inline) + <|> try (string [c,c] >> + notFollowedBy (char c) >> + two c prefix') ) + (char c >> return (B.emph <$> (prefix' <> contents))) + <|> return (return (B.str [c]) <> (prefix' <> contents)) + +strongOrEmph :: MarkdownParser (F Inlines) +strongOrEmph = enclosure '*' <|> (checkIntraword >> enclosure '_') + where checkIntraword = do + exts <- getOption readerExtensions + when (Ext_intraword_underscores `Set.member` exts) $ do + pos <- getPosition + lastStrPos <- stateLastStrPos <$> getState + guard $ lastStrPos /= Just pos -- | Parses a list of inlines between start and end delimiters. inlinesBetween :: (Show b) @@ -1474,28 +1516,6 @@ inlinesBetween start end = where inner = innerSpace <|> (notFollowedBy' (() <$ whitespace) >> inline) innerSpace = try $ whitespace >>~ notFollowedBy' end -emph :: MarkdownParser (F Inlines) -emph = fmap B.emph <$> nested - (inlinesBetween starStart starEnd <|> inlinesBetween ulStart ulEnd) - where starStart = char '*' >> lookAhead nonspaceChar - starEnd = notFollowedBy' (() <$ strong) >> char '*' - ulStart = checkIntraword >> char '_' >> lookAhead nonspaceChar - ulEnd = notFollowedBy' (() <$ strong) >> char '_' - checkIntraword = do - exts <- getOption readerExtensions - when (Ext_intraword_underscores `Set.member` exts) $ do - pos <- getPosition - lastStrPos <- stateLastStrPos <$> getState - guard $ lastStrPos /= Just pos - -strong :: MarkdownParser (F Inlines) -strong = fmap B.strong <$> nested - (inlinesBetween starStart starEnd <|> inlinesBetween ulStart ulEnd) - where starStart = string "**" >> lookAhead nonspaceChar - starEnd = try $ string "**" - ulStart = string "__" >> lookAhead nonspaceChar - ulEnd = try $ string "__" - strikeout :: MarkdownParser (F Inlines) strikeout = fmap B.strikeout <$> (guardEnabled Ext_strikeout >> inlinesBetween strikeStart strikeEnd) -- cgit v1.2.3 From 800c5490ec080520268c9c3348f2b4199a21e6db Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 21 Jul 2013 11:44:49 -0700 Subject: LaTeX reader: Don't add spurious ", " to citation suffixes. This is added when needed in Text.Pandoc.Biblio anyway. --- src/Text/Pandoc/Readers/LaTeX.hs | 10 +++------- tests/Tests/Readers/LaTeX.hs | 36 ++++++++++++++++++------------------ tests/latex-reader.native | 2 +- 3 files changed, 22 insertions(+), 26 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index 1a22f2ad2..7c7ae9fef 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -44,7 +44,7 @@ import qualified Text.Pandoc.UTF8 as UTF8 import Data.Char ( chr, ord ) import Control.Monad import Text.Pandoc.Builder -import Data.Char (isLetter, isPunctuation, isSpace) +import Data.Char (isLetter) import Control.Applicative import Data.Monoid import System.Environment (getEnv) @@ -986,12 +986,8 @@ addPrefix _ _ = [] addSuffix :: [Inline] -> [Citation] -> [Citation] addSuffix s ks@(_:_) = - let k = last ks - s' = case s of - (Str (c:_):_) - | not (isPunctuation c || isSpace c) -> Str "," : Space : s - _ -> s - in init ks ++ [k {citationSuffix = citationSuffix k ++ s'}] + let k = last ks + in init ks ++ [k {citationSuffix = citationSuffix k ++ s}] addSuffix _ _ = [] simpleCiteArgs :: LP [Citation] diff --git a/tests/Tests/Readers/LaTeX.hs b/tests/Tests/Readers/LaTeX.hs index 271b32689..88029b7c2 100644 --- a/tests/Tests/Readers/LaTeX.hs +++ b/tests/Tests/Readers/LaTeX.hs @@ -79,14 +79,14 @@ natbibCitations = testGroup "natbib" =?> para (cite [baseCitation] (rt "\\citet{item1}")) , "suffix" =: "\\citet[p.~30]{item1}" =?> para - (cite [baseCitation{ citationSuffix = toList $ text ", p.\160\&30" }] (rt "\\citet[p.~30]{item1}")) + (cite [baseCitation{ citationSuffix = toList $ text "p.\160\&30" }] (rt "\\citet[p.~30]{item1}")) , "suffix long" =: "\\citet[p.~30, with suffix]{item1}" =?> para (cite [baseCitation{ citationSuffix = - toList $ text ", p.\160\&30, with suffix" }] (rt "\\citet[p.~30, with suffix]{item1}")) + toList $ text "p.\160\&30, with suffix" }] (rt "\\citet[p.~30, with suffix]{item1}")) , "multiple" =: "\\citeauthor{item1} \\citetext{\\citeyear{item1}; \\citeyear[p.~30]{item2}; \\citealp[see also][]{item3}}" =?> para (cite [baseCitation{ citationMode = AuthorInText } ,baseCitation{ citationMode = SuppressAuthor - , citationSuffix = [Str ",",Space,Str "p.\160\&30"] + , citationSuffix = [Str "p.\160\&30"] , citationId = "item2" } ,baseCitation{ citationId = "item3" , citationPrefix = [Str "see",Space,Str "also"] @@ -95,28 +95,28 @@ natbibCitations = testGroup "natbib" , "group" =: "\\citetext{\\citealp[see][p.~34--35]{item1}; \\citealp[also][chap. 3]{item3}}" =?> para (cite [baseCitation{ citationMode = NormalCitation , citationPrefix = [Str "see"] - , citationSuffix = [Str ",",Space,Str "p.\160\&34\8211\&35"] } + , citationSuffix = [Str "p.\160\&34\8211\&35"] } ,baseCitation{ citationMode = NormalCitation , citationId = "item3" , citationPrefix = [Str "also"] - , citationSuffix = [Str ",",Space,Str "chap.",Space,Str "3"] } + , citationSuffix = [Str "chap.",Space,Str "3"] } ] (rt "\\citetext{\\citealp[see][p.~34--35]{item1}; \\citealp[also][chap. 3]{item3}}")) , "suffix and locator" =: "\\citep[pp.~33, 35--37, and nowhere else]{item1}" =?> para (cite [baseCitation{ citationMode = NormalCitation - , citationSuffix = [Str ",",Space,Str "pp.\160\&33,",Space,Str "35\8211\&37,",Space,Str "and",Space,Str "nowhere",Space, Str "else"] }] (rt "\\citep[pp.~33, 35--37, and nowhere else]{item1}")) + , citationSuffix = [Str "pp.\160\&33,",Space,Str "35\8211\&37,",Space,Str "and",Space,Str "nowhere",Space, Str "else"] }] (rt "\\citep[pp.~33, 35--37, and nowhere else]{item1}")) , "suffix only" =: "\\citep[and nowhere else]{item1}" =?> para (cite [baseCitation{ citationMode = NormalCitation - , citationSuffix = toList $ text ", and nowhere else" }] (rt "\\citep[and nowhere else]{item1}")) + , citationSuffix = toList $ text "and nowhere else" }] (rt "\\citep[and nowhere else]{item1}")) , "no author" =: "\\citeyearpar{item1}, and now Doe with a locator \\citeyearpar[p.~44]{item2}" =?> para (cite [baseCitation{ citationMode = SuppressAuthor }] (rt "\\citeyearpar{item1}") <> text ", and now Doe with a locator " <> cite [baseCitation{ citationMode = SuppressAuthor - , citationSuffix = [Str ",",Space,Str "p.\160\&44"] + , citationSuffix = [Str "p.\160\&44"] , citationId = "item2" }] (rt "\\citeyearpar[p.~44]{item2}")) , "markup" =: "\\citep[\\emph{see}][p. \\textbf{32}]{item1}" =?> para (cite [baseCitation{ citationMode = NormalCitation , citationPrefix = [Emph [Str "see"]] - , citationSuffix = [Str ",",Space,Str "p.",Space, + , citationSuffix = [Str "p.",Space, Strong [Str "32"]] }] (rt "\\citep[\\emph{see}][p. \\textbf{32}]{item1}")) ] @@ -126,14 +126,14 @@ biblatexCitations = testGroup "biblatex" =?> para (cite [baseCitation] (rt "\\textcite{item1}")) , "suffix" =: "\\textcite[p.~30]{item1}" =?> para - (cite [baseCitation{ citationSuffix = toList $ text ", p.\160\&30" }] (rt "\\textcite[p.~30]{item1}")) + (cite [baseCitation{ citationSuffix = toList $ text "p.\160\&30" }] (rt "\\textcite[p.~30]{item1}")) , "suffix long" =: "\\textcite[p.~30, with suffix]{item1}" =?> para (cite [baseCitation{ citationSuffix = - toList $ text ", p.\160\&30, with suffix" }] (rt "\\textcite[p.~30, with suffix]{item1}")) + toList $ text "p.\160\&30, with suffix" }] (rt "\\textcite[p.~30, with suffix]{item1}")) , "multiple" =: "\\textcites{item1}[p.~30]{item2}[see also][]{item3}" =?> para (cite [baseCitation{ citationMode = AuthorInText } ,baseCitation{ citationMode = NormalCitation - , citationSuffix = [Str ",",Space,Str "p.\160\&30"] + , citationSuffix = [Str "p.\160\&30"] , citationId = "item2" } ,baseCitation{ citationId = "item3" , citationPrefix = [Str "see",Space,Str "also"] @@ -142,28 +142,28 @@ biblatexCitations = testGroup "biblatex" , "group" =: "\\autocites[see][p.~34--35]{item1}[also][chap. 3]{item3}" =?> para (cite [baseCitation{ citationMode = NormalCitation , citationPrefix = [Str "see"] - , citationSuffix = [Str ",",Space,Str "p.\160\&34\8211\&35"] } + , citationSuffix = [Str "p.\160\&34\8211\&35"] } ,baseCitation{ citationMode = NormalCitation , citationId = "item3" , citationPrefix = [Str "also"] - , citationSuffix = [Str ",",Space,Str "chap.",Space,Str "3"] } + , citationSuffix = [Str "chap.",Space,Str "3"] } ] (rt "\\autocites[see][p.~34--35]{item1}[also][chap. 3]{item3}")) , "suffix and locator" =: "\\autocite[pp.~33, 35--37, and nowhere else]{item1}" =?> para (cite [baseCitation{ citationMode = NormalCitation - , citationSuffix = [Str ",",Space,Str "pp.\160\&33,",Space,Str "35\8211\&37,",Space,Str "and",Space,Str "nowhere",Space, Str "else"] }] (rt "\\autocite[pp.~33, 35--37, and nowhere else]{item1}")) + , citationSuffix = [Str "pp.\160\&33,",Space,Str "35\8211\&37,",Space,Str "and",Space,Str "nowhere",Space, Str "else"] }] (rt "\\autocite[pp.~33, 35--37, and nowhere else]{item1}")) , "suffix only" =: "\\autocite[and nowhere else]{item1}" =?> para (cite [baseCitation{ citationMode = NormalCitation - , citationSuffix = toList $ text ", and nowhere else" }] (rt "\\autocite[and nowhere else]{item1}")) + , citationSuffix = toList $ text "and nowhere else" }] (rt "\\autocite[and nowhere else]{item1}")) , "no author" =: "\\autocite*{item1}, and now Doe with a locator \\autocite*[p.~44]{item2}" =?> para (cite [baseCitation{ citationMode = SuppressAuthor }] (rt "\\autocite*{item1}") <> text ", and now Doe with a locator " <> cite [baseCitation{ citationMode = SuppressAuthor - , citationSuffix = [Str ",",Space,Str "p.\160\&44"] + , citationSuffix = [Str "p.\160\&44"] , citationId = "item2" }] (rt "\\autocite*[p.~44]{item2}")) , "markup" =: "\\autocite[\\emph{see}][p. \\textbf{32}]{item1}" =?> para (cite [baseCitation{ citationMode = NormalCitation , citationPrefix = [Emph [Str "see"]] - , citationSuffix = [Str ",",Space,Str "p.",Space, + , citationSuffix = [Str "p.",Space, Strong [Str "32"]] }] (rt "\\autocite[\\emph{see}][p. \\textbf{32}]{item1}")) , "parencite" =: "\\parencite{item1}" =?> para (cite [baseCitation{ citationMode = NormalCitation }] (rt "\\parencite{item1}")) diff --git a/tests/latex-reader.native b/tests/latex-reader.native index d8769e605..d19196345 100644 --- a/tests/latex-reader.native +++ b/tests/latex-reader.native @@ -260,7 +260,7 @@ Pandoc (Meta {unMeta = fromList [("authors",MetaList [MetaInlines [Str "John",Sp ,HorizontalRule ,Header 1 ("",[],[]) [Str "LaTeX"] ,BulletList - [[Para [Cite [Citation {citationId = "smith.1899", citationPrefix = [], citationSuffix = [Str ",",Space,Str "22-23"], citationMode = AuthorInText, citationNoteNum = 0, citationHash = 0}] [RawInline "latex" "\\cite[22-23]{smith.1899}"]]] + [[Para [Cite [Citation {citationId = "smith.1899", citationPrefix = [], citationSuffix = [Str "22-23"], citationMode = AuthorInText, citationNoteNum = 0, citationHash = 0}] [RawInline "latex" "\\cite[22-23]{smith.1899}"]]] ,[Para [RawInline "latex" "\\doublespacing"]] ,[Para [Math InlineMath "2+2=4"]] ,[Para [Math InlineMath "x \\in y"]] -- cgit v1.2.3 From 6f99ad80135c06d30d92ad275d482e841ef1e872 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 21 Jul 2013 12:16:52 -0700 Subject: Biblio: Tweaks to improve default behavior. * A suffix beginning with a digit gets 'p' inserted before it before passing to citeproc-hs, so that bare numbers are treated as page numbers by default. * A suffix not beginning with punctuation has a space added at the beginning (rather than a comma and space, as was done before). * This adding occurs not just in author-in-text citations, but in all citations. The result of these changes (and the last commit) is that `\citep[23]{item1}` in LaTeX will be interpreted properly, with '23' treated as a locator of type 'page'. --- src/Text/Pandoc/Biblio.hs | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Biblio.hs b/src/Text/Pandoc/Biblio.hs index 4dd82dd08..ae371a46d 100644 --- a/src/Text/Pandoc/Biblio.hs +++ b/src/Text/Pandoc/Biblio.hs @@ -119,12 +119,12 @@ toCslCite :: Citation -> CSL.Cite toCslCite c = let (l, s) = locatorWords $ citationSuffix c (la,lo) = parseLocator l - s' = case (l,s,citationMode c) of - -- treat a bare locator as if it begins with comma + s' = case (l,s) of + -- treat a bare locator as if it begins with space -- so @item1 [blah] is like [@item1, blah] - ("",(x:_),AuthorInText) | not (isPunct x) - -> [Str ",",Space] ++ s - _ -> s + ("",(x:_)) + | not (isPunct x) -> [Space] ++ s + _ -> s isPunct (Str (x:_)) = isPunctuation x isPunct _ = False citMode = case citationMode c of @@ -173,13 +173,21 @@ pLocator :: Parsec [Inline] st String pLocator = try $ do optional $ pMatch (== Str ",") optional pSpace - f <- many1 (notFollowedBy pSpace >> anyToken) + f <- (guardFollowingDigit >> return [Str "p"]) -- "page" the default + <|> many1 (notFollowedBy pSpace >> anyToken) gs <- many1 pWordWithDigits return $ stringify f ++ (' ' : unwords gs) +guardFollowingDigit :: Parsec [Inline] st () +guardFollowingDigit = do + t <- lookAhead anyToken + case t of + Str (d:_) | isDigit d -> return () + _ -> mzero + pWordWithDigits :: Parsec [Inline] st String pWordWithDigits = try $ do - pSpace + optional pSpace r <- many1 (notFollowedBy pSpace >> anyToken) let s = stringify r guard $ any isDigit s -- cgit v1.2.3 From 5592666ca44aa0d027cd95bf11cff09825896584 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 23 Jul 2013 22:31:50 -0700 Subject: Text.Pandoc: Added readJSON, writeJSON to the API. Closes #817. --- src/Text/Pandoc.hs | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc.hs b/src/Text/Pandoc.hs index 86e78ce53..b5b698e09 100644 --- a/src/Text/Pandoc.hs +++ b/src/Text/Pandoc.hs @@ -72,9 +72,11 @@ module Text.Pandoc , readOPML , readHaddock , readNative + , readJSON -- * Writers: converting /from/ Pandoc format , Writer (..) , writeNative + , writeJSON , writeMarkdown , writePlain , writeRST @@ -190,9 +192,8 @@ markdown o s = do -- | Association list of formats and readers. readers :: [(String, ReaderOptions -> String -> IO Pandoc)] -readers = [("native" , \_ s -> return $ readNative s) - ,("json" , \_ s -> return $ checkJSON - $ decode $ UTF8.fromStringLazy s) +readers = [ ("native" , \_ s -> return $ readNative s) + ,("json" , \o s -> return $ readJSON o s) ,("markdown" , markdown) ,("markdown_strict" , markdown) ,("markdown_phpextra" , markdown) @@ -205,8 +206,8 @@ readers = [("native" , \_ s -> return $ readNative s) ,("textile" , \o s -> return $ readTextile o s) -- TODO : textile+lhs ,("html" , \o s -> return $ readHtml o s) ,("latex" , \o s -> return $ readLaTeX o s) - ,("haddock" , \o s -> return $ readHaddock o s) - ] + ,("haddock" , \o s -> return $ readHaddock o s) + ] data Writer = PureStringWriter (WriterOptions -> Pandoc -> String) | IOStringWriter (WriterOptions -> Pandoc -> IO String) @@ -216,12 +217,12 @@ data Writer = PureStringWriter (WriterOptions -> Pandoc -> String) writers :: [ ( String, Writer ) ] writers = [ ("native" , PureStringWriter writeNative) - ,("json" , PureStringWriter $ \_ -> UTF8.toStringLazy . encode) + ,("json" , PureStringWriter writeJSON) ,("docx" , IOByteStringWriter writeDocx) - ,("odt" , IOByteStringWriter writeODT) - ,("epub" , IOByteStringWriter $ \o -> + ,("odt" , IOByteStringWriter writeODT) + ,("epub" , IOByteStringWriter $ \o -> writeEPUB o{ writerEpubVersion = Just EPUB2 }) - ,("epub3" , IOByteStringWriter $ \o -> + ,("epub3" , IOByteStringWriter $ \o -> writeEPUB o{ writerEpubVersion = Just EPUB3 }) ,("fb2" , IOStringWriter writeFB2) ,("html" , PureStringWriter writeHtmlString) @@ -359,3 +360,10 @@ instance (Data a) => ToJsonFilter (a -> IO [a]) where checkJSON :: Maybe a -> a checkJSON Nothing = error "Error parsing JSON" checkJSON (Just r) = r + +readJSON :: ReaderOptions -> String -> Pandoc +readJSON _ = checkJSON . decode . UTF8.fromStringLazy + +writeJSON :: WriterOptions -> Pandoc -> String +writeJSON _ = UTF8.toStringLazy . encode + -- cgit v1.2.3 From 85cc140744b01148da944a58948d9e4a87cb64c4 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Thu, 25 Jul 2013 09:45:23 -0700 Subject: Textile reader: Improved handling of `
` blocks.

* Closed #927 (a bug in which `
` in certain contexts was
  not recognized as a code block).
* Remove internal HTML tags in code blocks, rather than printing
  them verbatim.
* Parse attributes on `
` tag for code blocks.
---
 src/Text/Pandoc/Readers/Textile.hs | 14 ++++++++++----
 tests/textile-reader.native        |  2 +-
 2 files changed, 11 insertions(+), 5 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs
index a1687a691..9191f6908 100644
--- a/src/Text/Pandoc/Readers/Textile.hs
+++ b/src/Text/Pandoc/Readers/Textile.hs
@@ -57,6 +57,7 @@ import Text.Pandoc.Options
 import Text.Pandoc.Parsing
 import Text.Pandoc.Readers.HTML ( htmlTag, isInlineTag, isBlockTag )
 import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXBlock )
+import Text.HTML.TagSoup (parseTags, innerText, fromAttrib, Tag(..))
 import Text.HTML.TagSoup.Match
 import Data.List ( intercalate )
 import Data.Char ( digitToInt, isUpper )
@@ -152,8 +153,10 @@ codeBlockBc = try $ do
 -- | Code Blocks in Textile are between 
 and 
codeBlockPre :: Parser [Char] ParserState Block codeBlockPre = try $ do - htmlTag (tagOpen (=="pre") null) - result' <- manyTill anyChar (try $ htmlTag (tagClose (=="pre")) >> blockBreak) + (t@(TagOpen _ attrs),_) <- htmlTag (tagOpen (=="pre") (const True)) + result' <- (innerText . parseTags) `fmap` -- remove internal tags + manyTill anyChar (htmlTag (tagClose (=="pre"))) + optional blanklines -- drop leading newline if any let result'' = case result' of '\n':xs -> xs @@ -162,7 +165,10 @@ codeBlockPre = try $ do let result''' = case reverse result'' of '\n':_ -> init result'' _ -> result'' - return $ CodeBlock ("",[],[]) result''' + let classes = words $ fromAttrib "class" t + let ident = fromAttrib "id" t + let kvs = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"] + return $ CodeBlock (ident,classes,kvs) result''' -- | Header of the form "hN. content" with N in 1..6 header :: Parser [Char] ParserState Block @@ -275,7 +281,7 @@ definitionListItem = try $ do -- blocks support, we have to lookAhead for a rawHtmlBlock. blockBreak :: Parser [Char] ParserState () blockBreak = try (newline >> blanklines >> return ()) <|> - (lookAhead rawHtmlBlock >> return ()) + try (optional spaces >> lookAhead rawHtmlBlock >> return ()) -- raw content diff --git a/tests/textile-reader.native b/tests/textile-reader.native index 22a338d38..d14ae02c8 100644 --- a/tests/textile-reader.native +++ b/tests/textile-reader.native @@ -139,7 +139,7 @@ Pandoc (Meta {unMeta = fromList []}) ,Header 1 ("",[],[]) [Str "Raw",Space,Str "HTML"] ,Para [Str "However",Str ",",Space,RawInline "html" "",Space,Str "raw",Space,Str "HTML",Space,Str "inlines",Space,RawInline "html" "",Space,Str "are",Space,Str "accepted",Str ",",Space,Str "as",Space,Str "well",Space,Str "as",Space,Str ":"] ,RawBlock "html" "
" -,Para [Str "any",Space,Strong [Str "Raw",Space,Str "HTML",Space,Str "Block"],Space,Str "with",Space,Str "bold",LineBreak] +,Para [Str "any",Space,Strong [Str "Raw",Space,Str "HTML",Space,Str "Block"],Space,Str "with",Space,Str "bold"] ,RawBlock "html" "
" ,Para [Str "Html",Space,Str "blocks",Space,Str "can",Space,Str "be"] ,RawBlock "html" "
" -- cgit v1.2.3 From fb9f2e4bd5f71c7b515566921c5c5a7bff73c52c Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Thu, 25 Jul 2013 10:00:11 -0700 Subject: LaTeX reader: Support `\v{}` for hacek. Closes #926. --- src/Text/Pandoc/Readers/LaTeX.hs | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index 7c7ae9fef..6b5035d93 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -415,6 +415,7 @@ inlineCommands = M.fromList $ , (".", option (str ".") $ try $ tok >>= accent dot) , ("=", option (str "=") $ try $ tok >>= accent macron) , ("c", option (str "c") $ try $ tok >>= accent cedilla) + , ("v", option (str "v") $ try $ tok >>= accent hacek) , ("i", lit "i") , ("\\", linebreak <$ (optional (bracketed inline) *> optional sp)) , (",", pure mempty) @@ -671,6 +672,42 @@ cedilla 's' = 'ş' cedilla 'S' = 'Ş' cedilla c = c +hacek :: Char -> Char +hacek 'A' = 'Ǎ' +hacek 'a' = 'ǎ' +hacek 'C' = 'Č' +hacek 'c' = 'č' +hacek 'D' = 'Ď' +hacek 'd' = 'ď' +hacek 'E' = 'Ě' +hacek 'e' = 'ě' +hacek 'G' = 'Ǧ' +hacek 'g' = 'ǧ' +hacek 'H' = 'Ȟ' +hacek 'h' = 'ȟ' +hacek 'I' = 'Ǐ' +hacek 'i' = 'ǐ' +hacek 'j' = 'ǰ' +hacek 'K' = 'Ǩ' +hacek 'k' = 'ǩ' +hacek 'L' = 'Ľ' +hacek 'l' = 'ľ' +hacek 'N' = 'Ň' +hacek 'n' = 'ň' +hacek 'O' = 'Ǒ' +hacek 'o' = 'ǒ' +hacek 'R' = 'Ř' +hacek 'r' = 'ř' +hacek 'S' = 'Š' +hacek 's' = 'š' +hacek 'T' = 'Ť' +hacek 't' = 'ť' +hacek 'U' = 'Ǔ' +hacek 'u' = 'ǔ' +hacek 'Z' = 'Ž' +hacek 'z' = 'ž' +hacek c = c + tok :: LP Inlines tok = try $ grouped inline <|> inlineCommand <|> str <$> (count 1 $ inlineChar) -- cgit v1.2.3 From d5fad2306a27b3fcf2c85782dd13bc8e516a5df9 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Thu, 25 Jul 2013 20:29:42 -0700 Subject: LaTeX writer: Change `\` to `/` in paths. `/` works even on Windows in LaTeX. `\` will cause major problems if unescaped. --- src/Text/Pandoc/Writers/LaTeX.hs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs index 2b4a608a7..06a04ade2 100644 --- a/src/Text/Pandoc/Writers/LaTeX.hs +++ b/src/Text/Pandoc/Writers/LaTeX.hs @@ -202,7 +202,8 @@ stringToLaTeX ctx (x:xs) = do _ -> '-' : rest '~' | not isUrl -> "\\textasciitilde{}" ++ rest '^' -> "\\^{}" ++ rest - '\\' -> "\\textbackslash{}" ++ rest + '\\'| isUrl -> '/' : rest -- NB. / works as path sep even on Windows + | otherwise -> "\\textbackslash{}" ++ rest '|' -> "\\textbar{}" ++ rest '<' -> "\\textless{}" ++ rest '>' -> "\\textgreater{}" ++ rest @@ -648,7 +649,8 @@ inlineToLaTeX (Image _ (source, _)) = do let source' = if isAbsoluteURI source then source else unEscapeString source - return $ "\\includegraphics" <> braces (text source') + source'' <- stringToLaTeX URLString source' + return $ "\\includegraphics" <> braces (text source'') inlineToLaTeX (Note contents) = do modify (\s -> s{stInNote = True}) contents' <- blockListToLaTeX contents -- cgit v1.2.3 From a97f39c12e7b47a272575b69ad4cdd38966c043e Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Fri, 26 Jul 2013 12:40:56 -0700 Subject: Beamer: add allowframebreaks to slide if set in header classes. It's recommended that your bibliography slide have this attribute: # References {.allowframebreaks} This causes multiple slides to be created if necessary, depending on the length of the bibliography. --- README | 10 ++++++++++ src/Text/Pandoc/Writers/LaTeX.hs | 19 +++++++++++-------- 2 files changed, 21 insertions(+), 8 deletions(-) (limited to 'src/Text') diff --git a/README b/README index d9b003344..4895f0f52 100644 --- a/README +++ b/README @@ -2674,6 +2674,16 @@ using the `-V` option: pandoc -t beamer habits.txt -V theme:Warsaw -o habits.pdf +Note that header attributes will turn into slide attributes +(on a `
` or `
`) in HTML slide formats, allowing you +to style individual slides. In Beamer, the only header attribute +that affects slides is the `allowframebreaks` class, which sets the +`allowframebreaks` option, causing multiple slides to be created +if the content overfills the frame. This is recommended especially for +bibliographies: + + # References {.allowframebreaks} + Literate Haskell support ======================== diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs index 06a04ade2..aa5bfa623 100644 --- a/src/Text/Pandoc/Writers/LaTeX.hs +++ b/src/Text/Pandoc/Writers/LaTeX.hs @@ -232,7 +232,7 @@ toSlides bs = do elementToBeamer :: Int -> Element -> State WriterState [Block] elementToBeamer _slideLevel (Blk b) = return [b] -elementToBeamer slideLevel (Sec lvl _num (ident,classes,_) tit elts) +elementToBeamer slideLevel (Sec lvl _num (ident,classes,kvs) tit elts) | lvl > slideLevel = do bs <- concat `fmap` mapM (elementToBeamer slideLevel) elts return $ Para ( RawInline "latex" "\\begin{block}{" @@ -240,7 +240,7 @@ elementToBeamer slideLevel (Sec lvl _num (ident,classes,_) tit elts) : bs ++ [RawBlock "latex" "\\end{block}"] | lvl < slideLevel = do bs <- concat `fmap` mapM (elementToBeamer slideLevel) elts - return $ (Header lvl (ident,classes,[]) tit) : bs + return $ (Header lvl (ident,classes,kvs) tit) : bs | otherwise = do -- lvl == slideLevel -- note: [fragile] is required or verbatim breaks let hasCodeBlock (CodeBlock _ _) = [True] @@ -248,17 +248,20 @@ elementToBeamer slideLevel (Sec lvl _num (ident,classes,_) tit elts) let hasCode (Code _ _) = [True] hasCode _ = [] opts <- gets stOptions - let fragile = if not $ null $ queryWith hasCodeBlock elts ++ + let fragile = not $ null $ queryWith hasCodeBlock elts ++ if writerListings opts then queryWith hasCode elts else [] - then "[fragile]" - else "" - let slideStart = Para $ RawInline "latex" ("\\begin{frame}" ++ fragile) : + let allowframebreaks = "allowframebreaks" `elem` classes + let optionslist = ["fragile" | fragile] ++ + ["allowframebreaks" | allowframebreaks] + let options = if null optionslist + then "" + else "[" ++ intercalate "," optionslist ++ "]" + let slideStart = Para $ RawInline "latex" ("\\begin{frame}" ++ options) : if tit == [Str "\0"] -- marker for hrule then [] - else (RawInline "latex" "\\frametitle{") : tit ++ - [RawInline "latex" "}"] + else (RawInline "latex" "{") : tit ++ [RawInline "latex" "}"] let slideEnd = RawBlock "latex" "\\end{frame}" -- now carve up slide into blocks if there are sections inside bs <- concat `fmap` mapM (elementToBeamer slideLevel) elts -- cgit v1.2.3 From 3c06e2692a8fd7307658498b44401868e1059d61 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Mon, 29 Jul 2013 08:38:46 -0700 Subject: Markdown atx headers: Allow `.` or `)` after `#` if no `fancy_lists`. --- src/Text/Pandoc/Readers/Markdown.hs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 28f69eae4..076706b4e 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -494,7 +494,8 @@ addToHeaderList (ident,classes,kvs) text = do atxHeader :: MarkdownParser (F Blocks) atxHeader = try $ do level <- many1 (char '#') >>= return . length - notFollowedBy (char '.' <|> char ')') -- this would be a list + notFollowedBy $ guardEnabled Ext_fancy_lists >> + (char '.' <|> char ')') -- this would be a list skipSpaces text <- trimInlinesF . mconcat <$> many (notFollowedBy atxClosing >> inline) attr <- atxClosing -- cgit v1.2.3 From 7024664ddae00fc459953bb5d4bbc91d5877be1b Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 30 Jul 2013 08:38:13 -0700 Subject: Fixed compilation with http-conduit flag False. --- src/Text/Pandoc/Shared.hs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index 0f2e16d2e..09874299d 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -104,7 +104,6 @@ import System.IO (stderr) import Text.HTML.TagSoup (renderTagsOptions, RenderOptions(..), Tag(..), renderOptions) import qualified Data.ByteString as BS -import Data.ByteString.Lazy (toChunks) import qualified Data.ByteString.Char8 as B8 #ifdef EMBED_DATA_FILES @@ -114,6 +113,7 @@ import System.FilePath ( joinPath, splitDirectories ) import Paths_pandoc (getDataFileName) #endif #ifdef HTTP_CONDUIT +import Data.ByteString.Lazy (toChunks) import Network.HTTP.Conduit (httpLbs, parseUrl, withManager, responseBody, responseHeaders) import Network.HTTP.Types.Header ( hContentType) @@ -616,7 +616,7 @@ openURL u UTF8.toString `fmap` lookup hContentType (responseHeaders resp)) #else | otherwise = E.try $ getBodyAndMimeType `fmap` browse - (do UTF8.hPutStrLn stderr $ "Fetching " ++ u ++ "..." + (do S.liftIO $ UTF8.hPutStrLn stderr $ "Fetching " ++ u ++ "..." setOutHandler $ const (return ()) setAllowRedirects True request (getRequest' u')) -- cgit v1.2.3 From dceffeb04370e8661dd0534a6e97fd15caaeddcf Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Fri, 2 Aug 2013 14:20:44 -0700 Subject: Biblio: Override citeproc-hs's endWithPunct. The new version correctly sees a sentence ending in '.)' as ending with punctuation. This fixes a bug which led such sentences to receive an extra period at the end: '.).'. Thanks to Steve Petersen for reporting. --- src/Text/Pandoc/Biblio.hs | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Biblio.hs b/src/Text/Pandoc/Biblio.hs index ae371a46d..31c55472e 100644 --- a/src/Text/Pandoc/Biblio.hs +++ b/src/Text/Pandoc/Biblio.hs @@ -32,7 +32,7 @@ module Text.Pandoc.Biblio ( processBiblio ) where import Data.List import Data.Char ( isDigit, isPunctuation ) import qualified Data.Map as M -import Text.CSL hiding ( Cite(..), Citation(..) ) +import Text.CSL hiding ( Cite(..), Citation(..), endWithPunct ) import qualified Text.CSL as CSL ( Cite(..) ) import Text.Pandoc.Definition import Text.Pandoc.Generic @@ -88,6 +88,19 @@ sanitize :: [Inline] -> [Inline] sanitize xs | endWithPunct xs = toCapital xs | otherwise = toCapital (xs ++ [Str "."]) + +-- A replacement for citeproc-hs's endWithPunct, which wrongly treats +-- a sentence ending in '.)' as not ending with punctuation, leading +-- to an extra period. +endWithPunct :: [Inline] -> Bool +endWithPunct [] = True +endWithPunct xs@(_:_) = case reverse (stringify [last xs]) of + [] -> True + (')':c:_) | isEndPunct c -> True + (c:_) | isEndPunct c -> True + | otherwise -> False + where isEndPunct c = c `elem` ".,;:!?" + deNote :: Pandoc -> Pandoc deNote = topDown go where go (Note [Para xs]) = Note $ bottomUp go' [Para $ sanitize xs] -- cgit v1.2.3 From a32417378e8023b5dd8af4d8a9ea66eddb99a0eb Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Fri, 2 Aug 2013 15:37:09 -0700 Subject: Biblio: Don't interfere with Notes that aren't citation notes. Closes #898: notes not generated from citations were being adjusted (first letter capitalized, for example, against author's intentions). --- src/Text/Pandoc/Biblio.hs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Biblio.hs b/src/Text/Pandoc/Biblio.hs index 31c55472e..d0db35ae7 100644 --- a/src/Text/Pandoc/Biblio.hs +++ b/src/Text/Pandoc/Biblio.hs @@ -53,7 +53,7 @@ processBiblio (Just style) r p = map (map toCslCite) grps) cits_map = M.fromList $ zip grps (citations result) biblioList = map (renderPandoc' style) (bibliography result) - Pandoc m b = bottomUp mvPunct . deNote . bottomUp (processCite style cits_map) $ p' + Pandoc m b = bottomUp mvPunct . deNote . topDown (processCite style cits_map) $ p' in Pandoc m $ b ++ biblioList -- | Substitute 'Cite' elements with formatted citations. @@ -103,7 +103,8 @@ endWithPunct xs@(_:_) = case reverse (stringify [last xs]) of deNote :: Pandoc -> Pandoc deNote = topDown go - where go (Note [Para xs]) = Note $ bottomUp go' [Para $ sanitize xs] + where go (Cite cs [Note [Para xs]]) = + Cite cs [Note $ bottomUp go' [Para $ sanitize xs]] go (Note xs) = Note $ bottomUp go' xs go x = x go' (Note [Para xs]:ys) = -- cgit v1.2.3 From 1567d291a3aed0e55ddaaa65492ab19741e515b5 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 3 Aug 2013 16:39:43 -0700 Subject: Text.Pandoc.JSON: Use To/FromJSON instances from pandoc-types. * These use GHC generics rather than syb, and are faster. * toJsonFilter is now a deprecated synonym of toJSONFilter from Text.Pandoc.JSON. * The deprecated jsonFilter function has been removed. --- src/Text/Pandoc.hs | 74 +++++++++--------------------------------------------- 1 file changed, 12 insertions(+), 62 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc.hs b/src/Text/Pandoc.hs index b5b698e09..db0f0e5fe 100644 --- a/src/Text/Pandoc.hs +++ b/src/Text/Pandoc.hs @@ -106,12 +106,13 @@ module Text.Pandoc -- * Miscellaneous , getReader , getWriter - , jsonFilter , ToJsonFilter(..) + , ToJSONFilter(..) ) where import Text.Pandoc.Definition import Text.Pandoc.Generic +import Text.Pandoc.JSON import Text.Pandoc.Readers.Markdown import Text.Pandoc.Readers.MediaWiki import Text.Pandoc.Readers.RST @@ -146,13 +147,11 @@ import Text.Pandoc.Writers.Custom import Text.Pandoc.Templates import Text.Pandoc.Options import Text.Pandoc.Shared (safeRead, warn) -import Data.ByteString.Lazy (ByteString) +import Data.Aeson import qualified Data.ByteString.Lazy as BL import Data.List (intercalate, isSuffixOf) import Data.Version (showVersion) -import Data.Aeson.Generic import Data.Set (Set) -import Data.Data import qualified Data.Set as Set import Text.Parsec import Text.Parsec.Error @@ -211,7 +210,7 @@ readers = [ ("native" , \_ s -> return $ readNative s) data Writer = PureStringWriter (WriterOptions -> Pandoc -> String) | IOStringWriter (WriterOptions -> Pandoc -> IO String) - | IOByteStringWriter (WriterOptions -> Pandoc -> IO ByteString) + | IOByteStringWriter (WriterOptions -> Pandoc -> IO BL.ByteString) -- | Association list of formats and writers. writers :: [ ( String, Writer ) ] @@ -304,66 +303,17 @@ getWriter s = \o -> r o{ writerExtensions = setExts $ getDefaultExtensions writerName } -{-# DEPRECATED jsonFilter "Use toJsonFilter instead" #-} --- | Converts a transformation on the Pandoc AST into a function --- that reads and writes a JSON-encoded string. This is useful --- for writing small scripts. -jsonFilter :: (Pandoc -> Pandoc) -> String -> String -jsonFilter f = UTF8.toStringLazy . encode . f . checkJSON . decode . UTF8.fromStringLazy - --- | 'toJsonFilter' convert a function into a filter that reads pandoc's json output --- from stdin, transforms it by walking the AST and applying the specified --- function, and writes the result as json to stdout. Usage example: --- --- > -- capitalize.hs --- > -- compile with: ghc --make capitalize --- > -- run with: pandoc -t json | ./capitalize | pandoc -f json --- > --- > import Text.Pandoc --- > import Data.Char (toUpper) --- > --- > main :: IO () --- > main = toJsonFilter capitalizeStrings --- > --- > capitalizeStrings :: Inline -> Inline --- > capitalizeStrings (Str s) = Str $ map toUpper s --- > capitalizeStrings x = x --- --- The function can be any type @(a -> a)@, @(a -> IO a)@, @(a -> [a])@, --- or @(a -> IO [a])@, where @a@ is an instance of 'Data'. --- So, for example, @a@ can be 'Pandoc', 'Inline', 'Block', ['Inline'], --- ['Block'], 'Meta', 'ListNumberStyle', 'Alignment', 'ListNumberDelim', --- 'QuoteType', etc. See 'Text.Pandoc.Definition'. -class ToJsonFilter a where - toJsonFilter :: a -> IO () - -instance (Data a) => ToJsonFilter (a -> a) where - toJsonFilter f = BL.getContents >>= - BL.putStr . encode . (bottomUp f :: Pandoc -> Pandoc) . checkJSON . decode - -instance (Data a) => ToJsonFilter (a -> IO a) where - toJsonFilter f = BL.getContents >>= - (bottomUpM f :: Pandoc -> IO Pandoc) . checkJSON . decode >>= - BL.putStr . encode - -instance (Data a) => ToJsonFilter (a -> [a]) where - toJsonFilter f = BL.getContents >>= - BL.putStr . encode . (bottomUp (concatMap f) :: Pandoc -> Pandoc) . - checkJSON . decode - -instance (Data a) => ToJsonFilter (a -> IO [a]) where - toJsonFilter f = BL.getContents >>= - (bottomUpM (fmap concat . mapM f) :: Pandoc -> IO Pandoc) - . checkJSON . decode >>= - BL.putStr . encode - -checkJSON :: Maybe a -> a -checkJSON Nothing = error "Error parsing JSON" -checkJSON (Just r) = r +{-# DEPRECATED toJsonFilter "Use toJSONFilter instead" #-} +class ToJSONFilter a => ToJsonFilter a + where toJsonFilter :: a -> IO () + toJsonFilter = toJSONFilter readJSON :: ReaderOptions -> String -> Pandoc -readJSON _ = checkJSON . decode . UTF8.fromStringLazy +readJSON _ = checkJSON . eitherDecode' . UTF8.fromStringLazy writeJSON :: WriterOptions -> Pandoc -> String writeJSON _ = UTF8.toStringLazy . encode +checkJSON :: Either String a -> a +checkJSON (Right x) = x +checkJSON (Left e) = error e -- cgit v1.2.3 From 97b2be599e11bbe7aed73a30d8c7900f4276a3df Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 3 Aug 2013 17:02:35 -0700 Subject: Text.Pandoc: Don't reexport ToJSONFilter. It's better just to import this from Text.Pandoc.JSON. That way, compiled filters will be smaller in size. --- src/Text/Pandoc.hs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc.hs b/src/Text/Pandoc.hs index db0f0e5fe..27aa02a75 100644 --- a/src/Text/Pandoc.hs +++ b/src/Text/Pandoc.hs @@ -107,7 +107,6 @@ module Text.Pandoc , getReader , getWriter , ToJsonFilter(..) - , ToJSONFilter(..) ) where import Text.Pandoc.Definition @@ -303,7 +302,8 @@ getWriter s = \o -> r o{ writerExtensions = setExts $ getDefaultExtensions writerName } -{-# DEPRECATED toJsonFilter "Use toJSONFilter instead" #-} +{-# DEPRECATED toJsonFilter "Use 'toJSONFilter' from 'Text.Pandoc.JSON' instead" #-} +-- | Deprecated. Use @toJSONFilter@ from @Text.Pandoc.JSON@ instead. class ToJSONFilter a => ToJsonFilter a where toJsonFilter :: a -> IO () toJsonFilter = toJSONFilter -- cgit v1.2.3 From 4a84b78100f2cfa0f7f7d13a24693a37af60003d Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 3 Aug 2013 23:05:14 -0700 Subject: MediaWiki writer: Use native mediawiki tables instead of HTML. Closes #720. --- src/Text/Pandoc/Writers/MediaWiki.hs | 83 +++++---- tests/tables.mediawiki | 316 ++++++++++++++--------------------- 2 files changed, 164 insertions(+), 235 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Writers/MediaWiki.hs b/src/Text/Pandoc/Writers/MediaWiki.hs index b3b319c2a..e1bfd18b2 100644 --- a/src/Text/Pandoc/Writers/MediaWiki.hs +++ b/src/Text/Pandoc/Writers/MediaWiki.hs @@ -36,7 +36,7 @@ import Text.Pandoc.Shared import Text.Pandoc.Writers.Shared import Text.Pandoc.Templates (renderTemplate') import Text.Pandoc.XML ( escapeStringForXML ) -import Data.List ( intersect, intercalate ) +import Data.List ( intersect, intercalate, intersperse ) import Network.URI ( isURI ) import Control.Monad.State @@ -135,25 +135,17 @@ blockToMediaWiki opts (BlockQuote blocks) = do return $ "
" ++ contents ++ "
" blockToMediaWiki opts (Table capt aligns widths headers rows') = do - let alignStrings = map alignmentToString aligns - captionDoc <- if null capt - then return "" - else do - c <- inlineListToMediaWiki opts capt - return $ "" ++ c ++ "\n" - let percent w = show (truncate (100*w) :: Integer) ++ "%" - let coltags = if all (== 0.0) widths - then "" - else unlines $ map - (\w -> "") widths - head' <- if all null headers - then return "" - else do - hs <- tableRowToMediaWiki opts alignStrings 0 headers - return $ "\n" ++ hs ++ "\n\n" - body' <- zipWithM (tableRowToMediaWiki opts alignStrings) [1..] rows' - return $ "\n" ++ captionDoc ++ coltags ++ head' ++ - "\n" ++ unlines body' ++ "\n
\n" + caption <- if null capt + then return "" + else do + c <- inlineListToMediaWiki opts capt + return $ "|+ " ++ trimr c ++ "\n" + let headless = all null headers + let allrows = if headless then rows' else headers:rows' + tableBody <- (concat . intersperse "|-\n") `fmap` + mapM (tableRowToMediaWiki opts headless aligns widths) + (zip [1..] allrows) + return $ "{|\n" ++ caption ++ tableBody ++ "|}\n" blockToMediaWiki opts x@(BulletList items) = do oldUseTags <- get >>= return . stUseTags @@ -285,20 +277,34 @@ vcat = intercalate "\n" -- Auxiliary functions for tables: tableRowToMediaWiki :: WriterOptions - -> [String] - -> Int - -> [[Block]] + -> Bool + -> [Alignment] + -> [Double] + -> (Int, [[Block]]) -> State WriterState String -tableRowToMediaWiki opts alignStrings rownum cols' = do - let celltype = if rownum == 0 then "th" else "td" - let rowclass = case rownum of - 0 -> "header" - x | x `rem` 2 == 1 -> "odd" - _ -> "even" - cols'' <- sequence $ zipWith - (\alignment item -> tableItemToMediaWiki opts celltype alignment item) - alignStrings cols' - return $ "\n" ++ unlines cols'' ++ "" +tableRowToMediaWiki opts headless alignments widths (rownum, cells) = do + cells' <- mapM (\cellData -> + tableCellToMediaWiki opts headless rownum cellData) + $ zip3 alignments widths cells + return $ unlines cells' + +tableCellToMediaWiki :: WriterOptions + -> Bool + -> Int + -> (Alignment, Double, [Block]) + -> State WriterState String +tableCellToMediaWiki opts headless rownum (alignment, width, bs) = do + contents <- blockListToMediaWiki opts bs + let marker = if rownum == 1 && not headless then "!" else "|" + let percent w = show (truncate (100*w) :: Integer) ++ "%" + let attrs = ["align=" ++ show (alignmentToString alignment) | + alignment /= AlignDefault && alignment /= AlignLeft] ++ + ["width=\"" ++ percent width ++ "\"" | + width /= 0.0 && rownum == 1] + let attr = if null attrs + then "" + else unwords attrs ++ "|" + return $ marker ++ attr ++ trimr contents alignmentToString :: Alignment -> [Char] alignmentToString alignment = case alignment of @@ -307,17 +313,6 @@ alignmentToString alignment = case alignment of AlignCenter -> "center" AlignDefault -> "left" -tableItemToMediaWiki :: WriterOptions - -> String - -> String - -> [Block] - -> State WriterState String -tableItemToMediaWiki opts celltype align' item = do - let mkcell x = "<" ++ celltype ++ " align=\"" ++ align' ++ "\">" ++ - x ++ "" - contents <- blockListToMediaWiki opts item - return $ mkcell contents - -- | Convert list of Pandoc block elements to MediaWiki. blockListToMediaWiki :: WriterOptions -- ^ Options -> [Block] -- ^ List of block elements diff --git a/tests/tables.mediawiki b/tests/tables.mediawiki index 4836ecd79..efde76559 100644 --- a/tests/tables.mediawiki +++ b/tests/tables.mediawiki @@ -1,212 +1,146 @@ Simple table with caption: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Demonstration of simple table syntax.
RightLeftCenterDefault
12121212
123123123123
1111
+{| +|+ Demonstration of simple table syntax. +!align="right"|Right +!Left +!align="center"|Center +!Default +|- +|align="right"|12 +|12 +|align="center"|12 +|12 +|- +|align="right"|123 +|123 +|align="center"|123 +|123 +|- +|align="right"|1 +|1 +|align="center"|1 +|1 +|} Simple table without caption: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
RightLeftCenterDefault
12121212
123123123123
1111
+{| +!align="right"|Right +!Left +!align="center"|Center +!Default +|- +|align="right"|12 +|12 +|align="center"|12 +|12 +|- +|align="right"|123 +|123 +|align="center"|123 +|123 +|- +|align="right"|1 +|1 +|align="center"|1 +|1 +|} Simple table indented two spaces: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Demonstration of simple table syntax.
RightLeftCenterDefault
12121212
123123123123
1111
+{| +|+ Demonstration of simple table syntax. +!align="right"|Right +!Left +!align="center"|Center +!Default +|- +|align="right"|12 +|12 +|align="center"|12 +|12 +|- +|align="right"|123 +|123 +|align="center"|123 +|123 +|- +|align="right"|1 +|1 +|align="center"|1 +|1 +|} Multiline table with caption: - - ----- - - - - - - - - - - - - - - - - - - - - - -
Here's the caption. It may span multiple lines.
Centered HeaderLeft AlignedRight AlignedDefault aligned
Firstrow12.0Example of a row that spans multiple lines.
Secondrow5.0Here's another one. Note the blank line between rows.
+{| +|+ Here's the caption. It may span multiple lines. +!align="center" width="15%"|Centered Header +!width="13%"|Left Aligned +!align="right" width="16%"|Right Aligned +!width="33%"|Default aligned +|- +|align="center"|First +|row +|align="right"|12.0 +|Example of a row that spans multiple lines. +|- +|align="center"|Second +|row +|align="right"|5.0 +|Here's another one. Note the blank line between rows. +|} Multiline table without caption: - ----- - - - - - - - - - - - - - - - - - - - - - -
Centered HeaderLeft AlignedRight AlignedDefault aligned
Firstrow12.0Example of a row that spans multiple lines.
Secondrow5.0Here's another one. Note the blank line between rows.
+{| +!align="center" width="15%"|Centered Header +!width="13%"|Left Aligned +!align="right" width="16%"|Right Aligned +!width="33%"|Default aligned +|- +|align="center"|First +|row +|align="right"|12.0 +|Example of a row that spans multiple lines. +|- +|align="center"|Second +|row +|align="right"|5.0 +|Here's another one. Note the blank line between rows. +|} Table without column headers: - - - - - - - - - - - - - - - - - - - - - -
12121212
123123123123
1111
+{| +|align="right"|12 +|12 +|align="center"|12 +|align="right"|12 +|- +|align="right"|123 +|123 +|align="center"|123 +|align="right"|123 +|- +|align="right"|1 +|1 +|align="center"|1 +|align="right"|1 +|} Multiline table without column headers: - ----- - - - - - - - - - - - - - -
Firstrow12.0Example of a row that spans multiple lines.
Secondrow5.0Here's another one. Note the blank line between rows.
+{| +|align="center" width="15%"|First +|width="13%"|row +|align="right" width="16%"|12.0 +|width="33%"|Example of a row that spans multiple lines. +|- +|align="center"|Second +|row +|align="right"|5.0 +|Here's another one. Note the blank line between rows. +|} -- cgit v1.2.3 From 5050cff37cbe2dffd7f7f09db11da40d7c1e48d0 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 3 Aug 2013 23:16:54 -0700 Subject: Removed comment that chokes recent cpp. Closes #933. --- src/Text/Pandoc/Parsing.hs | 1 - 1 file changed, 1 deletion(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs index 0913d8c6c..4ade6def8 100644 --- a/src/Text/Pandoc/Parsing.hs +++ b/src/Text/Pandoc/Parsing.hs @@ -421,7 +421,6 @@ uri :: Parser [Char] st (String, String) uri = try $ do scheme <- uriScheme char ':' - -- /^[\/\w\u0080-\uffff]+|%[A-Fa-f0-9]+|&#?\w+;|(?:[,]+|[\S])[%&~\w\u0080-\uffff]/ -- We allow punctuation except at the end, since -- we don't want the trailing '.' in 'http://google.com.' We want to allow -- http://en.wikipedia.org/wiki/State_of_emergency_(disambiguation) -- cgit v1.2.3 From 2d6e0b1530e61fa2d6a22d8b61042734b20f0af5 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 4 Aug 2013 14:12:13 -0700 Subject: Remove CPP from default-extensions; add pragmas to modules as needed. --- man/make-pandoc-man-pages.hs | 1 + pandoc.cabal | 4 ---- src/Text/Pandoc/Pretty.hs | 2 +- src/Text/Pandoc/UTF8.hs | 1 + src/Text/Pandoc/Writers/EPUB.hs | 6 +----- 5 files changed, 4 insertions(+), 10 deletions(-) (limited to 'src/Text') diff --git a/man/make-pandoc-man-pages.hs b/man/make-pandoc-man-pages.hs index eca1276eb..008294433 100644 --- a/man/make-pandoc-man-pages.hs +++ b/man/make-pandoc-man-pages.hs @@ -1,3 +1,4 @@ +{-# LANGUAGE CPP #-} -- Create pandoc.1 man and pandoc_markdown.5 man pages from README import Text.Pandoc import qualified Text.Pandoc.UTF8 as UTF8 diff --git a/pandoc.cabal b/pandoc.cabal index 7d4bccc41..7f12a44ae 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -277,7 +277,6 @@ Library Ghc-Options: -rtsopts -Wall -fno-warn-unused-do-bind Ghc-Prof-Options: -auto-all -caf-all -rtsopts Default-Language: Haskell98 - Default-Extensions: CPP Other-Extensions: PatternGuards, OverloadedStrings, ScopedTypeVariables, GeneralizedNewtypeDeriving, RelaxedPolyRec, DeriveDataTypeable, TypeSynonymInstances, @@ -357,7 +356,6 @@ Executable pandoc if os(windows) Cpp-options: -D_WINDOWS Default-Language: Haskell98 - Default-Extensions: CPP Other-Extensions: PatternGuards, OverloadedStrings, ScopedTypeVariables, GeneralizedNewtypeDeriving, RelaxedPolyRec, DeriveDataTypeable, TypeSynonymInstances, @@ -377,7 +375,6 @@ Executable make-pandoc-man-pages old-time >= 1.0 && < 1.2, time >= 1.2 && < 1.5 Default-Language: Haskell98 - Default-Extensions: CPP Test-Suite test-pandoc Type: exitcode-stdio-1.0 @@ -415,7 +412,6 @@ Test-Suite test-pandoc Tests.Writers.LaTeX Ghc-Options: -rtsopts -Wall -fno-warn-unused-do-bind Default-Language: Haskell98 - Default-Extensions: CPP benchmark benchmark-pandoc Type: exitcode-stdio-1.0 diff --git a/src/Text/Pandoc/Pretty.hs b/src/Text/Pandoc/Pretty.hs index 21121a506..faf2a6797 100644 --- a/src/Text/Pandoc/Pretty.hs +++ b/src/Text/Pandoc/Pretty.hs @@ -1,4 +1,4 @@ -{-# LANGUAGE GeneralizedNewtypeDeriving #-} +{-# LANGUAGE GeneralizedNewtypeDeriving, CPP #-} {- Copyright (C) 2010 John MacFarlane diff --git a/src/Text/Pandoc/UTF8.hs b/src/Text/Pandoc/UTF8.hs index 9fa743cd9..229442543 100644 --- a/src/Text/Pandoc/UTF8.hs +++ b/src/Text/Pandoc/UTF8.hs @@ -1,3 +1,4 @@ +{-# LANGUAGE CPP #-} {- Copyright (C) 2010 John MacFarlane diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs index e625931fc..fb756f196 100644 --- a/src/Text/Pandoc/Writers/EPUB.hs +++ b/src/Text/Pandoc/Writers/EPUB.hs @@ -1,4 +1,4 @@ -{-# LANGUAGE PatternGuards #-} +{-# LANGUAGE PatternGuards, CPP #-} {- Copyright (C) 2010 John MacFarlane @@ -62,11 +62,7 @@ import Text.Pandoc.MIME (getMimeType) import Prelude hiding (catch) #endif import Control.Exception (catch, SomeException) -#if MIN_VERSION_blaze_html(0,5,0) import Text.Blaze.Html.Renderer.Utf8 (renderHtml) -#else -import Text.Blaze.Renderer.Utf8 (renderHtml) -#endif -- A Chapter includes a list of blocks and maybe a section -- number offset. Note, some chapters are unnumbered. The section -- cgit v1.2.3 From 52c5cdb04e6c574f897c948e45084bf9343bf57c Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 6 Aug 2013 16:19:34 -0700 Subject: Biblio: Capitalize citation note only if it has a prefix. So, author names or titles that aren't capitalized will stay uncapitalized. --- changelog | 1 + src/Text/Pandoc/Biblio.hs | 17 ++++++++++------- 2 files changed, 11 insertions(+), 7 deletions(-) (limited to 'src/Text') diff --git a/changelog b/changelog index 3166ff017..9e46155fd 100644 --- a/changelog +++ b/changelog @@ -76,6 +76,7 @@ + Don't interfere with Notes that aren't citation notes. This fixes a bug in which notes not generated from citations were being altered (e.g. first letter capitalized) (#898). + + Only capitalize footnote citations when they have a prefix. + Changes in suffix parsing. A suffix beginning with a digit gets 'p' inserted before it before passing to citeproc-hs, so that bare numbers are treated as page numbers by default. A suffix not beginning with diff --git a/src/Text/Pandoc/Biblio.hs b/src/Text/Pandoc/Biblio.hs index d0db35ae7..755c779ea 100644 --- a/src/Text/Pandoc/Biblio.hs +++ b/src/Text/Pandoc/Biblio.hs @@ -84,11 +84,6 @@ mvPunct (Space : x : ys) | isNote x, startWithPunct ys = mvPunct (Space : x : ys) | isNote x = x : ys mvPunct xs = xs -sanitize :: [Inline] -> [Inline] -sanitize xs | endWithPunct xs = toCapital xs - | otherwise = toCapital (xs ++ [Str "."]) - - -- A replacement for citeproc-hs's endWithPunct, which wrongly treats -- a sentence ending in '.)' as not ending with punctuation, leading -- to an extra period. @@ -103,8 +98,8 @@ endWithPunct xs@(_:_) = case reverse (stringify [last xs]) of deNote :: Pandoc -> Pandoc deNote = topDown go - where go (Cite cs [Note [Para xs]]) = - Cite cs [Note $ bottomUp go' [Para $ sanitize xs]] + where go (Cite (c:cs) [Note xs]) = + Cite (c:cs) [Note $ bottomUp go' $ sanitize c xs] go (Note xs) = Note $ bottomUp go' xs go x = x go' (Note [Para xs]:ys) = @@ -112,6 +107,14 @@ deNote = topDown go then initInline xs ++ ys else xs ++ ys go' xs = xs + sanitize :: Citation -> [Block] -> [Block] + sanitize Citation{citationPrefix = pref} [Para xs] = + case (null pref, endWithPunct xs) of + (True, False) -> [Para $ xs ++ [Str "."]] + (True, True) -> [Para xs] + (False, False) -> [Para $ toCapital $ xs ++ [Str "."]] + (False, True) -> [Para $ toCapital xs] + sanitize _ bs = bs isTextualCitation :: [Citation] -> Bool isTextualCitation (c:_) = citationMode c == AuthorInText -- cgit v1.2.3 From 7d18770b008c12e13c324223304c6703e06f3a4a Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 6 Aug 2013 23:31:01 -0700 Subject: Added support for MetaBool. --- src/Text/Pandoc/Readers/Markdown.hs | 2 +- src/Text/Pandoc/Writers/Custom.hs | 2 ++ src/Text/Pandoc/Writers/Shared.hs | 1 + 3 files changed, 4 insertions(+), 1 deletion(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 076706b4e..a880c09de 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -278,7 +278,7 @@ toMetaValue opts x = yamlToMeta :: ReaderOptions -> Yaml.Value -> MetaValue yamlToMeta opts (Yaml.String t) = toMetaValue opts t yamlToMeta _ (Yaml.Number n) = MetaString $ show n -yamlToMeta _ (Yaml.Bool b) = MetaString $ map toLower $ show b +yamlToMeta _ (Yaml.Bool b) = MetaBool b yamlToMeta opts (Yaml.Array xs) = B.toMetaValue $ map (yamlToMeta opts) $ V.toList xs yamlToMeta opts (Yaml.Object o) = MetaMap $ H.foldrWithKey (\k v m -> diff --git a/src/Text/Pandoc/Writers/Custom.hs b/src/Text/Pandoc/Writers/Custom.hs index 732497616..5c82fe0e1 100644 --- a/src/Text/Pandoc/Writers/Custom.hs +++ b/src/Text/Pandoc/Writers/Custom.hs @@ -110,12 +110,14 @@ instance StackValue [Block] where instance StackValue MetaValue where push l (MetaMap m) = Lua.push l m push l (MetaList xs) = Lua.push l xs + push l (MetaBool x) = Lua.push l x push l (MetaString s) = Lua.push l s push l (MetaInlines ils) = Lua.push l ils push l (MetaBlocks bs) = Lua.push l bs peek _ _ = undefined valuetype (MetaMap _) = Lua.TTABLE valuetype (MetaList _) = Lua.TTABLE + valuetype (MetaBool _) = Lua.TBOOLEAN valuetype (MetaString _) = Lua.TSTRING valuetype (MetaInlines _) = Lua.TSTRING valuetype (MetaBlocks _) = Lua.TSTRING diff --git a/src/Text/Pandoc/Writers/Shared.hs b/src/Text/Pandoc/Writers/Shared.hs index c6c30d070..e6ec853f8 100644 --- a/src/Text/Pandoc/Writers/Shared.hs +++ b/src/Text/Pandoc/Writers/Shared.hs @@ -74,6 +74,7 @@ metaValueToJSON blockWriter inlineWriter (MetaMap metamap) = liftM toJSON $ Traversable.mapM (metaValueToJSON blockWriter inlineWriter) metamap metaValueToJSON blockWriter inlineWriter (MetaList xs) = liftM toJSON $ Traversable.mapM (metaValueToJSON blockWriter inlineWriter) xs +metaValueToJSON _ _ (MetaBool b) = return $ toJSON b metaValueToJSON _ _ (MetaString s) = return $ toJSON s metaValueToJSON blockWriter _ (MetaBlocks bs) = liftM toJSON $ blockWriter bs metaValueToJSON _ inlineWriter (MetaInlines bs) = liftM toJSON $ inlineWriter bs -- cgit v1.2.3 From d44d1664312f0d05ada61eb49a678ef8a04d90d0 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Wed, 7 Aug 2013 08:43:42 -0700 Subject: Allow YAML title blocks to contain only comments. --- src/Text/Pandoc/Readers/Markdown.hs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index a880c09de..251554de1 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -231,7 +231,9 @@ yamlTitleBlock = try $ do pos <- getPosition string "---" blankline - rawYaml <- unlines <$> manyTill anyLine stopLine + rawYamlLines <- manyTill anyLine stopLine + -- by including --- and ..., we allow yaml blocks with just comments: + let rawYaml = unlines ("---" : (rawYamlLines ++ ["..."])) optional blanklines opts <- stateOptions <$> getState case Yaml.decodeEither' $ UTF8.fromString rawYaml of @@ -241,6 +243,7 @@ yamlTitleBlock = try $ do then f else B.setMeta (T.unpack k) (yamlToMeta opts v) . f) id hashmap + Right Yaml.Null -> return $ return id Right _ -> do addWarning (Just pos) "YAML header is not an object" return $ return id -- cgit v1.2.3 From bb61624bb2bba416e1992ecdf101f9660a3edcae Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Wed, 7 Aug 2013 14:30:47 -0700 Subject: Textile reader: Removed raw LaTeX parsing. This isn't part of Textile. --- src/Text/Pandoc/Readers/Textile.hs | 16 ---------------- 1 file changed, 16 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs index 9191f6908..d4f092d07 100644 --- a/src/Text/Pandoc/Readers/Textile.hs +++ b/src/Text/Pandoc/Readers/Textile.hs @@ -56,7 +56,6 @@ import Text.Pandoc.Shared import Text.Pandoc.Options import Text.Pandoc.Parsing import Text.Pandoc.Readers.HTML ( htmlTag, isInlineTag, isBlockTag ) -import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXBlock ) import Text.HTML.TagSoup (parseTags, innerText, fromAttrib, Tag(..)) import Text.HTML.TagSoup.Match import Data.List ( intercalate ) @@ -126,7 +125,6 @@ blockParsers = [ codeBlock , commentBlock , anyList , rawHtmlBlock - , rawLaTeXBlock' , maybeExplicitBlock "table" table , maybeExplicitBlock "p" para ] @@ -292,13 +290,6 @@ rawHtmlBlock = try $ do optional blanklines return $ RawBlock "html" b --- | Raw block of LaTeX content -rawLaTeXBlock' :: Parser [Char] ParserState Block -rawLaTeXBlock' = do - guardEnabled Ext_raw_tex - RawBlock "latex" <$> (rawLaTeXBlock <* spaces) - - -- | In textile, paragraphs are separated by blank lines. para :: Parser [Char] ParserState Block para = try $ Para . normalizeSpaces <$> manyTill inline blockBreak @@ -373,7 +364,6 @@ inlineParsers = [ str , escapedInline , htmlSpan , rawHtmlInline - , rawLaTeXInline' , note , try $ (char '[' *> inlineMarkup <* char ']') , inlineMarkup @@ -489,12 +479,6 @@ endline = try $ do rawHtmlInline :: Parser [Char] ParserState Inline rawHtmlInline = RawInline "html" . snd <$> htmlTag isInlineTag --- | Raw LaTeX Inline -rawLaTeXInline' :: Parser [Char] ParserState Inline -rawLaTeXInline' = try $ do - guardEnabled Ext_raw_tex - rawLaTeXInline - -- | Textile standard link syntax is "label":target. But we -- can also have ["label":target]. link :: Parser [Char] ParserState Inline -- cgit v1.2.3 From 802dc9a8b9f206eb3be592ab19067f637eb2a3ee Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Thu, 8 Aug 2013 10:41:39 -0700 Subject: Added Text.Pandoc.Compat.Monoid. This allows pandoc to compile with base < 4.5, where Data.Monoid doesn't export `<>`. Thanks to Dirk Ullirch for the patch. --- pandoc.cabal | 1 + src/Text/Pandoc/Compat/Monoid.hs | 16 ++++++++++++++++ src/Text/Pandoc/Templates.hs | 2 +- src/Text/Pandoc/Writers/Docx.hs | 2 +- 4 files changed, 19 insertions(+), 2 deletions(-) create mode 100644 src/Text/Pandoc/Compat/Monoid.hs (limited to 'src/Text') diff --git a/pandoc.cabal b/pandoc.cabal index 3dc400d40..192b6c5fd 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -335,6 +335,7 @@ Library Text.Pandoc.ImageSize, Text.Pandoc.Slides, Text.Pandoc.Highlighting, + Text.Pandoc.Compat.Monoid, Paths_pandoc Buildable: True diff --git a/src/Text/Pandoc/Compat/Monoid.hs b/src/Text/Pandoc/Compat/Monoid.hs new file mode 100644 index 000000000..80ffcbbd6 --- /dev/null +++ b/src/Text/Pandoc/Compat/Monoid.hs @@ -0,0 +1,16 @@ +{-# LANGUAGE CPP #-} +module Text.Pandoc.Compat.Monoid ( Monoid(..) + , (<>) + ) where + +#if MIN_VERSION_base(4,5,0) +import Data.Monoid ((<>), Monoid(..)) +#else +import Data.Monoid (mappend, Monoid(..)) +#endif + +#if MIN_VERSION_base(4,5,0) +#else +(<>) :: Monoid m => m -> m -> m +(<>) = mappend +#endif diff --git a/src/Text/Pandoc/Templates.hs b/src/Text/Pandoc/Templates.hs index c95c84ca8..22a44e735 100644 --- a/src/Text/Pandoc/Templates.hs +++ b/src/Text/Pandoc/Templates.hs @@ -102,7 +102,7 @@ import Control.Applicative import qualified Data.Text as T import Data.Text (Text) import Data.Text.Encoding (encodeUtf8) -import Data.Monoid ((<>), Monoid(..)) +import Text.Pandoc.Compat.Monoid ((<>), Monoid(..)) import Data.List (intersperse, nub) import System.FilePath ((), (<.>)) import qualified Data.Map as M diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index 611cddc65..6bb4d5569 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -35,7 +35,7 @@ import qualified Data.ByteString.Lazy as BL import qualified Data.ByteString.Lazy.Char8 as BL8 import qualified Data.Map as M import qualified Text.Pandoc.UTF8 as UTF8 -import Data.Monoid ((<>)) +import Text.Pandoc.Compat.Monoid ((<>)) import Codec.Archive.Zip import Data.Time.Clock.POSIX import Text.Pandoc.Definition -- cgit v1.2.3 From 12e7ec40707bfb716bb9add82e4320558e065492 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Thu, 8 Aug 2013 10:42:52 -0700 Subject: Added Text.Pandoc.Compat.TagSoupEntity. This allows pandoc to compile with tagsoup 0.13.x. Thanks to Dirk Ullrich for the patch. --- pandoc.cabal | 3 ++- src/Text/Pandoc/Compat/TagSoupEntity.hs | 15 +++++++++++++++ src/Text/Pandoc/Parsing.hs | 2 +- src/Text/Pandoc/Readers/DocBook.hs | 2 +- src/Text/Pandoc/Readers/OPML.hs | 2 +- src/Text/Pandoc/XML.hs | 2 +- 6 files changed, 21 insertions(+), 5 deletions(-) create mode 100644 src/Text/Pandoc/Compat/TagSoupEntity.hs (limited to 'src/Text') diff --git a/pandoc.cabal b/pandoc.cabal index 192b6c5fd..19f8c14ef 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -253,7 +253,7 @@ Library citeproc-hs >= 0.3.7 && < 0.4, pandoc-types >= 1.12 && < 1.13, aeson >= 0.6 && < 0.7, - tagsoup >= 0.12.5 && < 0.13, + tagsoup >= 0.12.5 && < 0.14, base64-bytestring >= 0.1 && < 1.1, zlib >= 0.5 && < 0.6, highlighting-kate >= 0.5.5 && < 0.6, @@ -336,6 +336,7 @@ Library Text.Pandoc.Slides, Text.Pandoc.Highlighting, Text.Pandoc.Compat.Monoid, + Text.Pandoc.Compat.TagSoupEntity, Paths_pandoc Buildable: True diff --git a/src/Text/Pandoc/Compat/TagSoupEntity.hs b/src/Text/Pandoc/Compat/TagSoupEntity.hs new file mode 100644 index 000000000..80985aef9 --- /dev/null +++ b/src/Text/Pandoc/Compat/TagSoupEntity.hs @@ -0,0 +1,15 @@ +{-# LANGUAGE CPP #-} +module Text.Pandoc.Compat.TagSoupEntity (lookupEntity + ) where + +import qualified Text.HTML.TagSoup.Entity as TE + +lookupEntity :: String -> Maybe Char +#if MIN_VERSION_tagsoup(0,13,0) +lookupEntity = str2chr . TE.lookupEntity + where str2chr :: Maybe String -> Maybe Char + str2chr (Just [c]) = Just c + str2chr _ = Nothing +#else +lookupEntity = TE.lookupEntity +#endif diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs index 4ade6def8..2f42aba41 100644 --- a/src/Text/Pandoc/Parsing.hs +++ b/src/Text/Pandoc/Parsing.hs @@ -161,7 +161,7 @@ import Data.List ( intercalate, transpose ) import Text.Pandoc.Shared import qualified Data.Map as M import Text.TeXMath.Macros (applyMacros, Macro, parseMacroDefinitions) -import Text.HTML.TagSoup.Entity ( lookupEntity ) +import Text.Pandoc.Compat.TagSoupEntity ( lookupEntity ) import Data.Default import qualified Data.Set as Set import Control.Monad.Reader diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs index 0058e889c..6a799e270 100644 --- a/src/Text/Pandoc/Readers/DocBook.hs +++ b/src/Text/Pandoc/Readers/DocBook.hs @@ -4,7 +4,7 @@ import Text.Pandoc.Options import Text.Pandoc.Definition import Text.Pandoc.Builder import Text.XML.Light -import Text.HTML.TagSoup.Entity (lookupEntity) +import Text.Pandoc.Compat.TagSoupEntity (lookupEntity) import Data.Generics import Data.Monoid import Data.Char (isSpace) diff --git a/src/Text/Pandoc/Readers/OPML.hs b/src/Text/Pandoc/Readers/OPML.hs index c9726d195..35d01e877 100644 --- a/src/Text/Pandoc/Readers/OPML.hs +++ b/src/Text/Pandoc/Readers/OPML.hs @@ -6,7 +6,7 @@ import Text.Pandoc.Builder import Text.Pandoc.Readers.HTML (readHtml) import Text.Pandoc.Readers.Markdown (readMarkdown) import Text.XML.Light -import Text.HTML.TagSoup.Entity (lookupEntity) +import Text.Pandoc.Compat.TagSoupEntity (lookupEntity) import Data.Generics import Data.Monoid import Control.Monad.State diff --git a/src/Text/Pandoc/XML.hs b/src/Text/Pandoc/XML.hs index 89ae81a10..c11af9a19 100644 --- a/src/Text/Pandoc/XML.hs +++ b/src/Text/Pandoc/XML.hs @@ -38,7 +38,7 @@ module Text.Pandoc.XML ( escapeCharForXML, import Text.Pandoc.Pretty import Data.Char (ord, isAscii, isSpace) -import Text.HTML.TagSoup.Entity (lookupEntity) +import Text.Pandoc.Compat.TagSoupEntity (lookupEntity) -- | Escape one character as needed for XML. escapeCharForXML :: Char -> String -- cgit v1.2.3 From 9aa9d5cf68386acd127427cc62f6004b2a17057a Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Thu, 8 Aug 2013 10:52:53 -0700 Subject: Revert "Textile reader: Removed raw LaTeX parsing." This reverts commit bb61624bb2bba416e1992ecdf101f9660a3edcae. Apparently someone put this there for a reason, since it's in the test suite. --- src/Text/Pandoc/Readers/Textile.hs | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs index d4f092d07..9191f6908 100644 --- a/src/Text/Pandoc/Readers/Textile.hs +++ b/src/Text/Pandoc/Readers/Textile.hs @@ -56,6 +56,7 @@ import Text.Pandoc.Shared import Text.Pandoc.Options import Text.Pandoc.Parsing import Text.Pandoc.Readers.HTML ( htmlTag, isInlineTag, isBlockTag ) +import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXBlock ) import Text.HTML.TagSoup (parseTags, innerText, fromAttrib, Tag(..)) import Text.HTML.TagSoup.Match import Data.List ( intercalate ) @@ -125,6 +126,7 @@ blockParsers = [ codeBlock , commentBlock , anyList , rawHtmlBlock + , rawLaTeXBlock' , maybeExplicitBlock "table" table , maybeExplicitBlock "p" para ] @@ -290,6 +292,13 @@ rawHtmlBlock = try $ do optional blanklines return $ RawBlock "html" b +-- | Raw block of LaTeX content +rawLaTeXBlock' :: Parser [Char] ParserState Block +rawLaTeXBlock' = do + guardEnabled Ext_raw_tex + RawBlock "latex" <$> (rawLaTeXBlock <* spaces) + + -- | In textile, paragraphs are separated by blank lines. para :: Parser [Char] ParserState Block para = try $ Para . normalizeSpaces <$> manyTill inline blockBreak @@ -364,6 +373,7 @@ inlineParsers = [ str , escapedInline , htmlSpan , rawHtmlInline + , rawLaTeXInline' , note , try $ (char '[' *> inlineMarkup <* char ']') , inlineMarkup @@ -479,6 +489,12 @@ endline = try $ do rawHtmlInline :: Parser [Char] ParserState Inline rawHtmlInline = RawInline "html" . snd <$> htmlTag isInlineTag +-- | Raw LaTeX Inline +rawLaTeXInline' :: Parser [Char] ParserState Inline +rawLaTeXInline' = try $ do + guardEnabled Ext_raw_tex + rawLaTeXInline + -- | Textile standard link syntax is "label":target. But we -- can also have ["label":target]. link :: Parser [Char] ParserState Inline -- cgit v1.2.3 From 7d694e15697a4b1cc974b6316a08117afe663a74 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Thu, 8 Aug 2013 15:13:28 -0700 Subject: Added Text.Pandoc.Process (pipeProcess). A souped up version of readProcessWithErrorCode that uses lazy bytestrings and allows setting environment. --- pandoc.cabal | 3 +- src/Text/Pandoc/Process.hs | 105 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 107 insertions(+), 1 deletion(-) create mode 100644 src/Text/Pandoc/Process.hs (limited to 'src/Text') diff --git a/pandoc.cabal b/pandoc.cabal index a8dd528ad..8210bfce5 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -324,7 +324,8 @@ Library Text.Pandoc.Templates, Text.Pandoc.XML, Text.Pandoc.Biblio, - Text.Pandoc.SelfContained + Text.Pandoc.SelfContained, + Text.Pandoc.Process Other-Modules: Text.Pandoc.Readers.Haddock.Lex, Text.Pandoc.Readers.Haddock.Parse, Text.Pandoc.Writers.Shared, diff --git a/src/Text/Pandoc/Process.hs b/src/Text/Pandoc/Process.hs new file mode 100644 index 000000000..112c5b974 --- /dev/null +++ b/src/Text/Pandoc/Process.hs @@ -0,0 +1,105 @@ +{- +Copyright (C) 2013 John MacFarlane + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Process + Copyright : Copyright (C) 2013 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane + Stability : alpha + Portability : portable + +ByteString variant of 'readProcessWithExitCode'. +-} +module Text.Pandoc.Process (pipeProcess) +where +import System.Process +import System.Exit (ExitCode (..)) +import Control.Exception +import System.IO (hClose, hFlush) +import Control.Concurrent (putMVar, takeMVar, newEmptyMVar, forkIO) +import Control.Monad (unless) +import qualified Data.ByteString.Lazy as BL + +{- | +Version of 'System.Process.readProcessWithExitCode' that uses lazy bytestrings +instead of strings and allows setting environment variables. + +@readProcessWithExitCode@ creates an external process, reads its +standard output and standard error strictly, waits until the process +terminates, and then returns the 'ExitCode' of the process, +the standard output, and the standard error. + +If an asynchronous exception is thrown to the thread executing +@readProcessWithExitCode@. The forked process will be terminated and +@readProcessWithExitCode@ will wait (block) until the process has been +terminated. +-} + +pipeProcess + :: Maybe [(String, String)] -- ^ environment variables + -> FilePath -- ^ Filename of the executable (see 'proc' for details) + -> [String] -- ^ any arguments + -> BL.ByteString -- ^ standard input + -> IO (ExitCode,BL.ByteString,BL.ByteString) -- ^ exitcode, stdout, stderr +pipeProcess mbenv cmd args input = + mask $ \restore -> do + (Just inh, Just outh, Just errh, pid) <- createProcess (proc cmd args) + { env = mbenv, + std_in = CreatePipe, + std_out = CreatePipe, + std_err = CreatePipe } + flip onException + (do hClose inh; hClose outh; hClose errh; + terminateProcess pid; waitForProcess pid) $ restore $ do + -- fork off a thread to start consuming stdout + out <- BL.hGetContents outh + waitOut <- forkWait $ evaluate $ BL.length out + + -- fork off a thread to start consuming stderr + err <- BL.hGetContents errh + waitErr <- forkWait $ evaluate $ BL.length err + + -- now write and flush any input + let writeInput = do + unless (BL.null input) $ do + BL.hPutStr inh input + hFlush inh + hClose inh + + writeInput + + -- wait on the output + waitOut + waitErr + + hClose outh + hClose errh + + -- wait on the process + ex <- waitForProcess pid + + return (ex, out, err) + +forkWait :: IO a -> IO (IO a) +forkWait a = do + res <- newEmptyMVar + _ <- mask $ \restore -> forkIO $ try (restore a) >>= putMVar res + return (takeMVar res >>= either (\ex -> throwIO (ex :: SomeException)) return) + -- cgit v1.2.3 From 83f263110f364e87d8c0908b4a52be801aa77802 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Thu, 8 Aug 2013 15:15:20 -0700 Subject: Use pipeProcess in Text.Pandoc.PDF. --- src/Text/Pandoc/PDF.hs | 39 +++------------------------------------ 1 file changed, 3 insertions(+), 36 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/PDF.hs b/src/Text/Pandoc/PDF.hs index 49b455285..b030e2ca7 100644 --- a/src/Text/Pandoc/PDF.hs +++ b/src/Text/Pandoc/PDF.hs @@ -38,11 +38,7 @@ import qualified Data.ByteString as BS import System.Exit (ExitCode (..)) import System.FilePath import System.Directory -import System.Process import System.Environment -import Control.Exception (evaluate) -import System.IO (hClose) -import Control.Concurrent (putMVar, takeMVar, newEmptyMVar, forkIO) import Control.Monad (unless) import Data.List (isInfixOf) import qualified Data.ByteString.Base64 as B64 @@ -52,6 +48,8 @@ import Text.Pandoc.Generic (bottomUpM) import Text.Pandoc.Shared (fetchItem, warn) import Text.Pandoc.Options (WriterOptions(..)) import Text.Pandoc.MIME (extensionFromMimeType) +import Text.Pandoc.Process (pipeProcess) +import qualified Data.ByteString.Lazy as BL withTempDir :: String -> (FilePath -> IO a) -> IO a withTempDir = @@ -148,7 +146,7 @@ runTeXProgram program runsLeft tmpDir source = do $ lookup "TEXINPUTS" env' let env'' = ("TEXINPUTS", texinputs) : [(k,v) | (k,v) <- env', k /= "TEXINPUTS"] - (exit, out, err) <- readCommand (Just env'') program programArgs + (exit, out, err) <- pipeProcess (Just env'') program programArgs BL.empty if runsLeft > 1 then runTeXProgram program (runsLeft - 1) tmpDir source else do @@ -159,34 +157,3 @@ runTeXProgram program runsLeft tmpDir source = do else return Nothing return (exit, out <> err, pdf) --- utility functions - --- Run a command and return exitcode, contents of stdout, and --- contents of stderr. (Based on --- 'readProcessWithExitCode' from 'System.Process'.) -readCommand :: Maybe [(String, String)] -- ^ environment variables - -> FilePath -- ^ command to run - -> [String] -- ^ any arguments - -> IO (ExitCode,ByteString,ByteString) -- ^ exit, stdout, stderr -readCommand mbenv cmd args = do - (Just inh, Just outh, Just errh, pid) <- - createProcess (proc cmd args){ env = mbenv, - std_in = CreatePipe, - std_out = CreatePipe, - std_err = CreatePipe } - outMVar <- newEmptyMVar - -- fork off a thread to start consuming stdout - out <- B.hGetContents outh - _ <- forkIO $ evaluate (B.length out) >> putMVar outMVar () - -- fork off a thread to start consuming stderr - err <- B.hGetContents errh - _ <- forkIO $ evaluate (B.length err) >> putMVar outMVar () - -- now write and flush any input - hClose inh -- done with stdin - -- wait on the output - takeMVar outMVar - takeMVar outMVar - hClose outh - -- wait on the process - ex <- waitForProcess pid - return (ex, out, err) -- cgit v1.2.3 From 99bb066bb925134b506d39c8d6694fe81337d9c1 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Thu, 8 Aug 2013 15:15:58 -0700 Subject: Pass writename as argument to filters. This way filters can figure out what the target format is and react appropriately. Example: #!/usr/bin/env runghc import Text.Pandoc.JSON import Data.Char main = toJSONFilter cap where cap (Just "html") (Str xs) = Str $ map toUpper xs cap _ x = x This capitalizes text only for html output. --- pandoc.cabal | 2 +- pandoc.hs | 17 +++++++++++------ src/Text/Pandoc.hs | 5 +---- 3 files changed, 13 insertions(+), 11 deletions(-) (limited to 'src/Text') diff --git a/pandoc.cabal b/pandoc.cabal index 8210bfce5..3903fe606 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -352,8 +352,8 @@ Executable pandoc bytestring >= 0.9 && < 0.11, extensible-exceptions >= 0.1 && < 0.2, highlighting-kate >= 0.5.5 && < 0.6, + aeson >= 0.6 && < 0.7, HTTP >= 4000.0.5 && < 4000.3, - process >= 1 && < 1.2, citeproc-hs >= 0.3.7 && < 0.4 Ghc-Options: -rtsopts -with-rtsopts=-K16m -Wall -fno-warn-unused-do-bind Ghc-Prof-Options: -auto-all -caf-all -rtsopts -with-rtsopts=-K16m diff --git a/pandoc.hs b/pandoc.hs index 94d206103..fdf0b35b7 100644 --- a/pandoc.hs +++ b/pandoc.hs @@ -37,13 +37,13 @@ import Text.Pandoc.Shared ( tabFilter, readDataFileUTF8, safeRead, headerShift, normalize, err, warn ) import Text.Pandoc.XML ( toEntities, fromEntities ) import Text.Pandoc.SelfContained ( makeSelfContained ) +import Text.Pandoc.Process (pipeProcess) import Text.Highlighting.Kate ( languages, Style, tango, pygments, espresso, zenburn, kate, haddock, monochrome ) import System.Environment ( getArgs, getProgName ) import System.Exit ( exitWith, ExitCode (..) ) import System.FilePath import System.Console.GetOpt -import System.Process (readProcess) import Data.Char ( toLower ) import Data.List ( intercalate, isPrefixOf, sort ) import System.Directory ( getAppUserDataDirectory, doesFileExist, findExecutable ) @@ -59,6 +59,7 @@ import Network.HTTP (simpleHTTP, mkRequest, getResponseBody, RequestMethod(..)) import Network.URI (parseURI, isURI, URI(..)) import qualified Data.ByteString.Lazy as B import Text.CSL.Reference (Reference(..)) +import Data.Aeson (eitherDecode', encode) copyrightMessage :: String copyrightMessage = "\nCopyright (C) 2006-2013 John MacFarlane\n" ++ @@ -88,9 +89,13 @@ wrapWords indent c = wrap' (c - indent) (c - indent) isTextFormat :: String -> Bool isTextFormat s = takeWhile (`notElem` "+-") s `notElem` ["odt","docx","epub","epub3"] -externalFilter :: FilePath -> Pandoc -> IO Pandoc -externalFilter f d = E.catch - (readJSON def `fmap` readProcess f [] (writeJSON def d)) +externalFilter :: FilePath -> [String] -> Pandoc -> IO Pandoc +externalFilter f args' d = E.catch + (do (exitcode, outbs, errbs) <- pipeProcess Nothing f args' $ encode d + case exitcode of + ExitSuccess -> return $ either error id $ eitherDecode' outbs + ExitFailure _ -> err 83 $ "Error running filter `" ++ UTF8.toStringLazy outbs ++ + UTF8.toStringLazy errbs ++ "'") (\e -> let _ = (e :: E.SomeException) in err 83 $ "Error running filter `" ++ f ++ "'") @@ -132,7 +137,7 @@ data Opt = Opt , optReferenceLinks :: Bool -- ^ Use reference links in writing markdown, rst , optWrapText :: Bool -- ^ Wrap text , optColumns :: Int -- ^ Line length in characters - , optPlugins :: [Pandoc -> IO Pandoc] -- ^ Plugins to apply + , optPlugins :: [[String] -> Pandoc -> IO Pandoc] -- ^ Plugins to apply , optEmailObfuscation :: ObfuscationMethod , optIdentifierPrefix :: String , optIndentedCodeClasses :: [String] -- ^ Default classes for indented code blocks @@ -1115,7 +1120,7 @@ main = do reader readerOpts let doc0 = foldr ($) doc transforms - doc1 <- foldrM ($) doc0 plugins + doc1 <- foldrM ($) doc0 $ map ($ [writerName']) plugins let writeBinary :: B.ByteString -> IO () writeBinary = B.writeFile (UTF8.encodePath outputFile) diff --git a/src/Text/Pandoc.hs b/src/Text/Pandoc.hs index 27aa02a75..703bb876a 100644 --- a/src/Text/Pandoc.hs +++ b/src/Text/Pandoc.hs @@ -309,11 +309,8 @@ class ToJSONFilter a => ToJsonFilter a toJsonFilter = toJSONFilter readJSON :: ReaderOptions -> String -> Pandoc -readJSON _ = checkJSON . eitherDecode' . UTF8.fromStringLazy +readJSON _ = either error id . eitherDecode' . UTF8.fromStringLazy writeJSON :: WriterOptions -> Pandoc -> String writeJSON _ = UTF8.toStringLazy . encode -checkJSON :: Either String a -> a -checkJSON (Right x) = x -checkJSON (Left e) = error e -- cgit v1.2.3 From e9de0f0e22b9b64b5684efe81d03539c3f57a71c Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Thu, 8 Aug 2013 23:14:12 -0700 Subject: Preliminary support for new Div and Span elements in writers. Currently these are "transparent" containers, except in HTML, where they produce div and span elements with attributes. --- data/sample.lua | 8 ++++++++ src/Text/Pandoc/Writers/AsciiDoc.hs | 2 ++ src/Text/Pandoc/Writers/ConTeXt.hs | 2 ++ src/Text/Pandoc/Writers/Custom.hs | 5 +++++ src/Text/Pandoc/Writers/Docbook.hs | 3 +++ src/Text/Pandoc/Writers/Docx.hs | 2 ++ src/Text/Pandoc/Writers/FB2.hs | 3 +++ src/Text/Pandoc/Writers/HTML.hs | 5 +++++ src/Text/Pandoc/Writers/LaTeX.hs | 2 ++ src/Text/Pandoc/Writers/Man.hs | 2 ++ src/Text/Pandoc/Writers/Markdown.hs | 3 +++ src/Text/Pandoc/Writers/MediaWiki.hs | 6 ++++++ src/Text/Pandoc/Writers/OpenDocument.hs | 2 ++ src/Text/Pandoc/Writers/Org.hs | 3 +++ src/Text/Pandoc/Writers/RST.hs | 2 ++ src/Text/Pandoc/Writers/RTF.hs | 3 +++ src/Text/Pandoc/Writers/Texinfo.hs | 5 +++++ src/Text/Pandoc/Writers/Textile.hs | 6 ++++++ 18 files changed, 64 insertions(+) (limited to 'src/Text') diff --git a/data/sample.lua b/data/sample.lua index 1c82ebe2e..a7e9d6337 100644 --- a/data/sample.lua +++ b/data/sample.lua @@ -177,6 +177,10 @@ function Note(s) '">' .. num .. '' end +function Span(s, attr) + return "" .. s .. "" +end + function Plain(s) return s end @@ -299,6 +303,10 @@ function Table(caption, aligns, widths, headers, rows) return table.concat(buffer,'\n') end +function Div(s, attr) + return "\n" .. s .. "
" +end + -- The following code will produce runtime warnings when you haven't defined -- all of the functions you need for the custom writer, so it's useful -- to include when you're working on a writer. diff --git a/src/Text/Pandoc/Writers/AsciiDoc.hs b/src/Text/Pandoc/Writers/AsciiDoc.hs index 6c3c6955e..00cea27e5 100644 --- a/src/Text/Pandoc/Writers/AsciiDoc.hs +++ b/src/Text/Pandoc/Writers/AsciiDoc.hs @@ -246,6 +246,7 @@ blockToAsciiDoc opts (OrderedList (_start, sty, _delim) items) = do blockToAsciiDoc opts (DefinitionList items) = do contents <- mapM (definitionListItemToAsciiDoc opts) items return $ cat contents <> blankline +blockToAsciiDoc opts (Div _ bs) = blockListToAsciiDoc opts bs -- | Convert bullet list item (list of blocks) to asciidoc. bulletListItemToAsciiDoc :: WriterOptions -> [Block] -> State WriterState Doc @@ -383,3 +384,4 @@ inlineToAsciiDoc opts (Note [Plain inlines]) = do return $ text "footnote:[" <> contents <> "]" -- asciidoc can't handle blank lines in notes inlineToAsciiDoc _ (Note _) = return "[multiblock footnote omitted]" +inlineToAsciiDoc opts (Span _ ils) = inlineListToAsciiDoc opts ils diff --git a/src/Text/Pandoc/Writers/ConTeXt.hs b/src/Text/Pandoc/Writers/ConTeXt.hs index 32588dc8f..40dc1deb5 100644 --- a/src/Text/Pandoc/Writers/ConTeXt.hs +++ b/src/Text/Pandoc/Writers/ConTeXt.hs @@ -143,6 +143,7 @@ blockToConTeXt (CodeBlock _ str) = -- blankline because \stoptyping can't have anything after it, inc. '}' blockToConTeXt (RawBlock "context" str) = return $ text str <> blankline blockToConTeXt (RawBlock _ _ ) = return empty +blockToConTeXt (Div _ bs) = blockListToConTeXt bs blockToConTeXt (BulletList lst) = do contents <- mapM listItemToConTeXt lst return $ ("\\startitemize" <> if isTightList lst @@ -330,6 +331,7 @@ inlineToConTeXt (Note contents) = do then text "\\footnote{" <> nest 2 contents' <> char '}' else text "\\startbuffer " <> nest 2 contents' <> text "\\stopbuffer\\footnote{\\getbuffer}" +inlineToConTeXt (Span _ ils) = inlineListToConTeXt ils -- | Craft the section header, inserting the secton reference, if supplied. sectionHeader :: Attr diff --git a/src/Text/Pandoc/Writers/Custom.hs b/src/Text/Pandoc/Writers/Custom.hs index 5c82fe0e1..c250a240e 100644 --- a/src/Text/Pandoc/Writers/Custom.hs +++ b/src/Text/Pandoc/Writers/Custom.hs @@ -178,6 +178,9 @@ blockToCustom lua (OrderedList (num,sty,delim) items) = blockToCustom lua (DefinitionList items) = callfunc lua "DefinitionList" items +blockToCustom lua (Div attr items) = + callfunc lua "Div" items (attrToMap attr) + -- | Convert list of Pandoc block elements to Custom. blockListToCustom :: LuaState -- ^ Options -> [Block] -- ^ List of block elements @@ -240,3 +243,5 @@ inlineToCustom lua (Image alt (src,tit)) = inlineToCustom lua (Note contents) = callfunc lua "Note" contents +inlineToCustom lua (Span attr items) = + callfunc lua "Span" items (attrToMap attr) diff --git a/src/Text/Pandoc/Writers/Docbook.hs b/src/Text/Pandoc/Writers/Docbook.hs index 6f4b61a79..2f415f3ee 100644 --- a/src/Text/Pandoc/Writers/Docbook.hs +++ b/src/Text/Pandoc/Writers/Docbook.hs @@ -148,6 +148,7 @@ listItemToDocbook opts item = -- | Convert a Pandoc block element to Docbook. blockToDocbook :: WriterOptions -> Block -> Doc blockToDocbook _ Null = empty +blockToDocbook opts (Div _ bs) = blocksToDocbook opts bs blockToDocbook _ (Header _ _ _) = empty -- should not occur after hierarchicalize blockToDocbook opts (Plain lst) = inlinesToDocbook opts lst -- title beginning with fig: indicates that the image is a figure @@ -267,6 +268,8 @@ inlineToDocbook opts (Quoted _ lst) = inTagsSimple "quote" $ inlinesToDocbook opts lst inlineToDocbook opts (Cite _ lst) = inlinesToDocbook opts lst +inlineToDocbook opts (Span _ ils) = + inlinesToDocbook opts ils inlineToDocbook _ (Code _ str) = inTagsSimple "literal" $ text (escapeStringForXML str) inlineToDocbook opts (Math t str) diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index 6bb4d5569..d93254971 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -428,6 +428,7 @@ getUniqueId = liftIO $ (show . (+ 20) . hashUnique) `fmap` newUnique -- | Convert a Pandoc block element to OpenXML. blockToOpenXML :: WriterOptions -> Block -> WS [Element] blockToOpenXML _ Null = return [] +blockToOpenXML opts (Div _ bs) = blocksToOpenXML opts bs blockToOpenXML opts (Header lev (ident,_,_) lst) = do contents <- withParaProp (pStyle $ "Heading" ++ show lev) $ blockToOpenXML opts (Para lst) @@ -633,6 +634,7 @@ formattedString str = do inlineToOpenXML :: WriterOptions -> Inline -> WS [Element] inlineToOpenXML _ (Str str) = formattedString str inlineToOpenXML opts Space = inlineToOpenXML opts (Str " ") +inlineToOpenXML opts (Span _ ils) = inlinesToOpenXML opts ils inlineToOpenXML opts (Strong lst) = withTextProp (mknode "w:b" [] ()) $ inlinesToOpenXML opts lst inlineToOpenXML opts (Emph lst) = diff --git a/src/Text/Pandoc/Writers/FB2.hs b/src/Text/Pandoc/Writers/FB2.hs index 27f0c8305..2576b2dc2 100644 --- a/src/Text/Pandoc/Writers/FB2.hs +++ b/src/Text/Pandoc/Writers/FB2.hs @@ -324,6 +324,7 @@ blockToXml (CodeBlock _ s) = return . spaceBeforeAfter . map (el "p" . el "code") . lines $ s blockToXml (RawBlock _ s) = return . spaceBeforeAfter . map (el "p" . el "code") . lines $ s +blockToXml (Div _ bs) = cMapM blockToXml bs blockToXml (BlockQuote bs) = liftM (list . el "cite") $ cMapM blockToXml bs blockToXml (OrderedList a bss) = do state <- get @@ -425,6 +426,7 @@ indent = indentBlock -- | Convert a Pandoc's Inline element to FictionBook XML representation. toXml :: Inline -> FBM [Content] toXml (Str s) = return [txt s] +toXml (Span _ ils) = cMapM toXml ils toXml (Emph ss) = list `liftM` wrap "emphasis" ss toXml (Strong ss) = list `liftM` wrap "strong" ss toXml (Strikeout ss) = list `liftM` wrap "strikethrough" ss @@ -560,6 +562,7 @@ list = (:[]) plain :: Inline -> String plain (Str s) = s plain (Emph ss) = concat (map plain ss) +plain (Span _ ss) = concat (map plain ss) plain (Strong ss) = concat (map plain ss) plain (Strikeout ss) = concat (map plain ss) plain (Superscript ss) = concat (map plain ss) diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs index cfc187e02..560c26c76 100644 --- a/src/Text/Pandoc/Writers/HTML.hs +++ b/src/Text/Pandoc/Writers/HTML.hs @@ -407,6 +407,9 @@ blockToHtml opts (Para [Str ".",Space,Str ".",Space,Str "."]) blockToHtml opts (Para lst) = do contents <- inlineListToHtml opts lst return $ H.p contents +blockToHtml opts (Div attr bs) = do + contents <- blockListToHtml opts bs + return $ addAttrs opts attr $ H.div $ nl opts >> contents >> nl opts blockToHtml _ (RawBlock "html" str) = return $ preEscapedString str blockToHtml _ (RawBlock _ _) = return mempty blockToHtml opts (HorizontalRule) = return $ if writerHtml5 opts then H5.hr else H.hr @@ -590,6 +593,8 @@ inlineToHtml opts inline = (Str str) -> return $ strToHtml str (Space) -> return $ strToHtml " " (LineBreak) -> return $ if writerHtml5 opts then H5.br else H.br + (Span attr ils) -> inlineListToHtml opts ils >>= + return . addAttrs opts attr . H.span (Emph lst) -> inlineListToHtml opts lst >>= return . H.em (Strong lst) -> inlineListToHtml opts lst >>= return . H.strong (Code attr str) -> case hlCode of diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs index aa5bfa623..37de03e0f 100644 --- a/src/Text/Pandoc/Writers/LaTeX.hs +++ b/src/Text/Pandoc/Writers/LaTeX.hs @@ -282,6 +282,7 @@ isLineBreakOrSpace _ = False blockToLaTeX :: Block -- ^ Block to convert -> State WriterState Doc blockToLaTeX Null = return empty +blockToLaTeX (Div _ bs) = blockListToLaTeX bs blockToLaTeX (Plain lst) = inlineListToLaTeX $ dropWhile isLineBreakOrSpace lst -- title beginning with fig: indicates that the image is a figure @@ -560,6 +561,7 @@ isQuoted _ = False -- | Convert inline element to LaTeX inlineToLaTeX :: Inline -- ^ Inline to convert -> State WriterState Doc +inlineToLaTeX (Span _ ils) = inlineListToLaTeX ils >>= return . braces inlineToLaTeX (Emph lst) = inlineListToLaTeX lst >>= return . inCmd "emph" inlineToLaTeX (Strong lst) = diff --git a/src/Text/Pandoc/Writers/Man.hs b/src/Text/Pandoc/Writers/Man.hs index 0508b6c27..ed66c7c2b 100644 --- a/src/Text/Pandoc/Writers/Man.hs +++ b/src/Text/Pandoc/Writers/Man.hs @@ -160,6 +160,7 @@ blockToMan :: WriterOptions -- ^ Options -> Block -- ^ Block element -> State WriterState Doc blockToMan _ Null = return empty +blockToMan opts (Div _ bs) = blockListToMan opts bs blockToMan opts (Plain inlines) = liftM vcat $ mapM (inlineListToMan opts) $ splitSentences inlines blockToMan opts (Para inlines) = do @@ -300,6 +301,7 @@ inlineListToMan opts lst = mapM (inlineToMan opts) lst >>= (return . hcat) -- | Convert Pandoc inline element to man. inlineToMan :: WriterOptions -> Inline -> State WriterState Doc +inlineToMan opts (Span _ ils) = inlineListToMan opts ils inlineToMan opts (Emph lst) = do contents <- inlineListToMan opts lst return $ text "\\f[I]" <> contents <> text "\\f[]" diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs index 80402a757..d195d8445 100644 --- a/src/Text/Pandoc/Writers/Markdown.hs +++ b/src/Text/Pandoc/Writers/Markdown.hs @@ -301,6 +301,7 @@ blockToMarkdown :: WriterOptions -- ^ Options -> Block -- ^ Block element -> State WriterState Doc blockToMarkdown _ Null = return empty +blockToMarkdown opts (Div _ bs) = blockListToMarkdown opts bs blockToMarkdown opts (Plain inlines) = do contents <- inlineListToMarkdown opts inlines return $ contents <> cr @@ -628,6 +629,8 @@ escapeSpaces x = x -- | Convert Pandoc inline element to markdown. inlineToMarkdown :: WriterOptions -> Inline -> State WriterState Doc +inlineToMarkdown opts (Span _ ils) = + inlineListToMarkdown opts ils inlineToMarkdown opts (Emph lst) = do contents <- inlineListToMarkdown opts lst return $ "*" <> contents <> "*" diff --git a/src/Text/Pandoc/Writers/MediaWiki.hs b/src/Text/Pandoc/Writers/MediaWiki.hs index e1bfd18b2..fccf25753 100644 --- a/src/Text/Pandoc/Writers/MediaWiki.hs +++ b/src/Text/Pandoc/Writers/MediaWiki.hs @@ -83,6 +83,9 @@ blockToMediaWiki :: WriterOptions -- ^ Options blockToMediaWiki _ Null = return "" +blockToMediaWiki opts (Div _ bs) = + blockListToMediaWiki opts bs + blockToMediaWiki opts (Plain inlines) = inlineListToMediaWiki opts inlines @@ -328,6 +331,9 @@ inlineListToMediaWiki opts lst = -- | Convert Pandoc inline element to MediaWiki. inlineToMediaWiki :: WriterOptions -> Inline -> State WriterState String +inlineToMediaWiki opts (Span _ ils) = + inlineListToMediaWiki opts ils + inlineToMediaWiki opts (Emph lst) = do contents <- inlineListToMediaWiki opts lst return $ "''" ++ contents ++ "''" diff --git a/src/Text/Pandoc/Writers/OpenDocument.hs b/src/Text/Pandoc/Writers/OpenDocument.hs index 0efbf7580..d76d0f6ad 100644 --- a/src/Text/Pandoc/Writers/OpenDocument.hs +++ b/src/Text/Pandoc/Writers/OpenDocument.hs @@ -285,6 +285,7 @@ blockToOpenDocument :: WriterOptions -> Block -> State WriterState Doc blockToOpenDocument o bs | Plain b <- bs = inParagraphTags =<< inlinesToOpenDocument o b | Para b <- bs = inParagraphTags =<< inlinesToOpenDocument o b + | Div _ xs <- bs = blocksToOpenDocument o xs | Header i _ b <- bs = setFirstPara >> (inHeaderTags i =<< inlinesToOpenDocument o b) | BlockQuote b <- bs = setFirstPara >> mkBlockQuote b @@ -360,6 +361,7 @@ inlinesToOpenDocument o l = hcat <$> mapM (inlineToOpenDocument o) l inlineToOpenDocument :: WriterOptions -> Inline -> State WriterState Doc inlineToOpenDocument o ils | Space <- ils = inTextStyle space + | Span _ xs <- ils = inlinesToOpenDocument o xs | LineBreak <- ils = return $ selfClosingTag "text:line-break" [] | Str s <- ils = inTextStyle $ handleSpaces $ escapeStringForXML s | Emph l <- ils = withTextStyle Italic $ inlinesToOpenDocument o l diff --git a/src/Text/Pandoc/Writers/Org.hs b/src/Text/Pandoc/Writers/Org.hs index 40e8abf7e..34ae532b0 100644 --- a/src/Text/Pandoc/Writers/Org.hs +++ b/src/Text/Pandoc/Writers/Org.hs @@ -106,6 +106,7 @@ escapeString = escapeStringUsing $ blockToOrg :: Block -- ^ Block element -> State WriterState Doc blockToOrg Null = return empty +blockToOrg (Div _ bs) = blockListToOrg bs blockToOrg (Plain inlines) = inlineListToOrg inlines -- title beginning with fig: indicates that the image is a figure blockToOrg (Para [Image txt (src,'f':'i':'g':':':tit)]) = do @@ -229,6 +230,8 @@ inlineListToOrg lst = mapM inlineToOrg lst >>= return . hcat -- | Convert Pandoc inline element to Org. inlineToOrg :: Inline -> State WriterState Doc +inlineToOrg (Span _ lst) = + inlineListToOrg lst inlineToOrg (Emph lst) = do contents <- inlineListToOrg lst return $ "/" <> contents <> "/" diff --git a/src/Text/Pandoc/Writers/RST.hs b/src/Text/Pandoc/Writers/RST.hs index 606793842..4d8daa15b 100644 --- a/src/Text/Pandoc/Writers/RST.hs +++ b/src/Text/Pandoc/Writers/RST.hs @@ -161,6 +161,7 @@ bordered contents c = blockToRST :: Block -- ^ Block element -> State WriterState Doc blockToRST Null = return empty +blockToRST (Div _ bs) = blockListToRST bs blockToRST (Plain inlines) = inlineListToRST inlines -- title beginning with fig: indicates that the image is a figure blockToRST (Para [Image txt (src,'f':'i':'g':':':tit)]) = do @@ -338,6 +339,7 @@ inlineListToRST lst = mapM inlineToRST (insertBS lst) >>= return . hcat -- | Convert Pandoc inline element to RST. inlineToRST :: Inline -> State WriterState Doc +inlineToRST (Span _ ils) = inlineListToRST ils inlineToRST (Emph lst) = do contents <- inlineListToRST lst return $ "*" <> contents <> "*" diff --git a/src/Text/Pandoc/Writers/RTF.hs b/src/Text/Pandoc/Writers/RTF.hs index 0db1c52c4..7e5d33c50 100644 --- a/src/Text/Pandoc/Writers/RTF.hs +++ b/src/Text/Pandoc/Writers/RTF.hs @@ -208,6 +208,8 @@ blockToRTF :: Int -- ^ indent level -> Block -- ^ block to convert -> String blockToRTF _ _ Null = "" +blockToRTF indent alignment (Div _ bs) = + concatMap (blockToRTF indent alignment) bs blockToRTF indent alignment (Plain lst) = rtfCompact indent 0 alignment $ inlineListToRTF lst blockToRTF indent alignment (Para lst) = @@ -308,6 +310,7 @@ inlineListToRTF lst = concatMap inlineToRTF lst -- | Convert inline item to RTF. inlineToRTF :: Inline -- ^ inline to convert -> String +inlineToRTF (Span _ lst) = inlineListToRTF lst inlineToRTF (Emph lst) = "{\\i " ++ (inlineListToRTF lst) ++ "}" inlineToRTF (Strong lst) = "{\\b " ++ (inlineListToRTF lst) ++ "}" inlineToRTF (Strikeout lst) = "{\\strike " ++ (inlineListToRTF lst) ++ "}" diff --git a/src/Text/Pandoc/Writers/Texinfo.hs b/src/Text/Pandoc/Writers/Texinfo.hs index 0f57d14b2..f8b460001 100644 --- a/src/Text/Pandoc/Writers/Texinfo.hs +++ b/src/Text/Pandoc/Writers/Texinfo.hs @@ -123,6 +123,8 @@ blockToTexinfo :: Block -- ^ Block to convert blockToTexinfo Null = return empty +blockToTexinfo (Div _ bs) = blockListToTexinfo bs + blockToTexinfo (Plain lst) = inlineListToTexinfo lst @@ -374,6 +376,9 @@ disallowedInNode c = c `elem` ".,:()" inlineToTexinfo :: Inline -- ^ Inline to convert -> State WriterState Doc +inlineToTexinfo (Span _ lst) = + inlineListToTexinfo lst + inlineToTexinfo (Emph lst) = inlineListToTexinfo lst >>= return . inCmd "emph" diff --git a/src/Text/Pandoc/Writers/Textile.hs b/src/Text/Pandoc/Writers/Textile.hs index 3288ce222..3fb554dca 100644 --- a/src/Text/Pandoc/Writers/Textile.hs +++ b/src/Text/Pandoc/Writers/Textile.hs @@ -101,6 +101,9 @@ blockToTextile :: WriterOptions -- ^ Options blockToTextile _ Null = return "" +blockToTextile opts (Div _ bs) = + blockListToTextile opts bs + blockToTextile opts (Plain inlines) = inlineListToTextile opts inlines @@ -343,6 +346,9 @@ inlineListToTextile opts lst = -- | Convert Pandoc inline element to Textile. inlineToTextile :: WriterOptions -> Inline -> State WriterState String +inlineToTextile opts (Span _ lst) = + inlineListToTextile opts lst + inlineToTextile opts (Emph lst) = do contents <- inlineListToTextile opts lst return $ if '_' `elem` contents -- cgit v1.2.3 From cbfa9321066212b912583481015224f3c944ae21 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 10 Aug 2013 17:23:51 -0700 Subject: Adjustments for new Format newtype. --- src/Text/Pandoc/Readers/HTML.hs | 4 ++-- src/Text/Pandoc/Readers/LaTeX.hs | 2 +- src/Text/Pandoc/Readers/RST.hs | 1 + src/Text/Pandoc/Readers/Textile.hs | 6 +++--- src/Text/Pandoc/Writers/AsciiDoc.hs | 8 ++++++-- src/Text/Pandoc/Writers/Custom.hs | 6 ++++++ src/Text/Pandoc/Writers/Docbook.hs | 9 +++++---- src/Text/Pandoc/Writers/Docx.hs | 10 +++++----- src/Text/Pandoc/Writers/EPUB.hs | 6 +++--- src/Text/Pandoc/Writers/HTML.hs | 13 ++++++++----- src/Text/Pandoc/Writers/LaTeX.hs | 13 ++++++++----- src/Text/Pandoc/Writers/Man.hs | 10 ++++++---- src/Text/Pandoc/Writers/MediaWiki.hs | 14 ++++++++------ src/Text/Pandoc/Writers/OpenDocument.hs | 12 +++++++----- src/Text/Pandoc/Writers/RST.hs | 15 +++++++++------ src/Text/Pandoc/Writers/RTF.hs | 12 +++++++----- src/Text/Pandoc/Writers/Texinfo.hs | 19 +++++++++++-------- src/Text/Pandoc/Writers/Textile.hs | 14 ++++++-------- tests/Tests/Arbitrary.hs | 8 ++++---- 19 files changed, 106 insertions(+), 76 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index 0068ab5c1..7ca554fa3 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -182,7 +182,7 @@ pRawHtmlBlock = do raw <- pHtmlBlock "script" <|> pHtmlBlock "style" <|> pRawTag parseRaw <- getOption readerParseRaw if parseRaw && not (null raw) - then return [RawBlock "html" raw] + then return [RawBlock (Format "html") raw] else return [] pHtmlBlock :: String -> TagParser String @@ -408,7 +408,7 @@ pRawHtmlInline = do result <- pSatisfy (tagComment (const True)) <|> pSatisfy isInlineTag parseRaw <- getOption readerParseRaw if parseRaw - then return [RawInline "html" $ renderTags' [result]] + then return [RawInline (Format "html") $ renderTags' [result]] else return [] pInlinesInTags :: String -> ([Inline] -> Inline) diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index 6b5035d93..eb0baedda 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -1,4 +1,4 @@ -{-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE ScopedTypeVariables, OverloadedStrings #-} {- Copyright (C) 2006-2012 John MacFarlane diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs index 34962b553..df0a8294d 100644 --- a/src/Text/Pandoc/Readers/RST.hs +++ b/src/Text/Pandoc/Readers/RST.hs @@ -1,3 +1,4 @@ +{-# LANGUAGE OverloadedStrings #-} {- Copyright (C) 2006-2010 John MacFarlane diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs index 9191f6908..8ccd1e227 100644 --- a/src/Text/Pandoc/Readers/Textile.hs +++ b/src/Text/Pandoc/Readers/Textile.hs @@ -290,13 +290,13 @@ rawHtmlBlock :: Parser [Char] ParserState Block rawHtmlBlock = try $ do (_,b) <- htmlTag isBlockTag optional blanklines - return $ RawBlock "html" b + return $ RawBlock (Format "html") b -- | Raw block of LaTeX content rawLaTeXBlock' :: Parser [Char] ParserState Block rawLaTeXBlock' = do guardEnabled Ext_raw_tex - RawBlock "latex" <$> (rawLaTeXBlock <* spaces) + RawBlock (Format "latex") <$> (rawLaTeXBlock <* spaces) -- | In textile, paragraphs are separated by blank lines. @@ -487,7 +487,7 @@ endline = try $ do return LineBreak rawHtmlInline :: Parser [Char] ParserState Inline -rawHtmlInline = RawInline "html" . snd <$> htmlTag isInlineTag +rawHtmlInline = RawInline (Format "html") . snd <$> htmlTag isInlineTag -- | Raw LaTeX Inline rawLaTeXInline' :: Parser [Char] ParserState Inline diff --git a/src/Text/Pandoc/Writers/AsciiDoc.hs b/src/Text/Pandoc/Writers/AsciiDoc.hs index 00cea27e5..68b525742 100644 --- a/src/Text/Pandoc/Writers/AsciiDoc.hs +++ b/src/Text/Pandoc/Writers/AsciiDoc.hs @@ -132,7 +132,9 @@ blockToAsciiDoc opts (Para inlines) = do then text "\\" else empty return $ esc <> contents <> blankline -blockToAsciiDoc _ (RawBlock _ _) = return empty +blockToAsciiDoc _ (RawBlock f s) + | f == "asciidoc" = return $ text s + | otherwise = return empty blockToAsciiDoc _ HorizontalRule = return $ blankline <> text "'''''" <> blankline blockToAsciiDoc opts (Header level (ident,_,_) inlines) = do @@ -347,7 +349,9 @@ inlineToAsciiDoc _ (Math InlineMath str) = return $ "latexmath:[$" <> text str <> "$]" inlineToAsciiDoc _ (Math DisplayMath str) = return $ "latexmath:[\\[" <> text str <> "\\]]" -inlineToAsciiDoc _ (RawInline _ _) = return empty +inlineToAsciiDoc _ (RawInline f s) + | f == "asciidoc" = return $ text s + | otherwise = return empty inlineToAsciiDoc _ (LineBreak) = return $ " +" <> cr inlineToAsciiDoc _ Space = return space inlineToAsciiDoc opts (Cite _ lst) = inlineListToAsciiDoc opts lst diff --git a/src/Text/Pandoc/Writers/Custom.hs b/src/Text/Pandoc/Writers/Custom.hs index c250a240e..0234e1e35 100644 --- a/src/Text/Pandoc/Writers/Custom.hs +++ b/src/Text/Pandoc/Writers/Custom.hs @@ -33,6 +33,7 @@ module Text.Pandoc.Writers.Custom ( writeCustom ) where import Text.Pandoc.Definition import Text.Pandoc.Options import Data.List ( intersperse ) +import Data.Char ( toLower ) import Scripting.Lua (LuaState, StackValue, callfunc) import qualified Scripting.Lua as Lua import Text.Pandoc.UTF8 (fromString, toString) @@ -78,6 +79,11 @@ instance StackValue a => StackValue [a] where return (Just lst) valuetype _ = Lua.TTABLE +instance StackValue Format where + push lua (Format f) = Lua.push lua (map toLower f) + peek l n = fmap Format `fmap` Lua.peek l n + valuetype _ = Lua.TSTRING + instance (StackValue a, StackValue b) => StackValue (M.Map a b) where push lua m = do let xs = M.toList m diff --git a/src/Text/Pandoc/Writers/Docbook.hs b/src/Text/Pandoc/Writers/Docbook.hs index 2f415f3ee..3d150d19b 100644 --- a/src/Text/Pandoc/Writers/Docbook.hs +++ b/src/Text/Pandoc/Writers/Docbook.hs @@ -1,3 +1,4 @@ +{-# LANGUAGE OverloadedStrings #-} {- Copyright (C) 2006-2010 John MacFarlane @@ -199,10 +200,10 @@ blockToDocbook opts (OrderedList (start, numstyle, _) (first:rest)) = in inTags True "orderedlist" attribs items blockToDocbook opts (DefinitionList lst) = inTagsIndented "variablelist" $ deflistItemsToDocbook opts lst -blockToDocbook _ (RawBlock "docbook" str) = text str -- raw XML block --- we allow html for compatibility with earlier versions of pandoc -blockToDocbook _ (RawBlock "html" str) = text str -- raw XML block -blockToDocbook _ (RawBlock _ _) = empty +blockToDocbook _ (RawBlock f str) + | f == "docbook" = text str -- raw XML block + | f == "html" = text str -- allow html for backwards compatibility + | otherwise = empty blockToDocbook _ HorizontalRule = empty -- not semantic blockToDocbook opts (Table caption aligns widths headers rows) = let captionDoc = if null caption diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index d93254971..2483e243f 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -460,8 +460,8 @@ blockToOpenXML opts (Para lst) = do contents <- inlinesToOpenXML opts lst return [mknode "w:p" [] (paraProps ++ contents)] blockToOpenXML _ (RawBlock format str) - | format == "openxml" = return [ x | Elem x <- parseXML str ] - | otherwise = return [] + | format == Format "openxml" = return [ x | Elem x <- parseXML str ] + | otherwise = return [] blockToOpenXML opts (BlockQuote blocks) = withParaProp (pStyle "BlockQuote") $ blocksToOpenXML opts blocks blockToOpenXML opts (CodeBlock attrs str) = @@ -653,8 +653,8 @@ inlineToOpenXML opts (Strikeout lst) = $ inlinesToOpenXML opts lst inlineToOpenXML _ LineBreak = return [br] inlineToOpenXML _ (RawInline f str) - | f == "openxml" = return [ x | Elem x <- parseXML str ] - | otherwise = return [] + | f == Format "openxml" = return [ x | Elem x <- parseXML str ] + | otherwise = return [] inlineToOpenXML opts (Quoted quoteType lst) = inlinesToOpenXML opts $ [Str open] ++ lst ++ [Str close] where (open, close) = case quoteType of @@ -688,7 +688,7 @@ inlineToOpenXML opts (Note bs) = do let notemarker = mknode "w:r" [] [ mknode "w:rPr" [] (rStyle "FootnoteRef") , mknode "w:footnoteRef" [] () ] - let notemarkerXml = RawInline "openxml" $ ppElement notemarker + let notemarkerXml = RawInline (Format "openxml") $ ppElement notemarker let insertNoteRef (Plain ils : xs) = Plain (notemarkerXml : ils) : xs insertNoteRef (Para ils : xs) = Para (notemarkerXml : ils) : xs insertNoteRef xs = Para [notemarkerXml] : xs diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs index fb756f196..ab14ff8a0 100644 --- a/src/Text/Pandoc/Writers/EPUB.hs +++ b/src/Text/Pandoc/Writers/EPUB.hs @@ -103,7 +103,7 @@ writeEPUB opts doc@(Pandoc meta _) = do Just img -> do let coverImage = "cover-image" ++ takeExtension img let cpContent = renderHtml $ writeHtml opts' - (Pandoc meta [RawBlock "html" $ "
\n\"cover\n
"]) + (Pandoc meta [RawBlock (Format "html") $ "
\n\"cover\n
"]) imgContent <- B.readFile img return ( [mkEntry "cover.xhtml" cpContent] , [mkEntry coverImage imgContent] ) @@ -422,7 +422,7 @@ transformInline opts sourceDir picsRef (Image lab (src,tit)) | isAbsoluteURI src = do raw <- makeSelfContained Nothing $ writeHtmlInline opts (Image lab (src,tit)) - return $ RawInline "html" raw + return $ RawInline (Format "html") raw | otherwise = do let src' = unEscapeString src pics <- readIORef picsRef @@ -438,7 +438,7 @@ transformInline opts sourceDir picsRef (Image lab (src,tit)) transformInline opts _ _ (x@(Math _ _)) | WebTeX _ <- writerHTMLMathMethod opts = do raw <- makeSelfContained Nothing $ writeHtmlInline opts x - return $ RawInline "html" raw + return $ RawInline (Format "html") raw transformInline _ _ _ x = return x writeHtmlInline :: WriterOptions -> Inline -> String diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs index 560c26c76..25079574e 100644 --- a/src/Text/Pandoc/Writers/HTML.hs +++ b/src/Text/Pandoc/Writers/HTML.hs @@ -410,8 +410,9 @@ blockToHtml opts (Para lst) = do blockToHtml opts (Div attr bs) = do contents <- blockListToHtml opts bs return $ addAttrs opts attr $ H.div $ nl opts >> contents >> nl opts -blockToHtml _ (RawBlock "html" str) = return $ preEscapedString str -blockToHtml _ (RawBlock _ _) = return mempty +blockToHtml _ (RawBlock f str) + | f == Format "html" = return $ preEscapedString str + | otherwise = return mempty blockToHtml opts (HorizontalRule) = return $ if writerHtml5 opts then H5.hr else H.hr blockToHtml opts (CodeBlock (id',classes,keyvals) rawCode) = do let tolhs = isEnabled Ext_literate_haskell opts && @@ -678,12 +679,14 @@ inlineToHtml opts inline = return $ case t of InlineMath -> m DisplayMath -> brtag >> m >> brtag ) - (RawInline "latex" str) -> case writerHTMLMathMethod opts of + (RawInline f str) + | f == Format "latex" -> + case writerHTMLMathMethod opts of LaTeXMathML _ -> do modify (\st -> st {stMath = True}) return $ toHtml str _ -> return mempty - (RawInline "html" str) -> return $ preEscapedString str - (RawInline _ _) -> return mempty + | f == Format "html" -> return $ preEscapedString str + | otherwise -> return mempty (Link [Str str] (s,_)) | "mailto:" `isPrefixOf` s && s == escapeURI ("mailto" ++ str) -> -- autolink diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs index 37de03e0f..d09ccc3b8 100644 --- a/src/Text/Pandoc/Writers/LaTeX.hs +++ b/src/Text/Pandoc/Writers/LaTeX.hs @@ -356,8 +356,10 @@ blockToLaTeX (CodeBlock (_,classes,keyvalAttr) str) = do Nothing -> rawCodeBlock Just h -> modify (\st -> st{ stHighlighting = True }) >> return (flush $ text h) -blockToLaTeX (RawBlock "latex" x) = return $ text x -blockToLaTeX (RawBlock _ _) = return empty +blockToLaTeX (RawBlock f x) + | f == Format "latex" || f == Format "tex" + = return $ text x + | otherwise = return empty blockToLaTeX (BulletList []) = return empty -- otherwise latex error blockToLaTeX (BulletList lst) = do incremental <- gets stIncremental @@ -630,9 +632,10 @@ inlineToLaTeX (Math InlineMath str) = return $ char '$' <> text str <> char '$' inlineToLaTeX (Math DisplayMath str) = return $ "\\[" <> text str <> "\\]" -inlineToLaTeX (RawInline "latex" str) = return $ text str -inlineToLaTeX (RawInline "tex" str) = return $ text str -inlineToLaTeX (RawInline _ _) = return empty +inlineToLaTeX (RawInline f str) + | f == Format "latex" || f == Format "tex" + = return $ text str + | otherwise = return empty inlineToLaTeX (LineBreak) = return "\\\\" inlineToLaTeX Space = return space inlineToLaTeX (Link txt ('#':ident, _)) = do diff --git a/src/Text/Pandoc/Writers/Man.hs b/src/Text/Pandoc/Writers/Man.hs index ed66c7c2b..642a002d6 100644 --- a/src/Text/Pandoc/Writers/Man.hs +++ b/src/Text/Pandoc/Writers/Man.hs @@ -167,8 +167,9 @@ blockToMan opts (Para inlines) = do contents <- liftM vcat $ mapM (inlineListToMan opts) $ splitSentences inlines return $ text ".PP" $$ contents -blockToMan _ (RawBlock "man" str) = return $ text str -blockToMan _ (RawBlock _ _) = return empty +blockToMan _ (RawBlock f str) + | f == Format "man" = return $ text str + | otherwise = return empty blockToMan _ HorizontalRule = return $ text ".PP" $$ text " * * * * *" blockToMan opts (Header level _ inlines) = do contents <- inlineListToMan opts inlines @@ -333,8 +334,9 @@ inlineToMan opts (Math InlineMath str) = inlineListToMan opts $ readTeXMath str inlineToMan opts (Math DisplayMath str) = do contents <- inlineListToMan opts $ readTeXMath str return $ cr <> text ".RS" $$ contents $$ text ".RE" -inlineToMan _ (RawInline "man" str) = return $ text str -inlineToMan _ (RawInline _ _) = return empty +inlineToMan _ (RawInline f str) + | f == Format "man" = return $ text str + | otherwise = return empty inlineToMan _ (LineBreak) = return $ cr <> text ".PD 0" $$ text ".P" $$ text ".PD" <> cr inlineToMan _ Space = return space diff --git a/src/Text/Pandoc/Writers/MediaWiki.hs b/src/Text/Pandoc/Writers/MediaWiki.hs index fccf25753..4ffba1100 100644 --- a/src/Text/Pandoc/Writers/MediaWiki.hs +++ b/src/Text/Pandoc/Writers/MediaWiki.hs @@ -107,9 +107,10 @@ blockToMediaWiki opts (Para inlines) = do then "

" ++ contents ++ "

" else contents ++ if null listLevel then "\n" else "" -blockToMediaWiki _ (RawBlock "mediawiki" str) = return str -blockToMediaWiki _ (RawBlock "html" str) = return str -blockToMediaWiki _ (RawBlock _ _) = return "" +blockToMediaWiki _ (RawBlock f str) + | f == Format "mediawiki" = return str + | f == Format "html" = return str + | otherwise = return "" blockToMediaWiki _ HorizontalRule = return "\n-----\n" @@ -374,9 +375,10 @@ inlineToMediaWiki _ (Str str) = return $ escapeString str inlineToMediaWiki _ (Math _ str) = return $ "" ++ str ++ "" -- note: str should NOT be escaped -inlineToMediaWiki _ (RawInline "mediawiki" str) = return str -inlineToMediaWiki _ (RawInline "html" str) = return str -inlineToMediaWiki _ (RawInline _ _) = return "" +inlineToMediaWiki _ (RawInline f str) + | f == Format "mediawiki" = return str + | f == Format "html" = return str + | otherwise = return "" inlineToMediaWiki _ (LineBreak) = return "
" diff --git a/src/Text/Pandoc/Writers/OpenDocument.hs b/src/Text/Pandoc/Writers/OpenDocument.hs index d76d0f6ad..05c576c20 100644 --- a/src/Text/Pandoc/Writers/OpenDocument.hs +++ b/src/Text/Pandoc/Writers/OpenDocument.hs @@ -1,4 +1,4 @@ -{-# LANGUAGE PatternGuards #-} +{-# LANGUAGE PatternGuards, OverloadedStrings #-} {- Copyright (C) 2008-2010 Andrea Rossato and John MacFarlane. @@ -296,7 +296,9 @@ blockToOpenDocument o bs | Table c a w h r <- bs = setFirstPara >> table c a w h r | HorizontalRule <- bs = setFirstPara >> return (selfClosingTag "text:p" [ ("text:style-name", "Horizontal_20_Line") ]) - | RawBlock _ _ <- bs = return empty + | RawBlock f s <- bs = if f == "opendocument" + then preformatted s + else return empty | Null <- bs = return empty | otherwise = return empty where @@ -374,9 +376,9 @@ inlineToOpenDocument o ils | Code _ s <- ils = preformatted s | Math _ s <- ils = inlinesToOpenDocument o (readTeXMath s) | Cite _ l <- ils = inlinesToOpenDocument o l - | RawInline "opendocument" s <- ils = preformatted s - | RawInline "html" s <- ils = preformatted s -- for backwards compat. - | RawInline _ _ <- ils = return empty + | RawInline f s <- ils = if f == "opendocument" || f == "html" + then preformatted s + else return empty | Link l (s,t) <- ils = mkLink s t <$> inlinesToOpenDocument o l | Image _ (s,t) <- ils = return $ mkImg s t | Note l <- ils = mkNote l diff --git a/src/Text/Pandoc/Writers/RST.hs b/src/Text/Pandoc/Writers/RST.hs index 4d8daa15b..5fbbb6afc 100644 --- a/src/Text/Pandoc/Writers/RST.hs +++ b/src/Text/Pandoc/Writers/RST.hs @@ -42,7 +42,7 @@ import Network.URI (isAbsoluteURI) import Text.Pandoc.Pretty import Control.Monad.State import Control.Applicative ( (<$>) ) -import Data.Char (isSpace) +import Data.Char (isSpace, toLower) type Refs = [([Inline], Target)] @@ -176,9 +176,11 @@ blockToRST (Para inlines) | otherwise = do contents <- inlineListToRST inlines return $ contents <> blankline -blockToRST (RawBlock f str) = - return $ blankline <> ".. raw:: " <> text f $+$ - (nest 3 $ text str) $$ blankline +blockToRST (RawBlock f str) + | f == "rst" = return $ text str + | otherwise = return $ blankline <> ".. raw:: " <> + text (map toLower $ unFormat f) $+$ + (nest 3 $ text str) $$ blankline blockToRST HorizontalRule = return $ blankline $$ "--------------" $$ blankline blockToRST (Header level _ inlines) = do @@ -374,8 +376,9 @@ inlineToRST (Math t str) = do then blankline $$ ".. math::" $$ blankline $$ nest 3 (text str) $$ blankline else blankline $$ (".. math:: " <> text str) $$ blankline -inlineToRST (RawInline "rst" x) = return $ text x -inlineToRST (RawInline _ _) = return empty +inlineToRST (RawInline f x) + | f == "rst" = return $ text x + | otherwise = return empty inlineToRST (LineBreak) = return cr -- there's no line break in RST (see Para) inlineToRST Space = return space -- autolink diff --git a/src/Text/Pandoc/Writers/RTF.hs b/src/Text/Pandoc/Writers/RTF.hs index 7e5d33c50..6d2b1229d 100644 --- a/src/Text/Pandoc/Writers/RTF.hs +++ b/src/Text/Pandoc/Writers/RTF.hs @@ -62,7 +62,7 @@ rtfEmbedImage x@(Image _ (src,_)) = do let raw = "{\\pict" ++ filetype ++ " " ++ concat bytes ++ "}" return $ if B.null imgdata then x - else RawInline "rtf" raw + else RawInline (Format "rtf") raw else return x rtfEmbedImage x = return x @@ -218,8 +218,9 @@ blockToRTF indent alignment (BlockQuote lst) = concatMap (blockToRTF (indent + indentIncrement) alignment) lst blockToRTF indent _ (CodeBlock _ str) = rtfPar indent 0 AlignLeft ("\\f1 " ++ (codeStringToRTF str)) -blockToRTF _ _ (RawBlock "rtf" str) = str -blockToRTF _ _ (RawBlock _ _) = "" +blockToRTF _ _ (RawBlock f str) + | f == Format "rtf" = str + | otherwise = "" blockToRTF indent alignment (BulletList lst) = spaceAtEnd $ concatMap (listItemToRTF alignment indent (bulletMarker indent)) lst blockToRTF indent alignment (OrderedList attribs lst) = spaceAtEnd $ concat $ @@ -325,8 +326,9 @@ inlineToRTF (Code _ str) = "{\\f1 " ++ (codeStringToRTF str) ++ "}" inlineToRTF (Str str) = stringToRTF str inlineToRTF (Math _ str) = inlineListToRTF $ readTeXMath str inlineToRTF (Cite _ lst) = inlineListToRTF lst -inlineToRTF (RawInline "rtf" str) = str -inlineToRTF (RawInline _ _) = "" +inlineToRTF (RawInline f str) + | f == Format "rtf" = str + | otherwise = "" inlineToRTF (LineBreak) = "\\line " inlineToRTF Space = " " inlineToRTF (Link text (src, _)) = diff --git a/src/Text/Pandoc/Writers/Texinfo.hs b/src/Text/Pandoc/Writers/Texinfo.hs index f8b460001..b1fd3d6af 100644 --- a/src/Text/Pandoc/Writers/Texinfo.hs +++ b/src/Text/Pandoc/Writers/Texinfo.hs @@ -1,3 +1,4 @@ +{-# LANGUAGE OverloadedStrings #-} {- Copyright (C) 2008-2010 John MacFarlane and Peter Wang @@ -152,10 +153,11 @@ blockToTexinfo (CodeBlock _ str) = do flush (text str) $$ text "@end verbatim" <> blankline -blockToTexinfo (RawBlock "texinfo" str) = return $ text str -blockToTexinfo (RawBlock "latex" str) = - return $ text "@tex" $$ text str $$ text "@end tex" -blockToTexinfo (RawBlock _ _) = return empty +blockToTexinfo (RawBlock f str) + | f == "texinfo" = return $ text str + | f == "latex" || f == "tex" = + return $ text "@tex" $$ text str $$ text "@end tex" + | otherwise = return empty blockToTexinfo (BulletList lst) = do items <- mapM listItemToTexinfo lst @@ -418,10 +420,11 @@ inlineToTexinfo (Cite _ lst) = inlineListToTexinfo lst inlineToTexinfo (Str str) = return $ text (stringToTexinfo str) inlineToTexinfo (Math _ str) = return $ inCmd "math" $ text str -inlineToTexinfo (RawInline f str) | f == "latex" || f == "tex" = - return $ text "@tex" $$ text str $$ text "@end tex" -inlineToTexinfo (RawInline "texinfo" str) = return $ text str -inlineToTexinfo (RawInline _ _) = return empty +inlineToTexinfo (RawInline f str) + | f == "latex" || f == "tex" = + return $ text "@tex" $$ text str $$ text "@end tex" + | f == "texinfo" = return $ text str + | otherwise = return empty inlineToTexinfo (LineBreak) = return $ text "@*" inlineToTexinfo Space = return $ char ' ' diff --git a/src/Text/Pandoc/Writers/Textile.hs b/src/Text/Pandoc/Writers/Textile.hs index 3fb554dca..27e8b60ec 100644 --- a/src/Text/Pandoc/Writers/Textile.hs +++ b/src/Text/Pandoc/Writers/Textile.hs @@ -121,10 +121,9 @@ blockToTextile opts (Para inlines) = do then "

" ++ contents ++ "

" else contents ++ if null listLevel then "\n" else "" -blockToTextile _ (RawBlock f str) = - if f == "html" || f == "textile" - then return str - else return "" +blockToTextile _ (RawBlock f str) + | f == Format "html" || f == Format "textile" = return str + | otherwise = return "" blockToTextile _ HorizontalRule = return "
\n" @@ -401,10 +400,9 @@ inlineToTextile _ (Str str) = return $ escapeStringForTextile str inlineToTextile _ (Math _ str) = return $ "" ++ escapeStringForXML str ++ "" -inlineToTextile _ (RawInline f str) = - if f == "html" || f == "textile" - then return str - else return "" +inlineToTextile _ (RawInline f str) + | f == Format "html" || f == Format "textile" = return str + | otherwise = return "" inlineToTextile _ (LineBreak) = return "\n" diff --git a/tests/Tests/Arbitrary.hs b/tests/Tests/Arbitrary.hs index 5939d088d..31c0cb46a 100644 --- a/tests/Tests/Arbitrary.hs +++ b/tests/Tests/Arbitrary.hs @@ -41,8 +41,8 @@ arbInline :: Int -> Gen Inline arbInline n = frequency $ [ (60, liftM Str realString) , (60, return Space) , (10, liftM2 Code arbAttr realString) - , (5, elements [ RawInline "html" "" - , RawInline "latex" "\\my{command}" ]) + , (5, elements [ RawInline (Format "html") "" + , RawInline (Format "latex") "\\my{command}" ]) ] ++ [ x | x <- nesters, n > 1] where nesters = [ (10, liftM Emph $ arbInlines (n-1)) , (10, liftM Strong $ arbInlines (n-1)) @@ -74,9 +74,9 @@ arbBlock :: Int -> Gen Block arbBlock n = frequency $ [ (10, liftM Plain $ arbInlines (n-1)) , (15, liftM Para $ arbInlines (n-1)) , (5, liftM2 CodeBlock arbAttr realString) - , (2, elements [ RawBlock "html" + , (2, elements [ RawBlock (Format "html") "
\n*&*\n
" - , RawBlock "latex" + , RawBlock (Format "latex") "\\begin[opt]{env}\nhi\n{\\end{env}" ]) , (5, do x1 <- choose (1 :: Int, 6) -- cgit v1.2.3 From 9152fa1a95346e26bc290b3f5018b2eeb5d4e077 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 10 Aug 2013 18:13:38 -0700 Subject: Use query instead of queryWith. --- src/Text/Pandoc/Biblio.hs | 5 +++-- src/Text/Pandoc/Shared.hs | 29 +++++++++++++++++++++++++++-- src/Text/Pandoc/Writers/ConTeXt.hs | 4 ++-- src/Text/Pandoc/Writers/LaTeX.hs | 7 ++++--- 4 files changed, 36 insertions(+), 9 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Biblio.hs b/src/Text/Pandoc/Biblio.hs index 755c779ea..206b38530 100644 --- a/src/Text/Pandoc/Biblio.hs +++ b/src/Text/Pandoc/Biblio.hs @@ -36,6 +36,7 @@ import Text.CSL hiding ( Cite(..), Citation(..), endWithPunct ) import qualified Text.CSL as CSL ( Cite(..) ) import Text.Pandoc.Definition import Text.Pandoc.Generic +import Text.Pandoc.Walk import Text.Pandoc.Shared (stringify) import Text.Parsec hiding (State) import Control.Monad @@ -48,7 +49,7 @@ processBiblio Nothing _ p = p processBiblio _ [] p = p processBiblio (Just style) r p = let p' = evalState (bottomUpM setHash p) 1 - grps = queryWith getCitation p' + grps = query getCitation p' result = citeproc procOpts style r (setNearNote style $ map (map toCslCite) grps) cits_map = M.fromList $ zip grps (citations result) @@ -121,7 +122,7 @@ isTextualCitation (c:_) = citationMode c == AuthorInText isTextualCitation _ = False -- | Retrieve all citations from a 'Pandoc' docuument. To be used with --- 'queryWith'. +-- 'query'. getCitation :: Inline -> [[Citation]] getCitation i | Cite t _ <- i = [t] | otherwise = [] diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index 09874299d..2b692dc3c 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -1,4 +1,4 @@ -{-# LANGUAGE DeriveDataTypeable, CPP #-} +{-# LANGUAGE DeriveDataTypeable, CPP, MultiParamTypeClasses #-} {- Copyright (C) 2006-2013 John MacFarlane @@ -79,6 +79,7 @@ module Text.Pandoc.Shared ( ) where import Text.Pandoc.Definition +import Text.Pandoc.Walk import Text.Pandoc.Generic import Text.Pandoc.Builder (Blocks, ToMetaValue(..)) import qualified Text.Pandoc.Builder as B @@ -105,6 +106,7 @@ import Text.HTML.TagSoup (renderTagsOptions, RenderOptions(..), Tag(..), renderOptions) import qualified Data.ByteString as BS import qualified Data.ByteString.Char8 as B8 +import Text.Pandoc.Compat.Monoid #ifdef EMBED_DATA_FILES import Text.Pandoc.Data (dataFiles) @@ -383,7 +385,7 @@ consolidateInlines [] = [] -- | Convert list of inlines to a string with formatting removed. stringify :: [Inline] -> String -stringify = queryWith go +stringify = query go where go :: Inline -> [Char] go Space = " " go (Str x) = x @@ -433,6 +435,29 @@ data Element = Blk Block -- lvl num attributes label contents deriving (Eq, Read, Show, Typeable, Data) +instance Walkable Inline Element where + walk f (Blk x) = Blk (walk f x) + walk f (Sec lev nums attr ils elts) = Sec lev nums attr (walk f ils) (walk f elts) + walkM f (Blk x) = Blk `fmap` walkM f x + walkM f (Sec lev nums attr ils elts) = do + ils' <- walkM f ils + elts' <- walkM f elts + return $ Sec lev nums attr ils' elts' + query f (Blk x) = query f x + query f (Sec _ _ _ ils elts) = query f ils <> query f elts + +instance Walkable Block Element where + walk f (Blk x) = Blk (walk f x) + walk f (Sec lev nums attr ils elts) = Sec lev nums attr (walk f ils) (walk f elts) + walkM f (Blk x) = Blk `fmap` walkM f x + walkM f (Sec lev nums attr ils elts) = do + ils' <- walkM f ils + elts' <- walkM f elts + return $ Sec lev nums attr ils' elts' + query f (Blk x) = query f x + query f (Sec _ _ _ ils elts) = query f ils <> query f elts + + -- | Convert Pandoc inline list to plain text identifier. HTML -- identifiers must start with a letter, and may contain only -- letters, digits, and the characters _-. diff --git a/src/Text/Pandoc/Writers/ConTeXt.hs b/src/Text/Pandoc/Writers/ConTeXt.hs index 40dc1deb5..0379f8b0a 100644 --- a/src/Text/Pandoc/Writers/ConTeXt.hs +++ b/src/Text/Pandoc/Writers/ConTeXt.hs @@ -33,7 +33,7 @@ import Text.Pandoc.Definition import Text.Pandoc.Shared import Text.Pandoc.Writers.Shared import Text.Pandoc.Options -import Text.Pandoc.Generic (queryWith) +import Text.Pandoc.Walk (query) import Text.Printf ( printf ) import Data.List ( intercalate, isPrefixOf ) import Control.Monad.State @@ -326,7 +326,7 @@ inlineToConTeXt (Note contents) = do contents' <- blockListToConTeXt contents let codeBlock x@(CodeBlock _ _) = [x] codeBlock _ = [] - let codeBlocks = queryWith codeBlock contents + let codeBlocks = query codeBlock contents return $ if null codeBlocks then text "\\footnote{" <> nest 2 contents' <> char '}' else text "\\startbuffer " <> nest 2 contents' <> diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs index d09ccc3b8..860ca8349 100644 --- a/src/Text/Pandoc/Writers/LaTeX.hs +++ b/src/Text/Pandoc/Writers/LaTeX.hs @@ -30,6 +30,7 @@ Conversion of 'Pandoc' format into LaTeX. -} module Text.Pandoc.Writers.LaTeX ( writeLaTeX ) where import Text.Pandoc.Definition +import Text.Pandoc.Walk import Text.Pandoc.Generic import Text.Pandoc.Shared import Text.Pandoc.Writers.Shared @@ -86,7 +87,7 @@ pandocToLaTeX options (Pandoc meta blocks) = do -- see if there are internal links let isInternalLink (Link _ ('#':xs,_)) = [xs] isInternalLink _ = [] - modify $ \s -> s{ stInternalLinks = queryWith isInternalLink blocks } + modify $ \s -> s{ stInternalLinks = query isInternalLink blocks } let template = writerTemplate options -- set stBook depending on documentclass let bookClasses = ["memoir","book","report","scrreprt","scrbook"] @@ -248,9 +249,9 @@ elementToBeamer slideLevel (Sec lvl _num (ident,classes,kvs) tit elts) let hasCode (Code _ _) = [True] hasCode _ = [] opts <- gets stOptions - let fragile = not $ null $ queryWith hasCodeBlock elts ++ + let fragile = not $ null $ query hasCodeBlock elts ++ if writerListings opts - then queryWith hasCode elts + then query hasCode elts else [] let allowframebreaks = "allowframebreaks" `elem` classes let optionslist = ["fragile" | fragile] ++ -- cgit v1.2.3 From 02a125d0aa8becd258c99b27c5e30116f0cbacb4 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 10 Aug 2013 18:45:00 -0700 Subject: Use walk, walkM in place of bottomUp, bottomUpM when possible. They are significantly faster. --- src/Text/Pandoc/PDF.hs | 4 ++-- src/Text/Pandoc/Readers/LaTeX.hs | 4 ++-- src/Text/Pandoc/Readers/MediaWiki.hs | 4 ++-- src/Text/Pandoc/Shared.hs | 2 +- src/Text/Pandoc/Writers/Docx.hs | 11 ++++++----- src/Text/Pandoc/Writers/EPUB.hs | 6 +++--- src/Text/Pandoc/Writers/FB2.hs | 8 ++++++-- src/Text/Pandoc/Writers/LaTeX.hs | 3 +-- src/Text/Pandoc/Writers/Markdown.hs | 8 ++++---- src/Text/Pandoc/Writers/ODT.hs | 4 ++-- src/Text/Pandoc/Writers/RTF.hs | 4 ++-- 11 files changed, 31 insertions(+), 27 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/PDF.hs b/src/Text/Pandoc/PDF.hs index b030e2ca7..ce20ac1b4 100644 --- a/src/Text/Pandoc/PDF.hs +++ b/src/Text/Pandoc/PDF.hs @@ -44,7 +44,7 @@ import Data.List (isInfixOf) import qualified Data.ByteString.Base64 as B64 import qualified Text.Pandoc.UTF8 as UTF8 import Text.Pandoc.Definition -import Text.Pandoc.Generic (bottomUpM) +import Text.Pandoc.Walk (walkM) import Text.Pandoc.Shared (fetchItem, warn) import Text.Pandoc.Options (WriterOptions(..)) import Text.Pandoc.MIME (extensionFromMimeType) @@ -73,7 +73,7 @@ handleImages :: String -- ^ source directory/base URL -> FilePath -- ^ temp dir to store images -> Pandoc -- ^ document -> IO Pandoc -handleImages baseURL tmpdir = bottomUpM (handleImage' baseURL tmpdir) +handleImages baseURL tmpdir = walkM (handleImage' baseURL tmpdir) handleImage' :: String -> FilePath diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index eb0baedda..71e1e0ac2 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -35,7 +35,7 @@ module Text.Pandoc.Readers.LaTeX ( readLaTeX, ) where import Text.Pandoc.Definition -import Text.Pandoc.Generic +import Text.Pandoc.Walk import Text.Pandoc.Shared import Text.Pandoc.Options import Text.Pandoc.Biblio (processBiblio) @@ -815,7 +815,7 @@ keyvals :: LP [(String, String)] keyvals = try $ char '[' *> manyTill keyval (char ']') alltt :: String -> LP Blocks -alltt t = bottomUp strToCode <$> parseFromString blocks +alltt t = walk strToCode <$> parseFromString blocks (substitute " " "\\ " $ substitute "%" "\\%" $ concat $ intersperse "\\\\\n" $ lines t) where strToCode (Str s) = Code nullAttr s diff --git a/src/Text/Pandoc/Readers/MediaWiki.hs b/src/Text/Pandoc/Readers/MediaWiki.hs index 56049e035..8f1ff2776 100644 --- a/src/Text/Pandoc/Readers/MediaWiki.hs +++ b/src/Text/Pandoc/Readers/MediaWiki.hs @@ -42,7 +42,7 @@ import Text.Pandoc.Options import Text.Pandoc.Readers.HTML ( htmlTag, isBlockTag, isCommentTag ) import Text.Pandoc.XML ( fromEntities ) import Text.Pandoc.Parsing hiding ( nested ) -import Text.Pandoc.Generic ( bottomUp ) +import Text.Pandoc.Walk ( walk ) import Text.Pandoc.Shared ( stripTrailingNewlines, safeRead ) import Data.Monoid (mconcat, mempty) import Control.Applicative ((<$>), (<*), (*>), (<$)) @@ -342,7 +342,7 @@ preformatted = try $ do spacesStr _ = False if F.all spacesStr contents then return mempty - else return $ B.para $ bottomUp strToCode contents + else return $ B.para $ walk strToCode contents header :: MWParser Blocks header = try $ do diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index 2b692dc3c..6fd78b188 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -518,7 +518,7 @@ isHeaderBlock _ = False -- | Shift header levels up or down. headerShift :: Int -> Pandoc -> Pandoc -headerShift n = bottomUp shift +headerShift n = walk shift where shift :: Block -> Block shift (Header level attr inner) = Header (level + n) attr inner shift x = x diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index 2483e243f..aa618b2cc 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -45,6 +45,7 @@ import Text.Pandoc.Shared hiding (Element) import Text.Pandoc.Options import Text.Pandoc.Readers.TeXMath import Text.Pandoc.Highlighting ( highlight ) +import Text.Pandoc.Walk import Text.Highlighting.Kate.Types () import Text.XML.Light import Text.TeXMath @@ -108,7 +109,7 @@ writeDocx :: WriterOptions -- ^ Writer options -> IO BL.ByteString writeDocx opts doc@(Pandoc meta _) = do let datadir = writerUserDataDir opts - let doc' = bottomUp (concatMap fixDisplayMath) doc + let doc' = walk fixDisplayMath doc refArchive <- liftM (toArchive . toLazy) $ case writerReferenceDocx opts of Just f -> B.readFile f @@ -810,17 +811,17 @@ stripLeadingTrailingSpace = go . reverse . go . reverse where go (Space:xs) = xs go xs = xs -fixDisplayMath :: Block -> [Block] +fixDisplayMath :: Block -> Block fixDisplayMath (Plain lst) | any isDisplayMath lst && not (all isDisplayMath lst) = -- chop into several paragraphs so each displaymath is its own - map (Plain . stripLeadingTrailingSpace) $ + Div ("",["math"],[]) $ map (Plain . stripLeadingTrailingSpace) $ groupBy (\x y -> (isDisplayMath x && isDisplayMath y) || not (isDisplayMath x || isDisplayMath y)) lst fixDisplayMath (Para lst) | any isDisplayMath lst && not (all isDisplayMath lst) = -- chop into several paragraphs so each displaymath is its own - map (Para . stripLeadingTrailingSpace) $ + Div ("",["math"],[]) $ map (Para . stripLeadingTrailingSpace) $ groupBy (\x y -> (isDisplayMath x && isDisplayMath y) || not (isDisplayMath x || isDisplayMath y)) lst -fixDisplayMath x = [x] +fixDisplayMath x = x diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs index ab14ff8a0..fa2b45036 100644 --- a/src/Text/Pandoc/Writers/EPUB.hs +++ b/src/Text/Pandoc/Writers/EPUB.hs @@ -48,7 +48,7 @@ import qualified Text.Pandoc.Shared as Shared import Text.Pandoc.Builder (fromList, setMeta) import Text.Pandoc.Options import Text.Pandoc.Definition -import Text.Pandoc.Generic +import Text.Pandoc.Walk import Control.Monad.State import Text.XML.Light hiding (ppTopElement) import Text.Pandoc.UUID @@ -116,7 +116,7 @@ writeEPUB opts doc@(Pandoc meta _) = do -- handle pictures picsRef <- newIORef [] - Pandoc _ blocks <- bottomUpM + Pandoc _ blocks <- walkM (transformInline opts' sourceDir picsRef) doc pics <- readIORef picsRef let readPicEntry entries (oldsrc, newsrc) = do @@ -520,7 +520,7 @@ correlateRefs chapterHeaderLevel bs = -- Replace internal link references using the table produced -- by correlateRefs. replaceRefs :: [(String,String)] -> [Block] -> [Block] -replaceRefs refTable = bottomUp replaceOneRef +replaceRefs refTable = walk replaceOneRef where replaceOneRef x@(Link lab ('#':xs,tit)) = case lookup xs refTable of Just url -> Link lab (url,tit) diff --git a/src/Text/Pandoc/Writers/FB2.hs b/src/Text/Pandoc/Writers/FB2.hs index 2576b2dc2..adbe948be 100644 --- a/src/Text/Pandoc/Writers/FB2.hs +++ b/src/Text/Pandoc/Writers/FB2.hs @@ -45,7 +45,7 @@ import qualified Text.XML.Light.Cursor as XC import Text.Pandoc.Definition import Text.Pandoc.Options (WriterOptions(..), HTMLMathMethod(..), def) import Text.Pandoc.Shared (orderedListMarkers) -import Text.Pandoc.Generic (bottomUp) +import Text.Pandoc.Walk -- | Data to be written at the end of the document: -- (foot)notes, URLs, references, images. @@ -423,6 +423,10 @@ indent = indentBlock indentLines ins = let lns = split isLineBreak ins :: [[Inline]] in intercalate [LineBreak] $ map ((Str spacer):) lns +capitalize :: Inline -> Inline +capitalize (Str xs) = Str $ map toUpper xs +capitalize x = x + -- | Convert a Pandoc's Inline element to FictionBook XML representation. toXml :: Inline -> FBM [Content] toXml (Str s) = return [txt s] @@ -432,7 +436,7 @@ toXml (Strong ss) = list `liftM` wrap "strong" ss toXml (Strikeout ss) = list `liftM` wrap "strikethrough" ss toXml (Superscript ss) = list `liftM` wrap "sup" ss toXml (Subscript ss) = list `liftM` wrap "sub" ss -toXml (SmallCaps ss) = cMapM toXml $ bottomUp (map toUpper) ss +toXml (SmallCaps ss) = cMapM toXml $ walk capitalize ss toXml (Quoted SingleQuote ss) = do -- FIXME: should be language-specific inner <- cMapM toXml ss return $ [txt "‘"] ++ inner ++ [txt "’"] diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs index 860ca8349..7f9a99801 100644 --- a/src/Text/Pandoc/Writers/LaTeX.hs +++ b/src/Text/Pandoc/Writers/LaTeX.hs @@ -31,7 +31,6 @@ Conversion of 'Pandoc' format into LaTeX. module Text.Pandoc.Writers.LaTeX ( writeLaTeX ) where import Text.Pandoc.Definition import Text.Pandoc.Walk -import Text.Pandoc.Generic import Text.Pandoc.Shared import Text.Pandoc.Writers.Shared import Text.Pandoc.Options @@ -498,7 +497,7 @@ sectionHeader unnumbered ref level lst = do txt <- inlineListToLaTeX lst let noNote (Note _) = Str "" noNote x = x - let lstNoNotes = bottomUp noNote lst + let lstNoNotes = walk noNote lst let star = if unnumbered then text "*" else empty -- footnotes in sections don't work unless you specify an optional -- argument: \section[mysec]{mysec\footnote{blah}} diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs index d195d8445..3d0ed8702 100644 --- a/src/Text/Pandoc/Writers/Markdown.hs +++ b/src/Text/Pandoc/Writers/Markdown.hs @@ -32,7 +32,7 @@ Markdown: -} module Text.Pandoc.Writers.Markdown (writeMarkdown, writePlain) where import Text.Pandoc.Definition -import Text.Pandoc.Generic +import Text.Pandoc.Walk import Text.Pandoc.Templates (renderTemplate') import Text.Pandoc.Shared import Text.Pandoc.Writers.Shared @@ -82,7 +82,7 @@ writePlain opts document = where document' = plainify document plainify :: Pandoc -> Pandoc -plainify = bottomUp go +plainify = walk go where go :: Inline -> Inline go (Emph xs) = SmallCaps xs go (Strong xs) = SmallCaps xs @@ -643,13 +643,13 @@ inlineToMarkdown opts (Strikeout lst) = do then "~~" <> contents <> "~~" else "" <> contents <> "" inlineToMarkdown opts (Superscript lst) = do - let lst' = bottomUp escapeSpaces lst + let lst' = walk escapeSpaces lst contents <- inlineListToMarkdown opts lst' return $ if isEnabled Ext_superscript opts then "^" <> contents <> "^" else "" <> contents <> "" inlineToMarkdown opts (Subscript lst) = do - let lst' = bottomUp escapeSpaces lst + let lst' = walk escapeSpaces lst contents <- inlineListToMarkdown opts lst' return $ if isEnabled Ext_subscript opts then "~" <> contents <> "~" diff --git a/src/Text/Pandoc/Writers/ODT.hs b/src/Text/Pandoc/Writers/ODT.hs index 589010bb9..fb94d9ffb 100644 --- a/src/Text/Pandoc/Writers/ODT.hs +++ b/src/Text/Pandoc/Writers/ODT.hs @@ -39,7 +39,7 @@ import Text.Pandoc.Shared ( stringify, readDataFile, fetchItem, warn ) import Text.Pandoc.ImageSize ( imageSize, sizeInPoints ) import Text.Pandoc.MIME ( getMimeType ) import Text.Pandoc.Definition -import Text.Pandoc.Generic +import Text.Pandoc.Walk import Text.Pandoc.Writers.OpenDocument ( writeOpenDocument ) import Control.Monad (liftM) import Text.Pandoc.XML @@ -63,7 +63,7 @@ writeODT opts doc@(Pandoc meta _) = do -- handle pictures picEntriesRef <- newIORef ([] :: [Entry]) let sourceDir = writerSourceDirectory opts - doc' <- bottomUpM (transformPic sourceDir picEntriesRef) doc + doc' <- walkM (transformPic sourceDir picEntriesRef) doc let newContents = writeOpenDocument opts{writerWrapText = False} doc' epochtime <- floor `fmap` getPOSIXTime let contentEntry = toEntry "content.xml" epochtime $ fromStringLazy newContents diff --git a/src/Text/Pandoc/Writers/RTF.hs b/src/Text/Pandoc/Writers/RTF.hs index 6d2b1229d..0e8ce2ece 100644 --- a/src/Text/Pandoc/Writers/RTF.hs +++ b/src/Text/Pandoc/Writers/RTF.hs @@ -34,7 +34,7 @@ import Text.Pandoc.Shared import Text.Pandoc.Writers.Shared import Text.Pandoc.Readers.TeXMath import Text.Pandoc.Templates (renderTemplate') -import Text.Pandoc.Generic (bottomUpM) +import Text.Pandoc.Walk import Data.List ( isSuffixOf, intercalate ) import Data.Char ( ord, chr, isDigit, toLower ) import System.FilePath ( takeExtension ) @@ -70,7 +70,7 @@ rtfEmbedImage x = return x -- images embedded as encoded binary data. writeRTFWithEmbeddedImages :: WriterOptions -> Pandoc -> IO String writeRTFWithEmbeddedImages options doc = - writeRTF options `fmap` bottomUpM rtfEmbedImage doc + writeRTF options `fmap` walkM rtfEmbedImage doc -- | Convert Pandoc to a string in rich text format. writeRTF :: WriterOptions -> Pandoc -> String -- cgit v1.2.3 From e279175ea517e2df65fe5d716bc02e383b04fc36 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 11 Aug 2013 15:58:09 -0700 Subject: Options: Changed `writerSourceDir` to `writerSourceURL` (now a Maybe). Previously we used to store the directory of the first input file, even if it was local, and used this as a base directory for finding images in ODT, EPUB, Docx, and PDF. This has been confusing to many users. It seems better to look for images relative to the current working directory, even if the first file argument is in another directory. writerSourceURL is set to 'Just url' when the first command-line argument is an absolute URL. (So, relative links will be resolved in relation to the first page.) Otherwise, 'Nothing'. The ODT, EPUB, Docx, and PDF writers have been modified accordingly. Note that this change may break some existing workflows. If you have been assuming that relative links will be interpreted relative to the directory of the first file argument, you'll need to make that the current directory before running pandoc. Closes #942. --- pandoc.hs | 12 +++++++----- src/Text/Pandoc/Options.hs | 4 ++-- src/Text/Pandoc/PDF.hs | 6 +++--- src/Text/Pandoc/Shared.hs | 20 ++++++++++---------- src/Text/Pandoc/Writers/Docx.hs | 3 +-- src/Text/Pandoc/Writers/EPUB.hs | 21 +++++++-------------- src/Text/Pandoc/Writers/ODT.hs | 9 ++++----- 7 files changed, 34 insertions(+), 41 deletions(-) (limited to 'src/Text') diff --git a/pandoc.hs b/pandoc.hs index fdf0b35b7..81672e16c 100644 --- a/pandoc.hs +++ b/pandoc.hs @@ -1034,13 +1034,15 @@ main = do return $ Just csl { CSL.styleAbbrevs = abbrevs } else return Nothing - let sourceDir = case sources of - [] -> "." + let sourceURL = case sources of + [] -> Nothing (x:_) -> case parseURI x of Just u | uriScheme u `elem` ["http:","https:"] -> - show u{ uriPath = "", uriQuery = "", uriFragment = "" } - _ -> takeDirectory x + Just $ show u{ uriPath = "", + uriQuery = "", + uriFragment = "" } + _ -> Nothing let readerOpts = def{ readerSmart = smart || (texLigatures && (laTeXOutput || "context" `isPrefixOf` writerName')) @@ -1074,7 +1076,7 @@ main = do writerColumns = columns, writerEmailObfuscation = obfuscationMethod, writerIdentifierPrefix = idPrefix, - writerSourceDirectory = sourceDir, + writerSourceURL = sourceURL, writerUserDataDir = datadir, writerHtml5 = html5, writerHtmlQTags = htmlQTags, diff --git a/src/Text/Pandoc/Options.hs b/src/Text/Pandoc/Options.hs index 61a85cf6e..c7c37d6b8 100644 --- a/src/Text/Pandoc/Options.hs +++ b/src/Text/Pandoc/Options.hs @@ -286,7 +286,7 @@ data WriterOptions = WriterOptions , writerEmailObfuscation :: ObfuscationMethod -- ^ How to obfuscate emails , writerIdentifierPrefix :: String -- ^ Prefix for section & note ids in HTML -- and for footnote marks in markdown - , writerSourceDirectory :: FilePath -- ^ Directory path of 1st source file + , writerSourceURL :: Maybe String -- ^ Absolute URL + directory of 1st source file , writerUserDataDir :: Maybe FilePath -- ^ Path of user data directory , writerCiteMethod :: CiteMethod -- ^ How to print cites , writerBiblioFiles :: [FilePath] -- ^ Biblio files to use for citations @@ -329,7 +329,7 @@ instance Default WriterOptions where , writerColumns = 72 , writerEmailObfuscation = JavascriptObfuscation , writerIdentifierPrefix = "" - , writerSourceDirectory = "." + , writerSourceURL = Nothing , writerUserDataDir = Nothing , writerCiteMethod = Citeproc , writerBiblioFiles = [] diff --git a/src/Text/Pandoc/PDF.hs b/src/Text/Pandoc/PDF.hs index ce20ac1b4..ae611bc37 100644 --- a/src/Text/Pandoc/PDF.hs +++ b/src/Text/Pandoc/PDF.hs @@ -65,17 +65,17 @@ makePDF :: String -- ^ pdf creator (pdflatex, lualatex, xelatex) -> Pandoc -- ^ document -> IO (Either ByteString ByteString) makePDF program writer opts doc = withTempDir "tex2pdf." $ \tmpdir -> do - doc' <- handleImages (writerSourceDirectory opts) tmpdir doc + doc' <- handleImages (writerSourceURL opts) tmpdir doc let source = writer opts doc' tex2pdf' tmpdir program source -handleImages :: String -- ^ source directory/base URL +handleImages :: Maybe String -- ^ source base URL -> FilePath -- ^ temp dir to store images -> Pandoc -- ^ document -> IO Pandoc handleImages baseURL tmpdir = walkM (handleImage' baseURL tmpdir) -handleImage' :: String +handleImage' :: Maybe String -> FilePath -> Inline -> IO Inline diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index 6fd78b188..d670a35bc 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -612,18 +612,18 @@ readDataFileUTF8 userDir fname = -- | Fetch an image or other item from the local filesystem or the net. -- Returns raw content and maybe mime type. -fetchItem :: String -> String +fetchItem :: Maybe String -> String -> IO (Either E.SomeException (BS.ByteString, Maybe String)) -fetchItem sourceDir s = - case s of - _ | isAbsoluteURI s -> openURL s - | isAbsoluteURI sourceDir -> openURL $ sourceDir ++ "/" ++ s - | otherwise -> E.try $ do +fetchItem sourceURL s + | isAbsoluteURI s = openURL s + | otherwise = case sourceURL of + Just u -> openURL (u ++ "/" ++ s) + Nothing -> E.try readLocalFile + where readLocalFile = do let mime = case takeExtension s of - ".gz" -> getMimeType $ dropExtension s - x -> getMimeType x - let f = sourceDir s - cont <- BS.readFile f + ".gz" -> getMimeType $ dropExtension s + x -> getMimeType x + cont <- BS.readFile s return (cont, mime) -- | Read from a URL and return raw data and maybe mime type. diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index aa618b2cc..c8673ae48 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -728,8 +728,7 @@ inlineToOpenXML opts (Image alt (src, tit)) = do case M.lookup src imgs of Just (_,_,_,elt,_) -> return [elt] Nothing -> do - let sourceDir = writerSourceDirectory opts - res <- liftIO $ fetchItem sourceDir src + res <- liftIO $ fetchItem (writerSourceURL opts) src case res of Left (_ :: E.SomeException) -> do liftIO $ warn $ "Could not find image `" ++ src ++ "', skipping..." diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs index fa2b45036..ac0e7610c 100644 --- a/src/Text/Pandoc/Writers/EPUB.hs +++ b/src/Text/Pandoc/Writers/EPUB.hs @@ -55,7 +55,7 @@ import Text.Pandoc.UUID import Text.Pandoc.Writers.HTML import Text.Pandoc.Writers.Markdown ( writePlain ) import Data.Char ( toLower ) -import Network.URI ( isAbsoluteURI, unEscapeString ) +import Network.URI ( unEscapeString ) import Text.Pandoc.MIME (getMimeType) #if MIN_VERSION_base(4,6,0) #else @@ -93,7 +93,6 @@ writeEPUB opts doc@(Pandoc meta _) = do then MathML Nothing else writerHTMLMathMethod opts , writerWrapText = False } - let sourceDir = writerSourceDirectory opts' let mbCoverImage = lookup "epub-cover-image" vars -- cover page @@ -117,10 +116,10 @@ writeEPUB opts doc@(Pandoc meta _) = do -- handle pictures picsRef <- newIORef [] Pandoc _ blocks <- walkM - (transformInline opts' sourceDir picsRef) doc + (transformInline opts' picsRef) doc pics <- readIORef picsRef let readPicEntry entries (oldsrc, newsrc) = do - res <- fetchItem sourceDir oldsrc + res <- fetchItem (writerSourceURL opts') oldsrc case res of Left _ -> do warn $ "Could not find image `" ++ oldsrc ++ "', skipping..." @@ -414,19 +413,13 @@ showDateTimeISO8601 :: UTCTime -> String showDateTimeISO8601 = formatTime defaultTimeLocale "%FT%TZ" transformInline :: WriterOptions - -> FilePath -> IORef [(FilePath, FilePath)] -- ^ (oldpath, newpath) images -> Inline -> IO Inline -transformInline opts sourceDir picsRef (Image lab (src,tit)) - | isAbsoluteURI src = do - raw <- makeSelfContained Nothing - $ writeHtmlInline opts (Image lab (src,tit)) - return $ RawInline (Format "html") raw - | otherwise = do +transformInline opts picsRef (Image lab (src,tit)) = do let src' = unEscapeString src pics <- readIORef picsRef - let oldsrc = sourceDir src' + let oldsrc = maybe src' ( src) $ writerSourceURL opts let ext = takeExtension src' newsrc <- case lookup oldsrc pics of Just n -> return n @@ -435,11 +428,11 @@ transformInline opts sourceDir picsRef (Image lab (src,tit)) modifyIORef picsRef ( (oldsrc, new): ) return new return $ Image lab (newsrc, tit) -transformInline opts _ _ (x@(Math _ _)) +transformInline opts _ (x@(Math _ _)) | WebTeX _ <- writerHTMLMathMethod opts = do raw <- makeSelfContained Nothing $ writeHtmlInline opts x return $ RawInline (Format "html") raw -transformInline _ _ _ x = return x +transformInline _ _ x = return x writeHtmlInline :: WriterOptions -> Inline -> String writeHtmlInline opts z = trimr $ diff --git a/src/Text/Pandoc/Writers/ODT.hs b/src/Text/Pandoc/Writers/ODT.hs index fb94d9ffb..751a323f5 100644 --- a/src/Text/Pandoc/Writers/ODT.hs +++ b/src/Text/Pandoc/Writers/ODT.hs @@ -62,8 +62,7 @@ writeODT opts doc@(Pandoc meta _) = do readDataFile datadir "reference.odt" -- handle pictures picEntriesRef <- newIORef ([] :: [Entry]) - let sourceDir = writerSourceDirectory opts - doc' <- walkM (transformPic sourceDir picEntriesRef) doc + doc' <- walkM (transformPic opts picEntriesRef) doc let newContents = writeOpenDocument opts{writerWrapText = False} doc' epochtime <- floor `fmap` getPOSIXTime let contentEntry = toEntry "content.xml" epochtime $ fromStringLazy newContents @@ -111,9 +110,9 @@ writeODT opts doc@(Pandoc meta _) = do let archive'' = addEntryToArchive metaEntry archive' return $ fromArchive archive'' -transformPic :: FilePath -> IORef [Entry] -> Inline -> IO Inline -transformPic sourceDir entriesRef (Image lab (src,_)) = do - res <- fetchItem sourceDir src +transformPic :: WriterOptions -> IORef [Entry] -> Inline -> IO Inline +transformPic opts entriesRef (Image lab (src,_)) = do + res <- fetchItem (writerSourceURL opts) src case res of Left (_ :: E.SomeException) -> do warn $ "Could not find image `" ++ src ++ "', skipping..." -- cgit v1.2.3 From 7b975c2bcc32e5ddd96338afdb32a1ceacdc0980 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 11 Aug 2013 16:16:24 -0700 Subject: PDF: Add suggestion to use --latex-engine=xelatex on encoding error. --- src/Text/Pandoc/PDF.hs | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/PDF.hs b/src/Text/Pandoc/PDF.hs index ae611bc37..a445e2991 100644 --- a/src/Text/Pandoc/PDF.hs +++ b/src/Text/Pandoc/PDF.hs @@ -109,8 +109,14 @@ tex2pdf' tmpDir program source = do (exit, log', mbPdf) <- runTeXProgram program numruns tmpDir source let msg = "Error producing PDF from TeX source." case (exit, mbPdf) of - (ExitFailure _, _) -> return $ Left $ - msg <> "\n" <> extractMsg log' + (ExitFailure _, _) -> do + let logmsg = extractMsg log' + let extramsg = + case logmsg of + x | "! Package inputenc Error" `BC.isPrefixOf` x -> + "\nTry running pandoc with --latex-engine=xelatex." + _ -> "" + return $ Left $ msg <> "\n" <> extractMsg log' <> extramsg (ExitSuccess, Nothing) -> return $ Left msg (ExitSuccess, Just pdf) -> return $ Right pdf -- cgit v1.2.3 From eb0c0b86ed518982eb5d3336e73ff5cb1d59d87c Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 11 Aug 2013 17:13:46 -0700 Subject: ODT/OpenDocument writer: Minor changes for ODF 1.2 conformance. See #939. We leave the nonconforming contextual-spacing attribute, which is provided by LibreOffice itself and seems to be supported. --- data/reference.odt | Bin 7058 -> 10702 bytes data/templates | 2 +- src/Text/Pandoc/Writers/ODT.hs | 20 ++++++++++++++------ src/Text/Pandoc/Writers/OpenDocument.hs | 3 ++- tests/writer.opendocument | 2 +- 5 files changed, 18 insertions(+), 9 deletions(-) (limited to 'src/Text') diff --git a/data/reference.odt b/data/reference.odt index 6307119d3..29c1777d7 100644 Binary files a/data/reference.odt and b/data/reference.odt differ diff --git a/data/templates b/data/templates index c27f59c01..0cb55f228 160000 --- a/data/templates +++ b/data/templates @@ -1 +1 @@ -Subproject commit c27f59c010b0468f01b710cdf3a3c04a450a03e7 +Subproject commit 0cb55f2289148b106ab78ce8f15efc8d0b8acda0 diff --git a/src/Text/Pandoc/Writers/ODT.hs b/src/Text/Pandoc/Writers/ODT.hs index 751a323f5..cc0a06243 100644 --- a/src/Text/Pandoc/Writers/ODT.hs +++ b/src/Text/Pandoc/Writers/ODT.hs @@ -65,26 +65,30 @@ writeODT opts doc@(Pandoc meta _) = do doc' <- walkM (transformPic opts picEntriesRef) doc let newContents = writeOpenDocument opts{writerWrapText = False} doc' epochtime <- floor `fmap` getPOSIXTime - let contentEntry = toEntry "content.xml" epochtime $ fromStringLazy newContents + let contentEntry = toEntry "content.xml" epochtime + $ fromStringLazy newContents picEntries <- readIORef picEntriesRef - let archive = foldr addEntryToArchive refArchive $ contentEntry : picEntries + let archive = foldr addEntryToArchive refArchive + $ contentEntry : picEntries -- construct META-INF/manifest.xml based on archive let toFileEntry fp = case getMimeType fp of Nothing -> empty Just m -> selfClosingTag "manifest:file-entry" [("manifest:media-type", m) ,("manifest:full-path", fp) + ,("manifest:version", "1.2") ] - let files = [ ent | ent <- filesInArchive archive, not ("META-INF" `isPrefixOf` ent) ] + let files = [ ent | ent <- filesInArchive archive, + not ("META-INF" `isPrefixOf` ent) ] let manifestEntry = toEntry "META-INF/manifest.xml" epochtime $ fromStringLazy $ render Nothing $ text "" $$ ( inTags True "manifest:manifest" - [("xmlns:manifest","urn:oasis:names:tc:opendocument:xmlns:manifest:1.0")] + [("xmlns:manifest","urn:oasis:names:tc:opendocument:xmlns:manifest:1.0") + ,("manifest:version","1.2")] $ ( selfClosingTag "manifest:file-entry" [("manifest:media-type","application/vnd.oasis.opendocument.text") - ,("manifest:version","1.2") ,("manifest:full-path","/")] $$ vcat ( map toFileEntry $ files ) ) @@ -107,7 +111,11 @@ writeODT opts doc@(Pandoc meta _) = do ) ) ) - let archive'' = addEntryToArchive metaEntry archive' + -- make sure mimetype is first + let mimetypeEntry = toEntry "mimetype" epochtime + $ fromStringLazy "application/vnd.oasis.opendocument.text" + let archive'' = addEntryToArchive mimetypeEntry + $ addEntryToArchive metaEntry archive' return $ fromArchive archive'' transformPic :: WriterOptions -> IORef [Entry] -> Inline -> IO Inline diff --git a/src/Text/Pandoc/Writers/OpenDocument.hs b/src/Text/Pandoc/Writers/OpenDocument.hs index 05c576c20..3ec5c2073 100644 --- a/src/Text/Pandoc/Writers/OpenDocument.hs +++ b/src/Text/Pandoc/Writers/OpenDocument.hs @@ -461,7 +461,8 @@ tableStyle :: Int -> [(Char,Double)] -> Doc tableStyle num wcs = let tableId = "Table" ++ show (num + 1) table = inTags True "style:style" - [("style:name", tableId)] $ + [("style:name", tableId) + ,("style:family", "table")] $ selfClosingTag "style:table-properties" [("table:align" , "center")] colStyle (c,0) = selfClosingTag "style:style" diff --git a/tests/writer.opendocument b/tests/writer.opendocument index 9e1661475..1cee01f76 100644 --- a/tests/writer.opendocument +++ b/tests/writer.opendocument @@ -1,5 +1,5 @@ - + -- cgit v1.2.3 From 3ebdc5b5f0f5bc88f727a36268d55921672899c0 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Mon, 12 Aug 2013 16:21:24 -0700 Subject: Text.Pandoc.Compat.Monoid: Small improvements to the (<>) definition. --- src/Text/Pandoc/Compat/Monoid.hs | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Compat/Monoid.hs b/src/Text/Pandoc/Compat/Monoid.hs index 80ffcbbd6..cb7ea2527 100644 --- a/src/Text/Pandoc/Compat/Monoid.hs +++ b/src/Text/Pandoc/Compat/Monoid.hs @@ -11,6 +11,10 @@ import Data.Monoid (mappend, Monoid(..)) #if MIN_VERSION_base(4,5,0) #else +infixr 6 <> + +-- | An infix synonym for 'mappend'. (<>) :: Monoid m => m -> m -> m (<>) = mappend +{-# INLINE (<>) #-} #endif -- cgit v1.2.3 From 3e8bd8aa15a57c3dc87772049aabedeb1e0c7582 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Wed, 14 Aug 2013 23:24:45 -0700 Subject: Updated for removed unMeta, unFormat in pandoc-types. --- pandoc.cabal | 9 +++++++++ scripts/comments.py | 3 +-- scripts/myemph.py | 7 +++++-- scripts/tikz.py | 2 +- src/Text/Pandoc/Writers/RST.hs | 4 ++-- tests/docbook-reader.native | 2 +- tests/haddock-reader.native | 2 +- tests/html-reader.native | 2 +- tests/latex-reader.native | 8 ++++---- tests/markdown-reader-more.native | 10 +++++----- tests/mediawiki-reader.native | 22 +++++++++++----------- tests/opml-reader.native | 2 +- tests/rst-reader.native | 8 ++++---- tests/s5.native | 2 +- tests/testsuite.native | 38 +++++++++++++++++++------------------- tests/textile-reader.native | 18 +++++++++--------- tests/writer.native | 38 +++++++++++++++++++------------------- 17 files changed, 94 insertions(+), 83 deletions(-) (limited to 'src/Text') diff --git a/pandoc.cabal b/pandoc.cabal index e22908918..352da4988 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -112,6 +112,15 @@ Extra-Source-Files: -- generated man pages (produced post-build) man/man1/pandoc.1, man/man5/pandoc_markdown.5, + -- python library and sample python scripts + scripts/abc.py, + scripts/comments.py, + scripts/graphviz.py, + scripts/pandoc.py, + scripts/caps.py, + scripts/deemph.py, + scripts/myemph.py, + scripts/tikz.py, -- tests tests/bodybg.gif, tests/docbook-reader.docbook diff --git a/scripts/comments.py b/scripts/comments.py index 304af1a2d..ded21039c 100755 --- a/scripts/comments.py +++ b/scripts/comments.py @@ -15,8 +15,7 @@ incomment = False def comment(k,v,fmt): global incomment if k == 'RawBlock': - f, s = v - fmt = f['unFormat'] + fmt, s = v if fmt == "html": if re.search("", s): incomment = True diff --git a/scripts/myemph.py b/scripts/myemph.py index e527a0b2e..2a322b385 100755 --- a/scripts/myemph.py +++ b/scripts/myemph.py @@ -1,5 +1,5 @@ #!/usr/bin/env python -from pandoc import toJSONFilter, rawInline +from pandoc import toJSONFilter """ Pandoc filter that causes emphasis to be rendered using @@ -7,9 +7,12 @@ the custom macro '\myemph{...}' rather than '\emph{...}' in latex. Other output formats are unaffected. """ +def latex(s): + return {'RawInline': ['latex', s]} + def myemph(k, v, f): if k == 'Emph' and f == 'latex': - return [rawInline("latex", "\\myemph{")] + v + [rawInline("latex","}")] + return [latex('\\myemph{')] + v + [latex('}')] if __name__ == "__main__": toJSONFilter(myemph) diff --git a/scripts/tikz.py b/scripts/tikz.py index 7e1ed7927..4ff8b2383 100755 --- a/scripts/tikz.py +++ b/scripts/tikz.py @@ -44,7 +44,7 @@ def tikz2image(tikz, filetype, outfile): def tikz(key, value, format): if key == 'RawBlock': [fmt, code] = value - if fmt['unFormat'] == "latex" and re.match("\\\\begin{tikzpicture}", code): + if fmt == "latex" and re.match("\\\\begin{tikzpicture}", code): outfile = imagedir + '/' + sha1(code) if format == "html": filetype = "png" diff --git a/src/Text/Pandoc/Writers/RST.hs b/src/Text/Pandoc/Writers/RST.hs index 5fbbb6afc..557658bc8 100644 --- a/src/Text/Pandoc/Writers/RST.hs +++ b/src/Text/Pandoc/Writers/RST.hs @@ -176,10 +176,10 @@ blockToRST (Para inlines) | otherwise = do contents <- inlineListToRST inlines return $ contents <> blankline -blockToRST (RawBlock f str) +blockToRST (RawBlock f@(Format f') str) | f == "rst" = return $ text str | otherwise = return $ blankline <> ".. raw:: " <> - text (map toLower $ unFormat f) $+$ + text (map toLower f') $+$ (nest 3 $ text str) $$ blankline blockToRST HorizontalRule = return $ blankline $$ "--------------" $$ blankline diff --git a/tests/docbook-reader.native b/tests/docbook-reader.native index 2d29bb154..8c94fea3e 100644 --- a/tests/docbook-reader.native +++ b/tests/docbook-reader.native @@ -1,4 +1,4 @@ -Pandoc (Meta {unMeta = fromList [("author",MetaList [MetaInlines [Str "John",Space,Str "MacFarlane"],MetaInlines [Str "Anonymous"]]),("date",MetaInlines [Str "July",Space,Str "17,",Space,Str "2006"]),("title",MetaInlines [Str "Pandoc",Space,Str "Test",Space,Str "Suite"])]}) +Pandoc (Meta (fromList [("author",MetaList [MetaInlines [Str "John",Space,Str "MacFarlane"],MetaInlines [Str "Anonymous"]]),("date",MetaInlines [Str "July",Space,Str "17,",Space,Str "2006"]),("title",MetaInlines [Str "Pandoc",Space,Str "Test",Space,Str "Suite"])])) [Para [Str "This",Space,Str "is",Space,Str "a",Space,Str "set",Space,Str "of",Space,Str "tests",Space,Str "for",Space,Str "pandoc.",Space,Str "Most",Space,Str "of",Space,Str "them",Space,Str "are",Space,Str "adapted",Space,Str "from",Space,Str "John",Space,Str "Gruber\8217s",Space,Str "markdown",Space,Str "test",Space,Str "suite."] ,Header 1 ("",[],[]) [Str "Headers"] ,Header 2 ("",[],[]) [Str "Level",Space,Str "2",Space,Str "with",Space,Str "an",Space,Link [Str "embedded",Space,Str "link"] ("/url","")] diff --git a/tests/haddock-reader.native b/tests/haddock-reader.native index 877719b50..c17c2ddf0 100644 --- a/tests/haddock-reader.native +++ b/tests/haddock-reader.native @@ -1,4 +1,4 @@ -Pandoc (Meta {unMeta = fromList []}) +Pandoc (Meta (fromList [])) [Para [Str "This",Space,Str "file",Space,Str "tests",Space,Str "the",Space,Str "Pandoc",Space,Str "reader",Space,Str "for",Space,Str "Haddock.",Space,Str "We've",Space,Str "borrowed",Space,Str "examples",Space,Str "from",Space,Str "Haddock's",Space,Str "documentation:",Space,Link [Str "http://www.haskell.org/haddock/doc/html/ch03s08.html"] ("http://www.haskell.org/haddock/doc/html/ch03s08.html","http://www.haskell.org/haddock/doc/html/ch03s08.html"),Str "."] ,Para [Str "The",Space,Str "following",Space,Str "characters",Space,Str "have",Space,Str "special",Space,Str "meanings",Space,Str "in",Space,Str "Haddock,",Space,Str "/,",Space,Str "',",Space,Str "`,",Space,Str "\",",Space,Str "@,",Space,Str "<,",Space,Str "so",Space,Str "they",Space,Str "must",Space,Str "be",Space,Str "escaped."] ,Para [Str "*",Space,Str "This",Space,Str "is",Space,Str "a",Space,Str "paragraph,",Space,Str "not",Space,Str "a",Space,Str "list",Space,Str "item.",Space,Str ">",Space,Str "This",Space,Str "sentence",Space,Str "is",Space,Str "not",Space,Str "code.",Space,Str ">>>",Space,Str "This",Space,Str "is",Space,Str "not",Space,Str "an",Space,Str "example."] diff --git a/tests/html-reader.native b/tests/html-reader.native index 15937e594..8f60f040e 100644 --- a/tests/html-reader.native +++ b/tests/html-reader.native @@ -1,4 +1,4 @@ -Pandoc (Meta {unMeta = fromList [("title",MetaInlines [Str "Pandoc",Space,Str "Test",Space,Str "Suite"])]}) +Pandoc (Meta (fromList [("title",MetaInlines [Str "Pandoc",Space,Str "Test",Space,Str "Suite"])])) [Para [Str "This",Space,Str "is",Space,Str "a",Space,Str "set",Space,Str "of",Space,Str "tests",Space,Str "for",Space,Str "pandoc",Str ".",Space,Str "Most",Space,Str "of",Space,Str "them",Space,Str "are",Space,Str "adapted",Space,Str "from",Space,Str "John",Space,Str "Gruber",Str "'",Str "s",Space,Str "markdown",Space,Str "test",Space,Str "suite",Str "."] ,HorizontalRule ,Header 1 ("",[],[]) [Str "Headers"] diff --git a/tests/latex-reader.native b/tests/latex-reader.native index 504e8b701..ddee17f9e 100644 --- a/tests/latex-reader.native +++ b/tests/latex-reader.native @@ -1,5 +1,5 @@ -Pandoc (Meta {unMeta = fromList [("authors",MetaList [MetaInlines [Str "John",Space,Str "MacFarlane"],MetaInlines [Str "Anonymous"]]),("date",MetaInlines [Str "July",Space,Str "17,",Space,Str "2006"]),("title",MetaInlines [Str "Pandoc",Space,Str "Test",Space,Str "Suite"])]}) -[RawBlock (Format {unFormat = "latex"}) "\\maketitle" +Pandoc (Meta (fromList [("authors",MetaList [MetaInlines [Str "John",Space,Str "MacFarlane"],MetaInlines [Str "Anonymous"]]),("date",MetaInlines [Str "July",Space,Str "17,",Space,Str "2006"]),("title",MetaInlines [Str "Pandoc",Space,Str "Test",Space,Str "Suite"])])) +[RawBlock (Format "latex") "\\maketitle" ,Para [Str "This",Space,Str "is",Space,Str "a",Space,Str "set",Space,Str "of",Space,Str "tests",Space,Str "for",Space,Str "pandoc.",Space,Str "Most",Space,Str "of",Space,Str "them",Space,Str "are",Space,Str "adapted",Space,Str "from",Space,Str "John",Space,Str "Gruber\8217s",Space,Str "markdown",Space,Str "test",Space,Str "suite."] ,HorizontalRule ,Header 1 ("",[],[]) [Str "Headers"] @@ -260,8 +260,8 @@ Pandoc (Meta {unMeta = fromList [("authors",MetaList [MetaInlines [Str "John",Sp ,HorizontalRule ,Header 1 ("",[],[]) [Str "LaTeX"] ,BulletList - [[Para [Cite [Citation {citationId = "smith.1899", citationPrefix = [], citationSuffix = [Str "22-23"], citationMode = AuthorInText, citationNoteNum = 0, citationHash = 0}] [RawInline (Format {unFormat = "latex"}) "\\cite[22-23]{smith.1899}"]]] - ,[Para [RawInline (Format {unFormat = "latex"}) "\\doublespacing"]] + [[Para [Cite [Citation {citationId = "smith.1899", citationPrefix = [], citationSuffix = [Str "22-23"], citationMode = AuthorInText, citationNoteNum = 0, citationHash = 0}] [RawInline (Format "latex") "\\cite[22-23]{smith.1899}"]]] + ,[Para [RawInline (Format "latex") "\\doublespacing"]] ,[Para [Math InlineMath "2+2=4"]] ,[Para [Math InlineMath "x \\in y"]] ,[Para [Math InlineMath "\\alpha \\wedge \\omega"]] diff --git a/tests/markdown-reader-more.native b/tests/markdown-reader-more.native index c88c0ed67..ca588571f 100644 --- a/tests/markdown-reader-more.native +++ b/tests/markdown-reader-more.native @@ -2,9 +2,9 @@ ,Header 2 ("blank-line-before-url-in-link-reference",[],[]) [Str "Blank",Space,Str "line",Space,Str "before",Space,Str "URL",Space,Str "in",Space,Str "link",Space,Str "reference"] ,Para [Link [Str "foo"] ("/url",""),Space,Str "and",Space,Link [Str "bar"] ("/url","title")] ,Header 2 ("raw-context-environments",[],[]) [Str "Raw",Space,Str "ConTeXt",Space,Str "environments"] -,Plain [RawInline (Format {unFormat = "tex"}) "\\placeformula "] -,RawBlock (Format {unFormat = "context"}) "\\startformula\n L_{1} = L_{2}\n \\stopformula" -,RawBlock (Format {unFormat = "context"}) "\\start[a2]\n\\start[a2]\n\\stop[a2]\n\\stop[a2]" +,Plain [RawInline (Format "tex") "\\placeformula "] +,RawBlock (Format "context") "\\startformula\n L_{1} = L_{2}\n \\stopformula" +,RawBlock (Format "context") "\\start[a2]\n\\start[a2]\n\\stop[a2]\n\\stop[a2]" ,Header 2 ("urls-with-spaces",[],[]) [Str "URLs",Space,Str "with",Space,Str "spaces"] ,Para [Link [Str "foo"] ("/bar%20and%20baz",""),Space,Link [Str "foo"] ("/bar%20and%20baz",""),Space,Link [Str "foo"] ("/bar%20and%20baz",""),Space,Link [Str "foo"] ("bar%20baz","title")] ,Para [Link [Str "baz"] ("/foo%20foo",""),Space,Link [Str "bam"] ("/foo%20fee",""),Space,Link [Str "bork"] ("/foo/zee%20zob","title")] @@ -12,13 +12,13 @@ ,HorizontalRule ,HorizontalRule ,Header 2 ("raw-html-before-header",[],[]) [Str "Raw",Space,Str "HTML",Space,Str "before",Space,Str "header"] -,Para [RawInline (Format {unFormat = "html"}) "
",RawInline (Format {unFormat = "html"}) ""] +,Para [RawInline (Format "html") "",RawInline (Format "html") ""] ,Header 3 ("my-header",[],[]) [Str "my",Space,Str "header"] ,Header 2 ("in-math",[],[]) [Str "$",Space,Str "in",Space,Str "math"] ,Para [Math InlineMath "\\$2 + \\$3"] ,Header 2 ("commented-out-list-item",[],[]) [Str "Commented-out",Space,Str "list",Space,Str "item"] ,BulletList - [[Plain [Str "one",Space,RawInline (Format {unFormat = "html"}) ""]] + [[Plain [Str "one",Space,RawInline (Format "html") ""]] ,[Plain [Str "three"]]] ,Header 2 ("backslash-newline",[],[]) [Str "Backslash",Space,Str "newline"] ,Para [Str "hi",LineBreak,Str "there"] diff --git a/tests/mediawiki-reader.native b/tests/mediawiki-reader.native index f6e09e45a..81596c7d7 100644 --- a/tests/mediawiki-reader.native +++ b/tests/mediawiki-reader.native @@ -1,4 +1,4 @@ -Pandoc (Meta {unMeta = fromList []}) +Pandoc (Meta (fromList [])) [Header 1 ("",[],[]) [Str "header"] ,Header 2 ("",[],[]) [Str "header",Space,Str "level",Space,Str "two"] ,Header 3 ("",[],[]) [Str "header",Space,Str "level",Space,Str "3"] @@ -51,11 +51,11 @@ Pandoc (Meta {unMeta = fromList []}) ,Para [Str "bud"] ,Para [Str "another"] ,Header 2 ("",[],[]) [Str "raw",Space,Str "html"] -,Para [Str "hi",Space,RawInline (Format {unFormat = "html"}) "",Emph [Str "there"],RawInline (Format {unFormat = "html"}) "",Str "."] -,Para [RawInline (Format {unFormat = "html"}) "",Str "inserted",RawInline (Format {unFormat = "html"}) ""] -,RawBlock (Format {unFormat = "html"}) "
" +,Para [Str "hi",Space,RawInline (Format "html") "",Emph [Str "there"],RawInline (Format "html") "",Str "."] +,Para [RawInline (Format "html") "",Str "inserted",RawInline (Format "html") ""] +,RawBlock (Format "html") "
" ,Para [Str "hi",Space,Emph [Str "there"]] -,RawBlock (Format {unFormat = "html"}) "
" +,RawBlock (Format "html") "
" ,Header 2 ("",[],[]) [Str "sup,",Space,Str "sub,",Space,Str "del"] ,Para [Str "H",Subscript [Str "2"],Str "O",Space,Str "base",Superscript [Emph [Str "exponent"]],Space,Strikeout [Str "hello"]] ,Header 2 ("",[],[]) [Str "inline",Space,Str "code"] @@ -140,7 +140,7 @@ Pandoc (Meta {unMeta = fromList []}) ,[Plain [Str "this",Space,Str "looks",Space,Str "like",Space,Str "a",Space,Str "continuation"]] ,[Plain [Str "and",Space,Str "is",Space,Str "often",Space,Str "used"]] ,[Plain [Str "instead",LineBreak,Str "of",Space,Str "
"]]])]] - ,[Plain [RawInline (Format {unFormat = "mediawiki"}) "{{{template\n|author=John\n|title=My Book\n}}}"] + ,[Plain [RawInline (Format "mediawiki") "{{{template\n|author=John\n|title=My Book\n}}}"] ,OrderedList (1,DefaultStyle,DefaultDelim) [[Plain [Str "five",Space,Str "sub",Space,Str "1"] ,OrderedList (1,DefaultStyle,DefaultDelim) @@ -168,16 +168,16 @@ Pandoc (Meta {unMeta = fromList []}) ,Para [Code ("",[],[]) "\160hell\160\160\160\160\160\160yeah"] ,Para [Code ("",[],[]) "Start\160with\160a\160space\160in\160the\160first\160column,",LineBreak,Code ("",[],[]) "(before\160the\160).",LineBreak,Code ("",[],[]) "",LineBreak,Code ("",[],[]) "Then\160your\160block\160format\160will\160be",LineBreak,Code ("",[],[]) "\160\160\160\160maintained.",LineBreak,Code ("",[],[]) "",LineBreak,Code ("",[],[]) "This\160is\160good\160for\160copying\160in\160code\160blocks:",LineBreak,Code ("",[],[]) "",LineBreak,Code ("",[],[]) "def\160function():",LineBreak,Code ("",[],[]) "\160\160\160\160\"\"\"documentation\160string\"\"\"",LineBreak,Code ("",[],[]) "",LineBreak,Code ("",[],[]) "\160\160\160\160if\160True:",LineBreak,Code ("",[],[]) "\160\160\160\160\160\160\160\160print\160True",LineBreak,Code ("",[],[]) "\160\160\160\160else:",LineBreak,Code ("",[],[]) "\160\160\160\160\160\160\160\160print\160False"] ,Para [Str "Not"] -,RawBlock (Format {unFormat = "html"}) "
" +,RawBlock (Format "html") "
" ,Para [Str "preformatted"] ,Para [Str "Don't",Space,Str "need"] ,Para [Code ("",[],[]) "a\160blank\160line"] ,Para [Str "around",Space,Str "a",Space,Str "preformatted",Space,Str "block."] ,Header 2 ("",[],[]) [Str "templates"] -,RawBlock (Format {unFormat = "mediawiki"}) "{{Welcome}}" -,RawBlock (Format {unFormat = "mediawiki"}) "{{Foo:Bar}}" -,RawBlock (Format {unFormat = "mediawiki"}) "{{Thankyou|all your effort|Me}}" -,Para [Str "Written",Space,RawInline (Format {unFormat = "mediawiki"}) "{{{date}}}",Space,Str "by",Space,RawInline (Format {unFormat = "mediawiki"}) "{{{name}}}",Str "."] +,RawBlock (Format "mediawiki") "{{Welcome}}" +,RawBlock (Format "mediawiki") "{{Foo:Bar}}" +,RawBlock (Format "mediawiki") "{{Thankyou|all your effort|Me}}" +,Para [Str "Written",Space,RawInline (Format "mediawiki") "{{{date}}}",Space,Str "by",Space,RawInline (Format "mediawiki") "{{{name}}}",Str "."] ,Header 2 ("",[],[]) [Str "tables"] ,Table [] [AlignDefault,AlignDefault] [0.0,0.0] [[] diff --git a/tests/opml-reader.native b/tests/opml-reader.native index e71857680..237a16719 100644 --- a/tests/opml-reader.native +++ b/tests/opml-reader.native @@ -1,4 +1,4 @@ -Pandoc (Meta {unMeta = fromList [("author",MetaList [MetaInlines [Str "Dave",Space,Str "Winer"]]),("date",MetaInlines [Str "Thu,",Space,Str "14",Space,Str "Jul",Space,Str "2005",Space,Str "23:41:05",Space,Str "GMT"]),("title",MetaInlines [Str "States"])]}) +Pandoc (Meta (fromList [("author",MetaList [MetaInlines [Str "Dave",Space,Str "Winer"]]),("date",MetaInlines [Str "Thu,",Space,Str "14",Space,Str "Jul",Space,Str "2005",Space,Str "23:41:05",Space,Str "GMT"]),("title",MetaInlines [Str "States"])])) [Header 1 ("",[],[]) [Str "United",Space,Str "States"] ,Header 2 ("",[],[]) [Str "Far",Space,Str "West"] ,Header 3 ("",[],[]) [Str "Alaska"] diff --git a/tests/rst-reader.native b/tests/rst-reader.native index 69e73ae40..09da2d5ef 100644 --- a/tests/rst-reader.native +++ b/tests/rst-reader.native @@ -1,4 +1,4 @@ -Pandoc (Meta {unMeta = fromList [("authors",MetaList [MetaInlines [Str "John",Space,Str "MacFarlane"],MetaInlines [Str "Anonymous"]]),("date",MetaInlines [Str "July",Space,Str "17,",Space,Str "2006"]),("revision",MetaBlocks [Para [Str "3"]]),("subtitle",MetaInlines [Str "Subtitle"]),("title",MetaInlines [Str "Pandoc",Space,Str "Test",Space,Str "Suite"])]}) +Pandoc (Meta (fromList [("authors",MetaList [MetaInlines [Str "John",Space,Str "MacFarlane"],MetaInlines [Str "Anonymous"]]),("date",MetaInlines [Str "July",Space,Str "17,",Space,Str "2006"]),("revision",MetaBlocks [Para [Str "3"]]),("subtitle",MetaInlines [Str "Subtitle"]),("title",MetaInlines [Str "Pandoc",Space,Str "Test",Space,Str "Suite"])])) [Header 1 ("",[],[]) [Str "Level",Space,Str "one",Space,Str "header"] ,Para [Str "This",Space,Str "is",Space,Str "a",Space,Str "set",Space,Str "of",Space,Str "tests",Space,Str "for",Space,Str "pandoc.",Space,Str "Most",Space,Str "of",Space,Str "them",Space,Str "are",Space,Str "adapted",Space,Str "from",Space,Str "John",Space,Str "Gruber\8217s",Space,Str "markdown",Space,Str "test",Space,Str "suite."] ,Header 2 ("",[],[]) [Str "Level",Space,Str "two",Space,Str "header"] @@ -172,11 +172,11 @@ Pandoc (Meta {unMeta = fromList [("authors",MetaList [MetaInlines [Str "John",Sp [[Para [Str "123-4567"]]])] ,Header 1 ("",[],[]) [Str "HTML",Space,Str "Blocks"] ,Para [Str "Simple",Space,Str "block",Space,Str "on",Space,Str "one",Space,Str "line:"] -,RawBlock (Format {unFormat = "html"}) "
foo
" +,RawBlock (Format "html") "
foo
" ,Para [Str "Now,",Space,Str "nested:"] -,RawBlock (Format {unFormat = "html"}) "
\n
\n
\n foo\n
\n
\n
" +,RawBlock (Format "html") "
\n
\n
\n foo\n
\n
\n
" ,Header 1 ("",[],[]) [Str "LaTeX",Space,Str "Block"] -,RawBlock (Format {unFormat = "latex"}) "\\begin{tabular}{|l|l|}\\hline\nAnimal & Number \\\\ \\hline\nDog & 2 \\\\\nCat & 1 \\\\ \\hline\n\\end{tabular}" +,RawBlock (Format "latex") "\\begin{tabular}{|l|l|}\\hline\nAnimal & Number \\\\ \\hline\nDog & 2 \\\\\nCat & 1 \\\\ \\hline\n\\end{tabular}" ,Header 1 ("",[],[]) [Str "Inline",Space,Str "Markup"] ,Para [Str "This",Space,Str "is",Space,Emph [Str "emphasized"],Str ".",Space,Str "This",Space,Str "is",Space,Strong [Str "strong"],Str "."] ,Para [Str "This",Space,Str "is",Space,Str "code:",Space,Code ("",[],[]) ">",Str ",",Space,Code ("",[],[]) "$",Str ",",Space,Code ("",[],[]) "\\",Str ",",Space,Code ("",[],[]) "\\$",Str ",",Space,Code ("",[],[]) "",Str "."] diff --git a/tests/s5.native b/tests/s5.native index 5796b74a0..def09cf80 100644 --- a/tests/s5.native +++ b/tests/s5.native @@ -1,4 +1,4 @@ -Pandoc (Meta {unMeta = fromList [("author",MetaList [MetaInlines [Str "Sam",Space,Str "Smith"],MetaInlines [Str "Jen",Space,Str "Jones"]]),("date",MetaInlines [Str "July",Space,Str "15,",Space,Str "2006"]),("title",MetaInlines [Str "My",Space,Str "S5",Space,Str "Document"])]}) +Pandoc (Meta (fromList [("author",MetaList [MetaInlines [Str "Sam",Space,Str "Smith"],MetaInlines [Str "Jen",Space,Str "Jones"]]),("date",MetaInlines [Str "July",Space,Str "15,",Space,Str "2006"]),("title",MetaInlines [Str "My",Space,Str "S5",Space,Str "Document"])])) [Header 1 ("first-slide",[],[]) [Str "First",Space,Str "slide"] ,BulletList [[Plain [Str "first",Space,Str "bullet"]] diff --git a/tests/testsuite.native b/tests/testsuite.native index f9cf606f3..503b3001e 100644 --- a/tests/testsuite.native +++ b/tests/testsuite.native @@ -1,4 +1,4 @@ -Pandoc (Meta {unMeta = fromList [("author",MetaList [MetaInlines [Str "John",Space,Str "MacFarlane"],MetaInlines [Str "Anonymous"]]),("date",MetaInlines [Str "July",Space,Str "17,",Space,Str "2006"]),("title",MetaInlines [Str "Pandoc",Space,Str "Test",Space,Str "Suite"])]}) +Pandoc (Meta (fromList [("author",MetaList [MetaInlines [Str "John",Space,Str "MacFarlane"],MetaInlines [Str "Anonymous"]]),("date",MetaInlines [Str "July",Space,Str "17,",Space,Str "2006"]),("title",MetaInlines [Str "Pandoc",Space,Str "Test",Space,Str "Suite"])])) [Para [Str "This",Space,Str "is",Space,Str "a",Space,Str "set",Space,Str "of",Space,Str "tests",Space,Str "for",Space,Str "pandoc.",Space,Str "Most",Space,Str "of",Space,Str "them",Space,Str "are",Space,Str "adapted",Space,Str "from",Space,Str "John",Space,Str "Gruber\8217s",Space,Str "markdown",Space,Str "test",Space,Str "suite."] ,HorizontalRule ,Header 1 ("headers",[],[]) [Str "Headers"] @@ -228,45 +228,45 @@ Pandoc (Meta {unMeta = fromList [("author",MetaList [MetaInlines [Str "John",Spa ,[Plain [Str "sublist"]]]]])] ,Header 1 ("html-blocks",[],[]) [Str "HTML",Space,Str "Blocks"] ,Para [Str "Simple",Space,Str "block",Space,Str "on",Space,Str "one",Space,Str "line:"] -,RawBlock (Format {unFormat = "html"}) "
" +,RawBlock (Format "html") "
" ,Plain [Str "foo"] -,RawBlock (Format {unFormat = "html"}) "
\n" +,RawBlock (Format "html") "
\n" ,Para [Str "And",Space,Str "nested",Space,Str "without",Space,Str "indentation:"] -,RawBlock (Format {unFormat = "html"}) "
\n
\n
" +,RawBlock (Format "html") "
\n
\n
" ,Plain [Str "foo"] -,RawBlock (Format {unFormat = "html"}) "
\n
\n
" +,RawBlock (Format "html") "
\n
\n
" ,Plain [Str "bar"] -,RawBlock (Format {unFormat = "html"}) "
\n
\n" +,RawBlock (Format "html") "
\n
\n" ,Para [Str "Interpreted",Space,Str "markdown",Space,Str "in",Space,Str "a",Space,Str "table:"] -,RawBlock (Format {unFormat = "html"}) "\n\n\n\n
" +,RawBlock (Format "html") "\n\n\n\n\n\n
" ,Plain [Str "This",Space,Str "is",Space,Emph [Str "emphasized"]] -,RawBlock (Format {unFormat = "html"}) "" +,RawBlock (Format "html") "" ,Plain [Str "And",Space,Str "this",Space,Str "is",Space,Strong [Str "strong"]] -,RawBlock (Format {unFormat = "html"}) "
\n\n\n" +,RawBlock (Format "html") "
\n\n\n" ,Para [Str "Here\8217s",Space,Str "a",Space,Str "simple",Space,Str "block:"] -,RawBlock (Format {unFormat = "html"}) "
\n " +,RawBlock (Format "html") "
\n " ,Plain [Str "foo"] -,RawBlock (Format {unFormat = "html"}) "
\n" +,RawBlock (Format "html") "
\n" ,Para [Str "This",Space,Str "should",Space,Str "be",Space,Str "a",Space,Str "code",Space,Str "block,",Space,Str "though:"] ,CodeBlock ("",[],[]) "
\n foo\n
" ,Para [Str "As",Space,Str "should",Space,Str "this:"] ,CodeBlock ("",[],[]) "
foo
" ,Para [Str "Now,",Space,Str "nested:"] -,RawBlock (Format {unFormat = "html"}) "
\n
\n
\n " +,RawBlock (Format "html") "
\n
\n
\n " ,Plain [Str "foo"] -,RawBlock (Format {unFormat = "html"}) "
\n
\n
\n" +,RawBlock (Format "html") "
\n
\n
\n" ,Para [Str "This",Space,Str "should",Space,Str "just",Space,Str "be",Space,Str "an",Space,Str "HTML",Space,Str "comment:"] -,RawBlock (Format {unFormat = "html"}) "\n" +,RawBlock (Format "html") "\n" ,Para [Str "Multiline:"] -,RawBlock (Format {unFormat = "html"}) "\n\n\n" +,RawBlock (Format "html") "\n\n\n" ,Para [Str "Code",Space,Str "block:"] ,CodeBlock ("",[],[]) "" ,Para [Str "Just",Space,Str "plain",Space,Str "comment,",Space,Str "with",Space,Str "trailing",Space,Str "spaces",Space,Str "on",Space,Str "the",Space,Str "line:"] -,RawBlock (Format {unFormat = "html"}) " \n" +,RawBlock (Format "html") " \n" ,Para [Str "Code:"] ,CodeBlock ("",[],[]) "
" ,Para [Str "Hr\8217s:"] -,RawBlock (Format {unFormat = "html"}) "
\n\n
\n\n
\n\n
\n\n
\n\n
\n\n
\n\n
\n\n
\n" +,RawBlock (Format "html") "
\n\n
\n\n
\n\n
\n\n
\n\n
\n\n
\n\n
\n\n
\n" ,HorizontalRule ,Header 1 ("inline-markup",[],[]) [Str "Inline",Space,Str "Markup"] ,Para [Str "This",Space,Str "is",Space,Emph [Str "emphasized"],Str ",",Space,Str "and",Space,Str "so",Space,Emph [Str "is",Space,Str "this"],Str "."] @@ -294,7 +294,7 @@ Pandoc (Meta {unMeta = fromList [("author",MetaList [MetaInlines [Str "John",Spa ,HorizontalRule ,Header 1 ("latex",[],[]) [Str "LaTeX"] ,BulletList - [[Plain [RawInline (Format {unFormat = "tex"}) "\\cite[22-23]{smith.1899}"]] + [[Plain [RawInline (Format "tex") "\\cite[22-23]{smith.1899}"]] ,[Plain [Math InlineMath "2+2=4"]] ,[Plain [Math InlineMath "x \\in y"]] ,[Plain [Math InlineMath "\\alpha \\wedge \\omega"]] @@ -309,7 +309,7 @@ Pandoc (Meta {unMeta = fromList [("author",MetaList [MetaInlines [Str "John",Spa ,[Plain [Str "Shoes",Space,Str "($20)",Space,Str "and",Space,Str "socks",Space,Str "($5)."]] ,[Plain [Str "Escaped",Space,Code ("",[],[]) "$",Str ":",Space,Str "$73",Space,Emph [Str "this",Space,Str "should",Space,Str "be",Space,Str "emphasized"],Space,Str "23$."]]] ,Para [Str "Here\8217s",Space,Str "a",Space,Str "LaTeX",Space,Str "table:"] -,RawBlock (Format {unFormat = "latex"}) "\\begin{tabular}{|l|l|}\\hline\nAnimal & Number \\\\ \\hline\nDog & 2 \\\\\nCat & 1 \\\\ \\hline\n\\end{tabular}" +,RawBlock (Format "latex") "\\begin{tabular}{|l|l|}\\hline\nAnimal & Number \\\\ \\hline\nDog & 2 \\\\\nCat & 1 \\\\ \\hline\n\\end{tabular}" ,HorizontalRule ,Header 1 ("special-characters",[],[]) [Str "Special",Space,Str "Characters"] ,Para [Str "Here",Space,Str "is",Space,Str "some",Space,Str "unicode:"] diff --git a/tests/textile-reader.native b/tests/textile-reader.native index 70b33f31d..31ab558d7 100644 --- a/tests/textile-reader.native +++ b/tests/textile-reader.native @@ -1,4 +1,4 @@ -Pandoc (Meta {unMeta = fromList []}) +Pandoc (Meta (fromList [])) [Para [Str "This",Space,Str "is",Space,Str "a",Space,Str "set",Space,Str "of",Space,Str "tests",Space,Str "for",Space,Str "pandoc",Space,Str "Textile",Space,Str "Reader",Str ".",Space,Str "Part",Space,Str "of",Space,Str "it",Space,Str "comes",LineBreak,Str "from",Space,Str "John",Space,Str "Gruber",Str "\8217",Str "s",Space,Str "markdown",Space,Str "test",Space,Str "suite",Str "."] ,HorizontalRule ,Header 1 ("",[],[]) [Str "Headers"] @@ -137,23 +137,23 @@ Pandoc (Meta {unMeta = fromList []}) ,Header 1 ("",[],[]) [Str "Entities"] ,Para [Str "*",LineBreak,Str "&"] ,Header 1 ("",[],[]) [Str "Raw",Space,Str "HTML"] -,Para [Str "However",Str ",",Space,RawInline (Format {unFormat = "html"}) "",Space,Str "raw",Space,Str "HTML",Space,Str "inlines",Space,RawInline (Format {unFormat = "html"}) "",Space,Str "are",Space,Str "accepted",Str ",",Space,Str "as",Space,Str "well",Space,Str "as",Space,Str ":"] -,RawBlock (Format {unFormat = "html"}) "
" +,Para [Str "However",Str ",",Space,RawInline (Format "html") "",Space,Str "raw",Space,Str "HTML",Space,Str "inlines",Space,RawInline (Format "html") "",Space,Str "are",Space,Str "accepted",Str ",",Space,Str "as",Space,Str "well",Space,Str "as",Space,Str ":"] +,RawBlock (Format "html") "
" ,Para [Str "any",Space,Strong [Str "Raw",Space,Str "HTML",Space,Str "Block"],Space,Str "with",Space,Str "bold"] -,RawBlock (Format {unFormat = "html"}) "
" +,RawBlock (Format "html") "
" ,Para [Str "Html",Space,Str "blocks",Space,Str "can",Space,Str "be"] -,RawBlock (Format {unFormat = "html"}) "
" +,RawBlock (Format "html") "
" ,Para [Str "inlined"] -,RawBlock (Format {unFormat = "html"}) "
" +,RawBlock (Format "html") "
" ,Para [Str "as",Space,Str "well",Str "."] ,BulletList [[Plain [Str "this",Space,Str "<",Str "div",Str ">",Space,Str "won",Str "\8217",Str "t",Space,Str "produce",Space,Str "raw",Space,Str "html",Space,Str "blocks",Space,Str "<",Str "/div",Str ">"]] - ,[Plain [Str "but",Space,Str "this",Space,RawInline (Format {unFormat = "html"}) "",Space,Str "will",Space,Str "produce",Space,Str "inline",Space,Str "html",Space,RawInline (Format {unFormat = "html"}) ""]]] + ,[Plain [Str "but",Space,Str "this",Space,RawInline (Format "html") "",Space,Str "will",Space,Str "produce",Space,Str "inline",Space,Str "html",Space,RawInline (Format "html") ""]]] ,Para [Str "Can",Space,Str "you",Space,Str "prove",Space,Str "that",Space,Str "2",Space,Str "<",Space,Str "3",Space,Str "?"] ,Header 1 ("",[],[]) [Str "Raw",Space,Str "LaTeX"] ,Para [Str "This",Space,Str "Textile",Space,Str "reader",Space,Str "also",Space,Str "accepts",Space,Str "raw",Space,Str "LaTeX",Space,Str "for",Space,Str "blocks",Space,Str ":"] -,RawBlock (Format {unFormat = "latex"}) "\\begin{itemize}\n \\item one\n \\item two\n\\end{itemize}" -,Para [Str "and",Space,Str "for",Space,RawInline (Format {unFormat = "latex"}) "\\emph{inlines}",Str "."] +,RawBlock (Format "latex") "\\begin{itemize}\n \\item one\n \\item two\n\\end{itemize}" +,Para [Str "and",Space,Str "for",Space,RawInline (Format "latex") "\\emph{inlines}",Str "."] ,Header 1 ("",[],[]) [Str "Acronyms",Space,Str "and",Space,Str "marks"] ,Para [Str "PBS (Public Broadcasting System)"] ,Para [Str "Hi",Str "\8482"] diff --git a/tests/writer.native b/tests/writer.native index f9cf606f3..503b3001e 100644 --- a/tests/writer.native +++ b/tests/writer.native @@ -1,4 +1,4 @@ -Pandoc (Meta {unMeta = fromList [("author",MetaList [MetaInlines [Str "John",Space,Str "MacFarlane"],MetaInlines [Str "Anonymous"]]),("date",MetaInlines [Str "July",Space,Str "17,",Space,Str "2006"]),("title",MetaInlines [Str "Pandoc",Space,Str "Test",Space,Str "Suite"])]}) +Pandoc (Meta (fromList [("author",MetaList [MetaInlines [Str "John",Space,Str "MacFarlane"],MetaInlines [Str "Anonymous"]]),("date",MetaInlines [Str "July",Space,Str "17,",Space,Str "2006"]),("title",MetaInlines [Str "Pandoc",Space,Str "Test",Space,Str "Suite"])])) [Para [Str "This",Space,Str "is",Space,Str "a",Space,Str "set",Space,Str "of",Space,Str "tests",Space,Str "for",Space,Str "pandoc.",Space,Str "Most",Space,Str "of",Space,Str "them",Space,Str "are",Space,Str "adapted",Space,Str "from",Space,Str "John",Space,Str "Gruber\8217s",Space,Str "markdown",Space,Str "test",Space,Str "suite."] ,HorizontalRule ,Header 1 ("headers",[],[]) [Str "Headers"] @@ -228,45 +228,45 @@ Pandoc (Meta {unMeta = fromList [("author",MetaList [MetaInlines [Str "John",Spa ,[Plain [Str "sublist"]]]]])] ,Header 1 ("html-blocks",[],[]) [Str "HTML",Space,Str "Blocks"] ,Para [Str "Simple",Space,Str "block",Space,Str "on",Space,Str "one",Space,Str "line:"] -,RawBlock (Format {unFormat = "html"}) "
" +,RawBlock (Format "html") "
" ,Plain [Str "foo"] -,RawBlock (Format {unFormat = "html"}) "
\n" +,RawBlock (Format "html") "
\n" ,Para [Str "And",Space,Str "nested",Space,Str "without",Space,Str "indentation:"] -,RawBlock (Format {unFormat = "html"}) "
\n
\n
" +,RawBlock (Format "html") "
\n
\n
" ,Plain [Str "foo"] -,RawBlock (Format {unFormat = "html"}) "
\n
\n
" +,RawBlock (Format "html") "
\n
\n
" ,Plain [Str "bar"] -,RawBlock (Format {unFormat = "html"}) "
\n
\n" +,RawBlock (Format "html") "
\n
\n" ,Para [Str "Interpreted",Space,Str "markdown",Space,Str "in",Space,Str "a",Space,Str "table:"] -,RawBlock (Format {unFormat = "html"}) "\n\n\n\n
" +,RawBlock (Format "html") "\n\n\n\n\n\n
" ,Plain [Str "This",Space,Str "is",Space,Emph [Str "emphasized"]] -,RawBlock (Format {unFormat = "html"}) "" +,RawBlock (Format "html") "" ,Plain [Str "And",Space,Str "this",Space,Str "is",Space,Strong [Str "strong"]] -,RawBlock (Format {unFormat = "html"}) "
\n\n\n" +,RawBlock (Format "html") "
\n\n\n" ,Para [Str "Here\8217s",Space,Str "a",Space,Str "simple",Space,Str "block:"] -,RawBlock (Format {unFormat = "html"}) "
\n " +,RawBlock (Format "html") "
\n " ,Plain [Str "foo"] -,RawBlock (Format {unFormat = "html"}) "
\n" +,RawBlock (Format "html") "
\n" ,Para [Str "This",Space,Str "should",Space,Str "be",Space,Str "a",Space,Str "code",Space,Str "block,",Space,Str "though:"] ,CodeBlock ("",[],[]) "
\n foo\n
" ,Para [Str "As",Space,Str "should",Space,Str "this:"] ,CodeBlock ("",[],[]) "
foo
" ,Para [Str "Now,",Space,Str "nested:"] -,RawBlock (Format {unFormat = "html"}) "
\n
\n
\n " +,RawBlock (Format "html") "
\n
\n
\n " ,Plain [Str "foo"] -,RawBlock (Format {unFormat = "html"}) "
\n
\n
\n" +,RawBlock (Format "html") "
\n
\n
\n" ,Para [Str "This",Space,Str "should",Space,Str "just",Space,Str "be",Space,Str "an",Space,Str "HTML",Space,Str "comment:"] -,RawBlock (Format {unFormat = "html"}) "\n" +,RawBlock (Format "html") "\n" ,Para [Str "Multiline:"] -,RawBlock (Format {unFormat = "html"}) "\n\n\n" +,RawBlock (Format "html") "\n\n\n" ,Para [Str "Code",Space,Str "block:"] ,CodeBlock ("",[],[]) "" ,Para [Str "Just",Space,Str "plain",Space,Str "comment,",Space,Str "with",Space,Str "trailing",Space,Str "spaces",Space,Str "on",Space,Str "the",Space,Str "line:"] -,RawBlock (Format {unFormat = "html"}) " \n" +,RawBlock (Format "html") " \n" ,Para [Str "Code:"] ,CodeBlock ("",[],[]) "
" ,Para [Str "Hr\8217s:"] -,RawBlock (Format {unFormat = "html"}) "
\n\n
\n\n
\n\n
\n\n
\n\n
\n\n
\n\n
\n\n
\n" +,RawBlock (Format "html") "
\n\n
\n\n
\n\n
\n\n
\n\n
\n\n
\n\n
\n\n
\n" ,HorizontalRule ,Header 1 ("inline-markup",[],[]) [Str "Inline",Space,Str "Markup"] ,Para [Str "This",Space,Str "is",Space,Emph [Str "emphasized"],Str ",",Space,Str "and",Space,Str "so",Space,Emph [Str "is",Space,Str "this"],Str "."] @@ -294,7 +294,7 @@ Pandoc (Meta {unMeta = fromList [("author",MetaList [MetaInlines [Str "John",Spa ,HorizontalRule ,Header 1 ("latex",[],[]) [Str "LaTeX"] ,BulletList - [[Plain [RawInline (Format {unFormat = "tex"}) "\\cite[22-23]{smith.1899}"]] + [[Plain [RawInline (Format "tex") "\\cite[22-23]{smith.1899}"]] ,[Plain [Math InlineMath "2+2=4"]] ,[Plain [Math InlineMath "x \\in y"]] ,[Plain [Math InlineMath "\\alpha \\wedge \\omega"]] @@ -309,7 +309,7 @@ Pandoc (Meta {unMeta = fromList [("author",MetaList [MetaInlines [Str "John",Spa ,[Plain [Str "Shoes",Space,Str "($20)",Space,Str "and",Space,Str "socks",Space,Str "($5)."]] ,[Plain [Str "Escaped",Space,Code ("",[],[]) "$",Str ":",Space,Str "$73",Space,Emph [Str "this",Space,Str "should",Space,Str "be",Space,Str "emphasized"],Space,Str "23$."]]] ,Para [Str "Here\8217s",Space,Str "a",Space,Str "LaTeX",Space,Str "table:"] -,RawBlock (Format {unFormat = "latex"}) "\\begin{tabular}{|l|l|}\\hline\nAnimal & Number \\\\ \\hline\nDog & 2 \\\\\nCat & 1 \\\\ \\hline\n\\end{tabular}" +,RawBlock (Format "latex") "\\begin{tabular}{|l|l|}\\hline\nAnimal & Number \\\\ \\hline\nDog & 2 \\\\\nCat & 1 \\\\ \\hline\n\\end{tabular}" ,HorizontalRule ,Header 1 ("special-characters",[],[]) [Str "Special",Space,Str "Characters"] ,Para [Str "Here",Space,Str "is",Space,Str "some",Space,Str "unicode:"] -- cgit v1.2.3 From c45bd6d468b272a2737dcc2a3c9f4afaebf37494 Mon Sep 17 00:00:00 2001 From: Scott Morrison Date: Fri, 16 Aug 2013 10:03:54 +1000 Subject: adding support for breve accents via \u{} while reading LaTeX --- src/Text/Pandoc/Readers/LaTeX.hs | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index 71e1e0ac2..7c370dd47 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -416,6 +416,7 @@ inlineCommands = M.fromList $ , ("=", option (str "=") $ try $ tok >>= accent macron) , ("c", option (str "c") $ try $ tok >>= accent cedilla) , ("v", option (str "v") $ try $ tok >>= accent hacek) + , ("u", option (str "u") $ try $ tok >>= accent breve) , ("i", lit "i") , ("\\", linebreak <$ (optional (bracketed inline) *> optional sp)) , (",", pure mempty) @@ -708,6 +709,21 @@ hacek 'Z' = 'Ž' hacek 'z' = 'ž' hacek c = c +breve :: Char -> Char +breve 'A' = 'Ă' +breve 'a' = 'ă' +breve 'E' = 'Ĕ' +breve 'e' = 'ĕ' +breve 'G' = 'Ğ' +breve 'g' = 'ğ' +breve 'I' = 'Ĭ' +breve 'i' = 'ĭ' +breve 'O' = 'Ŏ' +breve 'o' = 'ŏ' +breve 'U' = 'Ŭ' +breve 'u' = 'ŭ' +breve c = c + tok :: LP Inlines tok = try $ grouped inline <|> inlineCommand <|> str <$> (count 1 $ inlineChar) -- cgit v1.2.3 From 172f020bc5b59950afd29411b7d80200d0b38e83 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Thu, 15 Aug 2013 17:21:56 -0700 Subject: Shared: Better error message when default data file not found. Listing the full path can confuse people who are using `--self-contained`: they might have intended the file to be found locally. So now we just list the data file name. --- src/Text/Pandoc/Shared.hs | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index d670a35bc..72b467da5 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -583,8 +583,7 @@ readDefaultDataFile :: FilePath -> IO BS.ByteString readDefaultDataFile fname = #ifdef EMBED_DATA_FILES case lookup (makeCanonical fname) dataFiles of - Nothing -> ioError $ userError - $ "Data file `" ++ fname ++ "' does not exist" + Nothing -> err 97 $ "Could not find data file " ++ fname Just contents -> return contents where makeCanonical = joinPath . transformPathParts . splitDirectories transformPathParts = reverse . foldl go [] @@ -592,7 +591,12 @@ readDefaultDataFile fname = go (_:as) ".." = as go as x = x : as #else - getDataFileName ("data" fname) >>= BS.readFile + getDataFileName ("data" fname) >>= checkExistence >>= BS.readFile + where checkExistence fn = do + exists <- doesFileExist fn + if exists + then return fn + else err 97 ("Could not find data file " ++ fname) #endif -- | Read file from specified user data directory or, if not found there, from -- cgit v1.2.3 From d3ebca6f553efa37cb6795dbd72e84051edea356 Mon Sep 17 00:00:00 2001 From: Scott Morrison Date: Fri, 16 Aug 2013 14:48:24 +1000 Subject: LaTeX reader missing \oe and \OE characters --- src/Text/Pandoc/Readers/LaTeX.hs | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index 7c370dd47..414e50fc8 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -402,6 +402,8 @@ inlineCommands = M.fromList $ , ("l", lit "ł") , ("ae", lit "æ") , ("AE", lit "Æ") + , ("oe", lit "œ") + , ("OE", lit "Œ") , ("pounds", lit "£") , ("euro", lit "€") , ("copyright", lit "©") -- cgit v1.2.3 From ab8c0dcd410282baaa9429f755ad55e6d01a2466 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Fri, 16 Aug 2013 12:40:38 -0700 Subject: LaTeX reader: parse label after section command and set id. Closes #951. --- src/Text/Pandoc/Readers/LaTeX.hs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index 414e50fc8..50a95c361 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -315,12 +315,14 @@ authors = try $ do addMeta "authors" (map trimInlines auths) section :: Attr -> Int -> LP Blocks -section attr lvl = do +section (ident, classes, kvs) lvl = do hasChapters <- stateHasChapters `fmap` getState let lvl' = if hasChapters then lvl + 1 else lvl skipopts contents <- grouped inline - return $ headerWith attr lvl' contents + lab <- option ident $ try $ spaces >> controlSeq "label" >> + spaces >> braced + return $ headerWith (lab, classes, kvs) lvl' contents inlineCommand :: LP Inlines inlineCommand = try $ do -- cgit v1.2.3 From 441a7aebf8c141612203d1cab0032f8c55e536ed Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Fri, 16 Aug 2013 13:02:55 -0700 Subject: LaTeX writer: Avoid problem with footnotes in unnumbered headers. Closes #940. Added test case. --- src/Text/Pandoc/Writers/LaTeX.hs | 13 +++++++------ tests/Tests/Writers/LaTeX.hs | 6 ++++++ 2 files changed, 13 insertions(+), 6 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs index 7f9a99801..98553c421 100644 --- a/src/Text/Pandoc/Writers/LaTeX.hs +++ b/src/Text/Pandoc/Writers/LaTeX.hs @@ -498,14 +498,15 @@ sectionHeader unnumbered ref level lst = do let noNote (Note _) = Str "" noNote x = x let lstNoNotes = walk noNote lst + txtNoNotes <- inlineListToLaTeX lstNoNotes let star = if unnumbered then text "*" else empty - -- footnotes in sections don't work unless you specify an optional - -- argument: \section[mysec]{mysec\footnote{blah}} - optional <- if lstNoNotes == lst + -- footnotes in sections don't work (except for starred variants) + -- unless you specify an optional argument: + -- \section[mysec]{mysec\footnote{blah}} + optional <- if unnumbered || lstNoNotes == lst then return empty else do - res <- inlineListToLaTeX lstNoNotes - return $ char '[' <> res <> char ']' + return $ brackets txtNoNotes let stuffing = star <> optional <> braces txt book <- gets stBook opts <- gets stOptions @@ -536,7 +537,7 @@ sectionHeader unnumbered ref level lst = do $$ if unnumbered then "\\addcontentsline{toc}" <> braces (text sectionType) <> - braces txt + braces txtNoNotes else empty -- | Convert list of inline elements to LaTeX. diff --git a/tests/Tests/Writers/LaTeX.hs b/tests/Tests/Writers/LaTeX.hs index b1427d91f..ebde5b97c 100644 --- a/tests/Tests/Writers/LaTeX.hs +++ b/tests/Tests/Writers/LaTeX.hs @@ -36,4 +36,10 @@ tests = [ testGroup "code blocks" [ "escape |" =: para (math "\\sigma|_{\\{x\\}}") =?> "$\\sigma|_{\\{x\\}}$" ] + , testGroup "headers" + [ "unnumbered header" =: + headerWith ("foo",["unnumbered"],[]) 1 + (text "Header 1" <> note (plain $ text "note")) =?> + "\\section*{Header 1\\footnote{note}}\\label{foo}\n\\addcontentsline{toc}{section}{Header 1}\n" + ] ] -- cgit v1.2.3 From 19591df739a6c50a3d0a9af55ba90b883264b21d Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Fri, 16 Aug 2013 13:05:06 -0700 Subject: Shared: stringify now skips over footnotes. That is usually the right thing to do for section labels, etc. --- src/Text/Pandoc/Shared.hs | 1 + 1 file changed, 1 insertion(+) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index 72b467da5..bf92601ef 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -391,6 +391,7 @@ stringify = query go go (Str x) = x go (Code _ x) = x go (Math _ x) = x + go (Note _) = "" go LineBreak = " " go _ = "" -- cgit v1.2.3 From 399c75da448dc0f90855b43ee44e9d7cf8009f1c Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Fri, 16 Aug 2013 13:08:11 -0700 Subject: Revert "Shared: stringify now skips over footnotes." This reverts commit 19591df739a6c50a3d0a9af55ba90b883264b21d. This change didn't work; query has already written the contents of the note by the time it gets to Note. --- src/Text/Pandoc/Shared.hs | 1 - 1 file changed, 1 deletion(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index bf92601ef..72b467da5 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -391,7 +391,6 @@ stringify = query go go (Str x) = x go (Code _ x) = x go (Math _ x) = x - go (Note _) = "" go LineBreak = " " go _ = "" -- cgit v1.2.3 From 89a7703260703599a033be16e1581a0494326c2b Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Fri, 16 Aug 2013 13:22:27 -0700 Subject: Shared: Changed stringify so it ignores notes. Also documented this in README. --- README | 1 + src/Text/Pandoc/Shared.hs | 6 +++++- 2 files changed, 6 insertions(+), 1 deletion(-) (limited to 'src/Text') diff --git a/README b/README index e5de97556..c25d611d6 100644 --- a/README +++ b/README @@ -980,6 +980,7 @@ automatically assigned a unique identifier based on the header text. To derive the identifier from the header text, - Remove all formatting, links, etc. + - Remove all footnotes. - Remove all punctuation, except underscores, hyphens, and periods. - Replace all spaces and newlines with hyphens. - Convert all alphabetic characters to lowercase. diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index 72b467da5..eef150351 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -384,8 +384,10 @@ consolidateInlines (x : xs) = x : consolidateInlines xs consolidateInlines [] = [] -- | Convert list of inlines to a string with formatting removed. +-- Footnotes are skipped (since we don't want their contents in link +-- labels). stringify :: [Inline] -> String -stringify = query go +stringify = query go . walk deNote where go :: Inline -> [Char] go Space = " " go (Str x) = x @@ -393,6 +395,8 @@ stringify = query go go (Math _ x) = x go LineBreak = " " go _ = "" + deNote (Note _) = Str "" + deNote x = x -- | Change final list item from @Para@ to @Plain@ if the list contains -- no other @Para@ blocks. -- cgit v1.2.3 From 5a5a2522163d73c3b91db2cb2b73e697a5dcfb23 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 17 Aug 2013 10:29:12 -0700 Subject: Markdown reader: Don't generate blank title, author, date elements. --- src/Text/Pandoc/Readers/Markdown.hs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 251554de1..906dd10f2 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -221,9 +221,9 @@ pandocTitleBlock = try $ do title' <- title author' <- author date' <- date - return $ B.setMeta "title" title' - . B.setMeta "author" author' - . B.setMeta "date" date' + return $ if B.isNull title' then id else B.setMeta "title" title' + . if null author' then id else B.setMeta "author" author' + . if B.isNull date' then id else B.setMeta "date" date' yamlTitleBlock :: MarkdownParser (F (Pandoc -> Pandoc)) yamlTitleBlock = try $ do -- cgit v1.2.3 From 3117c668a7d245689bfc291d5d9a64cb3178b52c Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Thu, 15 Aug 2013 22:39:14 -0700 Subject: Markdown reader: Parse span, div tags as Span, Div elements. Assuming markdown_in_html extension is set. --- src/Text/Pandoc/Readers/Markdown.hs | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 906dd10f2..535fc02c6 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -446,6 +446,7 @@ block = choice [ mempty <$ blanklines , header , lhsCodeBlock , rawTeXBlock + , divHtml , htmlBlock , table , lineBlock @@ -1355,6 +1356,7 @@ inline = choice [ whitespace , superscript , inlineNote -- after superscript because of ^[link](/foo)^ , autoLink + , spanHtml , rawHtmlInline , escapedChar , rawLaTeXInline' @@ -1755,6 +1757,26 @@ inBrackets parser = do char ']' return $ "[" ++ contents ++ "]" +spanHtml :: MarkdownParser (F Inlines) +spanHtml = try $ do + guardEnabled Ext_markdown_in_html_blocks + (TagOpen _ attrs, _) <- htmlTag (~== TagOpen "span" []) + contents <- mconcat <$> manyTill inline (htmlTag (~== TagClose "span")) + let ident = maybe "" id $ lookup "id" attrs + let classes = maybe [] words $ lookup "class" attrs + let keyvals = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"] + return $ B.spanWith (ident, classes, keyvals) <$> contents + +divHtml :: MarkdownParser (F Blocks) +divHtml = try $ do + guardEnabled Ext_markdown_in_html_blocks + (TagOpen _ attrs, _) <- htmlTag (~== TagOpen "div" []) + contents <- mconcat <$> manyTill block (htmlTag (~== TagClose "div")) + let ident = maybe "" id $ lookup "id" attrs + let classes = maybe [] words $ lookup "class" attrs + let keyvals = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"] + return $ B.divWith (ident, classes, keyvals) <$> contents + rawHtmlInline :: MarkdownParser (F Inlines) rawHtmlInline = do guardEnabled Ext_raw_html -- cgit v1.2.3 From 8d441af3da4709fd48a44e860d5a0cd4d35792af Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 18 Aug 2013 14:36:40 -0700 Subject: Adjusted writers and tests for change in parsing of div/span. Textile, MediaWiki, Markdown, Org, RST will emit raw HTML div tags for divs. Otherwise Div and Span are "transparent" block containers. --- src/Text/Pandoc/Writers/Docbook.hs | 2 +- src/Text/Pandoc/Writers/Markdown.hs | 17 ++++++++++----- src/Text/Pandoc/Writers/MediaWiki.hs | 12 +++++++---- src/Text/Pandoc/Writers/Org.hs | 9 +++++++- src/Text/Pandoc/Writers/RST.hs | 6 +++++- src/Text/Pandoc/Writers/Shared.hs | 18 ++++++++++++++++ src/Text/Pandoc/Writers/Textile.hs | 8 ++++++-- tests/testsuite.native | 18 ++++------------ tests/testsuite.txt | 12 +++++------ tests/writer.docbook | 40 ++++++++++++++---------------------- tests/writer.fb2 | 2 +- tests/writer.html | 12 +++-------- tests/writer.markdown | 27 +++++++++++++++++++----- tests/writer.mediawiki | 28 +++++++++++++++++-------- tests/writer.native | 18 ++++------------ tests/writer.opml | 2 +- tests/writer.org | 35 ++++++++++++++++++++++++++----- tests/writer.plain | 5 +++++ tests/writer.rst | 35 ++++++++++++++++++++++++++----- tests/writer.textile | 31 +++++++++++++++++++++++----- 20 files changed, 225 insertions(+), 112 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Writers/Docbook.hs b/src/Text/Pandoc/Writers/Docbook.hs index 3d150d19b..7c03c07dc 100644 --- a/src/Text/Pandoc/Writers/Docbook.hs +++ b/src/Text/Pandoc/Writers/Docbook.hs @@ -149,7 +149,7 @@ listItemToDocbook opts item = -- | Convert a Pandoc block element to Docbook. blockToDocbook :: WriterOptions -> Block -> Doc blockToDocbook _ Null = empty -blockToDocbook opts (Div _ bs) = blocksToDocbook opts bs +blockToDocbook opts (Div _ bs) = blocksToDocbook opts $ map plainToPara bs blockToDocbook _ (Header _ _ _) = empty -- should not occur after hierarchicalize blockToDocbook opts (Plain lst) = inlinesToDocbook opts lst -- title beginning with fig: indicates that the image is a figure diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs index 3d0ed8702..623c445df 100644 --- a/src/Text/Pandoc/Writers/Markdown.hs +++ b/src/Text/Pandoc/Writers/Markdown.hs @@ -1,6 +1,6 @@ {-# LANGUAGE OverloadedStrings, TupleSections, ScopedTypeVariables #-} {- -Copyright (C) 2006-2010 John MacFarlane +Copyright (C) 2006-2013 John MacFarlane This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.Markdown - Copyright : Copyright (C) 2006-2010 John MacFarlane + Copyright : Copyright (C) 2006-2013 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane @@ -301,7 +301,13 @@ blockToMarkdown :: WriterOptions -- ^ Options -> Block -- ^ Block element -> State WriterState Doc blockToMarkdown _ Null = return empty -blockToMarkdown opts (Div _ bs) = blockListToMarkdown opts bs +blockToMarkdown opts (Div attrs ils) = do + isPlain <- gets stPlain + contents <- blockListToMarkdown opts ils + return $ if isPlain + then contents <> blankline + else tagWithAttrs "div" attrs <> blankline <> + contents <> blankline <> "
" <> blankline blockToMarkdown opts (Plain inlines) = do contents <- inlineListToMarkdown opts inlines return $ contents <> cr @@ -629,8 +635,9 @@ escapeSpaces x = x -- | Convert Pandoc inline element to markdown. inlineToMarkdown :: WriterOptions -> Inline -> State WriterState Doc -inlineToMarkdown opts (Span _ ils) = - inlineListToMarkdown opts ils +inlineToMarkdown opts (Span attrs ils) = do + contents <- inlineListToMarkdown opts ils + return $ tagWithAttrs "span" attrs <> contents <> text "" inlineToMarkdown opts (Emph lst) = do contents <- inlineListToMarkdown opts lst return $ "*" <> contents <> "*" diff --git a/src/Text/Pandoc/Writers/MediaWiki.hs b/src/Text/Pandoc/Writers/MediaWiki.hs index 4ffba1100..61741a61e 100644 --- a/src/Text/Pandoc/Writers/MediaWiki.hs +++ b/src/Text/Pandoc/Writers/MediaWiki.hs @@ -34,6 +34,7 @@ import Text.Pandoc.Definition import Text.Pandoc.Options import Text.Pandoc.Shared import Text.Pandoc.Writers.Shared +import Text.Pandoc.Pretty (render) import Text.Pandoc.Templates (renderTemplate') import Text.Pandoc.XML ( escapeStringForXML ) import Data.List ( intersect, intercalate, intersperse ) @@ -83,8 +84,10 @@ blockToMediaWiki :: WriterOptions -- ^ Options blockToMediaWiki _ Null = return "" -blockToMediaWiki opts (Div _ bs) = - blockListToMediaWiki opts bs +blockToMediaWiki opts (Div attrs bs) = do + contents <- blockListToMediaWiki opts bs + return $ render Nothing (tagWithAttrs "div" attrs) ++ "\n\n" ++ + contents ++ "\n\n" ++ "" blockToMediaWiki opts (Plain inlines) = inlineListToMediaWiki opts inlines @@ -332,8 +335,9 @@ inlineListToMediaWiki opts lst = -- | Convert Pandoc inline element to MediaWiki. inlineToMediaWiki :: WriterOptions -> Inline -> State WriterState String -inlineToMediaWiki opts (Span _ ils) = - inlineListToMediaWiki opts ils +inlineToMediaWiki opts (Span attrs ils) = do + contents <- inlineListToMediaWiki opts ils + return $ render Nothing (tagWithAttrs "span" attrs) ++ contents ++ "" inlineToMediaWiki opts (Emph lst) = do contents <- inlineListToMediaWiki opts lst diff --git a/src/Text/Pandoc/Writers/Org.hs b/src/Text/Pandoc/Writers/Org.hs index 34ae532b0..51083f52b 100644 --- a/src/Text/Pandoc/Writers/Org.hs +++ b/src/Text/Pandoc/Writers/Org.hs @@ -106,7 +106,14 @@ escapeString = escapeStringUsing $ blockToOrg :: Block -- ^ Block element -> State WriterState Doc blockToOrg Null = return empty -blockToOrg (Div _ bs) = blockListToOrg bs +blockToOrg (Div attrs bs) = do + contents <- blockListToOrg bs + let startTag = tagWithAttrs "div" attrs + let endTag = text "" + return $ blankline $$ "#+BEGIN_HTML" $$ + nest 2 startTag $$ "#+END_HTML" $$ blankline $$ + contents $$ blankline $$ "#+BEGIN_HTML" $$ + nest 2 endTag $$ "#+END_HTML" $$ blankline blockToOrg (Plain inlines) = inlineListToOrg inlines -- title beginning with fig: indicates that the image is a figure blockToOrg (Para [Image txt (src,'f':'i':'g':':':tit)]) = do diff --git a/src/Text/Pandoc/Writers/RST.hs b/src/Text/Pandoc/Writers/RST.hs index 557658bc8..70c6b4421 100644 --- a/src/Text/Pandoc/Writers/RST.hs +++ b/src/Text/Pandoc/Writers/RST.hs @@ -161,7 +161,11 @@ bordered contents c = blockToRST :: Block -- ^ Block element -> State WriterState Doc blockToRST Null = return empty -blockToRST (Div _ bs) = blockListToRST bs +blockToRST (Div attr bs) = do + contents <- blockListToRST bs + let startTag = ".. raw:: html" $+$ nest 3 (tagWithAttrs "div" attr) + let endTag = ".. raw:: html" $+$ nest 3 "" + return $ blankline <> startTag $+$ contents $+$ endTag $$ blankline blockToRST (Plain inlines) = inlineListToRST inlines -- title beginning with fig: indicates that the image is a figure blockToRST (Para [Image txt (src,'f':'i':'g':':':tit)]) = do diff --git a/src/Text/Pandoc/Writers/Shared.hs b/src/Text/Pandoc/Writers/Shared.hs index e6ec853f8..89923822c 100644 --- a/src/Text/Pandoc/Writers/Shared.hs +++ b/src/Text/Pandoc/Writers/Shared.hs @@ -1,3 +1,4 @@ +{-# LANGUAGE OverloadedStrings #-} {- Copyright (C) 2013 John MacFarlane @@ -32,9 +33,12 @@ module Text.Pandoc.Writers.Shared ( , getField , setField , defField + , tagWithAttrs ) where import Text.Pandoc.Definition +import Text.Pandoc.Pretty +import Text.Pandoc.XML (escapeStringForXML) import Control.Monad (liftM) import Text.Pandoc.Options (WriterOptions(..)) import qualified Data.HashMap.Strict as H @@ -120,3 +124,17 @@ defField field val (Object hashmap) = where f _newval oldval = oldval defField _ _ x = x +-- Produce an HTML tag with the given pandoc attributes. +tagWithAttrs :: String -> Attr -> Doc +tagWithAttrs tag (ident,classes,kvs) = hsep + ["<" <> text tag + ,if null ident + then empty + else "id=" <> doubleQuotes (text ident) + ,if null classes + then empty + else "class=" <> doubleQuotes (text (unwords classes)) + ] + <> hsep (map (\(k,v) -> text k <> "=" <> + doubleQuotes (text (escapeStringForXML v))) kvs) + <> ">" diff --git a/src/Text/Pandoc/Writers/Textile.hs b/src/Text/Pandoc/Writers/Textile.hs index 27e8b60ec..7c102cc86 100644 --- a/src/Text/Pandoc/Writers/Textile.hs +++ b/src/Text/Pandoc/Writers/Textile.hs @@ -33,6 +33,7 @@ module Text.Pandoc.Writers.Textile ( writeTextile ) where import Text.Pandoc.Definition import Text.Pandoc.Options import Text.Pandoc.Shared +import Text.Pandoc.Pretty (render) import Text.Pandoc.Writers.Shared import Text.Pandoc.Templates (renderTemplate') import Text.Pandoc.XML ( escapeStringForXML ) @@ -101,8 +102,11 @@ blockToTextile :: WriterOptions -- ^ Options blockToTextile _ Null = return "" -blockToTextile opts (Div _ bs) = - blockListToTextile opts bs +blockToTextile opts (Div attr bs) = do + let startTag = render Nothing $ tagWithAttrs "div" attr + let endTag = "" + contents <- blockListToTextile opts bs + return $ startTag ++ "\n\n" ++ contents ++ "\n\n" ++ endTag ++ "\n" blockToTextile opts (Plain inlines) = inlineListToTextile opts inlines diff --git a/tests/testsuite.native b/tests/testsuite.native index d1b14b24e..678d7595f 100644 --- a/tests/testsuite.native +++ b/tests/testsuite.native @@ -228,15 +228,9 @@ Pandoc (Meta {unMeta = fromList [("author",MetaList [MetaInlines [Str "John",Spa ,[Plain [Str "sublist"]]]]])] ,Header 1 ("html-blocks",[],[]) [Str "HTML",Space,Str "Blocks"] ,Para [Str "Simple",Space,Str "block",Space,Str "on",Space,Str "one",Space,Str "line:"] -,RawBlock (Format "html") "
" -,Plain [Str "foo"] -,RawBlock (Format "html") "
\n" +,Div ("",[],[]) [Plain [Str "foo"]] ,Para [Str "And",Space,Str "nested",Space,Str "without",Space,Str "indentation:"] -,RawBlock (Format "html") "
\n
\n
" -,Plain [Str "foo"] -,RawBlock (Format "html") "
\n
\n
" -,Plain [Str "bar"] -,RawBlock (Format "html") "
\n
\n" +,Div ("",[],[]) [Div ("",[],[]) [Div ("",[],[]) [Plain [Str "foo"]]],Div ("",[],[]) [Plain [Str "bar"]]] ,Para [Str "Interpreted",Space,Str "markdown",Space,Str "in",Space,Str "a",Space,Str "table:"] ,RawBlock (Format "html") "\n\n\n\n
" ,Plain [Str "This",Space,Str "is",Space,Emph [Str "emphasized"]] @@ -244,17 +238,13 @@ Pandoc (Meta {unMeta = fromList [("author",MetaList [MetaInlines [Str "John",Spa ,Plain [Str "And",Space,Str "this",Space,Str "is",Space,Strong [Str "strong"]] ,RawBlock (Format "html") "
\n\n\n" ,Para [Str "Here\8217s",Space,Str "a",Space,Str "simple",Space,Str "block:"] -,RawBlock (Format "html") "
\n " -,Plain [Str "foo"] -,RawBlock (Format "html") "
\n" +,Div ("",[],[]) [Plain [Str "foo"]] ,Para [Str "This",Space,Str "should",Space,Str "be",Space,Str "a",Space,Str "code",Space,Str "block,",Space,Str "though:"] ,CodeBlock ("",[],[]) "
\n foo\n
" ,Para [Str "As",Space,Str "should",Space,Str "this:"] ,CodeBlock ("",[],[]) "
foo
" ,Para [Str "Now,",Space,Str "nested:"] -,RawBlock (Format "html") "
\n
\n
\n " -,Plain [Str "foo"] -,RawBlock (Format "html") "
\n
\n
\n" +,Div ("",[],[]) [Div ("",[],[]) [Div ("",[],[]) [Plain [Str "foo"]]]] ,Para [Str "This",Space,Str "should",Space,Str "just",Space,Str "be",Space,Str "an",Space,Str "HTML",Space,Str "comment:"] ,RawBlock (Format "html") "\n" ,Para [Str "Multiline:"] diff --git a/tests/testsuite.txt b/tests/testsuite.txt index 3bb5d8cb5..4ddaae23f 100644 --- a/tests/testsuite.txt +++ b/tests/testsuite.txt @@ -377,7 +377,7 @@ Interpreted markdown in a table: Here's a simple block:
- foo +foo
This should be a code block, though: @@ -393,11 +393,11 @@ As should this: Now, nested:
-
-
- foo -
-
+
+
+ foo +
+
This should just be an HTML comment: diff --git a/tests/writer.docbook b/tests/writer.docbook index 1e77a61ed..e427d8ffc 100644 --- a/tests/writer.docbook +++ b/tests/writer.docbook @@ -862,22 +862,18 @@ These should not be escaped: \$ \\ \> \[ \{ Simple block on one line: -
- foo -
+ + foo + And nested without indentation: -
-
-
- foo -
-
-
- bar -
-
+ + foo + + + bar + Interpreted markdown in a table: @@ -896,10 +892,9 @@ These should not be escaped: \$ \\ \> \[ \{ Here’s a simple block: -
- - foo -
+ + foo + This should be a code block, though: @@ -917,14 +912,9 @@ These should not be escaped: \$ \\ \> \[ \{ Now, nested: -
-
-
- - foo -
-
-
+ + foo + This should just be an HTML comment: diff --git a/tests/writer.fb2 b/tests/writer.fb2 index 0bcbf1c2a..8106d2b91 100644 --- a/tests/writer.fb2 +++ b/tests/writer.fb2 @@ -1,2 +1,2 @@ -Pandoc Test SuiteJohnMacFarlaneAnonymousJuly 17, 2006pandoc<p>Pandoc Test Suite</p>

John MacFarlane

Anonymous

July 17, 2006

This is a set of tests for pandoc. Most of them are adapted from John Gruber’s markdown test suite.

——————————

<p>Headers</p>
<p>Level 2 with an embedded link </url></p>
<p>Level 3 with emphasis</p>
<p>Level 4</p>
<p>Level 5</p>
<p>Level 1</p>
<p>Level 2 with emphasis</p>
<p>Level 3</p>

with no blank line

<p>Level 2</p>

with no blank line

——————————

<p>Paragraphs</p>

Here’s a regular paragraph.

In Markdown 1.0.0 and earlier. Version 8. This line turns into a list item. Because a hard-wrapped line in the middle of a paragraph looked like a list item.

Here’s one with a bullet. * criminey.

There should be a hard line breakhere.

——————————

<p>Block Quotes</p>

E-mail style:

This is a block quote. It is pretty short.

Code in a block quote:

sub status {

print "working";

}

A list:

 1. item one

 2. item two

Nested block quotes:

nested

nested

This should not be a block quote: 2 > 1.

And a following paragraph.

——————————

<p>Code Blocks</p>

Code:

---- (should be four hyphens)

sub status {

print "working";

}

this code block is indented by one tab

And:

this code block is indented by two tabs

These should not be escaped: \$ \\ \> \[ \{

——————————

<p>Lists</p>
<p>Unordered</p>

Asterisks tight:

• asterisk 1

• asterisk 2

• asterisk 3

Asterisks loose:

• asterisk 1

• asterisk 2

• asterisk 3

Pluses tight:

• Plus 1

• Plus 2

• Plus 3

Pluses loose:

• Plus 1

• Plus 2

• Plus 3

Minuses tight:

• Minus 1

• Minus 2

• Minus 3

Minuses loose:

• Minus 1

• Minus 2

• Minus 3

<p>Ordered</p>

Tight:

 1. First

 2. Second

 3. Third

and:

 1. One

 2. Two

 3. Three

Loose using tabs:

 1. First

 2. Second

 3. Third

and using spaces:

 1. One

 2. Two

 3. Three

Multiple paragraphs:

 1. Item 1, graf one.Item 1. graf two. The quick brown fox jumped over the lazy dog’s back.

 2. Item 2.

 3. Item 3.

<p>Nested</p>

• Tab

◦ Tab

* Tab

Here’s another:

 1. First

 2. Second:

   • Fee

   • Fie

   • Foe

 3. Third

Same thing but with paragraphs:

 1. First

 2. Second:

   • Fee

   • Fie

   • Foe

 3. Third

<p>Tabs and spaces</p>

• this is a list item indented with tabs

• this is a list item indented with spaces

◦ this is an example list item indented with tabs

◦ this is an example list item indented with spaces

<p>Fancy list markers</p>

 (2) begins with 2

 (3) and now 3with a continuation

 (3) iv. sublist with roman numerals, starting with 4

 (3) v. more items

 (3) v. (A) a subsublist

 (3) v. (B) a subsublist

Nesting:

 A. Upper Alpha

 A. I. Upper Roman.

 A. I. (6) Decimal start with 6

 A. I. (6) c) Lower alpha with paren

Autonumbering:

 1. Autonumber.

 2. More.

 2. 1. Nested.

Should not be a list item:

M.A. 2007

B. Williams

——————————

<p>Definition Lists</p>

Tight using spaces:

apple

    red fruit

orange

    orange fruit

banana

    yellow fruit

Tight using tabs:

apple

    red fruit

orange

    orange fruit

banana

    yellow fruit

Loose:

apple

    red fruit

orange

    orange fruit

banana

    yellow fruit

Multiple blocks with italics:

apple

    red fruit    contains seeds, crisp, pleasant to taste

orange

    orange fruit

    { orange code block }

    orange block quote

Multiple definitions, tight:

apple

    red fruit    computer

orange

    orange fruit    bank

Multiple definitions, loose:

apple

    red fruit    computer

orange

    orange fruit    bank

Blank line after term, indented marker, alternate markers:

apple

    red fruit    computer

orange

    orange fruit

 1. sublist

 2. sublist

<p>HTML Blocks</p>

Simple block on one line:

<div>

foo

</div>

And nested without indentation:

<div>

<div>

<div>

foo

</div>

</div>

<div>

bar

</div>

</div>

Interpreted markdown in a table:

<table>

<tr>

<td>

This is emphasized

</td>

<td>

And this is strong

</td>

</tr>

</table>

<script type="text/javascript">document.write('This *should not* be interpreted as markdown');</script>

Here’s a simple block:

<div>

foo

</div>

This should be a code block, though:

<div>

foo

</div>

As should this:

<div>foo</div>

Now, nested:

<div>

<div>

<div>

foo

</div>

</div>

</div>

This should just be an HTML comment:

<!-- Comment -->

Multiline:

<!--

Blah

Blah

-->

<!--

This is another comment.

-->

Code block:

<!-- Comment -->

Just plain comment, with trailing spaces on the line:

<!-- foo -->

Code:

<hr />

Hr’s:

<hr>

<hr />

<hr />

<hr>

<hr />

<hr />

<hr class="foo" id="bar" />

<hr class="foo" id="bar" />

<hr class="foo" id="bar">

——————————

<p>Inline Markup</p>

This is emphasized, and so is this.

This is strong, and so is this.

An emphasized link[1].

This is strong and em.

So is this word.

This is strong and em.

So is this word.

This is code: >, $, \, \$, <html>.

This is strikeout.

Superscripts: abcd ahello ahello there.

Subscripts: H2O, H23O, Hmany of themO.

These should not be superscripts or subscripts, because of the unescaped spaces: a^b c^d, a~b c~d.

——————————

<p>Smart quotes, ellipses, dashes</p>

“Hello,” said the spider. “‘Shelob’ is my name.”

‘A’, ‘B’, and ‘C’ are letters.

‘Oak,’ ‘elm,’ and ‘beech’ are names of trees. So is ‘pine.’

‘He said, “I want to go.”’ Were you alive in the 70’s?

Here is some quoted ‘code’ and a “quoted link[2]”.

Some dashes: one—two — three—four — five.

Dashes between numbers: 5–7, 255–66, 1987–1999.

Ellipses…and…and….

——————————

<p>LaTeX</p>

• 

• 2+2=4

• x \in y

• \alpha \wedge \omega

• 223

• p-Tree

• Here’s some display math: \frac{d}{dx}f(x)=\lim_{h\to 0}\frac{f(x+h)-f(x)}{h}

• Here’s one that has a line break in it: \alpha + \omega \times x^2.

These shouldn’t be math:

• To get the famous equation, write $e = mc^2$.

• $22,000 is a lot of money. So is $34,000. (It worked if “lot” is emphasized.)

• Shoes ($20) and socks ($5).

• Escaped $: $73 this should be emphasized 23$.

Here’s a LaTeX table:

\begin{tabular}{|l|l|}\hline

Animal & Number \\ \hline

Dog & 2 \\

Cat & 1 \\ \hline

\end{tabular}

——————————

<p>Special Characters</p>

Here is some unicode:

• I hat: Î

• o umlaut: ö

• section: §

• set membership: ∈

• copyright: ©

AT&T has an ampersand in their name.

AT&T is another way to write it.

This & that.

4 < 5.

6 > 5.

Backslash: \

Backtick: `

Asterisk: *

Underscore: _

Left brace: {

Right brace: }

Left bracket: [

Right bracket: ]

Left paren: (

Right paren: )

Greater-than: >

Hash: #

Period: .

Bang: !

Plus: +

Minus: -

——————————

<p>Links</p>
<p>Explicit</p>

Just a URL[3].

URL and title[4].

URL and title[5].

URL and title[6].

URL and title[7]

URL and title[8]

with_underscore[9]

Email link[10]

Empty[11].

<p>Reference</p>

Foo bar[12].

Foo bar[13].

Foo bar[14].

With embedded [brackets][15].

b[16] by itself should be a link.

Indented once[17].

Indented twice[18].

Indented thrice[19].

This should [not][] be a link.

[not]: /url

Foo bar[20].

Foo biz[21].

<p>With ampersands</p>

Here’s a link with an ampersand in the URL[22].

Here’s a link with an amersand in the link text: AT&T[23].

Here’s an inline link[24].

Here’s an inline link in pointy braces[25].

<p>Autolinks</p>

With an ampersand: http://example.com/?foo=1&bar=2[26]

• In a list?

• http://example.com/[27]

• It should.

An e-mail address: nobody@nowhere.net[28]

Blockquoted: http://example.com/[29]

Auto-links should not occur here: <http://example.com/>

or here: <http://example.com/>

——————————

<p>Images</p>

From “Voyage dans la Lune” by Georges Melies (1902):

lalune

Here is a movie movie icon.

——————————

<p>Footnotes</p>

Here is a footnote reference,[30] and another.[31] This should not be a footnote reference, because it contains a space.[^my note] Here is an inline note.[32]

Notes can go in quotes.[33]

 1. And in list items.[34]

This paragraph should not be part of the note, as it is not indented.

<p>1</p>

/url

<p>2</p>

http://example.com/?foo=1&bar=2

<p>3</p>

/url/

<p>4</p>

title: /url/

<p>5</p>

title preceded by two spaces: /url/

<p>6</p>

title preceded by a tab: /url/

<p>7</p>

title with "quotes" in it: /url/

<p>8</p>

title with single quotes: /url/

<p>9</p>

/url/with_underscore

<p>10</p>

mailto:nobody@nowhere.net

<p>11</p>

<p>12</p>

/url/

<p>13</p>

/url/

<p>14</p>

/url/

<p>15</p>

/url/

<p>16</p>

/url/

<p>17</p>

/url

<p>18</p>

/url

<p>19</p>

/url

<p>20</p>

Title with "quotes" inside: /url/

<p>21</p>

Title with "quote" inside: /url/

<p>22</p>

http://example.com/?foo=1&bar=2

<p>23</p>

AT&T: http://att.com/

<p>24</p>

/script?foo=1&bar=2

<p>25</p>

/script?foo=1&bar=2

<p>26</p>

http://example.com/?foo=1&bar=2

<p>27</p>

http://example.com/

<p>28</p>

mailto:nobody@nowhere.net

<p>29</p>

http://example.com/

<p>30</p>

Here is the footnote. It can go anywhere after the footnote reference. It need not be placed at the end of the document.

<p>31</p>

Here’s the long note. This one contains multiple blocks.

Subsequent blocks are indented to show that they belong to the footnote (as with list items).

{ <code> }

If you want, you can indent every line, but you can also be lazy and just indent the first line of each block.

<p>32</p>

This is easier to type. Inline notes may contain links[32] and ] verbatim characters, as well as [bracketed text].

<p>33</p>

In quote.

<p>34</p>

In list.

/9j/4AAQSkZJRgABAQEASABIAAD//gBQVGhpcyBhcnQgaXMgaW4gdGhlIHB1YmxpYyBkb21haW4uIEtldmluIEh1Z2hlcywga2V2aW5oQGVpdC5jb20sIFNlcHRlbWJlciAxOTk1/9sAQwABAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEB/9sAQwEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEB/8AAEQgAFgAUAwEiAAIRAQMRAf/EABoAAQACAwEAAAAAAAAAAAAAAAAICQUGCgf/xAAjEAABBQEAAwABBQAAAAAAAAAGAwQFBwgCAAEJChEVOXa3/8QAFgEBAQEAAAAAAAAAAAAAAAAABggA/8QAJhEBAAECBQEJAAAAAAAAAAAAAQIAAwQFBhEhszE0NlFUcXR1tP/aAAwDAQACEQMRAD8AqQzziPNmpiqnIO1q4H+WkB84MdlzRSuM82/jVw/JCORtRmQz5d2VTy6WmS2eSYx3U/qkSRbgFsqRzH2Is4/mCluXc33vy8xTnJjTNqV/T8LKmkhr8Hq1da2aOvTfIh2CFeNt+GxFBP8AJFdFUbPWh+4FdXV7OtZOMR7mK9lBWNN+JBmMQ5cwmfH8DEFhTZUCRlE6CBq/ds/nBh9oYygeY1L9FnCUnBSN1t+w0l9bNomx1cllsOrL9OCTKtKOIqua6UVjP0dEvTyM7gp/3whbkAD0ScX3r6MLg+C2/XsMhCnJRn/5cVNHyJHiX6JKIFhhqnFeagm9BIgjfcJyNBTZiROBUk6Mp8CJRmT4NWU2MatV7n495DPk/wAbMJSRJOTBDItq0KR5s/nJN7LPW8AJWtYAoKQaDp+u4XShxgXhYcbHoxNTllCwETGQ8ag2jmDVsk8w/wCOp/C/hn+mWV/utpePH+D5wmF39NY6UakjUYR1Dn0YgRM5zQAAAMdfAA4AOAOArjkMNQ3vgm7UKtBR+m9QHFD5tpnDtpy+t2R20gK/OsmFtuDpaL5mVyiT5qdEVAvZci5ch5VoSGKbwlWTBr0RPoZT07av9lHfrXo6yLApWMugKpPM9SV1cDm65s/wkOHZBojoqiM+6GpMSj4FhtayNAUi5H3LfQBG2KWssFoSPuJdKyMLKtpuLi+e3jwFICUg7CSHsNVlYlKdizOTvKdq3KTsG8pQirsAG6vAB5FdhP490U4gfjxi+DedoqO4YftmKdKNulO26jiOv+2Ga/bftVNFXpHtVHrpLpRFJTpP3z77T469++fTx48e4LueE+NY6UKk7UniLP8A7rNf3X6//9k=/9j/4AAQSkZJRgABAQEAeAB4AAD/2wBDAAYEBQYFBAYGBQYHBwYIChAKCgkJChQODwwQFxQYGBcUFhYaHSUfGhsjHBYWICwgIyYnKSopGR8tMC0oMCUoKSj/2wBDAQcHBwoIChMKChMoGhYaKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCj/wAARCAD6APoDAREAAhEBAxEB/8QAHAAAAAcBAQAAAAAAAAAAAAAAAQIDBAUGBwAI/8QAPhAAAgEDAwIEBAQFAgUFAAMAAQIDAAQRBRIhBjETIkFRB2FxgRQykaEjQlKxwRXwFjNictEIJEPh8SZTgv/EABcBAQEBAQAAAAAAAAAAAAAAAAABAgT/xAAbEQEBAQEAAwEAAAAAAAAAAAAAARECEiExQf/aAAwDAQACEQMRAD8A2t0YoQpwT2qVzMV+N3UHgrDY2eoM0y58VEbgfp9K1yMRmnuJ5h40jyYHGSeKrWE8u2QAApOMdqGCsmT8h70TAJwMAZx249aKBy4c9vTNUC0zDCgmmmG7Ockjkj1PrUTAjcy5XP0ouCgHae4IomOJHhgIc55PHY0Uk5IXLMcUBQ27n96JYO2MYLebHtRBA7BcMx29sdxQJqwZRtIP+BQKpjHHc+xzigNGoAO/k+nPAoAYlee5oBiGeWySO9AJCgY5PHagFCADzj2GaA2N2TkjA/U0HMwbPPeiyBLDfkkj04FCl1cBMgn6URwYFGySR6D2oAeQDAxnHGKAhU4IbGc+tFwnwDj9aK7f8v2oNu+IHxNvJdXmt9EmKWSqArA/mPvxUxMZNe3Ml1dvNcMzSSEsxPOferJhht/OWyAPc0UfdgDcuM8n50AMCykZFARsngcY/egTcbjnJz9O9AB2kZGSQOcUCX8x83bntQCMruJ4B7D1oCyOGzxtJ9M80CAdg5UjFE0aFJrghLeNpHY4IRdx/QUNWCw6D6q1EZttEvirHAZ4ig/U4qw1b9H+CHVN3Mq6hJaWMJ5ZjJ4hA/7R3P3q3ET+pf8Ap/lWNm03XkkkA8qTW+3PHupP9qxopV78G+s7VSV0+OcAn/kzqSfscVvIKzqPTWu6XKE1LSL+Bhz5oDg/cd6lEZzGwLrtPqrA8frUCJfcw9gfegUjZsEAffNADyHt78UAjCjzDJxRcO5Pw3gwCGOVJQp8ZncMGOeNoxwMY96GCbQffFFcUXKjDDt2NEo+N3yyM5z3okKuqJgIzONoJyuMGi4QfGcqSfXBoYHJx659qKIRnnsfUGgJn/poJYoTIGLY+eDzQFlQK2G/KCTmgbspfO0qce/agPGcR7nHf9vnQFfBPlOc88Gg7uucc/M0Bd208YJJweKAYrea4kKQICRGW5IUYUZJ570DYqcknt3FE0VuVyDzj1oamOlulda6puvC0a0eZVIWSbtGn1Y1NNbX0x8ENH0qL8X1NdtqDoNxiQbIh8u+WpqL70Tc6fcxypouiRadbW8hhLFFXcB7Edz+tNFvEZxkmmgShbA9PlUA+Hgg/wBqDgmBkd6ArJuJBGR7VdEdqWgaVqMfh6hp9pcLj/5Ig2KaKJrvwW6S1EFoLaWwmPIe2fAz81ORTRm3UfwI1mzBbRL+K/ReyS/w3x/b+1Wexmev9O6xoE2zWdOubUDszr5T9G7H9auCJj2n3PPrUXTlGBB2kYx96GlQMjJJHuRRXBgDgk8DtRKH8w4OfYA0SUlIMsFXJ4oujHH8ufnRRGOSNoJNAeFC77F2jPucfvQFEqgY3nj/AKaCUY58wwq54AoCzOmVMke9QeRnGR7ZoEIF7pnaTk49KDpSSwQntQJsGKjgggZ9uDQc4OOe1Am2UCkHOR7dqA8t/cSW8MEkrGGEsUTPCk4zj9KJT3pzQtS6m1aPT9Jh8SVxlmJwqL/UfYURuuhfArR7f8NLrF1cXciKDJCrbI2b7c4+9NGtaRptrpdqltYW0VtAn5Y41wBUodvGjqUdQyn0YZqAIreOBFSFFRF7BQAKA1xcRwKplcJuOBn1NAR7y2ikWMzoZnGVQHJNAuQcD3oBKkD2FBy8jnvQFxnjjmg4rxwKBMqCBtPNA3vbCC+tngvYo54HGGSRQQR9DV0Y91n8DNOvFkuOmZmsrk5PgSNuiY98D1X+1XRhWu6DqWgX72er2j2069t/ZvmD2IoGG7jbnj1FFlB224PB+VClN4DYJHyAojmPGCck8cetCAxgjPp6UaAGKtx6+9ATAXO7nFBw8HHLN+goJhBuj2FeAcnmgNazW8U0vjweODGyqpYrsYjytx3x3oGa5LEEjH9XvQGlgmjjMmQq4HBPfPYgevagG5nhe3tkFuInQHxJQTmQntn0wKBKTlAeDx60DSY+U9zn+mgsnQvROr9Y3W2xi8KxV8SXUnCrjvj1Y/IUR6c6A6H03o6wMVgrSXMoBmuX/NIf8Djt/eiLfjJwO9ZBiOfmKDhktzQAzYBLZ8oyaDF+rOptVv8AUjNZL4tjA/lT+kr3wvqTQX/pi3Y+DqFxKXurmFWAaPaVzg4I/b0oHlxqV7penRTXFu93dPLsESYB2k8n7CgnradLq1WaIOFI/K42sPkRQCg3Kcd6Dgp3d6AdrGg5VxnjmgKWB8uQGxnFAUgKuSefSghuqNC0jXbAWGtxQyJKdsYc4YMf6T6GtDzR8S/hnqfSUz3NvuvNILYSZR5o+ezj/Pb6UGfLzyD/AJoFFySQVBHpQDJ5kGByPahAbWxn5+po0OF3D+XPtQJsNwOe+aAuygmMkebgHnHFALHYpJwSeGz2oGpOJWAI49BQEZlYAHkg4oARVOMvtBIJJ7AUAX6xxSOsUgmjViFcKRuHviiVfvhT8NZuqpk1LVFeHRkPlHZpznsP+n50qPS+mWVppdnFa2cEcFtGu1I41ChR8qyHVxK8cLPDD4kgGVQHBNAa0maaBJGTYzDJXOcUCy5JOaA2OMfoaArkheM7vlQNYNOtoWLJCgLHJwo5NApPKLaNpGRQB6j2oGmnRvcyNd3O/DkeErLhkWgklIdCyZOCRzxzQEeRxhdpUnncBkD5UCxXjJ7+tAlctMsIMLohz5mcZAH09aBQYdQwyAeaAuA7MAQxHH0oG1481nbGVInuWU5Kr+bHrgepoKB1u+o6jqlvBH05NevEBPBK0pQR4I4BHZj+1Bb9IS7lsFtNWtYwDGFYB/EXHqpJ7/WtQYx8VfhGbdZtV6Uh8gy81mpyR6koPb5UGKY4YkeYd88fbFAI5AC98c5oQBb+U9+9GnN5RgDgjOPWgAN3yMfWgAqc91/UUD2RSSRg9+49KCR6e0WfX9WS0icRwgb55WOFijH5nP0FBYNRi6dSR7HRNPmu0hOW1GaXaZMdwBwAP3oynE0XRYrFtV02wS4ECj8dp1wcsE7eJEf39qlFZ616ZttPu7Kbp9Zbi0vYzNCcgjHqoHuKsEp8LPh7P1PqjXerxywaXaviRSu1pWH8g+XuflQemIIY7S3SK3hVIo12pGoAAA7AClEL1N1RH0/oTalcwx+IACLaSQKx59Ppmshv0D1jH1ZbTubU27xkkAnKsuSMg/UUFluLlLaJXETyecKAg554zigXiubeRnSKeJ5FOGVXBIPsaBLULoWkIfw3kYsAqIOSTQJMbpm3oqlmwACeF9yfn+1A+Bx34oE5IY5P+YFbnPIzQKAckHuRQCAQOO1AL8r9KDhkZOT9M8UCcrxgAyYJzwD70CT3Itxm8kgi3fly+P7/AOKA9pskhEkZysnOfeg6RH8w3tgjAHtQRZ1uystSg0m5eRJ2UbHceV8fP3oJkBSAVII9xQFdSRwKDDvjN8L/AMSJte6chxcgFrm1QcSf9aj39x61YMH8Q+CkfhqpQncxBDH5H6VRwXJ/Ke1Am2QchuMYOaNFSAVznB9qAm8f10D2RmX8jDHP3oLbebtA6ej0m2LrfX6LcX7IMskf8kf6HcffIoG8yTadZxSTxCK3kRZUwSFfkruIJ78GhiS6Y1OS3160uZJFWO5bwZtxzuQ8bcfPNMZXvo2wsLnQ9R0q/maJNNv5Yo3bjCuMAHPzqA2jdUan0lF0/ZXcElxp9zE+5WVd/DE71IPPB7H2po1bSNXsdYthLp1ykyEcj+ZT817ioITrnoux6vs1gv5JYnjz4ckZ/Ln5etA+6N0BemdBttMina4WEFfFdQpIJJ7D60E5I4Vo9qnnsQO1A3k0yzeTxhCizZJ3qNpz9RQO449igMSxHGW5NAIwBtUAUAMORkfegMhG3jtQD8+fvQGXJz7UAHuRQA5YDI5FB0qCQA5yaCs2/SFit/Jd3AmvJ2bO64ctt5zwD2oLMilVAUDgcAelAJLbhgZz3oGN9HPIYmhtrWRw2czjt7Y+dA+h3mJS67W9gc0AvuLYANADpkZABHY85oPOnxy+Hx06Z+odGjC2jt/7qBRwjH+cY9D6/OrKMebcceHwfaqCYIyDgZ96GhHOFJI4/WjQpXnsaCz9J6fDqGvRC8OLO3Vri5PB/hqMkfc4H3oDT3UupapcXrKS9zISgDdhnAGPbsKC5aLLBHq9p01c6bbagPE23kpJYhmz5IySAAMj6nNGdRnT2lu3V9vaQQrJDHfCMFj5kAfufsMUFogu5H0jrLUYXK+Lq0aRse/lf/8AOKlFfudagvbnQpNQRmtILydCwPdCQcgMOMZFQanPoeiawBd9M6s9jeKPK1vKQp+RFA4septa6fuFtuqbRrmzx5b+BAdo927A+vsflQXfTr2z1O3W5025juIW/mjOR9KAZI914khaRNo4XdwT9KAl3b2+oWpjMoZWbOVfnI9sUCrXUNssUU8w3sQoJH5jQLvwQQC3NAKvuUPtK54waDg23v6UA7weBnNAIOBigMr+hoOjdZQdhBx3waAVG0Z7UBWfAOQSflQChyNxBAxQRutarb6bHALi9trSW4kEcJnGd7ewFA/j8QEK/IA/MBjmgWDDBB7igj9dupLTTbiaHZ4oQ7A7bQW9ATVgwXSNV6onl8azW6t45pWdxHIxWA/zNtz7A8Glg2S1u7fX+nt0J/H2c4MMhmQoW9GBUjj60g8sfEHpebpDqi4sHLG2Y77eQ486E8fcdvtVFekGW4UfegKVAAKgnFGhuDzxQXbpDTZF6a13UnUqrCOzQ5wGZmXIJ+lE0ppkEK6nJcRWcTW9hA08iKcjcowpye/mxQ0+6VRbC/jvLm48L8LG9y8pIOXxkDnuSTipqHXQMng3es9S3fhn8DbvcZI5Mr/lH700dc3Dad8NtPs4nU6jeXD6nMCwBRF5XOfU8YHrTNJFF1X8RawW1jc4GxTKNrZB385yPkBTFw1stSu7Ni9tPLGSQfK5Aphi8J8UNUm6fn0u72yvJ5fGbuF/39aYYtGgadp9/axXnRetzaXqnhqZI3bEcj4547Ak/X6UxFisPiXe6NMdO65057eQAr+LhUlHHbOPX07UwWXpQ6BqMo1LpgW0sioVI8Qgxk+684qC028M5890Y3kHKbUwF+lA4LDOzu2M4FAOG3DaoI9cntQdJxzQEyR259f/AKoGl5fSQRFo7ZpB/MhYIR9zxQdayyXKb7gqox5Yo2yB9WHc0DPUIWnhWKxkuYFRs5gcKWbPY59KBkx6isVeSGW31JNwHhyOUkA+o8v9qCfjkMo/LJFKqBmRvSgc2swnRyFcYODuXGfpQMtRsLK8vYJL+wjuGiUtHK6hghz6Z7H6UEmCsig84I9RigiruC9t0DaaVmIIHhTOQMeuGwT9qCJ1ywv9T0U29xFFiaVBJGHz5M5ODgYPY/arKJPTtLW1t44i7SKq48w8x+ZPrTRJoipGFQAAdgKgzX47dMJrXSrXkUe6807MykDkp/MP8/aro80FQyZ+tUJ7hvH0x270XQ7KGtXvIk0T4c9P2bIhkvpnvJVfjIxhf7qftRDXpu0/1DpzXltUlkvmWMBI+2zdnn64oYa6yX0XTm0i4jQ3t6wmuV53xov5UPpyeeKyLbpFtZ6Xpmn6TqNq7/ic6pqQRR/DVf8Alq2fTOP0FXBnXU+ox32o3lzeW+JrxlMXHKR9wfbJ/tVWK5f3AnaAjafCTwwcY4BOM/qKKSjA4Dg8j37UHZKkE5P0olSFlcLDdJPbTNBOigjxOVZu3+80Rbbnrq9l0t9I6isRd2rgKpPlZMdyre9An07oupoh1zo2+lea2fMlr+WZFx7ZwwqWDVug/ihDq7R6b1EPwmpMNokPlVj8xng1BqEUe1EAJOMDOc5oDSxq6YYeuaAJF4oCBUQ7mJ45zQHYB14wR86AVjBXyjge1AEcRTHlA9hQE8kbgEohJ5yQM0ETHNqMOr3IZQ9tIMQyEjKt7D3FBLqywRPJKTuxlj3zQI3Ut14e+yhWRj28Q7RjH60EfpF3rU/jLqFrHbS4/hqpJXH19aCRa8jgiVr1xGwXzYyf99qA9tc29/aRXFnKs1vINyOO2KByoxwe9AYocHGKBvdwLcWzxSLuR1KuD6gjBoPHXWujt071Nf6YSSkUnkJ4yp5H7f2rQgWAA3Y+1An4j/1t+tBrHxKuYS+gx24LRx6ZFtI/lz60FY0+/v8ASphNpd68EpXY5AGNvzFF1YOirZbzVrvX9dkNxZWH8eeaY5Lyj8qj7kcVlETqOqXd/HrPUNzcNE16Tbwxf1JkEgD2AA/etBte9R2Oq2cv+p6XHJfBFjgmjkMaRgAAEqO5o1FWfbgjsR8+9AlI5CgEggeoNAq0iug8uD7g80KKmCcZ7fPmjJzJfT/hWtjJvhOPK/OOe49u9A96X1W90/VrRtNkkSfxQF8I5yScdvX6UGidSLpfVFzcvbRiy6kgZBGysFW7B9T7HHNSjU+o9S1iz0e2uNLmX8RYxJ+KgYeVwVGTn5d6gjug/iU3UOt/6TewQpP59skL5B29x/8AYoNHPB78Ggb2l3bXO78PKsoyVyvIBHBFAoSkbfyhn4GfWgTnmWFN7ybAvc4Jz9hQRdx1dp0S3Dw+JJHbDdPIUZUjX3yRz9Bmrgzbqb4x9Oxho4bB751O5HPkXPsc80wQHT/xrJ1IHUbGKO0kdRiBiAgz+YjnsPpTKN/tLy3vLOK5t5klt5F3LKhyCPemAYLuK5XMDEj1OCP71ArGWLMPT0oIbU7h11u2t49OllWWNm/FIRsjI4AI/egfQ2ktpbww2XgxoDl9wJ49cUCHUGv2GixM13Mkcm0squwUH5/SrgxDW/jFcXOteHb3otrKEEiRISRM3zGc49v1qDT+gfiBpvV7y2unxTxywRhz4ozuHbOR2+9Bmf8A6kNIEWpaZqiooEqtBIR6kHI/atfRjDEt3AKjgVQjug9j+lQWh72e/htTOzyeCnhHPomeMYoJvQum7vVD47K9jpsQBkvZ5NoAHcgUAa7rKamE0Lp9Xh0G1OZZTwZSO8jn9cCsivdS38F9cJDZIY7G2URxKe5x/MT7nNaEKrENwAFPPlosFwS2cd/cc0UlIm3JOeKDo2LH+UA0SjgDk98URzPiJ2449e/NAbS7v8PdpKkpikQ5WQLkqccGgmYNQmXWLeQLG9wVRQVPlcj+Yn3xQa98OviAjz3WjdXSpFdliEuJCNjDtsJ7enepRdel+kdL0rqOTVdIsoYklV1dixO3nunpg9jUCnU3WMeka5b2EUcl3JInmigQs6ZPlJAHY8+vpQP9O1m3nthNo0cTwM2JDwoVj6H5gd6CbhtUiVn8TcXO4ktkZ+We1BAf8Z6fZ2uqXWpyxQrbStGseQzMB2IA961B59+IHXmodXal+HsPFh04HbHCo25+bY/zQWv4f/CCxvII73qC8iuXYb1tYZeF9txHf6U3BatX+DvSl86x6cr2dwjbnEUmcj6Enj6U8hLdJdEX/SmowJp2tTT6Oc+La3HO0442+3NBf1LmRUjjQAfmc+nyHvWQockYyQcY3CgaabaPZxGNnaUFi3mPb6f+KA2q3RstNurnBxDE0mPfCk1YPMemaP1L8RtYN9fJPc2aMUaVmCKg54H0z6VRYendf6Z6T1W56a6j6fgfwJyguhGJmPzbIzjHtSjTn0zSunbi01fSkt9Os5GAmWNCDOGxtXb6HnNZEZ8etOF90DPKFy1rKk3zAzg/3rXI8u7zvOTg4zVoTLDJ81QWDTb2SwuvFgcrkbXwM5H0PFGqsjpd6+kcT61Nc2ieb8OikFc/9PA+WfSjKA1nWBzpFlZ/hLWM4KH8zsPVj6mghN4IyQRk5NGo5BkFmyAfSgVjChdpGO/FAXYpOHLBe/FAQqoBJbA9sUBGxgtgEj/eaCf6DGjt1TZf8RNGumKS7mQZQkDIB+WaMrf8Ub/ovV7V20JIYL62K4khhCLOCcEcAdu9BmCuEQvxvyFUg42+v+/rQaj0zax/EXRY9Nns0t9TtM+BqCKAjEclXA98jn+1Si7Cz6u6O0tLjTrxLu2tQJJrDwcKE/m2M2SfeoLrpupDV9Mh1OytUS2vIN8m4BZQf6T7+vPpj50GfdK9L6rJqk1y1y0elRDKRqdjHHoyDhjx39e9BZr7fagW0j3kul3iETRqHkeF8ZBUjkZIxjtk5rQ86dW6r+O1OcW0UtvaRsY4oWfLKBxz7k/5NA46P6X1rqS6WPS7V9v88rAqi/f3oN46X6C1DSotkus+BIwKl8hn2+3PapROXPT2t20bPY6kJ5UGYmbIfIHGW5z68VBI6DrzzWSrrAjtrwFUbDja5OBlfuaCbluJLeNwIpLiVF3bVXAP0Pv8qBxLO8cYcW7vnuqkAigNFKs8CyxlwG/lcYI+1A31ayF/pt1auSFmiaM/LIxVgwfoO413o3qqfSLyUSwodogAyZVGcbPTPr71aNDvendJ6wtbu7Fi1lezK0bS4VZMjtnFZE0bC5u9Jh0qRAr2yw4uWx59vBI44PegN1tpbap0lqOk2sipLPB4aFsnHbBNOR5A1exFhqFxbeKkngyMhdOxIPcVuhiZFz/zBUEwcKvYnP6fWi0+6chjn6h062uATFLcRrIMnzAsO9EehNR+GvTV3GUh0+O2YsGaWHIf9amjIfib0no3S0VtFY3M000zMzLJtLKvvkenyx96oz0rwNjA8cj2osFLbVAbOc9jRQiXOAwxnj3oBlAxwDj37UDY+vHOQeTQBIdqjcPMfnQwJclWyBgCjJBFeefw4VaVycBUGST2wAKD0L8H9C1rSIILjWLSCytY1lZASVnlL4PI/wD8+vvUo1uwbxI5GkjdVc7isvOBjtj2qBWKFZiQ8CJCB5FHYj5jHFArDbQ20ey3RY1HOAOPsKCH1u61CPSLt9MtlXUHUrbCbJBbPdtvYetXR5T1y2udD6lni1ErJdJLvlK4wWOCePvVgsV/8Sr67UW1vA0NiowIonMe4+7FeT9ARQRmodWa9EYpPBhs1Tygw26rk9xknkn70EjonxZ17TXjAeKTkZ3L+YZ7N8vpSjX+lOpNM6umgkMG3EgBV1DYbG4kewz2NZGkC43CP8MPFBONysMAD50DaHVH8S6N1a+BaxMUjd280pA5wPb296DrXWLK9WNoJdtwybxDKPDcAnHIoJBifTBzzmgaz2UFzPFNNbwvLCcxuyglT7igdRRKg8qAZ5JAAzQEnuYoHiSWQIZW2ID/ADH2H6UERr12BY6hueIQJaO7SK/nHfnHtx3pyPGWoN4jynuCfU963RF+DL/UtQWTkjaWY/8ATnHFGql+j1VerdJY8r+KiJz/ANwoy9C/EjqSbpbRY723RJC8ojIcZ4IJ/wAVkecer9en1+9FzeLCCq4URjgDP7mtLhteadBY2kMczyHUpcO0YxtiUjgH/q9celAiLy1kjCX1ruyMLNGdrj0+h+lE0+t+kNQltJ7yKS3jgiTxUFw/hySp7qp70NV6YEBgWUNjBoaKeAODnHrRoVgDnBP0ozpxZ2f4y5trVeGuJFiBPpk4zQep9C0LTembS30fQbWP8ZsDyTugZgf6ix9fYZpbgmbXSmXULaa6kMzpltzcjJ//AGpaLCY1CDsF74PrUCgHY0HbSx7Z96BGUfxB2xjtQZ11t0Tb6jNfyw2wM18gV5AcBdpzyPnV0Yp1F0o/TEczXjXaTOQYpIk3QlT3B9Rj0zVl0VKbVppImheUSwbsgFfXHc0De0tri/ujFYQSSyfmKopPHqaDV/g9p+padr/gkSRTzKu0kZRlPLYPbOPf2pg9GWzRCMJAFxH5do4wayKX1z/G0CdzqLWRkiaTxQBLudclQvovbv3oMU/4Z67uwnUAt3u1Zw42yhmx3/Ln8v0oN86L1d00i3i1UiGQIocNnEbnkqT2xgiguEbI4DIysp7EHNAZnxQQ/Usksej3EsCl5EUthR5sY52/Mjigr6Qrp3R15LqEcIlmgdpFGAsY2navPJApyPJtwd8rnGBuJz6Gt0MzGSTyf0qCwSKA5ZsAjnn2otTXQYj/AOMNIDqCrXUZwf8AuGDRG6fF6Gyfo6+ub0CR4EPgIScLIeAcfc1keatN0661a+S3sYTPKzAbV9B7/StLrQ/iXp9pYLp8elWsUM11AzXMqt53I7g7j244oiB6W6Tn6j2TeAy2FspTeB+Z+/8AmgtnWlvpdl1Dp1pq07Ja20GFQpuDHHAwPf39KDHriVTKSPOCeBnHHtQGsrG5v5pfwcTOIlMjgEeVfck0XRIreS7uUigRpJXOEVe5PtRE/wBJ9HaxqvUcdhNFJp0lviaSWVcMgzxgdySeBipo9T6O8NppUJ1K4iW5KgSvIyqxb5jPH0paJm1NvKivE6Mp7MpyP1FQLRTwy58F0cjuAckfagOTtO3+Y8igMWCIWbOPlzQNhNBOWEbq5Q+YKc4+tAD7JEZgQfXj0oI/VtIttXsZLW5hRopByCP/ADVlGRa78Erae63aXK1tG3LAncM/Kmh10l8IZdBv4rxtTE0yggJsyoz6/P0po0zSNKEMdo9xGnjxuzkqMAEgjj7GmialjWQMgyCRyQcGoITqHT7q/a30+G2jFmwLSzl8GPBGFA9c5NBPRwJDbpHCipEi4CjtigqfWltqCaG8WhNbxyzOBIs8W8FcY4+dWQQ/wtuZdIGqadrknhy2u2QyOSEZOeRngY+XvTBZZevOmhC8janbqiZ53Zzj2FMFcs+sh1ZqsFrp8UkGkrlpbh+DNzhVX5Z5NQTfXyWUXSV2t+wW3EZ5I4HHt61eYPI0mA5C9snFaoLsPv8AvUEk5JcA8cZG480WnOlXX4PVLO4yQ0cyP244Yf8AiiPUfUump1B0/c2O8xfi4v8AmL3UcGpgw/SujNX0Trj8PpckimOMvHO/kEg9Rjs3PpV0aFq/Qqa1ZJ/qcrverEqNOwGM+uMfemiVtrKbQdMNjp9rvtkhPht6mU5yT8u1BkvXg1qXUtOvddgRY1R3j2YHiMvZSD27CgzSCyuNQ1KK0giL3Mz4VAOc/wDignoNNOnaHeiW8hgkku/Al2+Ziqgn09M0ETp0qpqSmGKOdFcEeLwMfPHag3JLuCRtPmQWsDhNphtVAcn1w3BPFSwDdWGpX1/OYdOtbbSrlQ80szHdn0GR24/c1AbWemdatLbTJdGvJIJypDQwMV3exwOPatSz9Ei/R+txy2mr3evyHV4miWIDhGwwyGA7nGRS2YNZAUBWYDdjGayEvxMYB3nYu4KCfU+woG93c2enWs1xcPFDCp8zEgDPzq4GGgz22saS1zZSZhkdsFePXt86YHWmySeLNDMYikZ4YNlvvUD+VARxQJqgwRQHUAAe2O1AWOFRM8mDlgB37fagb6reXFt4ItLZJnZsuWfaI0Hdjwcn2FAz0nWX1i4u4xY3VpFbv4eZ1x4vGdy/KgkriN2aMRlQoOW3DOR/5qwYr1P1tp2pdS3WnanKkGh24kRl2eaYgcb/AFxnnAqiv9Jno0dRLJPbtdQtkNPIALaMnODsPPpSjbdK0DTbWQXui+Gsco3BU5hPP5gPT7VkU74t6PZHpq/1N5ZZbwrtRnmOwDPOFJwPsK1xR5ybudw788VaC5X2WoJRULSBpAe5PA/aiinAZnHck4A70THq/Qr23/4Y0u4lmCpLBGA7epxjH60Du+WGOBvFlFuWOFcYyCfbPrUojri6k06xX8PFNfBUJ3ltzM3scVBjfVvVXVNit5dapNDZGQGK3shjeAe7YHIwAOT3zVggNTiu+orrR4p7m+upJFR7h3TPhggDaoBwRjnPH5hV0af0xotnoD3l5dWdrY2YjGLhwPEHoef996CC6m0HpuPpk3Wny2s9sJPHJ3AeI2D39T37UGU9QTDULuGPSLPwIyoVIYk2lj6/X70G2/DPp0hVudRuBLcwxhRGkeEjB9M+p96DSLprVHiieaAE+YxHkke4H1qUOIBawL4uAuc8nvj71AwjRtQ1eO78QNp9odyLju/qT8uf70Gb6r8SpLzryy0vp+4NxYSSCEswI2u2Rn3OOD9qC4dVamen9NlaC7tUaIFvCmnHiy4HJXJxnOeDVwed+rOvLnqSyWO4jZSru/kc7ck+30GKosXw2+KmqaDJDY3jR3OmqNoRhtZAP6SP7Ggtmt63qbTJ1XLazJpslwBFblypEOAA7L2OWANS+xrnR2vW3UmjJeWp8wJSRf6WHeoJdSPMCRmgMq8DmgBpNsgUIxBGSccD70DczmS8MDWoe28MN4+f588rj980CktuJZYpFdlKZ4B4OfegQ1hpIrVjbsRMBhBj8x9qsHnX/hm36y1O/u9V1ddPmS6aD8OkQdyxOSe4OMmqLUvwQsYY4Xjv7m4YEEhwFyMY7fXB5pRbvhp0jqfSMV7b6jqZvLGQAwxAEBDk54PuD6VkVb49a5DBpiaNaeF/FIaQDumOwpzMGDEZQZ5+VboR8In1I+9QWDY6gFn7jjHpQhtJEFbAGBnijT0P8H7qPVOh47a42yNaymPBOcDupoykep+m73V7g/8Av2itQowvJIx6j5/OpRjfUWrax051RPY6LqFy8YACkebO4Z7HjNWCY0ToW2utJbqPreW5na4O4R78cehY9+fQVKLX05p1ro97awC4kX8VFmJLeEKdoyfOxJPbHbHYVAz0rqKPWNauri9t1ktJgILYgEiNFJ/Op9STmrBBdeaFCo0y3jt444DI3jLE204Y5DD+9UPPhv0NaRtPq99mSLOy22nOfdh75oNC0vT7m1uJGvGiii3AW8UDbQAeDu9zUohLlhouqap1VciF4I1FtbxSthtobBIPuTn7VBJ2vUth1TYk2ULi9iALwyKQYz6Z9CM0Ft060/DWEcDHe2Mucdye9BFW3SekWt3LPb2cUTsd2UGCG9x7VYMzufhzdX/WmoXj+BPpx3I7XZMmXYckc8EVRKaP8I+nXikLQuxOQSTnBzj6UEjonw90XSrq3S3s7dplJcl1EhGDx396lFx1TQ4NVjaC7UNCU2lAO/8AvNWDCLp9X+E/WgWImXSp2LRq7eSRT3B9iP8AFSjd9P1+21TRodVsMS2rLmTbyUGOePXFQSltcLcW0c1vh4mXcjDswoDLdRm4FvISsgXeTghDzjAPbPyzmgVFxCzuiOC0WN3sM+5oDqySJmNg3rx7UFb60tNUubGJdFdEvhIdryflUFSM49TVgyTQenJemOorf/U4H1Fpp1edAh/hOQTuQ9375JA4q0bnbPBcxxT20wkjKkqYzlT9ayGWu38um2MbLEJ7iRtoUds+/wAgBzQeW/iHqi6j1PdzeL44HkL9txHtWhVUOVyvHNB2F9zQT8hUAhAdp5FCG0mSAzE4HGDRppvwL1bwOpJbEsFiuYyVX3deR98Zoy2ZtRgmjkSRZocEp51K7se3vUow/rfpFE124mVpfD4mk2MWdCc4A+VWCH1281/UmFnpklzPYRFBEG8uGC4yQfcn9alGgaJo95rRsbi53WaxwrHOm7BjYcHnvz/moJaw07pXSI5IW1K0CQnDhpAWB9R796CudY62msTRW+gadI8KnDXMkLLv9MA8HGOKC3dAXF1dRfh75f41moi4G0AdwcfTj7UFhv7RjqMBV5AJFZHkR8FRjIx6CgpXVNjJ1JqNn07p26CztSJLlpIydyj2J+fGaC+afplrazqLa3SKNIggx3IHYUEsBk4wQc4oK11L1z070/M9rql6wuVA3QopLcjNBDwfFboqeSO2W7kQNxuaEhQfnQLt8TuireVoV1UeXnckTFT9DigHRuv+mbu9ZV1W3Nyc4IRlVl9O47/KgtU+s6baw+JcX1umRkAuM/p3oK/1t0rYdX6cqXKESqN8Ug/Mp9P1qwZ702mo9GdUTWJsmOn3EY8CAORGXJAwScjJ5q0bJDNLb6YklxbKsgA3wwndg9sDtWQN3aw3ZKTwLLEQOGORn6ehoG1vYAw3FikRt7JSuH3Hc/GSc5P0oHn4aO2uGuYyiose044wBQIm6F1dwfh5ARs8R8L2BHGfnQHv9PS4PjxrGLtFKxysm4qD3oCxboIIo7e1jhQHzAYUJ8wP8VYM56j1ktaal1BMrS2sAaK1OQDD6eUepY9yfQVR5zv7hrmaSaRtzyHJY0DcE4BPIPb5UBwOO4oJYzFvzEYHAH9XvQhNZN7AEgDOSDRo/wBA1SXRdVtNQgb/AJUgfBHcZ/8AGaGPVlhPbarZ2t5CEeORBKje2RUrI1zYxTBhMinIwcjvUEcugWkO4AMisMEA8N69u3yoERrOhWNxNYy6hapcxAeJHM+D8u9ASLStLlm/EWdpZyxy+Z3RQ3I5B44oJKTT4blFWSNBEOeBg5+goFYbOK1TKhIxnIbGMH50Cpcyo6AMrIcM2OD68Ggb6Lbbllu5Cd88hYBu6rztWglSNkfm4P70Gaat8Rba96w0vp3R2mhufxyi4kO3YyDOV9+f8VYMw+P0cP8Ax4JVuEKzW8bEr5tuMj0+lUZ7Y2X4288GK+towRlZXYqv9uKCQi6YmbT2u11GzaJWKnYxbBB49KCFnhubdiwL+U8OhP60ElpXUFxY6nDdXQF0qYbZKxwT9vWg3npb42aHcmC11C3uLSQjEkpIdQfr3xUondP6x6e6tv7e101hczRzrNtaFiFC/wAxPYHtUF+lj8bYCTgMG59cUC4OBQQOo2eoXepFTeL/AKYQN1sEwWx6Fu+DQLX2kw3Ok3Vjas9qJ48Exd1PHb9KCE6R0G86Ut7mK71KK4gklM7TyKRIBjtjtjj96AOreudJsrMJbXksk8jBCbVdzRjONxBHP/3QQ9x1jcWGkERWWqXdpMPCt7x4wfFOOWPbA+fAqwZr8TJ9Qbp2ymvEjsrSTEVvawyHz45Lv6E4wPqaoyl8g91OKDlYEc8mgKW5PH7UE80f8PPHl7gDFAZkUjawUIQG8w5ouknAUbl9Dg59KK1X4Z9XXFvo8mlRXax3KHfBG8Rk3qe6jHOc54+dMZO7jr/qK8vWtba4tYu38TwvDOMc8N60wPLbrW10PS7pnvrnUtbAKobgBUUk9jg8f/lMC2rydMdRSaRqWoLEbx4UefwxkL2BVvvn9Klgv3Tp0lPxFno6wJ4IVmEOMEHsf2xUD2e4ks7n+NGDaCMu8q8lCMcEfPNA6tW/EwrMybEYZUHnI9Cf/FAzu7G4LXTWs38SRNqhs4Bz3z9KCO6x07UNT6altNMvEs7xkC7nPlI9R2/egwbrDT+r+kupLCeK9nu3KBYGRy+QvdWFWCU6avtA6h1iKDqLRhpmpvkxz2p8JGb3z7k557VRX77TdHteuPBut401CAouyXGcdvmASaC069030brYaay/CwPFwWspRErfUN7UGZX2hWSiY6ZrMc0CvhUlBUk/UcGgiLq2mtG8F54yDwfDfIoGkrRsSZXwOB2zmgsvw06XHVfUcFvI22xQ753Ze4H8v3OBUo9a6XodjpltHbabDHZIhVsQqBuA9DxznFQTQUe5oDYGMnn7UEbpV3JqDyz+BJFbBtsXiDBf/qx6CgDXL42cSRwGM3UzBQrHGFzy32oG2i2kKTSI80lzMow0rqQoyew+3tQO59KtJJRI9rEzgg7igz3z/egZ6paJdGGwW4eBXy7pGeXUdwT6A5qwebPjJrcOr9TvbWZQ2Onr+Gi2nIJHcj7+vyqigOuRk+vtxQAqEk4BU0ABj7j96CzzKxYD0GeM0CQG3OMAjkfOgbSZwzE4PJyfWi6caTdzaffW95akrPFIrLg/tRHpGzs9C6t0W31FrO3Y43MrcbH9c/eloZ6v0JpWoKlrHHBbScSFEHYc5Pz71NDXUoJrK1g0dvw9qsspW3nXaN6KMrHnH5ieSfan0U3Rr1uidaRbiwk8BUSS6naQkjc3ZcHaRnn70wbja6lZX+nw3NvMksM+FXnPJ9DUCeoXj2hSG2t2km7op4U/f/FAvHaNePb3N0jRzRA7VD8Akc9u/FAvcxnawZQ3HYtjJoKfDFAdeub6Vo1dSULIBtTbgYOfU8jNWUVvrm5ih0m1urixhlsI5HJliOwR5/LkkHHJPamjHdQ0HWdemlutN0+YWBYtC0rHDhjwVz3zWgx1n4e9U6QE8XT5Zd//APR5se9XBXbjS9S0zAvbO5tyWKAOhG4/KpQ3ZHXO5JFK98qRUGhfCbph77Uvx91pv463wVjR0LR7s483796WjW7rTrXpHWrSW2YK7lmXTbaIFpCRjjHOOSeeBipaNLtXuIre3R43lnkGXYADZnnmoH6Dkbzn6UDGWe9a/hKG3jsW8riQMJS3svp86B6zgMQmDtGT8qDNb6XUpOoPx72tzOkjFYowOduDwDjj70Gg6Wsq2KNeAJKRuKk52fIn5DvVkENrvW+iaSAsl0txI2Asdud5Yk4A4pgzv4l9ST6JZSXbyyprWpw+FFa5G21gz5icfzH3pgwCSQlh688+tUEwjjngZ7UHAHuWAHpn+1AXj5frQWXOGBZcKSe3c/rQIyAtnI27eBj/ADQIMAuH8vHBB9DQwVpPLjOckEZ/ahi9/Czqj/S9VhtLm6aG1uZFUsT5VOfX5Gpg9GiNJArxsrxsv1BHypYGF7pljeG3kvLZSlqzNGGxhSRjP6GoG1yLUWiWc2nSPA4KJGItyYHYHHarop3wu0jWYNUvzriNBp8ErraRMANxJ/N7nA7ZqDUHgSQLvAbacjI7H3oOuIFuYzGS68jJRyp4+lA0m0yDwGjiTw3bzBwTuDe+TQVbV+mLmW1NtbSok9weZiC2zPLEZ+/FBM6Xo40/TYdL8Jr21G4vJcuCck55HbFWUKQ/h4tR/DTz2o8TK21qmMgKOf8AfpmrokljG1i5BHI57D5VNorut9Lab1LA638W6H8sboNrqQckq3pntV0RWsfD6K7SGC3vTFahQJY2iVmkI9d+Mimie0Hp2DQ7AQacio3JZgqjcT3zxk1KHGldPWtnqMupS5uNTmGGnk5KjGNqf0r8qgmkhVGcquGblm96BDUZZYLGVrdN8+MIvux7UGKTdXdbaRrFvbaxbWN4d58BmwCjHPORycLx2q4LNe9S9TdN6I13qkWmzSXdwBCGlO4hiMKAB2A9ag0WySQwpLLtMjDcQBhVz6CgoHxF17XbnUYunulgsUsu4TTvjIUAEhR+x4qwZwtkOi7651PXJobm4tohHbQhdgecjnaPZeOfeqMy1vWLvWNQlvb+ZpJpWyT2A9gB6Cgjy5AO4A5oAGRgBR39aA7Z8MkFtxPY0AbV9UGfpQWhj5TkBQGxuBoEGG1iQCyHvj1oELgSkK6oRnIBIxzRdJ28Q3+fHiAds96LoH3AF1wCfyijNbF8JviI1rbQ6Pq/iSopxFOx5Uf0n3qUbWQk8II2SRyDPuCDUHMpSIiJQSBwDwKAgTxApnEbyIQ+APyn0+9A5B3AgfmoEWgcb3VlMpGFYjt+negSs7zxH/C3RWO9UElM8SAHG5fl/agNdXcEbJAZ1WadvDQDJO7Gf7c0ED1dqWv2enzw6Rb24nZfJd3EwREHqxyMZHzOKBbT7H8PZWTK5uZ9o3Xm1SzEry5PsT7UFF+JnUezSZ9LttRs2km2m5KSFWXDZI491AyBzVwK6J8T7CRtPjee0tbaGAtdNISdoXgLEo5JJxyfSmC6aF1fo2vELZXDJOxwkMybHYe4HtUFiJWJd8rKqjuTQcZV8SNI0dy43BlGVA+ZoBniE0RU7tp77Tg5zQUv4hNrU2tdNWOhylPEnaS5UHGYlAzn5cn74oHGt6l0z07k6nJC123HhKPFlbPptGTj9q0Kx0XMvU+ty6vqQtpWlZo4LOdGDWsak8Aflycgk+v2qC8a1q8OnaXLPOz2kCIWkZ+CqjgYx6n0xTBkmp9Sabp0KdRyI5vJkaGw08MVKIDw8jA557896QY1q2o3eqXr3N7O8skjnlnzgn5e1UMCGV/MOM0BJFOVwfX0oFtgZTjkg8UBtpOeO1B3hg85FBZXVMM+VJAA2+h96BO4IMJ/p7qvuKBKacmOKB5CYYx5UzgDPfHzoGwD5OApJHAHrQEAk8NWdNpx+XdkA0AIGhkGWOQcgg8UGw/DP4kmwhi07WCTZqAiSbstGc4+pFKNvs5o7q2Sa3kWaJxkOp4NZDOHUh/qL2k8LW78eG7kbZv+0+/yoEZp7fUpX/BXpgvYZTCSRtO7vtwe/vQdY6jeyatPp91FEPw8aSNMoYbi2cADt6Z70DS60KW7luJdV1JniJzBtURG2b0KN7+/vQKSWUWnLLqN3cSLP4ex5Y8jxiPykr23+nzzigpXUPVOu6Vqmmf8UWttb9OXDqkjRnfI3H849uRkDOKC0axLFr+nLB0rrUMM0bqCIGGGX1AH09qsGc9UfBiTV9Vhu9Pu5oPGLNeG6bczN7jHvVEr058Gre3sLeDVrmOdo3LmSFNjEH+XdntQXO51XQOl5YrCKGWa8SMYS3tzMyLjjJHb9alDqz1S5ktJ7nVdLmSVDiOONfEMiE4Xy+h9xUDm11CaTxEOn3VmpHFxKFCr9s5GPmKAus6jb9M6RJf3c88yxpjcxL7uM5OO3HrQVTSupoOuYdZMTSpptriNFtXKXDA/Pjhs9h2xQOJ7Xpnpa2S91WK2swqjbGw3ysfcnuxrQsGmapYvpwvra1FtDL52Mi+Gx49sZoMb+LXV0t+jWl3OYLGTO2zjx4h2nyszHsG4/Sgxt5ZJmEsjl3PB3GgSlyXBxkDnNADseB2PsaA8SoXQyFgmQCV70B5R/EIjYmPJxnvigEKcYDfrQF2/X9aCwqC7l2zwfT/FAJJ5747E5oGTqZArKOfccftQHZWwmOD23Z4NAEkTEBmwR7g5AoG7KSSE5HyFAMTyQsMHa/cEcEc5oL58P+v7rppdryPNAXx+GfsQe7Z9D2/Wg3zSdX0fqzT08F433eYwscOpHt9PcVkQPW632mX9vfWdrbXiRgrIdu2eHIwGD57+nIoKdc/ELVdC0u5afp27SUtta5vJMMzk+UDjzYHtQWDpj4gxXmif/wAitXZkGZzFGW8MehZO+PmM0Fibr/poWQmF6xXA2xmFg59sKRk0C2nQP1KFvdb0vwIUJNtDKcsVP8zD0Jx2oJDTNA0vR5p7qzs44pJOXkUc/SgNfa/pNvbF5L2JgTsCo2WJzjGKA1jcNcxOF/m/I0zbt4+gxgenNA5s7CCxWWYQxpNLgyMiY3nt2oHajcuexAoEL1C1uyFkVHO1ixHb17/KgwT4rdXWep6oul2OpywaTYqVLxDyySDjaM9x6Z+tWDPdB1TW7Wa9sumpGlursqMQRlpXwd3BA4571RcdN0i41G7h/wBSmNxqdkwn1O6u5/4cAXlYgcnngE/pQNvih8S211obHRyYbWInfJG/Ex9MD2+tBmNzcTXTtJcSSSSHH5jngDAH0oEdxbg5A9wKAzPwO+fegSLfLJoFoR5fUfegXOcHGAT8qAM+XaBzQCCwGDuyPlQWJpV37Q5TIweM7u/f9qA9vNYpFML2KaR8YiaJgAh55PvQRvnYoqA+MThcDnPai4PKWQlH3K6tggjnPaiEmz4ZznBPfFAVWG0AZDH1z2HzoE7lhv8AJIHI/mGRQI8AEeuOMGgndN6pu7V4RJLKY4WDLhypXj0oNL6d+MMS4ttetDdQgDNwAPEwPRh2NZGkabrvTXVZiexvba5aI+ILWZQG3Y4IDdvtQScvTdjK8chtFjkQ+VlYgqPXGKA17daV07apJrV9CsZbELTgbu3YY78UEHf9evJ4K9P6JqGoiR1XxjCUjAJ5OT3NBM9YdSWPTenwy3t7bWbSuBunUthfUhRyT+1BA2vXnS/jGdeo9LnIHljeLwSCe5zgmgejrOz1S3kGhazoaXYGAJ5Sw3e38uaCsJfX02rPD1XfXtvcCXdBJGjLbOO+EK88Y7nIoLB1H1Bb6WkN1ddSQ29io/5MZEjzt7DGTjj2FXNGUfEH4wRaxCtnp2kwGGM7llvBvIOO4XOAe/fNMwZbqusalfLBHfzSvFH+SIgKo+igYqiwWfWV30rpp03p6exJnUPJexQnxuR+Ulu2PkKCrT6jcSiTxZnbxCWcFidxPJJ96BBDlQ35fbFAbahBHJIGO+KAIwg5YnB455oDkKcbW7UAOowSMjOORQcCVXPp+9AqDkHcDmgMNpHPcUBTuzQWCJXcFvKR2Y0CMiqjnz4UcH/6oG8gKluwxyAfSjQ6ylEJBJfv37/OiYQMjbjkZXOSP/FEELAEkNn1waBCXdjORnPoc80CZeTb5wQe2BQAm4y7jlhjvQcWO3cW4GB7ZpgGC5a3uUeNyGQ5GCR+45pgt+l/EzqLTgxj1O5Zc4CSOXCj70wOE+Jd/Pq0V7qVvb3bISAWUBhnuc+/2pgvkHx0soLaNIdKkEiqR5yDg47cYpgresfELSNc1n8VrFtAw2ZBiiywwcgeb14x2xTBYNA13ozUo/GOqWVizDc1nf6crIh/7wOf1rOURvVupdE6ncpFeakiSWsZdbjSLfw1Zs+VVyMHA75xWsFDHU0idTJdf65rT28YKpPvHjKp9Bk49qYK/rGpXF/qU9zPdyzyyMSJJAAx9ifnVlwNZ7vxYEh8GAEHO8DDH5H5U0IPK8jHxSzFQAMnOKgJkFwPT6UBlAII5z3zQCjnA5OKBXeuAT9KABMA208DtxQHRhzgUC3KjJx9z2oAP6/L3NAHC8Hg/XNAcNtGe4oEy5yeaCdLk7yx2qQCAO1AmXZSSexHbPNAhJuLJkgjvzRonuAJy2cd8UCbyOi453Dj7GiYTZyVPPl74oYLJIyq68EH3Gf3oYTDEjLbiP1FEDHOUOdgOfXIBFAm77j5Mnng/WgLI5UE8Eg9iO9AnuJbuQx5wOBQFRxuIbOc54PrQK28ws76F722EyI4d4HJUOPb35yKBm8oeQsi4BPbPb71RyYIHmPGRg00GRsbsHIPY0Bg52AEEseBUCQJyOPXtQDtcdvvjtQCQ/GBwKAuXU4PrQBvZE5IGeO1AffjBJ57UBvEO045HagFWAAzktQKIzBeMg0C5kz5mOG/WgMrDJJ7jmgEnIyOccYoA3cEMRj05oC7/wDeBQTduzEoNxwcZGaAJOWfPNAlISVOT60aIQfkj+amgJ3bnnigKeFGPQUCf/x0Smw4V8exogX/AOY3+/SgKxKxeU459KAgJOckntQJkkcgkGgAAFFz/XQEmJaY7jnk96BM9yPQelAf/wCX7UBv/jagGP8AKB6ZoDf00HMfO/0oAH5TQA/5moECSMDPFAvGASMjPP8AigVX+b60BW7/AHoHEJJD55oDd4snv70CsSjCcDmgVAAbgYoGYJ3nk9qBUAYHAoP/2Q==
\ No newline at end of file +Pandoc Test SuiteJohnMacFarlaneAnonymousJuly 17, 2006pandoc<p>Pandoc Test Suite</p>

John MacFarlane

Anonymous

July 17, 2006

This is a set of tests for pandoc. Most of them are adapted from John Gruber’s markdown test suite.

——————————

<p>Headers</p>
<p>Level 2 with an embedded link </url></p>
<p>Level 3 with emphasis</p>
<p>Level 4</p>
<p>Level 5</p>
<p>Level 1</p>
<p>Level 2 with emphasis</p>
<p>Level 3</p>

with no blank line

<p>Level 2</p>

with no blank line

——————————

<p>Paragraphs</p>

Here’s a regular paragraph.

In Markdown 1.0.0 and earlier. Version 8. This line turns into a list item. Because a hard-wrapped line in the middle of a paragraph looked like a list item.

Here’s one with a bullet. * criminey.

There should be a hard line breakhere.

——————————

<p>Block Quotes</p>

E-mail style:

This is a block quote. It is pretty short.

Code in a block quote:

sub status {

print "working";

}

A list:

 1. item one

 2. item two

Nested block quotes:

nested

nested

This should not be a block quote: 2 > 1.

And a following paragraph.

——————————

<p>Code Blocks</p>

Code:

---- (should be four hyphens)

sub status {

print "working";

}

this code block is indented by one tab

And:

this code block is indented by two tabs

These should not be escaped: \$ \\ \> \[ \{

——————————

<p>Lists</p>
<p>Unordered</p>

Asterisks tight:

• asterisk 1

• asterisk 2

• asterisk 3

Asterisks loose:

• asterisk 1

• asterisk 2

• asterisk 3

Pluses tight:

• Plus 1

• Plus 2

• Plus 3

Pluses loose:

• Plus 1

• Plus 2

• Plus 3

Minuses tight:

• Minus 1

• Minus 2

• Minus 3

Minuses loose:

• Minus 1

• Minus 2

• Minus 3

<p>Ordered</p>

Tight:

 1. First

 2. Second

 3. Third

and:

 1. One

 2. Two

 3. Three

Loose using tabs:

 1. First

 2. Second

 3. Third

and using spaces:

 1. One

 2. Two

 3. Three

Multiple paragraphs:

 1. Item 1, graf one.Item 1. graf two. The quick brown fox jumped over the lazy dog’s back.

 2. Item 2.

 3. Item 3.

<p>Nested</p>

• Tab

◦ Tab

* Tab

Here’s another:

 1. First

 2. Second:

   • Fee

   • Fie

   • Foe

 3. Third

Same thing but with paragraphs:

 1. First

 2. Second:

   • Fee

   • Fie

   • Foe

 3. Third

<p>Tabs and spaces</p>

• this is a list item indented with tabs

• this is a list item indented with spaces

◦ this is an example list item indented with tabs

◦ this is an example list item indented with spaces

<p>Fancy list markers</p>

 (2) begins with 2

 (3) and now 3with a continuation

 (3) iv. sublist with roman numerals, starting with 4

 (3) v. more items

 (3) v. (A) a subsublist

 (3) v. (B) a subsublist

Nesting:

 A. Upper Alpha

 A. I. Upper Roman.

 A. I. (6) Decimal start with 6

 A. I. (6) c) Lower alpha with paren

Autonumbering:

 1. Autonumber.

 2. More.

 2. 1. Nested.

Should not be a list item:

M.A. 2007

B. Williams

——————————

<p>Definition Lists</p>

Tight using spaces:

apple

    red fruit

orange

    orange fruit

banana

    yellow fruit

Tight using tabs:

apple

    red fruit

orange

    orange fruit

banana

    yellow fruit

Loose:

apple

    red fruit

orange

    orange fruit

banana

    yellow fruit

Multiple blocks with italics:

apple

    red fruit    contains seeds, crisp, pleasant to taste

orange

    orange fruit

    { orange code block }

    orange block quote

Multiple definitions, tight:

apple

    red fruit    computer

orange

    orange fruit    bank

Multiple definitions, loose:

apple

    red fruit    computer

orange

    orange fruit    bank

Blank line after term, indented marker, alternate markers:

apple

    red fruit    computer

orange

    orange fruit

 1. sublist

 2. sublist

<p>HTML Blocks</p>

Simple block on one line:

foo

And nested without indentation:

foobar

Interpreted markdown in a table:

<table>

<tr>

<td>

This is emphasized

</td>

<td>

And this is strong

</td>

</tr>

</table>

<script type="text/javascript">document.write('This *should not* be interpreted as markdown');</script>

Here’s a simple block:

foo

This should be a code block, though:

<div>

foo

</div>

As should this:

<div>foo</div>

Now, nested:

foo

This should just be an HTML comment:

<!-- Comment -->

Multiline:

<!--

Blah

Blah

-->

<!--

This is another comment.

-->

Code block:

<!-- Comment -->

Just plain comment, with trailing spaces on the line:

<!-- foo -->

Code:

<hr />

Hr’s:

<hr>

<hr />

<hr />

<hr>

<hr />

<hr />

<hr class="foo" id="bar" />

<hr class="foo" id="bar" />

<hr class="foo" id="bar">

——————————

<p>Inline Markup</p>

This is emphasized, and so is this.

This is strong, and so is this.

An emphasized link[1].

This is strong and em.

So is this word.

This is strong and em.

So is this word.

This is code: >, $, \, \$, <html>.

This is strikeout.

Superscripts: abcd ahello ahello there.

Subscripts: H2O, H23O, Hmany of themO.

These should not be superscripts or subscripts, because of the unescaped spaces: a^b c^d, a~b c~d.

——————————

<p>Smart quotes, ellipses, dashes</p>

“Hello,” said the spider. “‘Shelob’ is my name.”

‘A’, ‘B’, and ‘C’ are letters.

‘Oak,’ ‘elm,’ and ‘beech’ are names of trees. So is ‘pine.’

‘He said, “I want to go.”’ Were you alive in the 70’s?

Here is some quoted ‘code’ and a “quoted link[2]”.

Some dashes: one—two — three—four — five.

Dashes between numbers: 5–7, 255–66, 1987–1999.

Ellipses…and…and….

——————————

<p>LaTeX</p>

• 

• 2+2=4

• x \in y

• \alpha \wedge \omega

• 223

• p-Tree

• Here’s some display math: \frac{d}{dx}f(x)=\lim_{h\to 0}\frac{f(x+h)-f(x)}{h}

• Here’s one that has a line break in it: \alpha + \omega \times x^2.

These shouldn’t be math:

• To get the famous equation, write $e = mc^2$.

• $22,000 is a lot of money. So is $34,000. (It worked if “lot” is emphasized.)

• Shoes ($20) and socks ($5).

• Escaped $: $73 this should be emphasized 23$.

Here’s a LaTeX table:

\begin{tabular}{|l|l|}\hline

Animal & Number \\ \hline

Dog & 2 \\

Cat & 1 \\ \hline

\end{tabular}

——————————

<p>Special Characters</p>

Here is some unicode:

• I hat: Î

• o umlaut: ö

• section: §

• set membership: ∈

• copyright: ©

AT&T has an ampersand in their name.

AT&T is another way to write it.

This & that.

4 < 5.

6 > 5.

Backslash: \

Backtick: `

Asterisk: *

Underscore: _

Left brace: {

Right brace: }

Left bracket: [

Right bracket: ]

Left paren: (

Right paren: )

Greater-than: >

Hash: #

Period: .

Bang: !

Plus: +

Minus: -

——————————

<p>Links</p>
<p>Explicit</p>

Just a URL[3].

URL and title[4].

URL and title[5].

URL and title[6].

URL and title[7]

URL and title[8]

with_underscore[9]

Email link[10]

Empty[11].

<p>Reference</p>

Foo bar[12].

Foo bar[13].

Foo bar[14].

With embedded [brackets][15].

b[16] by itself should be a link.

Indented once[17].

Indented twice[18].

Indented thrice[19].

This should [not][] be a link.

[not]: /url

Foo bar[20].

Foo biz[21].

<p>With ampersands</p>

Here’s a link with an ampersand in the URL[22].

Here’s a link with an amersand in the link text: AT&T[23].

Here’s an inline link[24].

Here’s an inline link in pointy braces[25].

<p>Autolinks</p>

With an ampersand: http://example.com/?foo=1&bar=2[26]

• In a list?

• http://example.com/[27]

• It should.

An e-mail address: nobody@nowhere.net[28]

Blockquoted: http://example.com/[29]

Auto-links should not occur here: <http://example.com/>

or here: <http://example.com/>

——————————

<p>Images</p>

From “Voyage dans la Lune” by Georges Melies (1902):

lalune

Here is a movie movie icon.

——————————

<p>Footnotes</p>

Here is a footnote reference,[30] and another.[31] This should not be a footnote reference, because it contains a space.[^my note] Here is an inline note.[32]

Notes can go in quotes.[33]

 1. And in list items.[34]

This paragraph should not be part of the note, as it is not indented.

<p>1</p>

/url

<p>2</p>

http://example.com/?foo=1&bar=2

<p>3</p>

/url/

<p>4</p>

title: /url/

<p>5</p>

title preceded by two spaces: /url/

<p>6</p>

title preceded by a tab: /url/

<p>7</p>

title with "quotes" in it: /url/

<p>8</p>

title with single quotes: /url/

<p>9</p>

/url/with_underscore

<p>10</p>

mailto:nobody@nowhere.net

<p>11</p>

<p>12</p>

/url/

<p>13</p>

/url/

<p>14</p>

/url/

<p>15</p>

/url/

<p>16</p>

/url/

<p>17</p>

/url

<p>18</p>

/url

<p>19</p>

/url

<p>20</p>

Title with "quotes" inside: /url/

<p>21</p>

Title with "quote" inside: /url/

<p>22</p>

http://example.com/?foo=1&bar=2

<p>23</p>

AT&T: http://att.com/

<p>24</p>

/script?foo=1&bar=2

<p>25</p>

/script?foo=1&bar=2

<p>26</p>

http://example.com/?foo=1&bar=2

<p>27</p>

http://example.com/

<p>28</p>

mailto:nobody@nowhere.net

<p>29</p>

http://example.com/

<p>30</p>

Here is the footnote. It can go anywhere after the footnote reference. It need not be placed at the end of the document.

<p>31</p>

Here’s the long note. This one contains multiple blocks.

Subsequent blocks are indented to show that they belong to the footnote (as with list items).

{ <code> }

If you want, you can indent every line, but you can also be lazy and just indent the first line of each block.

<p>32</p>

This is easier to type. Inline notes may contain links[32] and ] verbatim characters, as well as [bracketed text].

<p>33</p>

In quote.

<p>34</p>

In list.

/9j/4AAQSkZJRgABAQEASABIAAD//gBQVGhpcyBhcnQgaXMgaW4gdGhlIHB1YmxpYyBkb21haW4uIEtldmluIEh1Z2hlcywga2V2aW5oQGVpdC5jb20sIFNlcHRlbWJlciAxOTk1/9sAQwABAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEB/9sAQwEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEB/8AAEQgAFgAUAwEiAAIRAQMRAf/EABoAAQACAwEAAAAAAAAAAAAAAAAICQUGCgf/xAAjEAABBQEAAwABBQAAAAAAAAAGAwQFBwgCAAEJChEVOXa3/8QAFgEBAQEAAAAAAAAAAAAAAAAABggA/8QAJhEBAAECBQEJAAAAAAAAAAAAAQIAAwQFBhEhszE0NlFUcXR1tP/aAAwDAQACEQMRAD8AqQzziPNmpiqnIO1q4H+WkB84MdlzRSuM82/jVw/JCORtRmQz5d2VTy6WmS2eSYx3U/qkSRbgFsqRzH2Is4/mCluXc33vy8xTnJjTNqV/T8LKmkhr8Hq1da2aOvTfIh2CFeNt+GxFBP8AJFdFUbPWh+4FdXV7OtZOMR7mK9lBWNN+JBmMQ5cwmfH8DEFhTZUCRlE6CBq/ds/nBh9oYygeY1L9FnCUnBSN1t+w0l9bNomx1cllsOrL9OCTKtKOIqua6UVjP0dEvTyM7gp/3whbkAD0ScX3r6MLg+C2/XsMhCnJRn/5cVNHyJHiX6JKIFhhqnFeagm9BIgjfcJyNBTZiROBUk6Mp8CJRmT4NWU2MatV7n495DPk/wAbMJSRJOTBDItq0KR5s/nJN7LPW8AJWtYAoKQaDp+u4XShxgXhYcbHoxNTllCwETGQ8ag2jmDVsk8w/wCOp/C/hn+mWV/utpePH+D5wmF39NY6UakjUYR1Dn0YgRM5zQAAAMdfAA4AOAOArjkMNQ3vgm7UKtBR+m9QHFD5tpnDtpy+t2R20gK/OsmFtuDpaL5mVyiT5qdEVAvZci5ch5VoSGKbwlWTBr0RPoZT07av9lHfrXo6yLApWMugKpPM9SV1cDm65s/wkOHZBojoqiM+6GpMSj4FhtayNAUi5H3LfQBG2KWssFoSPuJdKyMLKtpuLi+e3jwFICUg7CSHsNVlYlKdizOTvKdq3KTsG8pQirsAG6vAB5FdhP490U4gfjxi+DedoqO4YftmKdKNulO26jiOv+2Ga/bftVNFXpHtVHrpLpRFJTpP3z77T469++fTx48e4LueE+NY6UKk7UniLP8A7rNf3X6//9k=/9j/4AAQSkZJRgABAQEAeAB4AAD/2wBDAAYEBQYFBAYGBQYHBwYIChAKCgkJChQODwwQFxQYGBcUFhYaHSUfGhsjHBYWICwgIyYnKSopGR8tMC0oMCUoKSj/2wBDAQcHBwoIChMKChMoGhYaKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCj/wAARCAD6APoDAREAAhEBAxEB/8QAHAAAAAcBAQAAAAAAAAAAAAAAAQIDBAUGBwAI/8QAPhAAAgEDAwIEBAQFAgUFAAMAAQIDAAQRBRIhBjETIkFRB2FxgRQykaEjQlKxwRXwFjNictEIJEPh8SZTgv/EABcBAQEBAQAAAAAAAAAAAAAAAAABAgT/xAAbEQEBAQEAAwEAAAAAAAAAAAAAARECEiExQf/aAAwDAQACEQMRAD8A2t0YoQpwT2qVzMV+N3UHgrDY2eoM0y58VEbgfp9K1yMRmnuJ5h40jyYHGSeKrWE8u2QAApOMdqGCsmT8h70TAJwMAZx249aKBy4c9vTNUC0zDCgmmmG7Ockjkj1PrUTAjcy5XP0ouCgHae4IomOJHhgIc55PHY0Uk5IXLMcUBQ27n96JYO2MYLebHtRBA7BcMx29sdxQJqwZRtIP+BQKpjHHc+xzigNGoAO/k+nPAoAYlee5oBiGeWySO9AJCgY5PHagFCADzj2GaA2N2TkjA/U0HMwbPPeiyBLDfkkj04FCl1cBMgn6URwYFGySR6D2oAeQDAxnHGKAhU4IbGc+tFwnwDj9aK7f8v2oNu+IHxNvJdXmt9EmKWSqArA/mPvxUxMZNe3Ml1dvNcMzSSEsxPOferJhht/OWyAPc0UfdgDcuM8n50AMCykZFARsngcY/egTcbjnJz9O9AB2kZGSQOcUCX8x83bntQCMruJ4B7D1oCyOGzxtJ9M80CAdg5UjFE0aFJrghLeNpHY4IRdx/QUNWCw6D6q1EZttEvirHAZ4ig/U4qw1b9H+CHVN3Mq6hJaWMJ5ZjJ4hA/7R3P3q3ET+pf8Ap/lWNm03XkkkA8qTW+3PHupP9qxopV78G+s7VSV0+OcAn/kzqSfscVvIKzqPTWu6XKE1LSL+Bhz5oDg/cd6lEZzGwLrtPqrA8frUCJfcw9gfegUjZsEAffNADyHt78UAjCjzDJxRcO5Pw3gwCGOVJQp8ZncMGOeNoxwMY96GCbQffFFcUXKjDDt2NEo+N3yyM5z3okKuqJgIzONoJyuMGi4QfGcqSfXBoYHJx659qKIRnnsfUGgJn/poJYoTIGLY+eDzQFlQK2G/KCTmgbspfO0qce/agPGcR7nHf9vnQFfBPlOc88Gg7uucc/M0Bd208YJJweKAYrea4kKQICRGW5IUYUZJ570DYqcknt3FE0VuVyDzj1oamOlulda6puvC0a0eZVIWSbtGn1Y1NNbX0x8ENH0qL8X1NdtqDoNxiQbIh8u+WpqL70Tc6fcxypouiRadbW8hhLFFXcB7Edz+tNFvEZxkmmgShbA9PlUA+Hgg/wBqDgmBkd6ArJuJBGR7VdEdqWgaVqMfh6hp9pcLj/5Ig2KaKJrvwW6S1EFoLaWwmPIe2fAz81ORTRm3UfwI1mzBbRL+K/ReyS/w3x/b+1Wexmev9O6xoE2zWdOubUDszr5T9G7H9auCJj2n3PPrUXTlGBB2kYx96GlQMjJJHuRRXBgDgk8DtRKH8w4OfYA0SUlIMsFXJ4oujHH8ufnRRGOSNoJNAeFC77F2jPucfvQFEqgY3nj/AKaCUY58wwq54AoCzOmVMke9QeRnGR7ZoEIF7pnaTk49KDpSSwQntQJsGKjgggZ9uDQc4OOe1Am2UCkHOR7dqA8t/cSW8MEkrGGEsUTPCk4zj9KJT3pzQtS6m1aPT9Jh8SVxlmJwqL/UfYURuuhfArR7f8NLrF1cXciKDJCrbI2b7c4+9NGtaRptrpdqltYW0VtAn5Y41wBUodvGjqUdQyn0YZqAIreOBFSFFRF7BQAKA1xcRwKplcJuOBn1NAR7y2ikWMzoZnGVQHJNAuQcD3oBKkD2FBy8jnvQFxnjjmg4rxwKBMqCBtPNA3vbCC+tngvYo54HGGSRQQR9DV0Y91n8DNOvFkuOmZmsrk5PgSNuiY98D1X+1XRhWu6DqWgX72er2j2069t/ZvmD2IoGG7jbnj1FFlB224PB+VClN4DYJHyAojmPGCck8cetCAxgjPp6UaAGKtx6+9ATAXO7nFBw8HHLN+goJhBuj2FeAcnmgNazW8U0vjweODGyqpYrsYjytx3x3oGa5LEEjH9XvQGlgmjjMmQq4HBPfPYgevagG5nhe3tkFuInQHxJQTmQntn0wKBKTlAeDx60DSY+U9zn+mgsnQvROr9Y3W2xi8KxV8SXUnCrjvj1Y/IUR6c6A6H03o6wMVgrSXMoBmuX/NIf8Djt/eiLfjJwO9ZBiOfmKDhktzQAzYBLZ8oyaDF+rOptVv8AUjNZL4tjA/lT+kr3wvqTQX/pi3Y+DqFxKXurmFWAaPaVzg4I/b0oHlxqV7penRTXFu93dPLsESYB2k8n7CgnradLq1WaIOFI/K42sPkRQCg3Kcd6Dgp3d6AdrGg5VxnjmgKWB8uQGxnFAUgKuSefSghuqNC0jXbAWGtxQyJKdsYc4YMf6T6GtDzR8S/hnqfSUz3NvuvNILYSZR5o+ezj/Pb6UGfLzyD/AJoFFySQVBHpQDJ5kGByPahAbWxn5+po0OF3D+XPtQJsNwOe+aAuygmMkebgHnHFALHYpJwSeGz2oGpOJWAI49BQEZlYAHkg4oARVOMvtBIJJ7AUAX6xxSOsUgmjViFcKRuHviiVfvhT8NZuqpk1LVFeHRkPlHZpznsP+n50qPS+mWVppdnFa2cEcFtGu1I41ChR8qyHVxK8cLPDD4kgGVQHBNAa0maaBJGTYzDJXOcUCy5JOaA2OMfoaArkheM7vlQNYNOtoWLJCgLHJwo5NApPKLaNpGRQB6j2oGmnRvcyNd3O/DkeErLhkWgklIdCyZOCRzxzQEeRxhdpUnncBkD5UCxXjJ7+tAlctMsIMLohz5mcZAH09aBQYdQwyAeaAuA7MAQxHH0oG1481nbGVInuWU5Kr+bHrgepoKB1u+o6jqlvBH05NevEBPBK0pQR4I4BHZj+1Bb9IS7lsFtNWtYwDGFYB/EXHqpJ7/WtQYx8VfhGbdZtV6Uh8gy81mpyR6koPb5UGKY4YkeYd88fbFAI5AC98c5oQBb+U9+9GnN5RgDgjOPWgAN3yMfWgAqc91/UUD2RSSRg9+49KCR6e0WfX9WS0icRwgb55WOFijH5nP0FBYNRi6dSR7HRNPmu0hOW1GaXaZMdwBwAP3oynE0XRYrFtV02wS4ECj8dp1wcsE7eJEf39qlFZ616ZttPu7Kbp9Zbi0vYzNCcgjHqoHuKsEp8LPh7P1PqjXerxywaXaviRSu1pWH8g+XuflQemIIY7S3SK3hVIo12pGoAAA7AClEL1N1RH0/oTalcwx+IACLaSQKx59Ppmshv0D1jH1ZbTubU27xkkAnKsuSMg/UUFluLlLaJXETyecKAg554zigXiubeRnSKeJ5FOGVXBIPsaBLULoWkIfw3kYsAqIOSTQJMbpm3oqlmwACeF9yfn+1A+Bx34oE5IY5P+YFbnPIzQKAckHuRQCAQOO1AL8r9KDhkZOT9M8UCcrxgAyYJzwD70CT3Itxm8kgi3fly+P7/AOKA9pskhEkZysnOfeg6RH8w3tgjAHtQRZ1uystSg0m5eRJ2UbHceV8fP3oJkBSAVII9xQFdSRwKDDvjN8L/AMSJte6chxcgFrm1QcSf9aj39x61YMH8Q+CkfhqpQncxBDH5H6VRwXJ/Ke1Am2QchuMYOaNFSAVznB9qAm8f10D2RmX8jDHP3oLbebtA6ej0m2LrfX6LcX7IMskf8kf6HcffIoG8yTadZxSTxCK3kRZUwSFfkruIJ78GhiS6Y1OS3160uZJFWO5bwZtxzuQ8bcfPNMZXvo2wsLnQ9R0q/maJNNv5Yo3bjCuMAHPzqA2jdUan0lF0/ZXcElxp9zE+5WVd/DE71IPPB7H2po1bSNXsdYthLp1ykyEcj+ZT817ioITrnoux6vs1gv5JYnjz4ckZ/Ln5etA+6N0BemdBttMina4WEFfFdQpIJJ7D60E5I4Vo9qnnsQO1A3k0yzeTxhCizZJ3qNpz9RQO449igMSxHGW5NAIwBtUAUAMORkfegMhG3jtQD8+fvQGXJz7UAHuRQA5YDI5FB0qCQA5yaCs2/SFit/Jd3AmvJ2bO64ctt5zwD2oLMilVAUDgcAelAJLbhgZz3oGN9HPIYmhtrWRw2czjt7Y+dA+h3mJS67W9gc0AvuLYANADpkZABHY85oPOnxy+Hx06Z+odGjC2jt/7qBRwjH+cY9D6/OrKMebcceHwfaqCYIyDgZ96GhHOFJI4/WjQpXnsaCz9J6fDqGvRC8OLO3Vri5PB/hqMkfc4H3oDT3UupapcXrKS9zISgDdhnAGPbsKC5aLLBHq9p01c6bbagPE23kpJYhmz5IySAAMj6nNGdRnT2lu3V9vaQQrJDHfCMFj5kAfufsMUFogu5H0jrLUYXK+Lq0aRse/lf/8AOKlFfudagvbnQpNQRmtILydCwPdCQcgMOMZFQanPoeiawBd9M6s9jeKPK1vKQp+RFA4septa6fuFtuqbRrmzx5b+BAdo927A+vsflQXfTr2z1O3W5025juIW/mjOR9KAZI914khaRNo4XdwT9KAl3b2+oWpjMoZWbOVfnI9sUCrXUNssUU8w3sQoJH5jQLvwQQC3NAKvuUPtK54waDg23v6UA7weBnNAIOBigMr+hoOjdZQdhBx3waAVG0Z7UBWfAOQSflQChyNxBAxQRutarb6bHALi9trSW4kEcJnGd7ewFA/j8QEK/IA/MBjmgWDDBB7igj9dupLTTbiaHZ4oQ7A7bQW9ATVgwXSNV6onl8azW6t45pWdxHIxWA/zNtz7A8Glg2S1u7fX+nt0J/H2c4MMhmQoW9GBUjj60g8sfEHpebpDqi4sHLG2Y77eQ486E8fcdvtVFekGW4UfegKVAAKgnFGhuDzxQXbpDTZF6a13UnUqrCOzQ5wGZmXIJ+lE0ppkEK6nJcRWcTW9hA08iKcjcowpye/mxQ0+6VRbC/jvLm48L8LG9y8pIOXxkDnuSTipqHXQMng3es9S3fhn8DbvcZI5Mr/lH700dc3Dad8NtPs4nU6jeXD6nMCwBRF5XOfU8YHrTNJFF1X8RawW1jc4GxTKNrZB385yPkBTFw1stSu7Ni9tPLGSQfK5Aphi8J8UNUm6fn0u72yvJ5fGbuF/39aYYtGgadp9/axXnRetzaXqnhqZI3bEcj4547Ak/X6UxFisPiXe6NMdO65057eQAr+LhUlHHbOPX07UwWXpQ6BqMo1LpgW0sioVI8Qgxk+684qC028M5890Y3kHKbUwF+lA4LDOzu2M4FAOG3DaoI9cntQdJxzQEyR259f/AKoGl5fSQRFo7ZpB/MhYIR9zxQdayyXKb7gqox5Yo2yB9WHc0DPUIWnhWKxkuYFRs5gcKWbPY59KBkx6isVeSGW31JNwHhyOUkA+o8v9qCfjkMo/LJFKqBmRvSgc2swnRyFcYODuXGfpQMtRsLK8vYJL+wjuGiUtHK6hghz6Z7H6UEmCsig84I9RigiruC9t0DaaVmIIHhTOQMeuGwT9qCJ1ywv9T0U29xFFiaVBJGHz5M5ODgYPY/arKJPTtLW1t44i7SKq48w8x+ZPrTRJoipGFQAAdgKgzX47dMJrXSrXkUe6807MykDkp/MP8/aro80FQyZ+tUJ7hvH0x270XQ7KGtXvIk0T4c9P2bIhkvpnvJVfjIxhf7qftRDXpu0/1DpzXltUlkvmWMBI+2zdnn64oYa6yX0XTm0i4jQ3t6wmuV53xov5UPpyeeKyLbpFtZ6Xpmn6TqNq7/ic6pqQRR/DVf8Alq2fTOP0FXBnXU+ox32o3lzeW+JrxlMXHKR9wfbJ/tVWK5f3AnaAjafCTwwcY4BOM/qKKSjA4Dg8j37UHZKkE5P0olSFlcLDdJPbTNBOigjxOVZu3+80Rbbnrq9l0t9I6isRd2rgKpPlZMdyre9An07oupoh1zo2+lea2fMlr+WZFx7ZwwqWDVug/ihDq7R6b1EPwmpMNokPlVj8xng1BqEUe1EAJOMDOc5oDSxq6YYeuaAJF4oCBUQ7mJ45zQHYB14wR86AVjBXyjge1AEcRTHlA9hQE8kbgEohJ5yQM0ETHNqMOr3IZQ9tIMQyEjKt7D3FBLqywRPJKTuxlj3zQI3Ut14e+yhWRj28Q7RjH60EfpF3rU/jLqFrHbS4/hqpJXH19aCRa8jgiVr1xGwXzYyf99qA9tc29/aRXFnKs1vINyOO2KByoxwe9AYocHGKBvdwLcWzxSLuR1KuD6gjBoPHXWujt071Nf6YSSkUnkJ4yp5H7f2rQgWAA3Y+1An4j/1t+tBrHxKuYS+gx24LRx6ZFtI/lz60FY0+/v8ASphNpd68EpXY5AGNvzFF1YOirZbzVrvX9dkNxZWH8eeaY5Lyj8qj7kcVlETqOqXd/HrPUNzcNE16Tbwxf1JkEgD2AA/etBte9R2Oq2cv+p6XHJfBFjgmjkMaRgAAEqO5o1FWfbgjsR8+9AlI5CgEggeoNAq0iug8uD7g80KKmCcZ7fPmjJzJfT/hWtjJvhOPK/OOe49u9A96X1W90/VrRtNkkSfxQF8I5yScdvX6UGidSLpfVFzcvbRiy6kgZBGysFW7B9T7HHNSjU+o9S1iz0e2uNLmX8RYxJ+KgYeVwVGTn5d6gjug/iU3UOt/6TewQpP59skL5B29x/8AYoNHPB78Ggb2l3bXO78PKsoyVyvIBHBFAoSkbfyhn4GfWgTnmWFN7ybAvc4Jz9hQRdx1dp0S3Dw+JJHbDdPIUZUjX3yRz9Bmrgzbqb4x9Oxho4bB751O5HPkXPsc80wQHT/xrJ1IHUbGKO0kdRiBiAgz+YjnsPpTKN/tLy3vLOK5t5klt5F3LKhyCPemAYLuK5XMDEj1OCP71ArGWLMPT0oIbU7h11u2t49OllWWNm/FIRsjI4AI/egfQ2ktpbww2XgxoDl9wJ49cUCHUGv2GixM13Mkcm0squwUH5/SrgxDW/jFcXOteHb3otrKEEiRISRM3zGc49v1qDT+gfiBpvV7y2unxTxywRhz4ozuHbOR2+9Bmf8A6kNIEWpaZqiooEqtBIR6kHI/atfRjDEt3AKjgVQjug9j+lQWh72e/htTOzyeCnhHPomeMYoJvQum7vVD47K9jpsQBkvZ5NoAHcgUAa7rKamE0Lp9Xh0G1OZZTwZSO8jn9cCsivdS38F9cJDZIY7G2URxKe5x/MT7nNaEKrENwAFPPlosFwS2cd/cc0UlIm3JOeKDo2LH+UA0SjgDk98URzPiJ2449e/NAbS7v8PdpKkpikQ5WQLkqccGgmYNQmXWLeQLG9wVRQVPlcj+Yn3xQa98OviAjz3WjdXSpFdliEuJCNjDtsJ7enepRdel+kdL0rqOTVdIsoYklV1dixO3nunpg9jUCnU3WMeka5b2EUcl3JInmigQs6ZPlJAHY8+vpQP9O1m3nthNo0cTwM2JDwoVj6H5gd6CbhtUiVn8TcXO4ktkZ+We1BAf8Z6fZ2uqXWpyxQrbStGseQzMB2IA961B59+IHXmodXal+HsPFh04HbHCo25+bY/zQWv4f/CCxvII73qC8iuXYb1tYZeF9txHf6U3BatX+DvSl86x6cr2dwjbnEUmcj6Enj6U8hLdJdEX/SmowJp2tTT6Oc+La3HO0442+3NBf1LmRUjjQAfmc+nyHvWQockYyQcY3CgaabaPZxGNnaUFi3mPb6f+KA2q3RstNurnBxDE0mPfCk1YPMemaP1L8RtYN9fJPc2aMUaVmCKg54H0z6VRYendf6Z6T1W56a6j6fgfwJyguhGJmPzbIzjHtSjTn0zSunbi01fSkt9Os5GAmWNCDOGxtXb6HnNZEZ8etOF90DPKFy1rKk3zAzg/3rXI8u7zvOTg4zVoTLDJ81QWDTb2SwuvFgcrkbXwM5H0PFGqsjpd6+kcT61Nc2ieb8OikFc/9PA+WfSjKA1nWBzpFlZ/hLWM4KH8zsPVj6mghN4IyQRk5NGo5BkFmyAfSgVjChdpGO/FAXYpOHLBe/FAQqoBJbA9sUBGxgtgEj/eaCf6DGjt1TZf8RNGumKS7mQZQkDIB+WaMrf8Ub/ovV7V20JIYL62K4khhCLOCcEcAdu9BmCuEQvxvyFUg42+v+/rQaj0zax/EXRY9Nns0t9TtM+BqCKAjEclXA98jn+1Si7Cz6u6O0tLjTrxLu2tQJJrDwcKE/m2M2SfeoLrpupDV9Mh1OytUS2vIN8m4BZQf6T7+vPpj50GfdK9L6rJqk1y1y0elRDKRqdjHHoyDhjx39e9BZr7fagW0j3kul3iETRqHkeF8ZBUjkZIxjtk5rQ86dW6r+O1OcW0UtvaRsY4oWfLKBxz7k/5NA46P6X1rqS6WPS7V9v88rAqi/f3oN46X6C1DSotkus+BIwKl8hn2+3PapROXPT2t20bPY6kJ5UGYmbIfIHGW5z68VBI6DrzzWSrrAjtrwFUbDja5OBlfuaCbluJLeNwIpLiVF3bVXAP0Pv8qBxLO8cYcW7vnuqkAigNFKs8CyxlwG/lcYI+1A31ayF/pt1auSFmiaM/LIxVgwfoO413o3qqfSLyUSwodogAyZVGcbPTPr71aNDvendJ6wtbu7Fi1lezK0bS4VZMjtnFZE0bC5u9Jh0qRAr2yw4uWx59vBI44PegN1tpbap0lqOk2sipLPB4aFsnHbBNOR5A1exFhqFxbeKkngyMhdOxIPcVuhiZFz/zBUEwcKvYnP6fWi0+6chjn6h062uATFLcRrIMnzAsO9EehNR+GvTV3GUh0+O2YsGaWHIf9amjIfib0no3S0VtFY3M000zMzLJtLKvvkenyx96oz0rwNjA8cj2osFLbVAbOc9jRQiXOAwxnj3oBlAxwDj37UDY+vHOQeTQBIdqjcPMfnQwJclWyBgCjJBFeefw4VaVycBUGST2wAKD0L8H9C1rSIILjWLSCytY1lZASVnlL4PI/wD8+vvUo1uwbxI5GkjdVc7isvOBjtj2qBWKFZiQ8CJCB5FHYj5jHFArDbQ20ey3RY1HOAOPsKCH1u61CPSLt9MtlXUHUrbCbJBbPdtvYetXR5T1y2udD6lni1ErJdJLvlK4wWOCePvVgsV/8Sr67UW1vA0NiowIonMe4+7FeT9ARQRmodWa9EYpPBhs1Tygw26rk9xknkn70EjonxZ17TXjAeKTkZ3L+YZ7N8vpSjX+lOpNM6umgkMG3EgBV1DYbG4kewz2NZGkC43CP8MPFBONysMAD50DaHVH8S6N1a+BaxMUjd280pA5wPb296DrXWLK9WNoJdtwybxDKPDcAnHIoJBifTBzzmgaz2UFzPFNNbwvLCcxuyglT7igdRRKg8qAZ5JAAzQEnuYoHiSWQIZW2ID/ADH2H6UERr12BY6hueIQJaO7SK/nHfnHtx3pyPGWoN4jynuCfU963RF+DL/UtQWTkjaWY/8ATnHFGql+j1VerdJY8r+KiJz/ANwoy9C/EjqSbpbRY723RJC8ojIcZ4IJ/wAVkecer9en1+9FzeLCCq4URjgDP7mtLhteadBY2kMczyHUpcO0YxtiUjgH/q9celAiLy1kjCX1ruyMLNGdrj0+h+lE0+t+kNQltJ7yKS3jgiTxUFw/hySp7qp70NV6YEBgWUNjBoaKeAODnHrRoVgDnBP0ozpxZ2f4y5trVeGuJFiBPpk4zQep9C0LTembS30fQbWP8ZsDyTugZgf6ix9fYZpbgmbXSmXULaa6kMzpltzcjJ//AGpaLCY1CDsF74PrUCgHY0HbSx7Z96BGUfxB2xjtQZ11t0Tb6jNfyw2wM18gV5AcBdpzyPnV0Yp1F0o/TEczXjXaTOQYpIk3QlT3B9Rj0zVl0VKbVppImheUSwbsgFfXHc0De0tri/ujFYQSSyfmKopPHqaDV/g9p+padr/gkSRTzKu0kZRlPLYPbOPf2pg9GWzRCMJAFxH5do4wayKX1z/G0CdzqLWRkiaTxQBLudclQvovbv3oMU/4Z67uwnUAt3u1Zw42yhmx3/Ln8v0oN86L1d00i3i1UiGQIocNnEbnkqT2xgiguEbI4DIysp7EHNAZnxQQ/Usksej3EsCl5EUthR5sY52/Mjigr6Qrp3R15LqEcIlmgdpFGAsY2navPJApyPJtwd8rnGBuJz6Gt0MzGSTyf0qCwSKA5ZsAjnn2otTXQYj/AOMNIDqCrXUZwf8AuGDRG6fF6Gyfo6+ub0CR4EPgIScLIeAcfc1keatN0661a+S3sYTPKzAbV9B7/StLrQ/iXp9pYLp8elWsUM11AzXMqt53I7g7j244oiB6W6Tn6j2TeAy2FspTeB+Z+/8AmgtnWlvpdl1Dp1pq07Ja20GFQpuDHHAwPf39KDHriVTKSPOCeBnHHtQGsrG5v5pfwcTOIlMjgEeVfck0XRIreS7uUigRpJXOEVe5PtRE/wBJ9HaxqvUcdhNFJp0lviaSWVcMgzxgdySeBipo9T6O8NppUJ1K4iW5KgSvIyqxb5jPH0paJm1NvKivE6Mp7MpyP1FQLRTwy58F0cjuAckfagOTtO3+Y8igMWCIWbOPlzQNhNBOWEbq5Q+YKc4+tAD7JEZgQfXj0oI/VtIttXsZLW5hRopByCP/ADVlGRa78Erae63aXK1tG3LAncM/Kmh10l8IZdBv4rxtTE0yggJsyoz6/P0po0zSNKEMdo9xGnjxuzkqMAEgjj7GmialjWQMgyCRyQcGoITqHT7q/a30+G2jFmwLSzl8GPBGFA9c5NBPRwJDbpHCipEi4CjtigqfWltqCaG8WhNbxyzOBIs8W8FcY4+dWQQ/wtuZdIGqadrknhy2u2QyOSEZOeRngY+XvTBZZevOmhC8janbqiZ53Zzj2FMFcs+sh1ZqsFrp8UkGkrlpbh+DNzhVX5Z5NQTfXyWUXSV2t+wW3EZ5I4HHt61eYPI0mA5C9snFaoLsPv8AvUEk5JcA8cZG480WnOlXX4PVLO4yQ0cyP244Yf8AiiPUfUump1B0/c2O8xfi4v8AmL3UcGpgw/SujNX0Trj8PpckimOMvHO/kEg9Rjs3PpV0aFq/Qqa1ZJ/qcrverEqNOwGM+uMfemiVtrKbQdMNjp9rvtkhPht6mU5yT8u1BkvXg1qXUtOvddgRY1R3j2YHiMvZSD27CgzSCyuNQ1KK0giL3Mz4VAOc/wDignoNNOnaHeiW8hgkku/Al2+Ziqgn09M0ETp0qpqSmGKOdFcEeLwMfPHag3JLuCRtPmQWsDhNphtVAcn1w3BPFSwDdWGpX1/OYdOtbbSrlQ80szHdn0GR24/c1AbWemdatLbTJdGvJIJypDQwMV3exwOPatSz9Ei/R+txy2mr3evyHV4miWIDhGwwyGA7nGRS2YNZAUBWYDdjGayEvxMYB3nYu4KCfU+woG93c2enWs1xcPFDCp8zEgDPzq4GGgz22saS1zZSZhkdsFePXt86YHWmySeLNDMYikZ4YNlvvUD+VARxQJqgwRQHUAAe2O1AWOFRM8mDlgB37fagb6reXFt4ItLZJnZsuWfaI0Hdjwcn2FAz0nWX1i4u4xY3VpFbv4eZ1x4vGdy/KgkriN2aMRlQoOW3DOR/5qwYr1P1tp2pdS3WnanKkGh24kRl2eaYgcb/AFxnnAqiv9Jno0dRLJPbtdQtkNPIALaMnODsPPpSjbdK0DTbWQXui+Gsco3BU5hPP5gPT7VkU74t6PZHpq/1N5ZZbwrtRnmOwDPOFJwPsK1xR5ybudw788VaC5X2WoJRULSBpAe5PA/aiinAZnHck4A70THq/Qr23/4Y0u4lmCpLBGA7epxjH60Du+WGOBvFlFuWOFcYyCfbPrUojri6k06xX8PFNfBUJ3ltzM3scVBjfVvVXVNit5dapNDZGQGK3shjeAe7YHIwAOT3zVggNTiu+orrR4p7m+upJFR7h3TPhggDaoBwRjnPH5hV0af0xotnoD3l5dWdrY2YjGLhwPEHoef996CC6m0HpuPpk3Wny2s9sJPHJ3AeI2D39T37UGU9QTDULuGPSLPwIyoVIYk2lj6/X70G2/DPp0hVudRuBLcwxhRGkeEjB9M+p96DSLprVHiieaAE+YxHkke4H1qUOIBawL4uAuc8nvj71AwjRtQ1eO78QNp9odyLju/qT8uf70Gb6r8SpLzryy0vp+4NxYSSCEswI2u2Rn3OOD9qC4dVamen9NlaC7tUaIFvCmnHiy4HJXJxnOeDVwed+rOvLnqSyWO4jZSru/kc7ck+30GKosXw2+KmqaDJDY3jR3OmqNoRhtZAP6SP7Ggtmt63qbTJ1XLazJpslwBFblypEOAA7L2OWANS+xrnR2vW3UmjJeWp8wJSRf6WHeoJdSPMCRmgMq8DmgBpNsgUIxBGSccD70DczmS8MDWoe28MN4+f588rj980CktuJZYpFdlKZ4B4OfegQ1hpIrVjbsRMBhBj8x9qsHnX/hm36y1O/u9V1ddPmS6aD8OkQdyxOSe4OMmqLUvwQsYY4Xjv7m4YEEhwFyMY7fXB5pRbvhp0jqfSMV7b6jqZvLGQAwxAEBDk54PuD6VkVb49a5DBpiaNaeF/FIaQDumOwpzMGDEZQZ5+VboR8In1I+9QWDY6gFn7jjHpQhtJEFbAGBnijT0P8H7qPVOh47a42yNaymPBOcDupoykep+m73V7g/8Av2itQowvJIx6j5/OpRjfUWrax051RPY6LqFy8YACkebO4Z7HjNWCY0ToW2utJbqPreW5na4O4R78cehY9+fQVKLX05p1ro97awC4kX8VFmJLeEKdoyfOxJPbHbHYVAz0rqKPWNauri9t1ktJgILYgEiNFJ/Op9STmrBBdeaFCo0y3jt444DI3jLE204Y5DD+9UPPhv0NaRtPq99mSLOy22nOfdh75oNC0vT7m1uJGvGiii3AW8UDbQAeDu9zUohLlhouqap1VciF4I1FtbxSthtobBIPuTn7VBJ2vUth1TYk2ULi9iALwyKQYz6Z9CM0Ft060/DWEcDHe2Mucdye9BFW3SekWt3LPb2cUTsd2UGCG9x7VYMzufhzdX/WmoXj+BPpx3I7XZMmXYckc8EVRKaP8I+nXikLQuxOQSTnBzj6UEjonw90XSrq3S3s7dplJcl1EhGDx396lFx1TQ4NVjaC7UNCU2lAO/8AvNWDCLp9X+E/WgWImXSp2LRq7eSRT3B9iP8AFSjd9P1+21TRodVsMS2rLmTbyUGOePXFQSltcLcW0c1vh4mXcjDswoDLdRm4FvISsgXeTghDzjAPbPyzmgVFxCzuiOC0WN3sM+5oDqySJmNg3rx7UFb60tNUubGJdFdEvhIdryflUFSM49TVgyTQenJemOorf/U4H1Fpp1edAh/hOQTuQ9375JA4q0bnbPBcxxT20wkjKkqYzlT9ayGWu38um2MbLEJ7iRtoUds+/wAgBzQeW/iHqi6j1PdzeL44HkL9txHtWhVUOVyvHNB2F9zQT8hUAhAdp5FCG0mSAzE4HGDRppvwL1bwOpJbEsFiuYyVX3deR98Zoy2ZtRgmjkSRZocEp51K7se3vUow/rfpFE124mVpfD4mk2MWdCc4A+VWCH1281/UmFnpklzPYRFBEG8uGC4yQfcn9alGgaJo95rRsbi53WaxwrHOm7BjYcHnvz/moJaw07pXSI5IW1K0CQnDhpAWB9R796CudY62msTRW+gadI8KnDXMkLLv9MA8HGOKC3dAXF1dRfh75f41moi4G0AdwcfTj7UFhv7RjqMBV5AJFZHkR8FRjIx6CgpXVNjJ1JqNn07p26CztSJLlpIydyj2J+fGaC+afplrazqLa3SKNIggx3IHYUEsBk4wQc4oK11L1z070/M9rql6wuVA3QopLcjNBDwfFboqeSO2W7kQNxuaEhQfnQLt8TuireVoV1UeXnckTFT9DigHRuv+mbu9ZV1W3Nyc4IRlVl9O47/KgtU+s6baw+JcX1umRkAuM/p3oK/1t0rYdX6cqXKESqN8Ug/Mp9P1qwZ702mo9GdUTWJsmOn3EY8CAORGXJAwScjJ5q0bJDNLb6YklxbKsgA3wwndg9sDtWQN3aw3ZKTwLLEQOGORn6ehoG1vYAw3FikRt7JSuH3Hc/GSc5P0oHn4aO2uGuYyiose044wBQIm6F1dwfh5ARs8R8L2BHGfnQHv9PS4PjxrGLtFKxysm4qD3oCxboIIo7e1jhQHzAYUJ8wP8VYM56j1ktaal1BMrS2sAaK1OQDD6eUepY9yfQVR5zv7hrmaSaRtzyHJY0DcE4BPIPb5UBwOO4oJYzFvzEYHAH9XvQhNZN7AEgDOSDRo/wBA1SXRdVtNQgb/AJUgfBHcZ/8AGaGPVlhPbarZ2t5CEeORBKje2RUrI1zYxTBhMinIwcjvUEcugWkO4AMisMEA8N69u3yoERrOhWNxNYy6hapcxAeJHM+D8u9ASLStLlm/EWdpZyxy+Z3RQ3I5B44oJKTT4blFWSNBEOeBg5+goFYbOK1TKhIxnIbGMH50Cpcyo6AMrIcM2OD68Ggb6Lbbllu5Cd88hYBu6rztWglSNkfm4P70Gaat8Rba96w0vp3R2mhufxyi4kO3YyDOV9+f8VYMw+P0cP8Ax4JVuEKzW8bEr5tuMj0+lUZ7Y2X4288GK+towRlZXYqv9uKCQi6YmbT2u11GzaJWKnYxbBB49KCFnhubdiwL+U8OhP60ElpXUFxY6nDdXQF0qYbZKxwT9vWg3npb42aHcmC11C3uLSQjEkpIdQfr3xUondP6x6e6tv7e101hczRzrNtaFiFC/wAxPYHtUF+lj8bYCTgMG59cUC4OBQQOo2eoXepFTeL/AKYQN1sEwWx6Fu+DQLX2kw3Ok3Vjas9qJ48Exd1PHb9KCE6R0G86Ut7mK71KK4gklM7TyKRIBjtjtjj96AOreudJsrMJbXksk8jBCbVdzRjONxBHP/3QQ9x1jcWGkERWWqXdpMPCt7x4wfFOOWPbA+fAqwZr8TJ9Qbp2ymvEjsrSTEVvawyHz45Lv6E4wPqaoyl8g91OKDlYEc8mgKW5PH7UE80f8PPHl7gDFAZkUjawUIQG8w5ouknAUbl9Dg59KK1X4Z9XXFvo8mlRXax3KHfBG8Rk3qe6jHOc54+dMZO7jr/qK8vWtba4tYu38TwvDOMc8N60wPLbrW10PS7pnvrnUtbAKobgBUUk9jg8f/lMC2rydMdRSaRqWoLEbx4UefwxkL2BVvvn9Klgv3Tp0lPxFno6wJ4IVmEOMEHsf2xUD2e4ks7n+NGDaCMu8q8lCMcEfPNA6tW/EwrMybEYZUHnI9Cf/FAzu7G4LXTWs38SRNqhs4Bz3z9KCO6x07UNT6altNMvEs7xkC7nPlI9R2/egwbrDT+r+kupLCeK9nu3KBYGRy+QvdWFWCU6avtA6h1iKDqLRhpmpvkxz2p8JGb3z7k557VRX77TdHteuPBut401CAouyXGcdvmASaC069030brYaay/CwPFwWspRErfUN7UGZX2hWSiY6ZrMc0CvhUlBUk/UcGgiLq2mtG8F54yDwfDfIoGkrRsSZXwOB2zmgsvw06XHVfUcFvI22xQ753Ze4H8v3OBUo9a6XodjpltHbabDHZIhVsQqBuA9DxznFQTQUe5oDYGMnn7UEbpV3JqDyz+BJFbBtsXiDBf/qx6CgDXL42cSRwGM3UzBQrHGFzy32oG2i2kKTSI80lzMow0rqQoyew+3tQO59KtJJRI9rEzgg7igz3z/egZ6paJdGGwW4eBXy7pGeXUdwT6A5qwebPjJrcOr9TvbWZQ2Onr+Gi2nIJHcj7+vyqigOuRk+vtxQAqEk4BU0ABj7j96CzzKxYD0GeM0CQG3OMAjkfOgbSZwzE4PJyfWi6caTdzaffW95akrPFIrLg/tRHpGzs9C6t0W31FrO3Y43MrcbH9c/eloZ6v0JpWoKlrHHBbScSFEHYc5Pz71NDXUoJrK1g0dvw9qsspW3nXaN6KMrHnH5ieSfan0U3Rr1uidaRbiwk8BUSS6naQkjc3ZcHaRnn70wbja6lZX+nw3NvMksM+FXnPJ9DUCeoXj2hSG2t2km7op4U/f/FAvHaNePb3N0jRzRA7VD8Akc9u/FAvcxnawZQ3HYtjJoKfDFAdeub6Vo1dSULIBtTbgYOfU8jNWUVvrm5ih0m1urixhlsI5HJliOwR5/LkkHHJPamjHdQ0HWdemlutN0+YWBYtC0rHDhjwVz3zWgx1n4e9U6QE8XT5Zd//APR5se9XBXbjS9S0zAvbO5tyWKAOhG4/KpQ3ZHXO5JFK98qRUGhfCbph77Uvx91pv463wVjR0LR7s483796WjW7rTrXpHWrSW2YK7lmXTbaIFpCRjjHOOSeeBipaNLtXuIre3R43lnkGXYADZnnmoH6Dkbzn6UDGWe9a/hKG3jsW8riQMJS3svp86B6zgMQmDtGT8qDNb6XUpOoPx72tzOkjFYowOduDwDjj70Gg6Wsq2KNeAJKRuKk52fIn5DvVkENrvW+iaSAsl0txI2Asdud5Yk4A4pgzv4l9ST6JZSXbyyprWpw+FFa5G21gz5icfzH3pgwCSQlh688+tUEwjjngZ7UHAHuWAHpn+1AXj5frQWXOGBZcKSe3c/rQIyAtnI27eBj/ADQIMAuH8vHBB9DQwVpPLjOckEZ/ahi9/Czqj/S9VhtLm6aG1uZFUsT5VOfX5Gpg9GiNJArxsrxsv1BHypYGF7pljeG3kvLZSlqzNGGxhSRjP6GoG1yLUWiWc2nSPA4KJGItyYHYHHarop3wu0jWYNUvzriNBp8ErraRMANxJ/N7nA7ZqDUHgSQLvAbacjI7H3oOuIFuYzGS68jJRyp4+lA0m0yDwGjiTw3bzBwTuDe+TQVbV+mLmW1NtbSok9weZiC2zPLEZ+/FBM6Xo40/TYdL8Jr21G4vJcuCck55HbFWUKQ/h4tR/DTz2o8TK21qmMgKOf8AfpmrokljG1i5BHI57D5VNorut9Lab1LA638W6H8sboNrqQckq3pntV0RWsfD6K7SGC3vTFahQJY2iVmkI9d+Mimie0Hp2DQ7AQacio3JZgqjcT3zxk1KHGldPWtnqMupS5uNTmGGnk5KjGNqf0r8qgmkhVGcquGblm96BDUZZYLGVrdN8+MIvux7UGKTdXdbaRrFvbaxbWN4d58BmwCjHPORycLx2q4LNe9S9TdN6I13qkWmzSXdwBCGlO4hiMKAB2A9ag0WySQwpLLtMjDcQBhVz6CgoHxF17XbnUYunulgsUsu4TTvjIUAEhR+x4qwZwtkOi7651PXJobm4tohHbQhdgecjnaPZeOfeqMy1vWLvWNQlvb+ZpJpWyT2A9gB6Cgjy5AO4A5oAGRgBR39aA7Z8MkFtxPY0AbV9UGfpQWhj5TkBQGxuBoEGG1iQCyHvj1oELgSkK6oRnIBIxzRdJ28Q3+fHiAds96LoH3AF1wCfyijNbF8JviI1rbQ6Pq/iSopxFOx5Uf0n3qUbWQk8II2SRyDPuCDUHMpSIiJQSBwDwKAgTxApnEbyIQ+APyn0+9A5B3AgfmoEWgcb3VlMpGFYjt+negSs7zxH/C3RWO9UElM8SAHG5fl/agNdXcEbJAZ1WadvDQDJO7Gf7c0ED1dqWv2enzw6Rb24nZfJd3EwREHqxyMZHzOKBbT7H8PZWTK5uZ9o3Xm1SzEry5PsT7UFF+JnUezSZ9LttRs2km2m5KSFWXDZI491AyBzVwK6J8T7CRtPjee0tbaGAtdNISdoXgLEo5JJxyfSmC6aF1fo2vELZXDJOxwkMybHYe4HtUFiJWJd8rKqjuTQcZV8SNI0dy43BlGVA+ZoBniE0RU7tp77Tg5zQUv4hNrU2tdNWOhylPEnaS5UHGYlAzn5cn74oHGt6l0z07k6nJC123HhKPFlbPptGTj9q0Kx0XMvU+ty6vqQtpWlZo4LOdGDWsak8Aflycgk+v2qC8a1q8OnaXLPOz2kCIWkZ+CqjgYx6n0xTBkmp9Sabp0KdRyI5vJkaGw08MVKIDw8jA557896QY1q2o3eqXr3N7O8skjnlnzgn5e1UMCGV/MOM0BJFOVwfX0oFtgZTjkg8UBtpOeO1B3hg85FBZXVMM+VJAA2+h96BO4IMJ/p7qvuKBKacmOKB5CYYx5UzgDPfHzoGwD5OApJHAHrQEAk8NWdNpx+XdkA0AIGhkGWOQcgg8UGw/DP4kmwhi07WCTZqAiSbstGc4+pFKNvs5o7q2Sa3kWaJxkOp4NZDOHUh/qL2k8LW78eG7kbZv+0+/yoEZp7fUpX/BXpgvYZTCSRtO7vtwe/vQdY6jeyatPp91FEPw8aSNMoYbi2cADt6Z70DS60KW7luJdV1JniJzBtURG2b0KN7+/vQKSWUWnLLqN3cSLP4ex5Y8jxiPykr23+nzzigpXUPVOu6Vqmmf8UWttb9OXDqkjRnfI3H849uRkDOKC0axLFr+nLB0rrUMM0bqCIGGGX1AH09qsGc9UfBiTV9Vhu9Pu5oPGLNeG6bczN7jHvVEr058Gre3sLeDVrmOdo3LmSFNjEH+XdntQXO51XQOl5YrCKGWa8SMYS3tzMyLjjJHb9alDqz1S5ktJ7nVdLmSVDiOONfEMiE4Xy+h9xUDm11CaTxEOn3VmpHFxKFCr9s5GPmKAus6jb9M6RJf3c88yxpjcxL7uM5OO3HrQVTSupoOuYdZMTSpptriNFtXKXDA/Pjhs9h2xQOJ7Xpnpa2S91WK2swqjbGw3ysfcnuxrQsGmapYvpwvra1FtDL52Mi+Gx49sZoMb+LXV0t+jWl3OYLGTO2zjx4h2nyszHsG4/Sgxt5ZJmEsjl3PB3GgSlyXBxkDnNADseB2PsaA8SoXQyFgmQCV70B5R/EIjYmPJxnvigEKcYDfrQF2/X9aCwqC7l2zwfT/FAJJ5747E5oGTqZArKOfccftQHZWwmOD23Z4NAEkTEBmwR7g5AoG7KSSE5HyFAMTyQsMHa/cEcEc5oL58P+v7rppdryPNAXx+GfsQe7Z9D2/Wg3zSdX0fqzT08F433eYwscOpHt9PcVkQPW632mX9vfWdrbXiRgrIdu2eHIwGD57+nIoKdc/ELVdC0u5afp27SUtta5vJMMzk+UDjzYHtQWDpj4gxXmif/wAitXZkGZzFGW8MehZO+PmM0Fibr/poWQmF6xXA2xmFg59sKRk0C2nQP1KFvdb0vwIUJNtDKcsVP8zD0Jx2oJDTNA0vR5p7qzs44pJOXkUc/SgNfa/pNvbF5L2JgTsCo2WJzjGKA1jcNcxOF/m/I0zbt4+gxgenNA5s7CCxWWYQxpNLgyMiY3nt2oHajcuexAoEL1C1uyFkVHO1ixHb17/KgwT4rdXWep6oul2OpywaTYqVLxDyySDjaM9x6Z+tWDPdB1TW7Wa9sumpGlursqMQRlpXwd3BA4571RcdN0i41G7h/wBSmNxqdkwn1O6u5/4cAXlYgcnngE/pQNvih8S211obHRyYbWInfJG/Ex9MD2+tBmNzcTXTtJcSSSSHH5jngDAH0oEdxbg5A9wKAzPwO+fegSLfLJoFoR5fUfegXOcHGAT8qAM+XaBzQCCwGDuyPlQWJpV37Q5TIweM7u/f9qA9vNYpFML2KaR8YiaJgAh55PvQRvnYoqA+MThcDnPai4PKWQlH3K6tggjnPaiEmz4ZznBPfFAVWG0AZDH1z2HzoE7lhv8AJIHI/mGRQI8AEeuOMGgndN6pu7V4RJLKY4WDLhypXj0oNL6d+MMS4ttetDdQgDNwAPEwPRh2NZGkabrvTXVZiexvba5aI+ILWZQG3Y4IDdvtQScvTdjK8chtFjkQ+VlYgqPXGKA17daV07apJrV9CsZbELTgbu3YY78UEHf9evJ4K9P6JqGoiR1XxjCUjAJ5OT3NBM9YdSWPTenwy3t7bWbSuBunUthfUhRyT+1BA2vXnS/jGdeo9LnIHljeLwSCe5zgmgejrOz1S3kGhazoaXYGAJ5Sw3e38uaCsJfX02rPD1XfXtvcCXdBJGjLbOO+EK88Y7nIoLB1H1Bb6WkN1ddSQ29io/5MZEjzt7DGTjj2FXNGUfEH4wRaxCtnp2kwGGM7llvBvIOO4XOAe/fNMwZbqusalfLBHfzSvFH+SIgKo+igYqiwWfWV30rpp03p6exJnUPJexQnxuR+Ulu2PkKCrT6jcSiTxZnbxCWcFidxPJJ96BBDlQ35fbFAbahBHJIGO+KAIwg5YnB455oDkKcbW7UAOowSMjOORQcCVXPp+9AqDkHcDmgMNpHPcUBTuzQWCJXcFvKR2Y0CMiqjnz4UcH/6oG8gKluwxyAfSjQ6ylEJBJfv37/OiYQMjbjkZXOSP/FEELAEkNn1waBCXdjORnPoc80CZeTb5wQe2BQAm4y7jlhjvQcWO3cW4GB7ZpgGC5a3uUeNyGQ5GCR+45pgt+l/EzqLTgxj1O5Zc4CSOXCj70wOE+Jd/Pq0V7qVvb3bISAWUBhnuc+/2pgvkHx0soLaNIdKkEiqR5yDg47cYpgresfELSNc1n8VrFtAw2ZBiiywwcgeb14x2xTBYNA13ozUo/GOqWVizDc1nf6crIh/7wOf1rOURvVupdE6ncpFeakiSWsZdbjSLfw1Zs+VVyMHA75xWsFDHU0idTJdf65rT28YKpPvHjKp9Bk49qYK/rGpXF/qU9zPdyzyyMSJJAAx9ifnVlwNZ7vxYEh8GAEHO8DDH5H5U0IPK8jHxSzFQAMnOKgJkFwPT6UBlAII5z3zQCjnA5OKBXeuAT9KABMA208DtxQHRhzgUC3KjJx9z2oAP6/L3NAHC8Hg/XNAcNtGe4oEy5yeaCdLk7yx2qQCAO1AmXZSSexHbPNAhJuLJkgjvzRonuAJy2cd8UCbyOi453Dj7GiYTZyVPPl74oYLJIyq68EH3Gf3oYTDEjLbiP1FEDHOUOdgOfXIBFAm77j5Mnng/WgLI5UE8Eg9iO9AnuJbuQx5wOBQFRxuIbOc54PrQK28ws76F722EyI4d4HJUOPb35yKBm8oeQsi4BPbPb71RyYIHmPGRg00GRsbsHIPY0Bg52AEEseBUCQJyOPXtQDtcdvvjtQCQ/GBwKAuXU4PrQBvZE5IGeO1AffjBJ57UBvEO045HagFWAAzktQKIzBeMg0C5kz5mOG/WgMrDJJ7jmgEnIyOccYoA3cEMRj05oC7/wDeBQTduzEoNxwcZGaAJOWfPNAlISVOT60aIQfkj+amgJ3bnnigKeFGPQUCf/x0Smw4V8exogX/AOY3+/SgKxKxeU459KAgJOckntQJkkcgkGgAAFFz/XQEmJaY7jnk96BM9yPQelAf/wCX7UBv/jagGP8AKB6ZoDf00HMfO/0oAH5TQA/5moECSMDPFAvGASMjPP8AigVX+b60BW7/AHoHEJJD55oDd4snv70CsSjCcDmgVAAbgYoGYJ3nk9qBUAYHAoP/2Q==
\ No newline at end of file diff --git a/tests/writer.html b/tests/writer.html index b0227e21b..e8e619f44 100644 --- a/tests/writer.html +++ b/tests/writer.html @@ -324,7 +324,6 @@ These should not be escaped: \$ \\ \> \[ \{
foo
-

And nested without indentation:

@@ -336,7 +335,6 @@ foo bar
-

Interpreted markdown in a table:

@@ -353,10 +351,8 @@ And this is strong

Here’s a simple block:

- foo
-

This should be a code block, though:

<div>
     foo
@@ -365,14 +361,12 @@ foo
 
<div>foo</div>

Now, nested:

-
-
- +
+
foo
-
- +

This should just be an HTML comment:

diff --git a/tests/writer.markdown b/tests/writer.markdown index 2201ac8d1..7d67e4e87 100644 --- a/tests/writer.markdown +++ b/tests/writer.markdown @@ -356,20 +356,31 @@ HTML Blocks Simple block on one line:
+ foo +
And nested without indentation:
+
+
+ foo +
+
+
+ bar +
+
Interpreted markdown in a table: @@ -390,8 +401,9 @@ And this is **strong** Here’s a simple block:
- + foo +
This should be a code block, though: @@ -407,12 +419,17 @@ As should this: Now, nested:
-
-
- + +
+ +
+ foo +
-
+ +
+
This should just be an HTML comment: diff --git a/tests/writer.mediawiki b/tests/writer.mediawiki index 7eccc44e8..2f3726285 100644 --- a/tests/writer.mediawiki +++ b/tests/writer.mediawiki @@ -311,22 +311,30 @@ Blank line after term, indented marker, alternate markers: Simple block on one line:
+ foo -
+
And nested without indentation:
+
+
+ foo +
+
+ bar -
+
+
Interpreted markdown in a table:
@@ -345,10 +353,10 @@ And this is '''strong''' Here’s a simple block:
- + foo -
+ This should be a code block, though:
<div>
@@ -360,14 +368,18 @@ As should this:
 Now, nested:
 
 
-
-
- + +
+ +
+ foo +
-
+
+
This should just be an HTML comment: diff --git a/tests/writer.native b/tests/writer.native index d1b14b24e..678d7595f 100644 --- a/tests/writer.native +++ b/tests/writer.native @@ -228,15 +228,9 @@ Pandoc (Meta {unMeta = fromList [("author",MetaList [MetaInlines [Str "John",Spa ,[Plain [Str "sublist"]]]]])] ,Header 1 ("html-blocks",[],[]) [Str "HTML",Space,Str "Blocks"] ,Para [Str "Simple",Space,Str "block",Space,Str "on",Space,Str "one",Space,Str "line:"] -,RawBlock (Format "html") "
" -,Plain [Str "foo"] -,RawBlock (Format "html") "
\n" +,Div ("",[],[]) [Plain [Str "foo"]] ,Para [Str "And",Space,Str "nested",Space,Str "without",Space,Str "indentation:"] -,RawBlock (Format "html") "
\n
\n
" -,Plain [Str "foo"] -,RawBlock (Format "html") "
\n
\n
" -,Plain [Str "bar"] -,RawBlock (Format "html") "
\n
\n" +,Div ("",[],[]) [Div ("",[],[]) [Div ("",[],[]) [Plain [Str "foo"]]],Div ("",[],[]) [Plain [Str "bar"]]] ,Para [Str "Interpreted",Space,Str "markdown",Space,Str "in",Space,Str "a",Space,Str "table:"] ,RawBlock (Format "html") "
\n\n\n\n
" ,Plain [Str "This",Space,Str "is",Space,Emph [Str "emphasized"]] @@ -244,17 +238,13 @@ Pandoc (Meta {unMeta = fromList [("author",MetaList [MetaInlines [Str "John",Spa ,Plain [Str "And",Space,Str "this",Space,Str "is",Space,Strong [Str "strong"]] ,RawBlock (Format "html") "
\n\n\n" ,Para [Str "Here\8217s",Space,Str "a",Space,Str "simple",Space,Str "block:"] -,RawBlock (Format "html") "
\n " -,Plain [Str "foo"] -,RawBlock (Format "html") "
\n" +,Div ("",[],[]) [Plain [Str "foo"]] ,Para [Str "This",Space,Str "should",Space,Str "be",Space,Str "a",Space,Str "code",Space,Str "block,",Space,Str "though:"] ,CodeBlock ("",[],[]) "
\n foo\n
" ,Para [Str "As",Space,Str "should",Space,Str "this:"] ,CodeBlock ("",[],[]) "
foo
" ,Para [Str "Now,",Space,Str "nested:"] -,RawBlock (Format "html") "
\n
\n
\n " -,Plain [Str "foo"] -,RawBlock (Format "html") "
\n
\n
\n" +,Div ("",[],[]) [Div ("",[],[]) [Div ("",[],[]) [Plain [Str "foo"]]]] ,Para [Str "This",Space,Str "should",Space,Str "just",Space,Str "be",Space,Str "an",Space,Str "HTML",Space,Str "comment:"] ,RawBlock (Format "html") "\n" ,Para [Str "Multiline:"] diff --git a/tests/writer.opml b/tests/writer.opml index b0954a439..228cad247 100644 --- a/tests/writer.opml +++ b/tests/writer.opml @@ -44,7 +44,7 @@ - + diff --git a/tests/writer.org b/tests/writer.org index b8058a406..85016f352 100644 --- a/tests/writer.org +++ b/tests/writer.org @@ -359,7 +359,13 @@ And nested without indentation: #+BEGIN_HTML
+#+END_HTML + +#+BEGIN_HTML
+#+END_HTML + +#+BEGIN_HTML
#+END_HTML @@ -367,7 +373,13 @@ foo #+BEGIN_HTML
+#+END_HTML + +#+BEGIN_HTML
+#+END_HTML + +#+BEGIN_HTML
#+END_HTML @@ -375,6 +387,9 @@ bar #+BEGIN_HTML
+#+END_HTML + +#+BEGIN_HTML
#+END_HTML @@ -407,7 +422,6 @@ Here's a simple block: #+BEGIN_HTML
- #+END_HTML foo @@ -434,16 +448,27 @@ Now, nested: #+BEGIN_HTML
-
-
- +#+END_HTML + +#+BEGIN_HTML +
+#+END_HTML + +#+BEGIN_HTML +
#+END_HTML foo #+BEGIN_HTML
-
+#+END_HTML + +#+BEGIN_HTML +
+#+END_HTML + +#+BEGIN_HTML
#+END_HTML diff --git a/tests/writer.plain b/tests/writer.plain index cc61916d2..60e7bb329 100644 --- a/tests/writer.plain +++ b/tests/writer.plain @@ -352,10 +352,13 @@ HTML Blocks Simple block on one line: foo + And nested without indentation: foo + bar + Interpreted markdown in a table: This is emphasized @@ -363,6 +366,7 @@ And this is strong Here’s a simple block: foo + This should be a code block, though:
@@ -376,6 +380,7 @@ As should this: Now, nested: foo + This should just be an HTML comment: Multiline: diff --git a/tests/writer.rst b/tests/writer.rst index 41da5bc73..68bc4a06c 100644 --- a/tests/writer.rst +++ b/tests/writer.rst @@ -394,7 +394,13 @@ And nested without indentation: .. raw:: html
+ +.. raw:: html +
+ +.. raw:: html +
foo @@ -402,7 +408,13 @@ foo .. raw:: html
+ +.. raw:: html +
+ +.. raw:: html +
bar @@ -410,6 +422,9 @@ bar .. raw:: html
+ +.. raw:: html +
Interpreted markdown in a table: @@ -442,7 +457,6 @@ Here’s a simple block: .. raw:: html
- foo @@ -469,16 +483,27 @@ Now, nested: .. raw:: html
-
-
- + +.. raw:: html + +
+ +.. raw:: html + +
foo .. raw:: html
-
+ +.. raw:: html + +
+ +.. raw:: html +
This should just be an HTML comment: diff --git a/tests/writer.textile b/tests/writer.textile index 31789a2b0..5042f79cb 100644 --- a/tests/writer.textile +++ b/tests/writer.textile @@ -352,20 +352,33 @@ h1(#html-blocks). HTML Blocks Simple block on one line:
+ foo +
And nested without indentation:
+
+
+ foo +
+ +
+
+ bar +
+ +
Interpreted markdown in a table: @@ -386,8 +399,9 @@ And this is *strong* Here's a simple block:
- + foo +
This should be a code block, though: @@ -405,12 +419,19 @@ bc.
foo
Now, nested:
-
-
- + +
+ +
+ foo + +
+ +
-
+ +
This should just be an HTML comment: -- cgit v1.2.3 From af786829a0d64e373218f4c84c105796e9663b6f Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 18 Aug 2013 16:22:56 -0700 Subject: Parsing: Added stateMeta' to ParserState. --- src/Text/Pandoc/Parsing.hs | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs index 2f42aba41..c16d5bb1d 100644 --- a/src/Text/Pandoc/Parsing.hs +++ b/src/Text/Pandoc/Parsing.hs @@ -801,6 +801,7 @@ data ParserState = ParserState stateNotes :: NoteTable, -- ^ List of notes (raw bodies) stateNotes' :: NoteTable', -- ^ List of notes (parsed bodies) stateMeta :: Meta, -- ^ Document metadata + stateMeta' :: F Meta, -- ^ Document metadata stateHeaderTable :: [HeaderType], -- ^ Ordered list of header types used stateHeaders :: M.Map Inlines String, -- ^ List of headers and ids (used for implicit ref links) stateIdentifiers :: [String], -- ^ List of header identifiers used @@ -834,6 +835,7 @@ defaultParserState = stateNotes = [], stateNotes' = [], stateMeta = nullMeta, + stateMeta' = return nullMeta, stateHeaderTable = [], stateHeaders = M.empty, stateIdentifiers = [], -- cgit v1.2.3 From 0e2605ffdf69b7a6a7c942a986dec4283a886e82 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 18 Aug 2013 18:39:04 -0700 Subject: Allow multiple YAML metadata blocks in document. --- README | 27 +++++---- src/Text/Pandoc/Readers/Markdown.hs | 107 +++++++++++++++++++----------------- 2 files changed, 73 insertions(+), 61 deletions(-) (limited to 'src/Text') diff --git a/README b/README index c1429aec3..7a2b01f49 100644 --- a/README +++ b/README @@ -1815,14 +1815,21 @@ YAML metadata block **Extension: `yaml_metadata_block`** -If the file begins with a YAML object, delimited by a line of three -hyphens (`---`) at the top and a line of three hyphens (`---`) or three -dots (`...`) at the bottom, metadata will be taken from the fields -of the YAML object. Metadata can contain lists and objects (nested -arbitrarily), but all string scalars will be interpreted as markdown. - -Fields with names ending in an underscore will be ignored by -pandoc. (They may be given a role by external processors.) +A YAML metadata block is a valid YAML object, delimited by a line of three +hyphens (`---`) at the top and a line of three hyphens (`---`) or three dots +(`...`) at the bottom. A YAML metadata block may occur anywhere in the +document, but if it is not at the beginning, it must be preceded by a blank +line. + +Metadata will be taken from the fields of the YAML object and added to any +existing document metadata. Metadata can contain lists and objects (nested +arbitrarily), but all string scalars will be interpreted as markdown. Fields +with names ending in an underscore will be ignored by pandoc. (They may be +given a role by external processors.) + +A document may contain multiple metadata blocks. The metadata fields will +be combined through a *left-biased union*: if two metadata blocks attempt +to set the same field, the value from the first block will be taken. Note that YAML escaping rules must be followed. Thus, for example, if a title contains a colon, it must be quoted. The pipe character @@ -1844,8 +1851,8 @@ when the field contains blank lines: It consists of two paragraphs. ... -Template variables will be set from the metadata. Thus, for example, -in writing HTML, the variable `abstract` will be set to the HTML +Template variables will be set automatically from the metadata. Thus, for +example, in writing HTML, the variable `abstract` will be set to the HTML equivalent of the markdown in the `abstract` field:

This is the abstract.

diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 535fc02c6..a653c2e98 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -203,13 +203,10 @@ dateLine = try $ do skipSpaces trimInlinesF . mconcat <$> manyTill inline newline -titleBlock :: MarkdownParser (F (Pandoc -> Pandoc)) -titleBlock = pandocTitleBlock - <|> yamlTitleBlock - <|> mmdTitleBlock - <|> return (return id) +titleBlock :: MarkdownParser () +titleBlock = pandocTitleBlock <|> mmdTitleBlock -pandocTitleBlock :: MarkdownParser (F (Pandoc -> Pandoc)) +pandocTitleBlock :: MarkdownParser () pandocTitleBlock = try $ do guardEnabled Ext_pandoc_title_block lookAhead (char '%') @@ -217,16 +214,18 @@ pandocTitleBlock = try $ do author <- option (return []) authorsLine date <- option mempty dateLine optional blanklines - return $ do - title' <- title - author' <- author - date' <- date - return $ if B.isNull title' then id else B.setMeta "title" title' - . if null author' then id else B.setMeta "author" author' - . if B.isNull date' then id else B.setMeta "date" date' - -yamlTitleBlock :: MarkdownParser (F (Pandoc -> Pandoc)) -yamlTitleBlock = try $ do + let meta' = do title' <- title + author' <- author + date' <- date + return $ + ( if B.isNull title' then id else B.setMeta "title" title' + . if null author' then id else B.setMeta "author" author' + . if B.isNull date' then id else B.setMeta "date" date' ) + nullMeta + updateState $ \st -> st{ stateMeta' = stateMeta' st <> meta' } + +yamlMetaBlock :: MarkdownParser (F Blocks) +yamlMetaBlock = try $ do guardEnabled Ext_yaml_metadata_block pos <- getPosition string "---" @@ -236,33 +235,39 @@ yamlTitleBlock = try $ do let rawYaml = unlines ("---" : (rawYamlLines ++ ["..."])) optional blanklines opts <- stateOptions <$> getState - case Yaml.decodeEither' $ UTF8.fromString rawYaml of - Right (Yaml.Object hashmap) -> return $ return $ - H.foldrWithKey (\k v f -> - if ignorable k - then f - else B.setMeta (T.unpack k) (yamlToMeta opts v) . f) - id hashmap - Right Yaml.Null -> return $ return id - Right _ -> do - addWarning (Just pos) "YAML header is not an object" - return $ return id - Left err' -> do - case err' of - InvalidYaml (Just YamlParseException{ - yamlProblem = problem - , yamlContext = _ctxt - , yamlProblemMark = Yaml.YamlMark { - yamlLine = yline - , yamlColumn = ycol - }}) -> - addWarning (Just $ setSourceLine - (setSourceColumn pos (sourceColumn pos + ycol)) - (sourceLine pos + 1 + yline)) - $ "Could not parse YAML header: " ++ problem - _ -> addWarning (Just pos) - $ "Could not parse YAML header: " ++ show err' - return $ return id + meta' <- case Yaml.decodeEither' $ UTF8.fromString rawYaml of + Right (Yaml.Object hashmap) -> return $ return $ + H.foldrWithKey (\k v m -> + if ignorable k + then m + else B.setMeta (T.unpack k) + (yamlToMeta opts v) m) + nullMeta hashmap + Right Yaml.Null -> return $ return nullMeta + Right _ -> do + addWarning (Just pos) "YAML header is not an object" + return $ return nullMeta + Left err' -> do + case err' of + InvalidYaml (Just YamlParseException{ + yamlProblem = problem + , yamlContext = _ctxt + , yamlProblemMark = Yaml.YamlMark { + yamlLine = yline + , yamlColumn = ycol + }}) -> + addWarning (Just $ setSourceLine + (setSourceColumn pos + (sourceColumn pos + ycol)) + (sourceLine pos + 1 + yline)) + $ "Could not parse YAML header: " ++ + problem + _ -> addWarning (Just pos) + $ "Could not parse YAML header: " ++ + show err' + return $ return nullMeta + updateState $ \st -> st{ stateMeta' = stateMeta' st <> meta' } + return mempty -- ignore fields ending with _ ignorable :: Text -> Bool @@ -295,13 +300,13 @@ yamlToMeta _ _ = MetaString "" stopLine :: MarkdownParser () stopLine = try $ (string "---" <|> string "...") >> blankline >> return () -mmdTitleBlock :: MarkdownParser (F (Pandoc -> Pandoc)) +mmdTitleBlock :: MarkdownParser () mmdTitleBlock = try $ do guardEnabled Ext_mmd_title_block kvPairs <- many1 kvPair blanklines - return $ return $ \(Pandoc m bs) -> - Pandoc (foldl (\m' (k,v) -> addMetaField k v m') m kvPairs) bs + updateState $ \st -> st{ stateMeta' = stateMeta' st <> + return (Meta $ M.fromList kvPairs) } kvPair :: MarkdownParser (String, MetaValue) kvPair = try $ do @@ -318,15 +323,14 @@ parseMarkdown = do updateState $ \state -> state { stateOptions = let oldOpts = stateOptions state in oldOpts{ readerParseRaw = True } } - titleTrans <- option (return id) titleBlock + optional titleBlock blocks <- parseBlocks st <- getState + let meta = runF (stateMeta' st) st + let Pandoc _ bs = B.doc $ runF blocks st mbsty <- getOption readerCitationStyle refs <- getOption readerReferences - return $ processBiblio mbsty refs - $ runF titleTrans st - $ B.doc - $ runF blocks st + return $ processBiblio mbsty refs $ Pandoc meta bs addWarning :: Maybe SourcePos -> String -> MarkdownParser () addWarning mbpos msg = @@ -442,6 +446,7 @@ parseBlocks = mconcat <$> manyTill block eof block :: MarkdownParser (F Blocks) block = choice [ mempty <$ blanklines , codeBlockFenced + , yamlMetaBlock , guardEnabled Ext_latex_macros *> (macro >>= return . return) , header , lhsCodeBlock -- cgit v1.2.3 From e8ddcfd997bd1733b715a4321f0e57c7860071d2 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Mon, 19 Aug 2013 16:03:22 -0700 Subject: Scale LaTeX tables so they don't exceed columnwidth. --- src/Text/Pandoc/Writers/LaTeX.hs | 6 +++- tests/tables.latex | 64 ++++++++++++++++++++-------------------- 2 files changed, 37 insertions(+), 33 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs index 98553c421..ab579a326 100644 --- a/src/Text/Pandoc/Writers/LaTeX.hs +++ b/src/Text/Pandoc/Writers/LaTeX.hs @@ -470,9 +470,13 @@ tableRowToLaTeX header aligns widths cols = do AlignRight -> "\\raggedleft" AlignCenter -> "\\centering" AlignDefault -> "\\raggedright" + -- scale factor compensates for extra space between columns + -- so the whole table isn't larger than columnwidth + let scaleFactor = 0.97 ** fromIntegral (length aligns) let toCell 0 _ c = c toCell w a c = "\\begin{minipage}" <> valign <> - braces (text (printf "%.2f\\columnwidth" w)) <> + braces (text (printf "%.2f\\columnwidth" + (w * scaleFactor))) <> (halign a <> cr <> c <> cr) <> "\\end{minipage}" let cells = zipWith3 toCell widths aligns renderedCells return $ hsep (intersperse "&" cells) $$ "\\\\\\noalign{\\medskip}" diff --git a/tests/tables.latex b/tests/tables.latex index 82abeb9a5..c27e10461 100644 --- a/tests/tables.latex +++ b/tests/tables.latex @@ -54,34 +54,34 @@ Multiline table with caption: \begin{longtable}[c]{@{}clrl@{}} \hline\noalign{\medskip} -\begin{minipage}[b]{0.15\columnwidth}\centering +\begin{minipage}[b]{0.13\columnwidth}\centering Centered Header -\end{minipage} & \begin{minipage}[b]{0.14\columnwidth}\raggedright +\end{minipage} & \begin{minipage}[b]{0.12\columnwidth}\raggedright Left Aligned -\end{minipage} & \begin{minipage}[b]{0.16\columnwidth}\raggedleft +\end{minipage} & \begin{minipage}[b]{0.14\columnwidth}\raggedleft Right Aligned -\end{minipage} & \begin{minipage}[b]{0.34\columnwidth}\raggedright +\end{minipage} & \begin{minipage}[b]{0.30\columnwidth}\raggedright Default aligned \end{minipage} \\\noalign{\medskip} \hline\noalign{\medskip} -\begin{minipage}[t]{0.15\columnwidth}\centering +\begin{minipage}[t]{0.13\columnwidth}\centering First -\end{minipage} & \begin{minipage}[t]{0.14\columnwidth}\raggedright +\end{minipage} & \begin{minipage}[t]{0.12\columnwidth}\raggedright row -\end{minipage} & \begin{minipage}[t]{0.16\columnwidth}\raggedleft +\end{minipage} & \begin{minipage}[t]{0.14\columnwidth}\raggedleft 12.0 -\end{minipage} & \begin{minipage}[t]{0.34\columnwidth}\raggedright +\end{minipage} & \begin{minipage}[t]{0.30\columnwidth}\raggedright Example of a row that spans multiple lines. \end{minipage} \\\noalign{\medskip} -\begin{minipage}[t]{0.15\columnwidth}\centering +\begin{minipage}[t]{0.13\columnwidth}\centering Second -\end{minipage} & \begin{minipage}[t]{0.14\columnwidth}\raggedright +\end{minipage} & \begin{minipage}[t]{0.12\columnwidth}\raggedright row -\end{minipage} & \begin{minipage}[t]{0.16\columnwidth}\raggedleft +\end{minipage} & \begin{minipage}[t]{0.14\columnwidth}\raggedleft 5.0 -\end{minipage} & \begin{minipage}[t]{0.34\columnwidth}\raggedright +\end{minipage} & \begin{minipage}[t]{0.30\columnwidth}\raggedright Here's another one. Note the blank line between rows. \end{minipage} \\\noalign{\medskip} @@ -94,34 +94,34 @@ Multiline table without caption: \begin{longtable}[c]{@{}clrl@{}} \hline\noalign{\medskip} -\begin{minipage}[b]{0.15\columnwidth}\centering +\begin{minipage}[b]{0.13\columnwidth}\centering Centered Header -\end{minipage} & \begin{minipage}[b]{0.14\columnwidth}\raggedright +\end{minipage} & \begin{minipage}[b]{0.12\columnwidth}\raggedright Left Aligned -\end{minipage} & \begin{minipage}[b]{0.16\columnwidth}\raggedleft +\end{minipage} & \begin{minipage}[b]{0.14\columnwidth}\raggedleft Right Aligned -\end{minipage} & \begin{minipage}[b]{0.34\columnwidth}\raggedright +\end{minipage} & \begin{minipage}[b]{0.30\columnwidth}\raggedright Default aligned \end{minipage} \\\noalign{\medskip} \hline\noalign{\medskip} -\begin{minipage}[t]{0.15\columnwidth}\centering +\begin{minipage}[t]{0.13\columnwidth}\centering First -\end{minipage} & \begin{minipage}[t]{0.14\columnwidth}\raggedright +\end{minipage} & \begin{minipage}[t]{0.12\columnwidth}\raggedright row -\end{minipage} & \begin{minipage}[t]{0.16\columnwidth}\raggedleft +\end{minipage} & \begin{minipage}[t]{0.14\columnwidth}\raggedleft 12.0 -\end{minipage} & \begin{minipage}[t]{0.34\columnwidth}\raggedright +\end{minipage} & \begin{minipage}[t]{0.30\columnwidth}\raggedright Example of a row that spans multiple lines. \end{minipage} \\\noalign{\medskip} -\begin{minipage}[t]{0.15\columnwidth}\centering +\begin{minipage}[t]{0.13\columnwidth}\centering Second -\end{minipage} & \begin{minipage}[t]{0.14\columnwidth}\raggedright +\end{minipage} & \begin{minipage}[t]{0.12\columnwidth}\raggedright row -\end{minipage} & \begin{minipage}[t]{0.16\columnwidth}\raggedleft +\end{minipage} & \begin{minipage}[t]{0.14\columnwidth}\raggedleft 5.0 -\end{minipage} & \begin{minipage}[t]{0.34\columnwidth}\raggedright +\end{minipage} & \begin{minipage}[t]{0.30\columnwidth}\raggedright Here's another one. Note the blank line between rows. \end{minipage} \\\noalign{\medskip} @@ -145,23 +145,23 @@ Multiline table without column headers: \begin{longtable}[c]{@{}clrl@{}} \hline\noalign{\medskip} -\begin{minipage}[t]{0.15\columnwidth}\centering +\begin{minipage}[t]{0.13\columnwidth}\centering First -\end{minipage} & \begin{minipage}[t]{0.14\columnwidth}\raggedright +\end{minipage} & \begin{minipage}[t]{0.12\columnwidth}\raggedright row -\end{minipage} & \begin{minipage}[t]{0.16\columnwidth}\raggedleft +\end{minipage} & \begin{minipage}[t]{0.14\columnwidth}\raggedleft 12.0 -\end{minipage} & \begin{minipage}[t]{0.34\columnwidth}\raggedright +\end{minipage} & \begin{minipage}[t]{0.30\columnwidth}\raggedright Example of a row that spans multiple lines. \end{minipage} \\\noalign{\medskip} -\begin{minipage}[t]{0.15\columnwidth}\centering +\begin{minipage}[t]{0.13\columnwidth}\centering Second -\end{minipage} & \begin{minipage}[t]{0.14\columnwidth}\raggedright +\end{minipage} & \begin{minipage}[t]{0.12\columnwidth}\raggedright row -\end{minipage} & \begin{minipage}[t]{0.16\columnwidth}\raggedleft +\end{minipage} & \begin{minipage}[t]{0.14\columnwidth}\raggedleft 5.0 -\end{minipage} & \begin{minipage}[t]{0.34\columnwidth}\raggedright +\end{minipage} & \begin{minipage}[t]{0.30\columnwidth}\raggedright Here's another one. Note the blank line between rows. \end{minipage} \\\noalign{\medskip} -- cgit v1.2.3 From 7048c130ec9d128dd1c9d1ddf8e7ce3c15eaf435 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 18 Aug 2013 23:01:23 -0700 Subject: Create Cite element even if no matching reference in the biblio. * Add ??? as fallback text for non-resolved citations. * Biblio: Put references (including a header at the end of the document, if one exists) inside a Div with class "references". This gives some control over styling of references, and allows scripts to manipulate them. * Markdown writer: Print markdown citation codes, and disable printing of references, if `citations` extension is enabled. NOTE: It would be good to improve what citeproc-hs does for a nonexistent key. --- src/Text/Pandoc/Biblio.hs | 5 +- src/Text/Pandoc/Readers/Markdown.hs | 21 ++++---- src/Text/Pandoc/Writers/Markdown.hs | 35 +++++++------ tests/Tests/Old.hs | 2 +- tests/markdown-citations.chicago-author-date.txt | 10 +++- tests/markdown-citations.ieee.txt | 40 ++++++++------- tests/markdown-citations.mhra.txt | 62 ++++++++++++++---------- 7 files changed, 105 insertions(+), 70 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Biblio.hs b/src/Text/Pandoc/Biblio.hs index 206b38530..1c0975f11 100644 --- a/src/Text/Pandoc/Biblio.hs +++ b/src/Text/Pandoc/Biblio.hs @@ -55,7 +55,10 @@ processBiblio (Just style) r p = cits_map = M.fromList $ zip grps (citations result) biblioList = map (renderPandoc' style) (bibliography result) Pandoc m b = bottomUp mvPunct . deNote . topDown (processCite style cits_map) $ p' - in Pandoc m $ b ++ biblioList + (bs, lastb) = case reverse b of + x@(Header _ _ _) : xs -> (reverse xs, [x]) + _ -> (b, []) + in Pandoc m $ bs ++ [Div ("",["references"],[]) (lastb ++ biblioList)] -- | Substitute 'Cite' elements with formatted citations. processCite :: Style -> M.Map [Citation] [FormattedOutput] -> Inline -> Inline diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index a653c2e98..05662d9b5 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -55,7 +55,6 @@ import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXBlock ) import Text.Pandoc.Readers.HTML ( htmlTag, htmlInBalanced, isInlineTag, isBlockTag, isTextTag, isCommentTag ) import Text.Pandoc.Biblio (processBiblio) -import qualified Text.CSL as CSL import Data.Monoid (mconcat, mempty) import Control.Applicative ((<$>), (<*), (*>), (<$)) import Control.Monad @@ -1797,11 +1796,13 @@ rawHtmlInline = do cite :: MarkdownParser (F Inlines) cite = do guardEnabled Ext_citations - getOption readerReferences >>= guard . not . null - citations <- textualCite <|> normalCite - return $ flip B.cite mempty <$> citations + citations <- textualCite <|> (fmap (flip B.cite unknownC) <$> normalCite) + return citations + +unknownC :: Inlines +unknownC = B.str "???" -textualCite :: MarkdownParser (F [Citation]) +textualCite :: MarkdownParser (F Inlines) textualCite = try $ do (_, key) <- citeKey let first = Citation{ citationId = key @@ -1813,8 +1814,12 @@ textualCite = try $ do } mbrest <- option Nothing $ try $ spnl >> Just <$> normalCite case mbrest of - Just rest -> return $ (first:) <$> rest - Nothing -> option (return [first]) $ bareloc first + Just rest -> return $ (flip B.cite unknownC . (first:)) <$> rest + Nothing -> (fmap (flip B.cite unknownC) <$> bareloc first) <|> + return (do st <- askF + return $ case M.lookup key (stateExamples st) of + Just n -> B.str (show n) + _ -> B.cite [first] unknownC) bareloc :: Citation -> MarkdownParser (F [Citation]) bareloc c = try $ do @@ -1846,8 +1851,6 @@ citeKey = try $ do let internal p = try $ p >>~ lookAhead (letter <|> digit) rest <- many $ letter <|> digit <|> internal (oneOf ":.#$%&-_+?<>~/") let key = first:rest - citations' <- map CSL.refId <$> getOption readerReferences - guard $ key `elem` citations' return (suppress_author, key) suffix :: MarkdownParser (F Inlines) diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs index 623c445df..d617954dd 100644 --- a/src/Text/Pandoc/Writers/Markdown.hs +++ b/src/Text/Pandoc/Writers/Markdown.hs @@ -186,7 +186,12 @@ pandocToMarkdown opts (Pandoc meta blocks) = do let toc = if writerTableOfContents opts then tableOfContents opts headerBlocks else empty - body <- blockListToMarkdown opts blocks + -- Strip off final 'references' header if markdown citations enabled + let blocks' = case reverse blocks of + (Div (_,["references"],_) _):xs + | isEnabled Ext_citations opts -> reverse xs + _ -> blocks + body <- blockListToMarkdown opts blocks' st <- get notes' <- notesToMarkdown opts (reverse $ stNotes st) st' <- get -- note that the notes may contain refs @@ -304,10 +309,10 @@ blockToMarkdown _ Null = return empty blockToMarkdown opts (Div attrs ils) = do isPlain <- gets stPlain contents <- blockListToMarkdown opts ils - return $ if isPlain + return $ if isPlain || not (isEnabled Ext_markdown_in_html_blocks opts) then contents <> blankline else tagWithAttrs "div" attrs <> blankline <> - contents <> blankline <> "
" <> blankline + contents <> blankline <> "
" <> blankline blockToMarkdown opts (Plain inlines) = do contents <- inlineListToMarkdown opts inlines return $ contents <> cr @@ -711,17 +716,20 @@ inlineToMarkdown opts (LineBreak) | isEnabled Ext_escaped_line_breaks opts = return $ "\\" <> cr | otherwise = return $ " " <> cr inlineToMarkdown _ Space = return space -inlineToMarkdown opts (Cite (c:cs) lst@[RawInline "latex" _]) +inlineToMarkdown opts (Cite [] lst) = inlineListToMarkdown opts lst +inlineToMarkdown opts (Cite (c:cs) lst) | not (isEnabled Ext_citations opts) = inlineListToMarkdown opts lst - | citationMode c == AuthorInText = do - suffs <- inlineListToMarkdown opts $ citationSuffix c - rest <- mapM convertOne cs - let inbr = suffs <+> joincits rest - br = if isEmpty inbr then empty else char '[' <> inbr <> char ']' - return $ text ("@" ++ citationId c) <+> br - | otherwise = do - cits <- mapM convertOne (c:cs) - return $ text "[" <> joincits cits <> text "]" + | otherwise = + if citationMode c == AuthorInText + then do + suffs <- inlineListToMarkdown opts $ citationSuffix c + rest <- mapM convertOne cs + let inbr = suffs <+> joincits rest + br = if isEmpty inbr then empty else char '[' <> inbr <> char ']' + return $ text ("@" ++ citationId c) <+> br + else do + cits <- mapM convertOne (c:cs) + return $ text "[" <> joincits cits <> text "]" where joincits = hcat . intersperse (text "; ") . filter (not . isEmpty) convertOne Citation { citationId = k @@ -738,7 +746,6 @@ inlineToMarkdown opts (Cite (c:cs) lst@[RawInline "latex" _]) return $ pdoc <+> r modekey SuppressAuthor = "-" modekey _ = "" -inlineToMarkdown opts (Cite _ lst) = inlineListToMarkdown opts lst inlineToMarkdown opts (Link txt (src, tit)) = do linktext <- inlineListToMarkdown opts txt let linktitle = if null tit diff --git a/tests/Tests/Old.hs b/tests/Tests/Old.hs index 0ba240084..8609781d0 100644 --- a/tests/Tests/Old.hs +++ b/tests/Tests/Old.hs @@ -196,7 +196,7 @@ markdownCitationTests ++ [test "natbib" wopts "markdown-citations.txt" "markdown-citations.txt"] where - ropts = ["-r", "markdown", "-w", "markdown", "--bibliography", + ropts = ["-r", "markdown", "-w", "markdown-citations", "--bibliography", "biblio.bib", "--no-wrap"] wopts = ["-r", "markdown", "-w", "markdown", "--no-wrap", "--natbib"] styleToTest style = test style (ropts ++ ["--csl", style ++ ".csl"]) diff --git a/tests/markdown-citations.chicago-author-date.txt b/tests/markdown-citations.chicago-author-date.txt index de242300d..81d7482cb 100644 --- a/tests/markdown-citations.chicago-author-date.txt +++ b/tests/markdown-citations.chicago-author-date.txt @@ -1,9 +1,9 @@ Pandoc with citeproc-hs ======================= -- [@nonexistent] +- ([CSL BIBLIOGRAPHIC DATA ERROR: reference "nonexistent" not found.]) -- @nonexistent +- ([CSL BIBLIOGRAPHIC DATA ERROR: reference "nonexistent" not found.]) - Doe (2005) says blah. @@ -29,15 +29,21 @@ Pandoc with citeproc-hs - With some markup (*see* Doe 2005, 32). +
+ References ========== +“Nonexistent Not Found!” + Doe, John. 2005. *First Book*. Cambridge: Cambridge University Press. ———. 2006. “Article.” *Journal of Generic Studies* 6: 33–34. Doe, John, and Jenny Roe. 2007. “Why Water Is Wet.” In *Third Book*, edited by Sam Smith. Oxford: Oxford University Press. +
+ [^1]: Doe and Roe (2007, 12) and a citation without locators (Doe and Roe 2007). [^2]: Some citations (see Doe 2005, chap. 3; Doe and Roe 2007; Doe 2006). diff --git a/tests/markdown-citations.ieee.txt b/tests/markdown-citations.ieee.txt index a397e3f38..4085a7c63 100644 --- a/tests/markdown-citations.ieee.txt +++ b/tests/markdown-citations.ieee.txt @@ -1,45 +1,51 @@ Pandoc with citeproc-hs ======================= -- [@nonexistent] +- [] -- @nonexistent +- -- Reference 1 says blah. +- Reference 2 says blah. -- Reference 1 says blah. +- Reference 2 says blah. -- Reference 1 says blah. +- Reference 2 says blah. -- Reference 1 [3] says blah. +- Reference 2 [4] says blah. - In a note.[^1] -- A citation group [1], [3]. +- A citation group [2], [4]. -- Another one [1]. +- Another one [2]. - And another one in a note.[^2] -- Citation with a suffix and locator [1]. +- Citation with a suffix and locator [2]. -- Citation with suffix only [1]. +- Citation with suffix only [2]. - Now some modifiers.[^3] -- With some markup [1]. +- With some markup [2]. + +
References ========== -[1] J. Doe, *First Book*. Cambridge: Cambridge University Press, 2005. +[1]“nonexistent not found!” . + +[2] J. Doe, *First Book*. Cambridge: Cambridge University Press, 2005. + +[3] J. Doe, “Article,” *Journal of Generic Studies*, vol. 6, pp. 33–34, 2006. -[2] J. Doe, “Article,” *Journal of Generic Studies*, vol. 6, pp. 33–34, 2006. +[4] J. Doe and J. Roe, “Why Water Is Wet,” in *Third Book*, S. Smith, Ed. Oxford: Oxford University Press, 2007. -[3] J. Doe and J. Roe, “Why Water Is Wet,” in *Third Book*, S. Smith, Ed. Oxford: Oxford University Press, 2007. +
-[^1]: Reference 3 and a citation without locators [3]. +[^1]: Reference 4 and a citation without locators [4]. -[^2]: Some citations [1–3]. +[^2]: Some citations [2–4]. -[^3]: Like a citation without author: [1], and now Doe with a locator [2]. +[^3]: Like a citation without author: [2], and now Doe with a locator [3]. diff --git a/tests/markdown-citations.mhra.txt b/tests/markdown-citations.mhra.txt index d33a1b94b..01d9c45ca 100644 --- a/tests/markdown-citations.mhra.txt +++ b/tests/markdown-citations.mhra.txt @@ -1,33 +1,35 @@ Pandoc with citeproc-hs ======================= -- [@nonexistent] +- [^1] -- @nonexistent +- [^2] -- John Doe[^1] says blah. +- John Doe[^3] says blah. -- Doe[^2] says blah. +- Doe[^4] says blah. -- Doe[^3] says blah. +- Doe[^5] says blah. -- Doe[^4] says blah. +- Doe[^6] says blah. -- In a note.[^5] +- In a note.[^7] -- A citation group.[^6] +- A citation group.[^8] -- Another one.[^7] +- Another one.[^9] -- And another one in a note.[^8] +- And another one in a note.[^10] -- Citation with a suffix and locator.[^9] +- Citation with a suffix and locator.[^11] -- Citation with suffix only.[^10] +- Citation with suffix only.[^12] -- Now some modifiers.[^11] +- Now some modifiers.[^13] -- With some markup.[^12] +- With some markup.[^14] + +
References ========== @@ -38,26 +40,34 @@ Doe, John, ‘Article’, *Journal of Generic Studies*, 6 (2006), 33–34. Doe, John, and Jenny Roe, ‘Why Water Is Wet’, in *Third Book*, ed. by Sam Smith (Oxford: Oxford University Press, 2007). -[^1]: *First Book* (Cambridge: Cambridge University Press, 2005). +‘Nonexistent Not Found!’. + +
+ +[^1]: [CSL BIBLIOGRAPHIC DATA ERROR: reference "nonexistent" not found.]. + +[^2]: [CSL STYLE ERROR: reference with no printed form.]. + +[^3]: *First Book* (Cambridge: Cambridge University Press, 2005). -[^2]: *First Book*, p. 30. +[^4]: *First Book*, p. 30. -[^3]: *First Book*, p. 30, with suffix. +[^5]: *First Book*, p. 30, with suffix. -[^4]: *First Book*; ‘Article’, *Journal of Generic Studies*, 6 (2006), 33–34 (p. 30); see also John Doe and Jenny Roe, ‘Why Water Is Wet’, in *Third Book*, ed. by Sam Smith (Oxford: Oxford University Press, 2007). +[^6]: *First Book*; ‘Article’, *Journal of Generic Studies*, 6 (2006), 33–34 (p. 30); see also John Doe and Jenny Roe, ‘Why Water Is Wet’, in *Third Book*, ed. by Sam Smith (Oxford: Oxford University Press, 2007). -[^5]: Doe and Roe, p. 12 and a citation without locators Doe and Roe. +[^7]: Doe and Roe, p. 12 and a citation without locators Doe and Roe. -[^6]: See Doe, *First Book*, chap. 3; also Doe and Roe, pp. 34–35. +[^8]: See Doe, *First Book*, chap. 3; also Doe and Roe, pp. 34–35. -[^7]: See Doe, *First Book*, pp. 34–35. +[^9]: See Doe, *First Book*, pp. 34–35. -[^8]: Some citations see Doe, *First Book*, chap. 3; Doe and Roe; Doe, ‘Article’, 33–34. +[^10]: Some citations see Doe, *First Book*, chap. 3; Doe and Roe; Doe, ‘Article’, 33–34. -[^9]: Doe, *First Book*, pp. 33, 35–37, and nowhere else. +[^11]: Doe, *First Book*, pp. 33, 35–37, and nowhere else. -[^10]: Doe, *First Book* and nowhere else. +[^12]: Doe, *First Book* and nowhere else. -[^11]: Like a citation without author: *First Book*, and now Doe with a locator ‘Article’, 33–34 (p. 44). +[^13]: Like a citation without author: *First Book*, and now Doe with a locator ‘Article’, 33–34 (p. 44). -[^12]: *See* Doe, *First Book*, p. 32. +[^14]: *See* Doe, *First Book*, p. 32. -- cgit v1.2.3 From 0b5156cc7e6e77bb072e6f4e09f6468f6d8a8f60 Mon Sep 17 00:00:00 2001 From: Scott Morrison Date: Wed, 21 Aug 2013 16:04:06 +1000 Subject: adding some cedilla characters to the LaTeX reader --- src/Text/Pandoc/Readers/LaTeX.hs | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index 414e50fc8..37cec697b 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -673,6 +673,14 @@ cedilla 'c' = 'ç' cedilla 'C' = 'Ç' cedilla 's' = 'ş' cedilla 'S' = 'Ş' +cedilla 't' = 'ţ' +cedilla 'T' = 'Ţ' +cedilla 'e' = 'ȩ' +cedilla 'E' = 'Ȩ' +cedilla 'h' = 'ḩ' +cedilla 'H' = 'Ḩ' +cedilla 'o' = 'o̧' +cedilla 'O' = ''O̧ cedilla c = c hacek :: Char -> Char -- cgit v1.2.3 From 5b97b150cc63a5cab48a544e2b15881409702781 Mon Sep 17 00:00:00 2001 From: Scott Morrison Date: Wed, 21 Aug 2013 16:10:42 +1000 Subject: cedilla-o breaks the compile, removing again --- src/Text/Pandoc/Readers/LaTeX.hs | 2 -- 1 file changed, 2 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index 37cec697b..20ed88717 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -679,8 +679,6 @@ cedilla 'e' = 'ȩ' cedilla 'E' = 'Ȩ' cedilla 'h' = 'ḩ' cedilla 'H' = 'Ḩ' -cedilla 'o' = 'o̧' -cedilla 'O' = ''O̧ cedilla c = c hacek :: Char -> Char -- cgit v1.2.3 From 1d91e2cdb380a22b8d988291d726dd1612318b80 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Wed, 21 Aug 2013 20:07:36 -0700 Subject: LaTeX reader: Added o-cedilla. --- src/Text/Pandoc/Readers/LaTeX.hs | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index ded57df5a..028d83e24 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -681,6 +681,8 @@ cedilla 'e' = 'ȩ' cedilla 'E' = 'Ȩ' cedilla 'h' = 'ḩ' cedilla 'H' = 'Ḩ' +cedilla 'o' = 'o̧' +cedilla 'O' = 'O̧' cedilla c = c hacek :: Char -> Char -- cgit v1.2.3 From 5f09cf7ff033ae11c5094fe39f8cd2ac11657229 Mon Sep 17 00:00:00 2001 From: Florian Eitel Date: Thu, 22 Aug 2013 20:15:36 +0200 Subject: Write id for code block to label attr in latex when listing is used The code: ~~~{#test} asdf ~~~ gets compiled to html:
    asdf
    
So it is possible to link to the identifier `test` But this doesn't happen on latex When using the listings package (`--listings`) it is possible to set the identifier using the `label=test` property: \begin{lstlisting}[label=id] hi \end{lstlisting} And this is exactly what this patch is doing. Modified LaTeX Reader/Writer and added tests for this. --- src/Text/Pandoc/Readers/LaTeX.hs | 3 ++- src/Text/Pandoc/Writers/LaTeX.hs | 8 ++++++-- tests/Tests/Readers/LaTeX.hs | 7 +++++++ tests/Tests/Writers/LaTeX.hs | 7 +++++++ 4 files changed, 22 insertions(+), 3 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index ded57df5a..b785a9852 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -47,6 +47,7 @@ import Text.Pandoc.Builder import Data.Char (isLetter) import Control.Applicative import Data.Monoid +import Data.Maybe (fromMaybe) import System.Environment (getEnv) import System.FilePath (replaceExtension, ()) import Data.List (intercalate, intersperse) @@ -901,7 +902,7 @@ environments = M.fromList lookup "numbers" options == Just "left" ] ++ maybe [] (:[]) (lookup "language" options >>= fromListingsLanguage) - let attr = ("",classes,kvs) + let attr = (fromMaybe "" (lookup "label" options),classes,kvs) codeBlockWith attr <$> (verbEnv "lstlisting")) , ("minted", do options <- option [] keyvals lang <- grouped (many1 $ satisfy (/='}')) diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs index ab579a326..bf056001f 100644 --- a/src/Text/Pandoc/Writers/LaTeX.hs +++ b/src/Text/Pandoc/Writers/LaTeX.hs @@ -313,7 +313,7 @@ blockToLaTeX (BlockQuote lst) = do _ -> do contents <- blockListToLaTeX lst return $ "\\begin{quote}" $$ contents $$ "\\end{quote}" -blockToLaTeX (CodeBlock (_,classes,keyvalAttr) str) = do +blockToLaTeX (CodeBlock (identifier,classes,keyvalAttr) str) = do opts <- gets stOptions case () of _ | isEnabled Ext_literate_haskell opts && "haskell" `elem` classes && @@ -344,7 +344,11 @@ blockToLaTeX (CodeBlock (_,classes,keyvalAttr) str) = do [ (if key == "startFrom" then "firstnumber" else key) ++ "=" ++ attr | - (key,attr) <- keyvalAttr ] + (key,attr) <- keyvalAttr ] ++ + (if identifier == "" + then [] + else [ "label=" ++ identifier ]) + else [] printParams | null params = empty diff --git a/tests/Tests/Readers/LaTeX.hs b/tests/Tests/Readers/LaTeX.hs index 88029b7c2..dff6e4537 100644 --- a/tests/Tests/Readers/LaTeX.hs +++ b/tests/Tests/Readers/LaTeX.hs @@ -55,6 +55,13 @@ tests = [ testGroup "basic" "hi % this is a comment\nthere\n" =?> para "hi there" ] + , testGroup "code blocks" + [ "identifier" =: + "\\begin{lstlisting}[label=test]\\end{lstlisting}" =?> codeBlockWith ("test", [], [("label","test")]) "" + , "no identifier" =: + "\\begin{lstlisting}\\end{lstlisting}" =?> codeBlock "" + ] + , testGroup "citations" [ natbibCitations , biblatexCitations diff --git a/tests/Tests/Writers/LaTeX.hs b/tests/Tests/Writers/LaTeX.hs index ebde5b97c..5f702a85d 100644 --- a/tests/Tests/Writers/LaTeX.hs +++ b/tests/Tests/Writers/LaTeX.hs @@ -10,6 +10,9 @@ import Tests.Arbitrary() latex :: (ToString a, ToPandoc a) => a -> String latex = writeLaTeX def . toPandoc +latexListing :: (ToString a, ToPandoc a) => a -> String +latexListing = writeLaTeX def{ writerListings = True } . toPandoc + {- "my test" =: X =?> Y @@ -31,6 +34,10 @@ tests :: [Test] tests = [ testGroup "code blocks" [ "in footnotes" =: note (para "hi" <> codeBlock "hi") =?> "\\footnote{hi\n\n\\begin{Verbatim}\nhi\n\\end{Verbatim}\n}" + , test latexListing "identifier" $ codeBlockWith ("id",[],[]) "hi" =?> + ("\\begin{lstlisting}[label=id]\nhi\n\\end{lstlisting}" :: String) + , test latexListing "no identifier" $ codeBlock "hi" =?> + ("\\begin{lstlisting}\nhi\n\\end{lstlisting}" :: String) ] , testGroup "math" [ "escape |" =: para (math "\\sigma|_{\\{x\\}}") =?> -- cgit v1.2.3 From 74250b6c351180cb350150b8069824111193b913 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 24 Aug 2013 16:10:13 -0700 Subject: Moved most of Text.Pandoc.Readers.TeXMath to texmath 0.6.4. --- pandoc.cabal | 2 +- src/Text/Pandoc/Readers/TeXMath.hs | 84 +------------------------------------- 2 files changed, 3 insertions(+), 83 deletions(-) (limited to 'src/Text') diff --git a/pandoc.cabal b/pandoc.cabal index e22908918..ac28ad068 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -246,7 +246,7 @@ Library old-locale >= 1 && < 1.1, time >= 1.2 && < 1.5, HTTP >= 4000.0.5 && < 4000.3, - texmath >= 0.6.3 && < 0.7, + texmath >= 0.6.4 && < 0.7, xml >= 1.3.12 && < 1.4, random >= 1 && < 1.1, extensible-exceptions >= 0.1 && < 0.2, diff --git a/src/Text/Pandoc/Readers/TeXMath.hs b/src/Text/Pandoc/Readers/TeXMath.hs index fe49a992e..1f7088f72 100644 --- a/src/Text/Pandoc/Readers/TeXMath.hs +++ b/src/Text/Pandoc/Readers/TeXMath.hs @@ -30,93 +30,13 @@ Conversion of TeX math to a list of 'Pandoc' inline elements. module Text.Pandoc.Readers.TeXMath ( readTeXMath ) where import Text.Pandoc.Definition -import Text.TeXMath.Types -import Text.TeXMath.Parser +import Text.TeXMath -- | Converts a raw TeX math formula to a list of 'Pandoc' inlines. -- Defaults to raw formula between @$@ characters if entire formula -- can't be converted. readTeXMath :: String -- ^ String to parse (assumes @'\n'@ line endings) -> [Inline] -readTeXMath inp = case texMathToPandoc inp of +readTeXMath inp = case texMathToPandoc DisplayInline inp of Left _ -> [Str ("$" ++ inp ++ "$")] Right res -> res - -texMathToPandoc :: String -> Either String [Inline] -texMathToPandoc inp = inp `seq` - case parseFormula inp of - Left err -> Left err - Right exps -> case expsToInlines exps of - Nothing -> Left "Formula too complex for [Inline]" - Just r -> Right r - -expsToInlines :: [Exp] -> Maybe [Inline] -expsToInlines xs = do - res <- mapM expToInlines xs - return (concat res) - -expToInlines :: Exp -> Maybe [Inline] -expToInlines (ENumber s) = Just [Str s] -expToInlines (EIdentifier s) = Just [Emph [Str s]] -expToInlines (EMathOperator s) = Just [Str s] -expToInlines (ESymbol t s) = Just $ addSpace t (Str s) - where addSpace Op x = [x, thinspace] - addSpace Bin x = [medspace, x, medspace] - addSpace Rel x = [widespace, x, widespace] - addSpace Pun x = [x, thinspace] - addSpace _ x = [x] - thinspace = Str "\x2006" - medspace = Str "\x2005" - widespace = Str "\x2004" -expToInlines (EStretchy x) = expToInlines x -expToInlines (EDelimited start end xs) = do - xs' <- mapM expToInlines xs - return $ [Str start] ++ concat xs' ++ [Str end] -expToInlines (EGrouped xs) = expsToInlines xs -expToInlines (ESpace "0.167em") = Just [Str "\x2009"] -expToInlines (ESpace "0.222em") = Just [Str "\x2005"] -expToInlines (ESpace "0.278em") = Just [Str "\x2004"] -expToInlines (ESpace "0.333em") = Just [Str "\x2004"] -expToInlines (ESpace "1em") = Just [Str "\x2001"] -expToInlines (ESpace "2em") = Just [Str "\x2001\x2001"] -expToInlines (ESpace _) = Just [Str " "] -expToInlines (EBinary _ _ _) = Nothing -expToInlines (ESub x y) = do - x' <- expToInlines x - y' <- expToInlines y - return $ x' ++ [Subscript y'] -expToInlines (ESuper x y) = do - x' <- expToInlines x - y' <- expToInlines y - return $ x' ++ [Superscript y'] -expToInlines (ESubsup x y z) = do - x' <- expToInlines x - y' <- expToInlines y - z' <- expToInlines z - return $ x' ++ [Subscript y'] ++ [Superscript z'] -expToInlines (EDown x y) = expToInlines (ESub x y) -expToInlines (EUp x y) = expToInlines (ESuper x y) -expToInlines (EDownup x y z) = expToInlines (ESubsup x y z) -expToInlines (EText TextNormal x) = Just [Str x] -expToInlines (EText TextBold x) = Just [Strong [Str x]] -expToInlines (EText TextMonospace x) = Just [Code nullAttr x] -expToInlines (EText TextItalic x) = Just [Emph [Str x]] -expToInlines (EText _ x) = Just [Str x] -expToInlines (EOver (EGrouped [EIdentifier [c]]) (ESymbol Accent [accent])) = - case accent of - '\x203E' -> Just [Emph [Str [c,'\x0304']]] -- bar - '\x00B4' -> Just [Emph [Str [c,'\x0301']]] -- acute - '\x0060' -> Just [Emph [Str [c,'\x0300']]] -- grave - '\x02D8' -> Just [Emph [Str [c,'\x0306']]] -- breve - '\x02C7' -> Just [Emph [Str [c,'\x030C']]] -- check - '.' -> Just [Emph [Str [c,'\x0307']]] -- dot - '\x00B0' -> Just [Emph [Str [c,'\x030A']]] -- ring - '\x20D7' -> Just [Emph [Str [c,'\x20D7']]] -- arrow right - '\x20D6' -> Just [Emph [Str [c,'\x20D6']]] -- arrow left - '\x005E' -> Just [Emph [Str [c,'\x0302']]] -- hat - '\x0302' -> Just [Emph [Str [c,'\x0302']]] -- hat - '~' -> Just [Emph [Str [c,'\x0303']]] -- tilde - _ -> Nothing -expToInlines _ = Nothing - - -- cgit v1.2.3 From deb59b62354e38df9c85ce6985e5c28dd2301ee7 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 24 Aug 2013 22:27:08 -0700 Subject: Removed dependency on citeproc-hs. Going forward we'll use pandoc-citeproc, as an external filter. The `--bibliography`, `--csl`, and `--citation-abbreviation` fields have been removed. Instead one must include `bibliography`, `csl`, or `csl-abbrevs` fields in the document's YAML metadata. The filter can then be used as follows: pandoc --filter pandoc-citeproc The `Text.Pandoc.Biblio` module has been removed. Henceforth, `Text.CSL.Pandoc` from pandoc-citations can be used by library users. The Markdown and LaTeX readers now longer format bibliographies and citations. That must be done using `processCites` or `processCites'` from Text.CSL.Pandoc. All bibliography-related fields have been removed from `ReaderOptions` and `WriterOptions`: `writerBiblioFiles`, `readerReferences`, `readerCitationStyle`. API change. --- README | 105 ++++----- data/default.csl | 458 ------------------------------------ pandoc.cabal | 7 +- pandoc.hs | 68 +----- src/Text/Pandoc/Biblio.hs | 216 ----------------- src/Text/Pandoc/Options.hs | 7 - src/Text/Pandoc/Readers/LaTeX.hs | 5 +- src/Text/Pandoc/Readers/Markdown.hs | 5 +- src/Text/Pandoc/Writers/LaTeX.hs | 8 +- tests/Tests/Old.hs | 14 -- 10 files changed, 50 insertions(+), 843 deletions(-) delete mode 100644 data/default.csl delete mode 100644 src/Text/Pandoc/Biblio.hs (limited to 'src/Text') diff --git a/README b/README index 7a2b01f49..f85e62e14 100644 --- a/README +++ b/README @@ -598,54 +598,6 @@ Options affecting specific writers Citation rendering ------------------ -`--bibliography=`*FILE* -: Specify bibliography database to be used in resolving - citations. The database type will be determined from the - extension of *FILE*, which may be `.mods` (MODS format), - `.bib` (BibLaTeX format, which will normally work for BibTeX - files as well), `.bibtex` (BibTeX format), - `.ris` (RIS format), `.enl` (EndNote format), - `.xml` (EndNote XML format), `.wos` (ISI format), - `.medline` (MEDLINE format), `.copac` (Copac format), - or `.json` (citeproc JSON). If you want to use multiple - bibliographies, just use this option repeatedly. - -`--csl=`*FILE* -: Specify [CSL] style to be used in formatting citations and - the bibliography. If *FILE* is not found, pandoc will look - for it in - - $HOME/.csl - - in unix, - - C:\Documents And Settings\USERNAME\Application Data\csl - - in Windows XP, and - - C:\Users\USERNAME\AppData\Roaming\csl - - in Windows 7. If the `--csl` option is not specified, pandoc - will use a default style: either `default.csl` in the - user data directory (see `--data-dir`), or, if that is - not present, the Chicago author-date style. - -`--citation-abbreviations=`*FILE* -: Specify a file containing abbreviations for journal titles and - other bibliographic fields (indicated by setting `form="short"` - in the CSL node for the field). The format is described at - . - Here is a short example: - - { "default": { - "container-title": { - "Lloyd's Law Reports": "Lloyd's Rep", - "Estates Gazette": "EG", - "Scots Law Times": "SLT" - } - } - } - `--natbib` : Use natbib for citations in LaTeX output. @@ -2378,9 +2330,14 @@ Citations **Extension: `citations`** -Pandoc can automatically generate citations and a bibliography in a number of -styles (using Andrea Rossato's `hs-citeproc`). In order to use this feature, -you will need a bibliographic database in one of the following formats: +Using an external filter, `pandoc-citeproc`, pandoc can automatically generate +citations and a bibliography in a number of styles. Basic usage is + + pandoc --filter pandoc-citeproc myinput.txt + +In order to use this feature, you will need to specify a bibliography file +using the `bibliography` metadata field in a YAML metadata section. +The bibliography may have any of these formats: Format File extension ------------ -------------- @@ -2398,18 +2355,40 @@ you will need a bibliographic database in one of the following formats: Note that `.bib` can generally be used with both BibTeX and BibLaTeX files, but you can use `.bibtex` to force BibTeX. -You will need to specify the bibliography file using the `--bibliography` -command-line option (which may be repeated if you have several -bibliographies). - -By default, pandoc will use a Chicago author-date format for citations -and references. To use another style, you will need to use the -`--csl` option to specify a [CSL] 1.0 style file. A primer on -creating and modifying CSL styles can be found at -. -A repository of CSL styles can be found at -. -See also for easy browsing. +Alternatively you can use a `references` field in the document's YAML +metadata. This should include an array of YAML-encoded references, +for example: + + --- + references: + - id: fenner2012a + title: One-click science marketing + author: + - family: Fenner + given: Martin + container-title: Nature Materials + volume: 11 + URL: 'http://dx.doi.org/10.1038/nmat3283' + DOI: 10.1038/nmat3283 + issue: 4 + publisher: Nature Publishing Group + page: 261-263 + type: article-journal + issued: + year: 2012 + month: 3 + ... + +(The program `mods2yaml`, which comes with `pandoc-citeproc`, can help produce +these from a MODS reference collection.) + +By default, `pandoc-citeproc` will use a Chicago author-date format for +citations and references. To use another style, you will need to specify +a [CSL] 1.0 style file in the `csl` metadata field. A primer on creating and +modifying CSL styles can be found at +. A repository of CSL styles +can be found at . See also + for easy browsing. Citations go inside square brackets and are separated by semicolons. Each citation must have a key, composed of '@' + the citation diff --git a/data/default.csl b/data/default.csl deleted file mode 100644 index 83a70d0b5..000000000 --- a/data/default.csl +++ /dev/null @@ -1,458 +0,0 @@ - - diff --git a/pandoc.cabal b/pandoc.cabal index ac28ad068..0ab990a17 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -99,8 +99,6 @@ Data-Files: data/slideous/slideous.js, -- data for dzslides writer data/dzslides/template.html, - -- data for citeproc - data/default.csl, -- sample lua custom writer data/sample.lua -- documentation @@ -250,7 +248,6 @@ Library xml >= 1.3.12 && < 1.4, random >= 1 && < 1.1, extensible-exceptions >= 0.1 && < 0.2, - citeproc-hs >= 0.3.7 && < 0.4, pandoc-types >= 1.12 && < 1.13, aeson >= 0.6 && < 0.7, tagsoup >= 0.12.5 && < 0.14, @@ -323,7 +320,6 @@ Library Text.Pandoc.UTF8, Text.Pandoc.Templates, Text.Pandoc.XML, - Text.Pandoc.Biblio, Text.Pandoc.SelfContained, Text.Pandoc.Process Other-Modules: Text.Pandoc.Readers.Haddock.Lex, @@ -353,8 +349,7 @@ Executable pandoc extensible-exceptions >= 0.1 && < 0.2, highlighting-kate >= 0.5.5 && < 0.6, aeson >= 0.6 && < 0.7, - HTTP >= 4000.0.5 && < 4000.3, - citeproc-hs >= 0.3.7 && < 0.4 + HTTP >= 4000.0.5 && < 4000.3 Ghc-Options: -rtsopts -with-rtsopts=-K16m -Wall -fno-warn-unused-do-bind Ghc-Prof-Options: -auto-all -caf-all -rtsopts -with-rtsopts=-K16m if os(windows) diff --git a/pandoc.hs b/pandoc.hs index 8eed67544..6ad5694f1 100644 --- a/pandoc.hs +++ b/pandoc.hs @@ -35,7 +35,7 @@ import Text.Pandoc.PDF (makePDF) import Text.Pandoc.Readers.LaTeX (handleIncludes) import Text.Pandoc.Shared ( tabFilter, readDataFileUTF8, readDataFile, safeRead, headerShift, normalize, err, warn ) -import Text.Pandoc.XML ( toEntities, fromEntities ) +import Text.Pandoc.XML ( toEntities ) import Text.Pandoc.SelfContained ( makeSelfContained ) import Text.Pandoc.Process (pipeProcess) import Text.Highlighting.Kate ( languages, Style, tango, pygments, @@ -46,20 +46,18 @@ import System.FilePath import System.Console.GetOpt import Data.Char ( toLower ) import Data.List ( intercalate, isPrefixOf, sort ) -import System.Directory ( getAppUserDataDirectory, doesFileExist, findExecutable ) +import System.Directory ( getAppUserDataDirectory, findExecutable ) import System.IO ( stdout, stderr ) import System.IO.Error ( isDoesNotExistError ) import qualified Control.Exception as E import Control.Exception.Extensible ( throwIO ) import qualified Text.Pandoc.UTF8 as UTF8 -import qualified Text.CSL as CSL import Control.Monad (when, unless, liftM) import Data.Foldable (foldrM) import Network.HTTP (simpleHTTP, mkRequest, getResponseBody, RequestMethod(..)) import Network.URI (parseURI, isURI, URI(..)) import qualified Data.ByteString.Lazy as B import qualified Data.ByteString as BS -import Text.CSL.Reference (Reference(..)) import Data.Aeson (eitherDecode', encode) copyrightMessage :: String @@ -70,7 +68,7 @@ copyrightMessage = "\nCopyright (C) 2006-2013 John MacFarlane\n" ++ compileInfo :: String compileInfo = - "\nCompiled with citeproc-hs " ++ VERSION_citeproc_hs ++ ", texmath " ++ + "\nCompiled with texmath " ++ VERSION_texmath ++ ", highlighting-kate " ++ VERSION_highlighting_kate ++ ".\nSyntax highlighting is supported for the following languages:\n " ++ wrapWords 4 78 @@ -146,9 +144,6 @@ data Opt = Opt , optIndentedCodeClasses :: [String] -- ^ Default classes for indented code blocks , optDataDir :: Maybe FilePath , optCiteMethod :: CiteMethod -- ^ Method to output cites - , optBibliography :: [String] - , optCslFile :: Maybe FilePath - , optAbbrevsFile :: Maybe FilePath , optListings :: Bool -- ^ Use listings package for code blocks , optLaTeXEngine :: String -- ^ Program to use for latex -> pdf , optSlideLevel :: Maybe Int -- ^ Header level that creates slides @@ -203,9 +198,6 @@ defaultOpts = Opt , optIndentedCodeClasses = [] , optDataDir = Nothing , optCiteMethod = Citeproc - , optBibliography = [] - , optCslFile = Nothing - , optAbbrevsFile = Nothing , optListings = False , optLaTeXEngine = "pdflatex" , optSlideLevel = Nothing @@ -650,24 +642,6 @@ options = "PROGRAM") "" -- "Name of latex program to use in generating PDF" - , Option "" ["bibliography"] - (ReqArg - (\arg opt -> return opt { optBibliography = (optBibliography opt) ++ [arg] }) - "FILENAME") - "" - - , Option "" ["csl"] - (ReqArg - (\arg opt -> return opt { optCslFile = Just arg }) - "FILENAME") - "" - - , Option "" ["citation-abbreviations"] - (ReqArg - (\arg opt -> return opt { optAbbrevsFile = Just arg }) - "FILENAME") - "" - , Option "" ["natbib"] (NoArg (\opt -> return opt { optCiteMethod = Natbib })) @@ -904,9 +878,6 @@ main = do , optIdentifierPrefix = idPrefix , optIndentedCodeClasses = codeBlockClasses , optDataDir = mbDataDir - , optBibliography = reffiles - , optCslFile = mbCsl - , optAbbrevsFile = cslabbrevs , optCiteMethod = citeMethod , optListings = listings , optLaTeXEngine = latexEngine @@ -1007,36 +978,6 @@ main = do $ lines dztempl return $ ("dzslides-core", dzcore) : variables' else return variables' - - -- unescape reference ids, which may contain XML entities, so - -- that we can do lookups with regular string equality - let unescapeRefId ref = ref{ refId = fromEntities (refId ref) } - - refs <- mapM (\f -> E.catch (CSL.readBiblioFile f) - (\e -> let _ = (e :: E.SomeException) - in err 23 $ "Error reading bibliography `" ++ f ++ - "'" ++ "\n" ++ show e)) - reffiles >>= - return . map unescapeRefId . concat - - mbsty <- if citeMethod == Citeproc && not (null refs) - then do - csl <- CSL.parseCSL =<< - case mbCsl of - Nothing -> readDataFileUTF8 datadir - "default.csl" - Just cslfile -> do - exists <- doesFileExist cslfile - if exists - then UTF8.readFile cslfile - else do - csldir <- getAppUserDataDirectory "csl" - readDataFileUTF8 (Just csldir) - (replaceExtension cslfile "csl") - abbrevs <- maybe (return []) CSL.readJsonAbbrevFile cslabbrevs - return $ Just csl { CSL.styleAbbrevs = abbrevs } - else return Nothing - let sourceURL = case sources of [] -> Nothing (x:_) -> case parseURI x of @@ -1054,8 +995,6 @@ main = do , readerColumns = columns , readerTabStop = tabStop , readerOldDashes = oldDashes - , readerReferences = refs - , readerCitationStyle = mbsty , readerIndentedCodeClasses = codeBlockClasses , readerApplyMacros = not laTeXOutput , readerDefaultImageExtension = defaultImageExtension @@ -1069,7 +1008,6 @@ main = do writerHTMLMathMethod = mathMethod, writerIncremental = incremental, writerCiteMethod = citeMethod, - writerBiblioFiles = reffiles, writerIgnoreNotes = False, writerNumberSections = numberSections, writerNumberOffset = numberFrom, diff --git a/src/Text/Pandoc/Biblio.hs b/src/Text/Pandoc/Biblio.hs deleted file mode 100644 index 1c0975f11..000000000 --- a/src/Text/Pandoc/Biblio.hs +++ /dev/null @@ -1,216 +0,0 @@ -{-# LANGUAGE PatternGuards #-} -{- -Copyright (C) 2008 Andrea Rossato - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.Biblio - Copyright : Copyright (C) 2008-2010 Andrea Rossato - License : GNU GPL, version 2 or above - - Maintainer : Andrea Rossato - Stability : alpha - Portability : portable --} - -module Text.Pandoc.Biblio ( processBiblio ) where - -import Data.List -import Data.Char ( isDigit, isPunctuation ) -import qualified Data.Map as M -import Text.CSL hiding ( Cite(..), Citation(..), endWithPunct ) -import qualified Text.CSL as CSL ( Cite(..) ) -import Text.Pandoc.Definition -import Text.Pandoc.Generic -import Text.Pandoc.Walk -import Text.Pandoc.Shared (stringify) -import Text.Parsec hiding (State) -import Control.Monad -import Control.Monad.State - --- | Process a 'Pandoc' document by adding citations formatted --- according to a CSL style, using 'citeproc' from citeproc-hs. -processBiblio :: Maybe Style -> [Reference] -> Pandoc -> Pandoc -processBiblio Nothing _ p = p -processBiblio _ [] p = p -processBiblio (Just style) r p = - let p' = evalState (bottomUpM setHash p) 1 - grps = query getCitation p' - result = citeproc procOpts style r (setNearNote style $ - map (map toCslCite) grps) - cits_map = M.fromList $ zip grps (citations result) - biblioList = map (renderPandoc' style) (bibliography result) - Pandoc m b = bottomUp mvPunct . deNote . topDown (processCite style cits_map) $ p' - (bs, lastb) = case reverse b of - x@(Header _ _ _) : xs -> (reverse xs, [x]) - _ -> (b, []) - in Pandoc m $ bs ++ [Div ("",["references"],[]) (lastb ++ biblioList)] - --- | Substitute 'Cite' elements with formatted citations. -processCite :: Style -> M.Map [Citation] [FormattedOutput] -> Inline -> Inline -processCite s cs (Cite t _) = - case M.lookup t cs of - Just (x:xs) - | isTextualCitation t && not (null xs) -> - let xs' = renderPandoc s xs - in if styleClass s == "note" - then Cite t (renderPandoc s [x] ++ [Note [Para xs']]) - else Cite t (renderPandoc s [x] ++ [Space | not (startWithPunct xs')] ++ xs') - | otherwise -> if styleClass s == "note" - then Cite t [Note [Para $ renderPandoc s (x:xs)]] - else Cite t (renderPandoc s (x:xs)) - _ -> Strong [Str "???"] -- TODO raise error instead? -processCite _ _ x = x - -isNote :: Inline -> Bool -isNote (Note _) = True -isNote (Cite _ [Note _]) = True -isNote _ = False - -mvPunct :: [Inline] -> [Inline] -mvPunct (Space : Space : xs) = Space : xs -mvPunct (Space : x : ys) | isNote x, startWithPunct ys = - Str (headInline ys) : x : tailFirstInlineStr ys -mvPunct (Space : x : ys) | isNote x = x : ys -mvPunct xs = xs - --- A replacement for citeproc-hs's endWithPunct, which wrongly treats --- a sentence ending in '.)' as not ending with punctuation, leading --- to an extra period. -endWithPunct :: [Inline] -> Bool -endWithPunct [] = True -endWithPunct xs@(_:_) = case reverse (stringify [last xs]) of - [] -> True - (')':c:_) | isEndPunct c -> True - (c:_) | isEndPunct c -> True - | otherwise -> False - where isEndPunct c = c `elem` ".,;:!?" - -deNote :: Pandoc -> Pandoc -deNote = topDown go - where go (Cite (c:cs) [Note xs]) = - Cite (c:cs) [Note $ bottomUp go' $ sanitize c xs] - go (Note xs) = Note $ bottomUp go' xs - go x = x - go' (Note [Para xs]:ys) = - if startWithPunct ys && endWithPunct xs - then initInline xs ++ ys - else xs ++ ys - go' xs = xs - sanitize :: Citation -> [Block] -> [Block] - sanitize Citation{citationPrefix = pref} [Para xs] = - case (null pref, endWithPunct xs) of - (True, False) -> [Para $ xs ++ [Str "."]] - (True, True) -> [Para xs] - (False, False) -> [Para $ toCapital $ xs ++ [Str "."]] - (False, True) -> [Para $ toCapital xs] - sanitize _ bs = bs - -isTextualCitation :: [Citation] -> Bool -isTextualCitation (c:_) = citationMode c == AuthorInText -isTextualCitation _ = False - --- | Retrieve all citations from a 'Pandoc' docuument. To be used with --- 'query'. -getCitation :: Inline -> [[Citation]] -getCitation i | Cite t _ <- i = [t] - | otherwise = [] - -setHash :: Citation -> State Int Citation -setHash c = do - ident <- get - put $ ident + 1 - return c{ citationHash = ident } - -toCslCite :: Citation -> CSL.Cite -toCslCite c - = let (l, s) = locatorWords $ citationSuffix c - (la,lo) = parseLocator l - s' = case (l,s) of - -- treat a bare locator as if it begins with space - -- so @item1 [blah] is like [@item1, blah] - ("",(x:_)) - | not (isPunct x) -> [Space] ++ s - _ -> s - isPunct (Str (x:_)) = isPunctuation x - isPunct _ = False - citMode = case citationMode c of - AuthorInText -> (True, False) - SuppressAuthor -> (False,True ) - NormalCitation -> (False,False) - in emptyCite { CSL.citeId = citationId c - , CSL.citePrefix = PandocText $ citationPrefix c - , CSL.citeSuffix = PandocText s' - , CSL.citeLabel = la - , CSL.citeLocator = lo - , CSL.citeNoteNumber = show $ citationNoteNum c - , CSL.authorInText = fst citMode - , CSL.suppressAuthor = snd citMode - , CSL.citeHash = citationHash c - } - -locatorWords :: [Inline] -> (String, [Inline]) -locatorWords inp = - case parse pLocatorWords "suffix" $ breakup inp of - Right r -> r - Left _ -> ("",inp) - where breakup [] = [] - breakup (Str x : xs) = map Str (splitup x) ++ breakup xs - breakup (x : xs) = x : breakup xs - splitup = groupBy (\x y -> x /= '\160' && y /= '\160') - -pLocatorWords :: Parsec [Inline] st (String, [Inline]) -pLocatorWords = do - l <- pLocator - s <- getInput -- rest is suffix - if length l > 0 && last l == ',' - then return (init l, Str "," : s) - else return (l, s) - -pMatch :: (Inline -> Bool) -> Parsec [Inline] st Inline -pMatch condition = try $ do - t <- anyToken - guard $ condition t - return t - -pSpace :: Parsec [Inline] st Inline -pSpace = pMatch (\t -> t == Space || t == Str "\160") - -pLocator :: Parsec [Inline] st String -pLocator = try $ do - optional $ pMatch (== Str ",") - optional pSpace - f <- (guardFollowingDigit >> return [Str "p"]) -- "page" the default - <|> many1 (notFollowedBy pSpace >> anyToken) - gs <- many1 pWordWithDigits - return $ stringify f ++ (' ' : unwords gs) - -guardFollowingDigit :: Parsec [Inline] st () -guardFollowingDigit = do - t <- lookAhead anyToken - case t of - Str (d:_) | isDigit d -> return () - _ -> mzero - -pWordWithDigits :: Parsec [Inline] st String -pWordWithDigits = try $ do - optional pSpace - r <- many1 (notFollowedBy pSpace >> anyToken) - let s = stringify r - guard $ any isDigit s - return s - diff --git a/src/Text/Pandoc/Options.hs b/src/Text/Pandoc/Options.hs index c7c37d6b8..48e418ab2 100644 --- a/src/Text/Pandoc/Options.hs +++ b/src/Text/Pandoc/Options.hs @@ -48,7 +48,6 @@ import Data.Set (Set) import qualified Data.Set as Set import Data.Default import Text.Pandoc.Highlighting (Style, pygments) -import qualified Text.CSL as CSL -- | Individually selectable syntax extensions. data Extension = @@ -205,8 +204,6 @@ data ReaderOptions = ReaderOptions{ , readerOldDashes :: Bool -- ^ Use pandoc <= 1.8.2.1 behavior -- in parsing dashes; -- is em-dash; -- - before numerial is en-dash - , readerReferences :: [CSL.Reference] -- ^ Bibliographic references - , readerCitationStyle :: Maybe CSL.Style -- ^ Citation style , readerApplyMacros :: Bool -- ^ Apply macros to TeX math , readerIndentedCodeClasses :: [String] -- ^ Default classes for -- indented code blocks @@ -223,8 +220,6 @@ instance Default ReaderOptions , readerColumns = 80 , readerTabStop = 4 , readerOldDashes = False - , readerReferences = [] - , readerCitationStyle = Nothing , readerApplyMacros = True , readerIndentedCodeClasses = [] , readerDefaultImageExtension = "" @@ -289,7 +284,6 @@ data WriterOptions = WriterOptions , writerSourceURL :: Maybe String -- ^ Absolute URL + directory of 1st source file , writerUserDataDir :: Maybe FilePath -- ^ Path of user data directory , writerCiteMethod :: CiteMethod -- ^ How to print cites - , writerBiblioFiles :: [FilePath] -- ^ Biblio files to use for citations , writerHtml5 :: Bool -- ^ Produce HTML5 , writerHtmlQTags :: Bool -- ^ Use @@ tags for quotes in HTML , writerBeamer :: Bool -- ^ Produce beamer LaTeX slide show @@ -332,7 +326,6 @@ instance Default WriterOptions where , writerSourceURL = Nothing , writerUserDataDir = Nothing , writerCiteMethod = Citeproc - , writerBiblioFiles = [] , writerHtml5 = False , writerHtmlQTags = False , writerBeamer = False diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index ded57df5a..e558ed1b9 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -38,7 +38,6 @@ import Text.Pandoc.Definition import Text.Pandoc.Walk import Text.Pandoc.Shared import Text.Pandoc.Options -import Text.Pandoc.Biblio (processBiblio) import Text.Pandoc.Parsing hiding ((<|>), many, optional, space) import qualified Text.Pandoc.UTF8 as UTF8 import Data.Char ( chr, ord ) @@ -67,9 +66,7 @@ parseLaTeX = do eof st <- getState let meta = stateMeta st - refs <- getOption readerReferences - mbsty <- getOption readerCitationStyle - let (Pandoc _ bs') = processBiblio mbsty refs $ doc bs + let (Pandoc _ bs') = doc bs return $ Pandoc meta bs' type LP = Parser [Char] ParserState diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 05662d9b5..658335202 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -54,7 +54,6 @@ import Text.Pandoc.Parsing hiding (tableWith) import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXBlock ) import Text.Pandoc.Readers.HTML ( htmlTag, htmlInBalanced, isInlineTag, isBlockTag, isTextTag, isCommentTag ) -import Text.Pandoc.Biblio (processBiblio) import Data.Monoid (mconcat, mempty) import Control.Applicative ((<$>), (<*), (*>), (<$)) import Control.Monad @@ -327,9 +326,7 @@ parseMarkdown = do st <- getState let meta = runF (stateMeta' st) st let Pandoc _ bs = B.doc $ runF blocks st - mbsty <- getOption readerCitationStyle - refs <- getOption readerReferences - return $ processBiblio mbsty refs $ Pandoc meta bs + return $ Pandoc meta bs addWarning :: Maybe SourcePos -> String -> MarkdownParser () addWarning mbpos msg = diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs index ab579a326..6a781ddec 100644 --- a/src/Text/Pandoc/Writers/LaTeX.hs +++ b/src/Text/Pandoc/Writers/LaTeX.hs @@ -43,7 +43,6 @@ import Data.Char ( toLower, isPunctuation ) import Control.Applicative ((<|>)) import Control.Monad.State import Text.Pandoc.Pretty -import System.FilePath (dropExtension) import Text.Pandoc.Slides import Text.Pandoc.Highlighting (highlight, styleToLaTeX, formatLaTeXInline, formatLaTeXBlock, @@ -120,7 +119,6 @@ pandocToLaTeX options (Pandoc meta blocks) = do (biblioTitle :: String) <- liftM (render colwidth) $ inlineListToLaTeX lastHeader let main = render colwidth $ vsep body st <- get - let biblioFiles = intercalate "," $ map dropExtension $ writerBiblioFiles options let context = defField "toc" (writerTableOfContents options) $ defField "toc-depth" (show (writerTOCDepth options - if writerChapters options @@ -152,11 +150,9 @@ pandocToLaTeX options (Pandoc meta blocks) = do $ writerHighlightStyle options ) else id) $ (case writerCiteMethod options of - Natbib -> defField "biblio-files" biblioFiles . - defField "biblio-title" biblioTitle . + Natbib -> defField "biblio-title" biblioTitle . defField "natbib" True - Biblatex -> defField "biblio-files" biblioFiles . - defField "biblio-title" biblioTitle . + Biblatex -> defField "biblio-title" biblioTitle . defField "biblatex" True _ -> id) $ metadata diff --git a/tests/Tests/Old.hs b/tests/Tests/Old.hs index 8609781d0..5054559a1 100644 --- a/tests/Tests/Old.hs +++ b/tests/Tests/Old.hs @@ -63,7 +63,6 @@ tests = [ testGroup "markdown" "markdown-reader-more.txt" "markdown-reader-more.native" , lhsReaderTest "markdown+lhs" ] - , testGroup "citations" markdownCitationTests ] , testGroup "rst" [ testGroup "writer" (writerTests "rst" ++ lhsWriterTests "rst") @@ -190,19 +189,6 @@ fb2WriterTest title opts inputfile normfile = ignoreBinary = unlines . filter (not . startsWith " [String] -- ^ Options to pass to pandoc -- cgit v1.2.3 From 80148095781b44c7f5af132b48605adaa93a0558 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 27 Aug 2013 20:12:21 -0700 Subject: LaTeX reader: Allow accents with combining characters. accent now returns [Char], not Char. --- src/Text/Pandoc/Readers/LaTeX.hs | 370 +++++++++++++++++++-------------------- 1 file changed, 185 insertions(+), 185 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index 5d73134cd..b9ca986fb 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -543,196 +543,196 @@ doLHSverb = codeWith ("",["haskell"],[]) <$> manyTill (satisfy (/='\n')) (char ' lit :: String -> LP Inlines lit = pure . str -accent :: (Char -> Char) -> Inlines -> LP Inlines +accent :: (Char -> String) -> Inlines -> LP Inlines accent f ils = case toList ils of - (Str (x:xs) : ys) -> return $ fromList $ (Str (f x : xs) : ys) + (Str (x:xs) : ys) -> return $ fromList $ (Str (f x ++ xs) : ys) [] -> mzero _ -> return ils -grave :: Char -> Char -grave 'A' = 'À' -grave 'E' = 'È' -grave 'I' = 'Ì' -grave 'O' = 'Ò' -grave 'U' = 'Ù' -grave 'a' = 'à' -grave 'e' = 'è' -grave 'i' = 'ì' -grave 'o' = 'ò' -grave 'u' = 'ù' -grave c = c - -acute :: Char -> Char -acute 'A' = 'Á' -acute 'E' = 'É' -acute 'I' = 'Í' -acute 'O' = 'Ó' -acute 'U' = 'Ú' -acute 'Y' = 'Ý' -acute 'a' = 'á' -acute 'e' = 'é' -acute 'i' = 'í' -acute 'o' = 'ó' -acute 'u' = 'ú' -acute 'y' = 'ý' -acute 'C' = 'Ć' -acute 'c' = 'ć' -acute 'L' = 'Ĺ' -acute 'l' = 'ĺ' -acute 'N' = 'Ń' -acute 'n' = 'ń' -acute 'R' = 'Ŕ' -acute 'r' = 'ŕ' -acute 'S' = 'Ś' -acute 's' = 'ś' -acute 'Z' = 'Ź' -acute 'z' = 'ź' -acute c = c - -circ :: Char -> Char -circ 'A' = 'Â' -circ 'E' = 'Ê' -circ 'I' = 'Î' -circ 'O' = 'Ô' -circ 'U' = 'Û' -circ 'a' = 'â' -circ 'e' = 'ê' -circ 'i' = 'î' -circ 'o' = 'ô' -circ 'u' = 'û' -circ 'C' = 'Ĉ' -circ 'c' = 'ĉ' -circ 'G' = 'Ĝ' -circ 'g' = 'ĝ' -circ 'H' = 'Ĥ' -circ 'h' = 'ĥ' -circ 'J' = 'Ĵ' -circ 'j' = 'ĵ' -circ 'S' = 'Ŝ' -circ 's' = 'ŝ' -circ 'W' = 'Ŵ' -circ 'w' = 'ŵ' -circ 'Y' = 'Ŷ' -circ 'y' = 'ŷ' -circ c = c - -tilde :: Char -> Char -tilde 'A' = 'Ã' -tilde 'a' = 'ã' -tilde 'O' = 'Õ' -tilde 'o' = 'õ' -tilde 'I' = 'Ĩ' -tilde 'i' = 'ĩ' -tilde 'U' = 'Ũ' -tilde 'u' = 'ũ' -tilde 'N' = 'Ñ' -tilde 'n' = 'ñ' -tilde c = c - -umlaut :: Char -> Char -umlaut 'A' = 'Ä' -umlaut 'E' = 'Ë' -umlaut 'I' = 'Ï' -umlaut 'O' = 'Ö' -umlaut 'U' = 'Ü' -umlaut 'a' = 'ä' -umlaut 'e' = 'ë' -umlaut 'i' = 'ï' -umlaut 'o' = 'ö' -umlaut 'u' = 'ü' -umlaut c = c - -dot :: Char -> Char -dot 'C' = 'Ċ' -dot 'c' = 'ċ' -dot 'E' = 'Ė' -dot 'e' = 'ė' -dot 'G' = 'Ġ' -dot 'g' = 'ġ' -dot 'I' = 'İ' -dot 'Z' = 'Ż' -dot 'z' = 'ż' -dot c = c - -macron :: Char -> Char -macron 'A' = 'Ā' -macron 'E' = 'Ē' -macron 'I' = 'Ī' -macron 'O' = 'Ō' -macron 'U' = 'Ū' -macron 'a' = 'ā' -macron 'e' = 'ē' -macron 'i' = 'ī' -macron 'o' = 'ō' -macron 'u' = 'ū' -macron c = c - -cedilla :: Char -> Char -cedilla 'c' = 'ç' -cedilla 'C' = 'Ç' -cedilla 's' = 'ş' -cedilla 'S' = 'Ş' -cedilla 't' = 'ţ' -cedilla 'T' = 'Ţ' -cedilla 'e' = 'ȩ' -cedilla 'E' = 'Ȩ' -cedilla 'h' = 'ḩ' -cedilla 'H' = 'Ḩ' -cedilla 'o' = 'o̧' -cedilla 'O' = 'O̧' -cedilla c = c - -hacek :: Char -> Char -hacek 'A' = 'Ǎ' -hacek 'a' = 'ǎ' -hacek 'C' = 'Č' -hacek 'c' = 'č' -hacek 'D' = 'Ď' -hacek 'd' = 'ď' -hacek 'E' = 'Ě' -hacek 'e' = 'ě' -hacek 'G' = 'Ǧ' -hacek 'g' = 'ǧ' -hacek 'H' = 'Ȟ' -hacek 'h' = 'ȟ' -hacek 'I' = 'Ǐ' -hacek 'i' = 'ǐ' -hacek 'j' = 'ǰ' -hacek 'K' = 'Ǩ' -hacek 'k' = 'ǩ' -hacek 'L' = 'Ľ' -hacek 'l' = 'ľ' -hacek 'N' = 'Ň' -hacek 'n' = 'ň' -hacek 'O' = 'Ǒ' -hacek 'o' = 'ǒ' -hacek 'R' = 'Ř' -hacek 'r' = 'ř' -hacek 'S' = 'Š' -hacek 's' = 'š' -hacek 'T' = 'Ť' -hacek 't' = 'ť' -hacek 'U' = 'Ǔ' -hacek 'u' = 'ǔ' -hacek 'Z' = 'Ž' -hacek 'z' = 'ž' -hacek c = c - -breve :: Char -> Char -breve 'A' = 'Ă' -breve 'a' = 'ă' -breve 'E' = 'Ĕ' -breve 'e' = 'ĕ' -breve 'G' = 'Ğ' -breve 'g' = 'ğ' -breve 'I' = 'Ĭ' -breve 'i' = 'ĭ' -breve 'O' = 'Ŏ' -breve 'o' = 'ŏ' -breve 'U' = 'Ŭ' -breve 'u' = 'ŭ' -breve c = c +grave :: Char -> String +grave 'A' = "À" +grave 'E' = "È" +grave 'I' = "Ì" +grave 'O' = "Ò" +grave 'U' = "Ù" +grave 'a' = "à" +grave 'e' = "è" +grave 'i' = "ì" +grave 'o' = "ò" +grave 'u' = "ù" +grave c = [c] + +acute :: Char -> String +acute 'A' = "Á" +acute 'E' = "É" +acute 'I' = "Í" +acute 'O' = "Ó" +acute 'U' = "Ú" +acute 'Y' = "Ý" +acute 'a' = "á" +acute 'e' = "é" +acute 'i' = "í" +acute 'o' = "ó" +acute 'u' = "ú" +acute 'y' = "ý" +acute 'C' = "Ć" +acute 'c' = "ć" +acute 'L' = "Ĺ" +acute 'l' = "ĺ" +acute 'N' = "Ń" +acute 'n' = "ń" +acute 'R' = "Ŕ" +acute 'r' = "ŕ" +acute 'S' = "Ś" +acute 's' = "ś" +acute 'Z' = "Ź" +acute 'z' = "ź" +acute c = [c] + +circ :: Char -> String +circ 'A' = "Â" +circ 'E' = "Ê" +circ 'I' = "Î" +circ 'O' = "Ô" +circ 'U' = "Û" +circ 'a' = "â" +circ 'e' = "ê" +circ 'i' = "î" +circ 'o' = "ô" +circ 'u' = "û" +circ 'C' = "Ĉ" +circ 'c' = "ĉ" +circ 'G' = "Ĝ" +circ 'g' = "ĝ" +circ 'H' = "Ĥ" +circ 'h' = "ĥ" +circ 'J' = "Ĵ" +circ 'j' = "ĵ" +circ 'S' = "Ŝ" +circ 's' = "ŝ" +circ 'W' = "Ŵ" +circ 'w' = "ŵ" +circ 'Y' = "Ŷ" +circ 'y' = "ŷ" +circ c = [c] + +tilde :: Char -> String +tilde 'A' = "Ã" +tilde 'a' = "ã" +tilde 'O' = "Õ" +tilde 'o' = "õ" +tilde 'I' = "Ĩ" +tilde 'i' = "ĩ" +tilde 'U' = "Ũ" +tilde 'u' = "ũ" +tilde 'N' = "Ñ" +tilde 'n' = "ñ" +tilde c = [c] + +umlaut :: Char -> String +umlaut 'A' = "Ä" +umlaut 'E' = "Ë" +umlaut 'I' = "Ï" +umlaut 'O' = "Ö" +umlaut 'U' = "Ü" +umlaut 'a' = "ä" +umlaut 'e' = "ë" +umlaut 'i' = "ï" +umlaut 'o' = "ö" +umlaut 'u' = "ü" +umlaut c = [c] + +dot :: Char -> String +dot 'C' = "Ċ" +dot 'c' = "ċ" +dot 'E' = "Ė" +dot 'e' = "ė" +dot 'G' = "Ġ" +dot 'g' = "ġ" +dot 'I' = "İ" +dot 'Z' = "Ż" +dot 'z' = "ż" +dot c = [c] + +macron :: Char -> String +macron 'A' = "Ā" +macron 'E' = "Ē" +macron 'I' = "Ī" +macron 'O' = "Ō" +macron 'U' = "Ū" +macron 'a' = "ā" +macron 'e' = "ē" +macron 'i' = "ī" +macron 'o' = "ō" +macron 'u' = "ū" +macron c = [c] + +cedilla :: Char -> String +cedilla 'c' = "ç" +cedilla 'C' = "Ç" +cedilla 's' = "ş" +cedilla 'S' = "Ş" +cedilla 't' = "ţ" +cedilla 'T' = "Ţ" +cedilla 'e' = "ȩ" +cedilla 'E' = "Ȩ" +cedilla 'h' = "ḩ" +cedilla 'H' = "Ḩ" +cedilla 'o' = "o̧" +cedilla 'O' = "O̧" +cedilla c = [c] + +hacek :: Char -> String +hacek 'A' = "Ǎ" +hacek 'a' = "ǎ" +hacek 'C' = "Č" +hacek 'c' = "č" +hacek 'D' = "Ď" +hacek 'd' = "ď" +hacek 'E' = "Ě" +hacek 'e' = "ě" +hacek 'G' = "Ǧ" +hacek 'g' = "ǧ" +hacek 'H' = "Ȟ" +hacek 'h' = "ȟ" +hacek 'I' = "Ǐ" +hacek 'i' = "ǐ" +hacek 'j' = "ǰ" +hacek 'K' = "Ǩ" +hacek 'k' = "ǩ" +hacek 'L' = "Ľ" +hacek 'l' = "ľ" +hacek 'N' = "Ň" +hacek 'n' = "ň" +hacek 'O' = "Ǒ" +hacek 'o' = "ǒ" +hacek 'R' = "Ř" +hacek 'r' = "ř" +hacek 'S' = "Š" +hacek 's' = "š" +hacek 'T' = "Ť" +hacek 't' = "ť" +hacek 'U' = "Ǔ" +hacek 'u' = "ǔ" +hacek 'Z' = "Ž" +hacek 'z' = "ž" +hacek c = [c] + +breve :: Char -> String +breve 'A' = "Ă" +breve 'a' = "ă" +breve 'E' = "Ĕ" +breve 'e' = "ĕ" +breve 'G' = "Ğ" +breve 'g' = "ğ" +breve 'I' = "Ĭ" +breve 'i' = "ĭ" +breve 'O' = "Ŏ" +breve 'o' = "ŏ" +breve 'U' = "Ŭ" +breve 'u' = "ŭ" +breve c = [c] tok :: LP Inlines tok = try $ grouped inline <|> inlineCommand <|> str <$> (count 1 $ inlineChar) -- cgit v1.2.3 From dd5cb82348dfb2b8febb01db8bdc98ddeac394dc Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Wed, 28 Aug 2013 08:43:51 -0700 Subject: Generalized type of stringify. --- src/Text/Pandoc/Shared.hs | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index eef150351..9a9a092fc 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -1,4 +1,5 @@ -{-# LANGUAGE DeriveDataTypeable, CPP, MultiParamTypeClasses #-} +{-# LANGUAGE DeriveDataTypeable, CPP, MultiParamTypeClasses, + FlexibleContexts #-} {- Copyright (C) 2006-2013 John MacFarlane @@ -383,10 +384,10 @@ consolidateInlines (Code a1 x : Code a2 y : zs) | a1 == a2 = consolidateInlines (x : xs) = x : consolidateInlines xs consolidateInlines [] = [] --- | Convert list of inlines to a string with formatting removed. +-- | Convert pandoc structure to a string with formatting removed. -- Footnotes are skipped (since we don't want their contents in link -- labels). -stringify :: [Inline] -> String +stringify :: Walkable Inline a => a -> String stringify = query go . walk deNote where go :: Inline -> [Char] go Space = " " -- cgit v1.2.3 From 940515a00ba49b9feb3d736dc071059400f83015 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Wed, 28 Aug 2013 16:54:37 -0700 Subject: LaTeX reader: allow spaces in alignment spec in tables. E.g. `{ l r c }`. --- src/Text/Pandoc/Readers/LaTeX.hs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index b9ca986fb..e91ea1e82 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -1116,12 +1116,13 @@ complexNatbibCitation mode = try $ do parseAligns :: LP [Alignment] parseAligns = try $ do char '{' - optional $ char '|' + let maybeBar = try $ spaces >> optional (char '|') + maybeBar let cAlign = AlignCenter <$ char 'c' let lAlign = AlignLeft <$ char 'l' let rAlign = AlignRight <$ char 'r' let alignChar = optional sp *> (cAlign <|> lAlign <|> rAlign) - aligns' <- sepEndBy alignChar (optional $ char '|') + aligns' <- sepEndBy alignChar maybeBar spaces char '}' spaces -- cgit v1.2.3 From 6ed41fdfcc3b57e88cf98b875a75ab5e1629dca6 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 1 Sep 2013 08:54:10 -0700 Subject: Factored out registerHeader from markdown reader, added to Parsing. Text.Pandoc.Parsing now exports registerHeader, which can be used in other readers. --- src/Text/Pandoc/Parsing.hs | 32 ++++++++++++++++++++++++++++++++ src/Text/Pandoc/Readers/Markdown.hs | 30 ++---------------------------- 2 files changed, 34 insertions(+), 28 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs index c16d5bb1d..701b2ef84 100644 --- a/src/Text/Pandoc/Parsing.hs +++ b/src/Text/Pandoc/Parsing.hs @@ -75,6 +75,7 @@ module Text.Pandoc.Parsing ( (>>~), SubstTable, Key (..), toKey, + registerHeader, smartPunctuation, withQuoteContext, singleQuoteStart, @@ -151,6 +152,7 @@ where import Text.Pandoc.Definition import Text.Pandoc.Options import Text.Pandoc.Builder (Blocks, Inlines, rawBlock, HasMeta(..)) +import qualified Text.Pandoc.Builder as B import Text.Pandoc.XML (fromEntities) import qualified Text.Pandoc.UTF8 as UTF8 (putStrLn) import Text.Parsec @@ -162,11 +164,13 @@ import Text.Pandoc.Shared import qualified Data.Map as M import Text.TeXMath.Macros (applyMacros, Macro, parseMacroDefinitions) import Text.Pandoc.Compat.TagSoupEntity ( lookupEntity ) +import Text.Pandoc.Asciify (toAsciiChar) import Data.Default import qualified Data.Set as Set import Control.Monad.Reader import Control.Applicative ((*>), (<*), (<$), liftA2) import Data.Monoid +import Data.Maybe (catMaybes) type Parser t s = Parsec t s @@ -886,6 +890,34 @@ type KeyTable = M.Map Key Target type SubstTable = M.Map Key Inlines +-- | Add header to the list of headers in state, together +-- with its associated identifier. If the identifier is null +-- and the auto_identifers extension is set, generate a new +-- unique identifier, and update the list of identifiers +-- in state. +registerHeader :: Attr -> Inlines -> Parser s ParserState Attr +registerHeader (ident,classes,kvs) header' = do + ids <- stateIdentifiers `fmap` getState + exts <- getOption readerExtensions + let insert' = M.insertWith (\_new old -> old) + if null ident && Ext_auto_identifiers `Set.member` exts + then do + let id' = uniqueIdent (B.toList header') ids + let id'' = if Ext_ascii_identifiers `Set.member` exts + then catMaybes $ map toAsciiChar id' + else id' + updateState $ \st -> st{ + stateIdentifiers = if id' == id'' + then id' : ids + else id' : id'' : ids, + stateHeaders = insert' header' id' $ stateHeaders st } + return (id'',classes,kvs) + else do + unless (null ident) $ + updateState $ \st -> st{ + stateHeaders = insert' header' ident $ stateHeaders st } + return (ident,classes,kvs) + -- | Fail unless we're in "smart typography" mode. failUnlessSmart :: Parser [tok] ParserState () failUnlessSmart = getOption readerSmart >>= guard diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 658335202..267b30032 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -49,7 +49,6 @@ import Text.Pandoc.Builder (Inlines, Blocks, trimInlines, (<>)) import Text.Pandoc.Options import Text.Pandoc.Shared import Text.Pandoc.XML (fromEntities) -import Text.Pandoc.Asciify (toAsciiChar) import Text.Pandoc.Parsing hiding (tableWith) import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXBlock ) import Text.Pandoc.Readers.HTML ( htmlTag, htmlInBalanced, isInlineTag, isBlockTag, @@ -471,31 +470,6 @@ block = choice [ mempty <$ blanklines header :: MarkdownParser (F Blocks) header = setextHeader <|> atxHeader "header" --- returns unique identifier -addToHeaderList :: Attr -> F Inlines -> MarkdownParser Attr -addToHeaderList (ident,classes,kvs) text = do - let header' = runF text defaultParserState - exts <- getOption readerExtensions - let insert' = M.insertWith (\_new old -> old) - if null ident && Ext_auto_identifiers `Set.member` exts - then do - ids <- stateIdentifiers `fmap` getState - let id' = uniqueIdent (B.toList header') ids - let id'' = if Ext_ascii_identifiers `Set.member` exts - then catMaybes $ map toAsciiChar id' - else id' - updateState $ \st -> st{ - stateIdentifiers = if id' == id'' - then id' : ids - else id' : id'' : ids, - stateHeaders = insert' header' id' $ stateHeaders st } - return (id'',classes,kvs) - else do - unless (null ident) $ - updateState $ \st -> st{ - stateHeaders = insert' header' ident $ stateHeaders st } - return (ident,classes,kvs) - atxHeader :: MarkdownParser (F Blocks) atxHeader = try $ do level <- many1 (char '#') >>= return . length @@ -504,7 +478,7 @@ atxHeader = try $ do skipSpaces text <- trimInlinesF . mconcat <$> many (notFollowedBy atxClosing >> inline) attr <- atxClosing - attr' <- addToHeaderList attr text + attr' <- registerHeader attr (runF text defaultParserState) return $ B.headerWith attr' level <$> text atxClosing :: MarkdownParser Attr @@ -543,7 +517,7 @@ setextHeader = try $ do many (char underlineChar) blanklines let level = (fromMaybe 0 $ findIndex (== underlineChar) setextHChars) + 1 - attr' <- addToHeaderList attr text + attr' <- registerHeader attr (runF text defaultParserState) return $ B.headerWith attr' level <$> text -- -- cgit v1.2.3 From 9282f632786e85c7a31f974f20162214c5387c00 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 1 Sep 2013 09:13:31 -0700 Subject: Use registerHeader in RST and LaTeX readers. This will give automatic unique identifiers, unless `-auto_identifiers` is specified. --- src/Text/Pandoc/Readers/LaTeX.hs | 6 ++-- src/Text/Pandoc/Readers/RST.hs | 6 ++-- tests/Tests/Readers/LaTeX.hs | 10 +++---- tests/latex-reader.native | 60 ++++++++++++++++++++-------------------- tests/lhs-test.html | 2 +- tests/lhs-test.html+lhs | 2 +- tests/lhs-test.latex | 2 +- tests/lhs-test.latex+lhs | 2 +- tests/lhs-test.native | 2 +- tests/rst-reader.native | 58 +++++++++++++++++++------------------- 10 files changed, 76 insertions(+), 74 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index e91ea1e82..ff5b73348 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -318,9 +318,9 @@ section (ident, classes, kvs) lvl = do let lvl' = if hasChapters then lvl + 1 else lvl skipopts contents <- grouped inline - lab <- option ident $ try $ spaces >> controlSeq "label" >> - spaces >> braced - return $ headerWith (lab, classes, kvs) lvl' contents + lab <- option ident $ try (spaces >> controlSeq "label" >> spaces >> braced) + attr' <- registerHeader (lab, classes, kvs) contents + return $ headerWith attr' lvl' contents inlineCommand :: LP Inlines inlineCommand = try $ do diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs index df0a8294d..32893128a 100644 --- a/src/Text/Pandoc/Readers/RST.hs +++ b/src/Text/Pandoc/Readers/RST.hs @@ -275,7 +275,8 @@ doubleHeader = try $ do Just ind -> (headerTable, ind + 1) Nothing -> (headerTable ++ [DoubleHeader c], (length headerTable) + 1) setState (state { stateHeaderTable = headerTable' }) - return $ B.header level txt + attr <- registerHeader nullAttr txt + return $ B.headerWith attr level txt -- a header with line on the bottom only singleHeader :: RSTParser Blocks @@ -295,7 +296,8 @@ singleHeader = try $ do Just ind -> (headerTable, ind + 1) Nothing -> (headerTable ++ [SingleHeader c], (length headerTable) + 1) setState (state { stateHeaderTable = headerTable' }) - return $ B.header level txt + attr <- registerHeader nullAttr txt + return $ B.headerWith attr level txt -- -- hrule block diff --git a/tests/Tests/Readers/LaTeX.hs b/tests/Tests/Readers/LaTeX.hs index dff6e4537..c1efd1b68 100644 --- a/tests/Tests/Readers/LaTeX.hs +++ b/tests/Tests/Readers/LaTeX.hs @@ -28,17 +28,17 @@ tests = [ testGroup "basic" , testGroup "headers" [ "level 1" =: - "\\section{header}" =?> header 1 "header" + "\\section{header}" =?> headerWith ("header",[],[]) 1 "header" , "level 2" =: - "\\subsection{header}" =?> header 2 "header" + "\\subsection{header}" =?> headerWith ("header",[],[]) 2 "header" , "level 3" =: - "\\subsubsection{header}" =?> header 3 "header" + "\\subsubsection{header}" =?> headerWith ("header",[],[]) 3 "header" , "emph" =: "\\section{text \\emph{emph}}" =?> - header 1 ("text" <> space <> emph "emph") + headerWith ("text-emph",[],[]) 1 ("text" <> space <> emph "emph") , "link" =: "\\section{text \\href{/url}{link}}" =?> - header 1 ("text" <> space <> link "/url" "" "link") + headerWith ("text-link",[],[]) 1 ("text" <> space <> link "/url" "" "link") ] , testGroup "math" diff --git a/tests/latex-reader.native b/tests/latex-reader.native index 23e600000..15b667b2f 100644 --- a/tests/latex-reader.native +++ b/tests/latex-reader.native @@ -2,25 +2,25 @@ Pandoc (Meta {unMeta = fromList [("authors",MetaList [MetaInlines [Str "John",Sp [RawBlock (Format "latex") "\\maketitle" ,Para [Str "This",Space,Str "is",Space,Str "a",Space,Str "set",Space,Str "of",Space,Str "tests",Space,Str "for",Space,Str "pandoc.",Space,Str "Most",Space,Str "of",Space,Str "them",Space,Str "are",Space,Str "adapted",Space,Str "from",Space,Str "John",Space,Str "Gruber\8217s",Space,Str "markdown",Space,Str "test",Space,Str "suite."] ,HorizontalRule -,Header 1 ("",[],[]) [Str "Headers"] -,Header 2 ("",[],[]) [Str "Level",Space,Str "2",Space,Str "with",Space,Str "an",Space,Link [Str "embedded",Space,Str "link"] ("/url","")] -,Header 3 ("",[],[]) [Str "Level",Space,Str "3",Space,Str "with",Space,Emph [Str "emphasis"]] +,Header 1 ("headers",[],[]) [Str "Headers"] +,Header 2 ("level-2-with-an-embedded-link",[],[]) [Str "Level",Space,Str "2",Space,Str "with",Space,Str "an",Space,Link [Str "embedded",Space,Str "link"] ("/url","")] +,Header 3 ("level-3-with-emphasis",[],[]) [Str "Level",Space,Str "3",Space,Str "with",Space,Emph [Str "emphasis"]] ,Para [Str "Level",Space,Str "4"] ,Para [Str "Level",Space,Str "5"] -,Header 1 ("",[],[]) [Str "Level",Space,Str "1"] -,Header 2 ("",[],[]) [Str "Level",Space,Str "2",Space,Str "with",Space,Emph [Str "emphasis"]] -,Header 3 ("",[],[]) [Str "Level",Space,Str "3"] +,Header 1 ("level-1",[],[]) [Str "Level",Space,Str "1"] +,Header 2 ("level-2-with-emphasis",[],[]) [Str "Level",Space,Str "2",Space,Str "with",Space,Emph [Str "emphasis"]] +,Header 3 ("level-3",[],[]) [Str "Level",Space,Str "3"] ,Para [Str "with",Space,Str "no",Space,Str "blank",Space,Str "line"] -,Header 2 ("",[],[]) [Str "Level",Space,Str "2"] +,Header 2 ("level-2",[],[]) [Str "Level",Space,Str "2"] ,Para [Str "with",Space,Str "no",Space,Str "blank",Space,Str "line"] ,HorizontalRule -,Header 1 ("",[],[]) [Str "Paragraphs"] +,Header 1 ("paragraphs",[],[]) [Str "Paragraphs"] ,Para [Str "Here\8217s",Space,Str "a",Space,Str "regular",Space,Str "paragraph."] ,Para [Str "In",Space,Str "Markdown",Space,Str "1.0.0",Space,Str "and",Space,Str "earlier.",Space,Str "Version",Space,Str "8.",Space,Str "This",Space,Str "line",Space,Str "turns",Space,Str "into",Space,Str "a",Space,Str "list",Space,Str "item.",Space,Str "Because",Space,Str "a",Space,Str "hard-wrapped",Space,Str "line",Space,Str "in",Space,Str "the",Space,Str "middle",Space,Str "of",Space,Str "a",Space,Str "paragraph",Space,Str "looked",Space,Str "like",Space,Str "a",Space,Str "list",Space,Str "item."] ,Para [Str "Here\8217s",Space,Str "one",Space,Str "with",Space,Str "a",Space,Str "bullet.",Space,Str "*",Space,Str "criminey."] ,Para [Str "There",Space,Str "should",Space,Str "be",Space,Str "a",Space,Str "hard",Space,Str "line",Space,Str "break",LineBreak,Str "here."] ,HorizontalRule -,Header 1 ("",[],[]) [Str "Block",Space,Str "Quotes"] +,Header 1 ("block-quotes",[],[]) [Str "Block",Space,Str "Quotes"] ,Para [Str "E-mail",Space,Str "style:"] ,BlockQuote [Para [Str "This",Space,Str "is",Space,Str "a",Space,Str "block",Space,Str "quote.",Space,Str "It",Space,Str "is",Space,Str "pretty",Space,Str "short."]] @@ -52,15 +52,15 @@ Pandoc (Meta {unMeta = fromList [("authors",MetaList [MetaInlines [Str "John",Sp [Para [Str "Don\8217t",Space,Str "quote",Space,Str "me."]]] ,Para [Str "And",Space,Str "a",Space,Str "following",Space,Str "paragraph."] ,HorizontalRule -,Header 1 ("",[],[]) [Str "Code",Space,Str "Blocks"] +,Header 1 ("code-blocks",[],[]) [Str "Code",Space,Str "Blocks"] ,Para [Str "Code:"] ,CodeBlock ("",[],[]) "---- (should be four hyphens)\n\nsub status {\n print \"working\";\n}\n\nthis code block is indented by one tab" ,Para [Str "And:"] ,CodeBlock ("",[],[]) " this code block is indented by two tabs\n\nThese should not be escaped: \\$ \\\\ \\> \\[ \\{" ,Para [Str "this",Space,Str "has",Space,Emph [Str "two",LineBreak,Str "lines"]] ,HorizontalRule -,Header 1 ("",[],[]) [Str "Lists"] -,Header 2 ("",[],[]) [Str "Unordered"] +,Header 1 ("lists",[],[]) [Str "Lists"] +,Header 2 ("unordered",[],[]) [Str "Unordered"] ,Para [Str "Asterisks",Space,Str "tight:"] ,BulletList [[Para [Str "asterisk",Space,Str "1"]] @@ -91,7 +91,7 @@ Pandoc (Meta {unMeta = fromList [("authors",MetaList [MetaInlines [Str "John",Sp [[Para [Str "Minus",Space,Str "1"]] ,[Para [Str "Minus",Space,Str "2"]] ,[Para [Str "Minus",Space,Str "3"]]] -,Header 2 ("",[],[]) [Str "Ordered"] +,Header 2 ("ordered",[],[]) [Str "Ordered"] ,Para [Str "Tight:"] ,OrderedList (1,Decimal,Period) [[Para [Str "First"]] @@ -118,7 +118,7 @@ Pandoc (Meta {unMeta = fromList [("authors",MetaList [MetaInlines [Str "John",Sp ,Para [Str "Item",Space,Str "1.",Space,Str "graf",Space,Str "two.",Space,Str "The",Space,Str "quick",Space,Str "brown",Space,Str "fox",Space,Str "jumped",Space,Str "over",Space,Str "the",Space,Str "lazy",Space,Str "dog\8217s",Space,Str "back."]] ,[Para [Str "Item",Space,Str "2."]] ,[Para [Str "Item",Space,Str "3."]]] -,Header 2 ("",[],[]) [Str "Nested"] +,Header 2 ("nested",[],[]) [Str "Nested"] ,BulletList [[Para [Str "Tab"] ,BulletList @@ -143,14 +143,14 @@ Pandoc (Meta {unMeta = fromList [("authors",MetaList [MetaInlines [Str "John",Sp ,[Para [Str "Fie"]] ,[Para [Str "Foe"]]]] ,[Para [Str "Third"]]] -,Header 2 ("",[],[]) [Str "Tabs",Space,Str "and",Space,Str "spaces"] +,Header 2 ("tabs-and-spaces",[],[]) [Str "Tabs",Space,Str "and",Space,Str "spaces"] ,BulletList [[Para [Str "this",Space,Str "is",Space,Str "a",Space,Str "list",Space,Str "item",Space,Str "indented",Space,Str "with",Space,Str "tabs"]] ,[Para [Str "this",Space,Str "is",Space,Str "a",Space,Str "list",Space,Str "item",Space,Str "indented",Space,Str "with",Space,Str "spaces"] ,BulletList [[Para [Str "this",Space,Str "is",Space,Str "an",Space,Str "example",Space,Str "list",Space,Str "item",Space,Str "indented",Space,Str "with",Space,Str "tabs"]] ,[Para [Str "this",Space,Str "is",Space,Str "an",Space,Str "example",Space,Str "list",Space,Str "item",Space,Str "indented",Space,Str "with",Space,Str "spaces"]]]]] -,Header 2 ("",[],[]) [Str "Fancy",Space,Str "list",Space,Str "markers"] +,Header 2 ("fancy-list-markers",[],[]) [Str "Fancy",Space,Str "list",Space,Str "markers"] ,OrderedList (2,Decimal,TwoParens) [[Para [Str "begins",Space,Str "with",Space,Str "2"]] ,[Para [Str "and",Space,Str "now",Space,Str "3"] @@ -180,7 +180,7 @@ Pandoc (Meta {unMeta = fromList [("authors",MetaList [MetaInlines [Str "John",Sp ,Para [Str "M.A.",Space,Str "2007"] ,Para [Str "B.",Space,Str "Williams"] ,HorizontalRule -,Header 1 ("",[],[]) [Str "Definition",Space,Str "Lists"] +,Header 1 ("definition-lists",[],[]) [Str "Definition",Space,Str "Lists"] ,Para [Str "Tight",Space,Str "using",Space,Str "spaces:"] ,DefinitionList [([Str "apple"], @@ -215,7 +215,7 @@ Pandoc (Meta {unMeta = fromList [("authors",MetaList [MetaInlines [Str "John",Sp ,CodeBlock ("",[],[]) "{ orange code block }" ,BlockQuote [Para [Str "orange",Space,Str "block",Space,Str "quote"]]]])] -,Header 1 ("",[],[]) [Str "HTML",Space,Str "Blocks"] +,Header 1 ("html-blocks",[],[]) [Str "HTML",Space,Str "Blocks"] ,Para [Str "Simple",Space,Str "block",Space,Str "on",Space,Str "one",Space,Str "line:"] ,Para [Str "foo",Space,Str "And",Space,Str "nested",Space,Str "without",Space,Str "indentation:"] ,Para [Str "foo",Space,Str "bar",Space,Str "Interpreted",Space,Str "markdown",Space,Str "in",Space,Str "a",Space,Str "table:"] @@ -234,7 +234,7 @@ Pandoc (Meta {unMeta = fromList [("authors",MetaList [MetaInlines [Str "John",Sp ,CodeBlock ("",[],[]) "
" ,Para [Str "Hr\8217s:"] ,HorizontalRule -,Header 1 ("",[],[]) [Str "Inline",Space,Str "Markup"] +,Header 1 ("inline-markup",[],[]) [Str "Inline",Space,Str "Markup"] ,Para [Str "This",Space,Str "is",Space,Emph [Str "emphasized"],Str ",",Space,Str "and",Space,Str "so",Space,Emph [Str "is",Space,Str "this"],Str "."] ,Para [Str "This",Space,Str "is",Space,Strong [Str "strong"],Str ",",Space,Str "and",Space,Str "so",Space,Strong [Str "is",Space,Str "this"],Str "."] ,Para [Str "An",Space,Emph [Link [Str "emphasized",Space,Str "link"] ("/url","")],Str "."] @@ -248,7 +248,7 @@ Pandoc (Meta {unMeta = fromList [("authors",MetaList [MetaInlines [Str "John",Sp ,Para [Str "Subscripts:",Space,Str "H",Subscript [Str "2"],Str "O,",Space,Str "H",Subscript [Str "23"],Str "O,",Space,Str "H",Subscript [Str "many",Space,Str "of",Space,Str "them"],Str "O."] ,Para [Str "These",Space,Str "should",Space,Str "not",Space,Str "be",Space,Str "superscripts",Space,Str "or",Space,Str "subscripts,",Space,Str "because",Space,Str "of",Space,Str "the",Space,Str "unescaped",Space,Str "spaces:",Space,Str "a^b",Space,Str "c^d,",Space,Str "a",Math InlineMath "\\sim",Str "b",Space,Str "c",Math InlineMath "\\sim",Str "d."] ,HorizontalRule -,Header 1 ("",[],[]) [Str "Smart",Space,Str "quotes,",Space,Str "ellipses,",Space,Str "dashes"] +,Header 1 ("smart-quotes-ellipses-dashes",[],[]) [Str "Smart",Space,Str "quotes,",Space,Str "ellipses,",Space,Str "dashes"] ,Para [Quoted DoubleQuote [Str "Hello,"],Space,Str "said",Space,Str "the",Space,Str "spider.",Space,Quoted DoubleQuote [Quoted SingleQuote [Str "Shelob"],Space,Str "is",Space,Str "my",Space,Str "name."]] ,Para [Quoted SingleQuote [Str "A"],Str ",",Space,Quoted SingleQuote [Str "B"],Str ",",Space,Str "and",Space,Quoted SingleQuote [Str "C"],Space,Str "are",Space,Str "letters."] ,Para [Quoted SingleQuote [Str "Oak,"],Space,Quoted SingleQuote [Str "elm,"],Space,Str "and",Space,Quoted SingleQuote [Str "beech"],Space,Str "are",Space,Str "names",Space,Str "of",Space,Str "trees.",Space,Str "So",Space,Str "is",Space,Quoted SingleQuote [Str "pine."]] @@ -258,7 +258,7 @@ Pandoc (Meta {unMeta = fromList [("authors",MetaList [MetaInlines [Str "John",Sp ,Para [Str "Dashes",Space,Str "between",Space,Str "numbers:",Space,Str "5\8211\&7,",Space,Str "255\8211\&66,",Space,Str "1987\8211\&1999."] ,Para [Str "Ellipses\8230and\8230and\8230."] ,HorizontalRule -,Header 1 ("",[],[]) [Str "LaTeX"] +,Header 1 ("latex",[],[]) [Str "LaTeX"] ,BulletList [[Para [Cite [Citation {citationId = "smith.1899", citationPrefix = [], citationSuffix = [Str "22-23"], citationMode = AuthorInText, citationNoteNum = 0, citationHash = 0}] [RawInline (Format "latex") "\\cite[22-23]{smith.1899}"]]] ,[Para [RawInline (Format "latex") "\\doublespacing"]] @@ -288,7 +288,7 @@ Pandoc (Meta {unMeta = fromList [("authors",MetaList [MetaInlines [Str "John",Sp [[[Plain [Str "Animal"]]] ,[[Plain [Str "Vegetable"]]]] ,HorizontalRule -,Header 1 ("",[],[]) [Str "Special",Space,Str "Characters"] +,Header 1 ("special-characters",[],[]) [Str "Special",Space,Str "Characters"] ,Para [Str "Here",Space,Str "is",Space,Str "some",Space,Str "unicode:"] ,BulletList [[Para [Str "I",Space,Str "hat:",Space,Str "\206"]] @@ -318,8 +318,8 @@ Pandoc (Meta {unMeta = fromList [("authors",MetaList [MetaInlines [Str "John",Sp ,Para [Str "Plus:",Space,Str "+"] ,Para [Str "Minus:",Space,Str "-"] ,HorizontalRule -,Header 1 ("",[],[]) [Str "Links"] -,Header 2 ("",[],[]) [Str "Explicit"] +,Header 1 ("links",[],[]) [Str "Links"] +,Header 2 ("explicit",[],[]) [Str "Explicit"] ,Para [Str "Just",Space,Str "a",Space,Link [Str "URL"] ("/url/",""),Str "."] ,Para [Link [Str "URL",Space,Str "and",Space,Str "title"] ("/url/",""),Str "."] ,Para [Link [Str "URL",Space,Str "and",Space,Str "title"] ("/url/",""),Str "."] @@ -329,7 +329,7 @@ Pandoc (Meta {unMeta = fromList [("authors",MetaList [MetaInlines [Str "John",Sp ,Para [Link [Str "with_underscore"] ("/url/with_underscore","")] ,Para [Link [Str "Email",Space,Str "link"] ("mailto:nobody@nowhere.net","")] ,Para [Link [Str "Empty"] ("",""),Str "."] -,Header 2 ("",[],[]) [Str "Reference"] +,Header 2 ("reference",[],[]) [Str "Reference"] ,Para [Str "Foo",Space,Link [Str "bar"] ("/url/",""),Str "."] ,Para [Str "Foo",Space,Link [Str "bar"] ("/url/",""),Str "."] ,Para [Str "Foo",Space,Link [Str "bar"] ("/url/",""),Str "."] @@ -342,12 +342,12 @@ Pandoc (Meta {unMeta = fromList [("authors",MetaList [MetaInlines [Str "John",Sp ,CodeBlock ("",[],[]) "[not]: /url" ,Para [Str "Foo",Space,Link [Str "bar"] ("/url/",""),Str "."] ,Para [Str "Foo",Space,Link [Str "biz"] ("/url/",""),Str "."] -,Header 2 ("",[],[]) [Str "With",Space,Str "ampersands"] +,Header 2 ("with-ampersands",[],[]) [Str "With",Space,Str "ampersands"] ,Para [Str "Here\8217s",Space,Str "a",Space,Link [Str "link",Space,Str "with",Space,Str "an",Space,Str "ampersand",Space,Str "in",Space,Str "the",Space,Str "URL"] ("http://example.com/?foo=1&bar=2",""),Str "."] ,Para [Str "Here\8217s",Space,Str "a",Space,Str "link",Space,Str "with",Space,Str "an",Space,Str "amersand",Space,Str "in",Space,Str "the",Space,Str "link",Space,Str "text:",Space,Link [Str "AT&T"] ("http://att.com/",""),Str "."] ,Para [Str "Here\8217s",Space,Str "an",Space,Link [Str "inline",Space,Str "link"] ("/script?foo=1&bar=2",""),Str "."] ,Para [Str "Here\8217s",Space,Str "an",Space,Link [Str "inline",Space,Str "link",Space,Str "in",Space,Str "pointy",Space,Str "braces"] ("/script?foo=1&bar=2",""),Str "."] -,Header 2 ("",[],[]) [Str "Autolinks"] +,Header 2 ("autolinks",[],[]) [Str "Autolinks"] ,Para [Str "With",Space,Str "an",Space,Str "ampersand:",Space,Link [Str "http://example.com/?foo=1&bar=2"] ("http://example.com/?foo=1&bar=2","")] ,BulletList [[Para [Str "In",Space,Str "a",Space,Str "list?"]] @@ -359,17 +359,17 @@ Pandoc (Meta {unMeta = fromList [("authors",MetaList [MetaInlines [Str "John",Sp ,Para [Str "Auto-links",Space,Str "should",Space,Str "not",Space,Str "occur",Space,Str "here:",Space,Code ("",[],[]) ""] ,CodeBlock ("",[],[]) "or here: " ,HorizontalRule -,Header 1 ("",[],[]) [Str "Images"] +,Header 1 ("images",[],[]) [Str "Images"] ,Para [Str "From",Space,Quoted DoubleQuote [Str "Voyage",Space,Str "dans",Space,Str "la",Space,Str "Lune"],Space,Str "by",Space,Str "Georges",Space,Str "Melies",Space,Str "(1902):"] ,Para [Image [Str "image"] ("lalune.jpg","")] ,Para [Str "Here",Space,Str "is",Space,Str "a",Space,Str "movie",Space,Image [Str "image"] ("movie.jpg",""),Space,Str "icon."] ,HorizontalRule -,Header 1 ("",[],[]) [Str "Footnotes"] +,Header 1 ("footnotes",[],[]) [Str "Footnotes"] ,Para [Str "Here",Space,Str "is",Space,Str "a",Space,Str "footnote",Space,Str "reference,",Note [Para [Str "Here",Space,Str "is",Space,Str "the",Space,Str "footnote.",Space,Str "It",Space,Str "can",Space,Str "go",Space,Str "anywhere",Space,Str "after",Space,Str "the",Space,Str "footnote",Space,Str "reference.",Space,Str "It",Space,Str "need",Space,Str "not",Space,Str "be",Space,Str "placed",Space,Str "at",Space,Str "the",Space,Str "end",Space,Str "of",Space,Str "the",Space,Str "document."]],Space,Str "and",Space,Str "another.",Note [Para [Str "Here\8217s",Space,Str "the",Space,Str "long",Space,Str "note.",Space,Str "This",Space,Str "one",Space,Str "contains",Space,Str "multiple",Space,Str "blocks."],Para [Str "Subsequent",Space,Str "blocks",Space,Str "are",Space,Str "indented",Space,Str "to",Space,Str "show",Space,Str "that",Space,Str "they",Space,Str "belong",Space,Str "to",Space,Str "the",Space,Str "footnote",Space,Str "(as",Space,Str "with",Space,Str "list",Space,Str "items)."],CodeBlock ("",[],[]) " { }",Para [Str "If",Space,Str "you",Space,Str "want,",Space,Str "you",Space,Str "can",Space,Str "indent",Space,Str "every",Space,Str "line,",Space,Str "but",Space,Str "you",Space,Str "can",Space,Str "also",Space,Str "be",Space,Str "lazy",Space,Str "and",Space,Str "just",Space,Str "indent",Space,Str "the",Space,Str "first",Space,Str "line",Space,Str "of",Space,Str "each",Space,Str "block."]],Space,Str "This",Space,Str "should",Space,Emph [Str "not"],Space,Str "be",Space,Str "a",Space,Str "footnote",Space,Str "reference,",Space,Str "because",Space,Str "it",Space,Str "contains",Space,Str "a",Space,Str "space.[^my",Space,Str "note]",Space,Str "Here",Space,Str "is",Space,Str "an",Space,Str "inline",Space,Str "note.",Note [Para [Str "This",Space,Str "is",Space,Emph [Str "easier"],Space,Str "to",Space,Str "type.",Space,Str "Inline",Space,Str "notes",Space,Str "may",Space,Str "contain",Space,Link [Str "links"] ("http://google.com",""),Space,Str "and",Space,Code ("",[],[]) "]",Space,Str "verbatim",Space,Str "characters,",Space,Str "as",Space,Str "well",Space,Str "as",Space,Str "[bracketed",Space,Str "text]."]]] ,BlockQuote [Para [Str "Notes",Space,Str "can",Space,Str "go",Space,Str "in",Space,Str "quotes.",Note [Para [Str "In",Space,Str "quote."]]]] ,OrderedList (1,Decimal,Period) [[Para [Str "And",Space,Str "in",Space,Str "list",Space,Str "items.",Note [Para [Str "In",Space,Str "list."]]]]] ,Para [Str "This",Space,Str "paragraph",Space,Str "should",Space,Str "not",Space,Str "be",Space,Str "part",Space,Str "of",Space,Str "the",Space,Str "note,",Space,Str "as",Space,Str "it",Space,Str "is",Space,Str "not",Space,Str "indented."] -,Header 1 ("",[],[]) [Str "Escaped",Space,Str "characters"] +,Header 1 ("escaped-characters",[],[]) [Str "Escaped",Space,Str "characters"] ,Para [Str "$",Space,Str "%",Space,Str "&",Space,Str "#",Space,Str "_",Space,Str "{",Space,Str "}"]] diff --git a/tests/lhs-test.html b/tests/lhs-test.html index 6fc51b1e9..bde505a1e 100644 --- a/tests/lhs-test.html +++ b/tests/lhs-test.html @@ -27,7 +27,7 @@ code > span.er { color: #ff0000; font-weight: bold; } -

lhs test

+

lhs test

unsplit is an arrow that takes a pair of values and combines them to return a single value:

unsplit :: (Arrow a) => (b -> c -> d) -> a (b, c) d
 unsplit = arr . uncurry
diff --git a/tests/lhs-test.html+lhs b/tests/lhs-test.html+lhs
index bc0935bd1..fcdcad303 100644
--- a/tests/lhs-test.html+lhs
+++ b/tests/lhs-test.html+lhs
@@ -27,7 +27,7 @@ code > span.er { color: #ff0000; font-weight: bold; }
   
 
 
-

lhs test

+

lhs test

unsplit is an arrow that takes a pair of values and combines them to return a single value:

> unsplit :: (Arrow a) => (b -> c -> d) -> a (b, c) d
 > unsplit = arr . uncurry
diff --git a/tests/lhs-test.latex b/tests/lhs-test.latex
index 0bfdec6a5..51c62f98a 100644
--- a/tests/lhs-test.latex
+++ b/tests/lhs-test.latex
@@ -68,7 +68,7 @@
 
 \begin{document}
 
-\section{lhs test}
+\section{lhs test}\label{lhs-test}
 
 \texttt{unsplit} is an arrow that takes a pair of values and combines them to
 return a single value:
diff --git a/tests/lhs-test.latex+lhs b/tests/lhs-test.latex+lhs
index ce91b37e1..606d49a12 100644
--- a/tests/lhs-test.latex+lhs
+++ b/tests/lhs-test.latex+lhs
@@ -49,7 +49,7 @@
 
 \begin{document}
 
-\section{lhs test}
+\section{lhs test}\label{lhs-test}
 
 \texttt{unsplit} is an arrow that takes a pair of values and combines them to
 return a single value:
diff --git a/tests/lhs-test.native b/tests/lhs-test.native
index 3a22d1f8a..63037d9e3 100644
--- a/tests/lhs-test.native
+++ b/tests/lhs-test.native
@@ -1,4 +1,4 @@
-[Header 1 ("",[],[]) [Str "lhs",Space,Str "test"]
+[Header 1 ("lhs-test",[],[]) [Str "lhs",Space,Str "test"]
 ,Para [Code ("",[],[]) "unsplit",Space,Str "is",Space,Str "an",Space,Str "arrow",Space,Str "that",Space,Str "takes",Space,Str "a",Space,Str "pair",Space,Str "of",Space,Str "values",Space,Str "and",Space,Str "combines",Space,Str "them",Space,Str "to",Space,Str "return",Space,Str "a",Space,Str "single",Space,Str "value:"]
 ,CodeBlock ("",["sourceCode","literate","haskell"],[]) "unsplit :: (Arrow a) => (b -> c -> d) -> a (b, c) d\nunsplit = arr . uncurry\n          -- arr (\\op (x,y) -> x `op` y)"
 ,Para [Code ("",[],[]) "(***)",Space,Str "combines",Space,Str "two",Space,Str "arrows",Space,Str "into",Space,Str "a",Space,Str "new",Space,Str "arrow",Space,Str "by",Space,Str "running",Space,Str "the",Space,Str "two",Space,Str "arrows",Space,Str "on",Space,Str "a",Space,Str "pair",Space,Str "of",Space,Str "values",Space,Str "(one",Space,Str "arrow",Space,Str "on",Space,Str "the",Space,Str "first",Space,Str "item",Space,Str "of",Space,Str "the",Space,Str "pair",Space,Str "and",Space,Str "one",Space,Str "arrow",Space,Str "on",Space,Str "the",Space,Str "second",Space,Str "item",Space,Str "of",Space,Str "the",Space,Str "pair)."]
diff --git a/tests/rst-reader.native b/tests/rst-reader.native
index abceaaab7..497810f39 100644
--- a/tests/rst-reader.native
+++ b/tests/rst-reader.native
@@ -1,11 +1,11 @@
 Pandoc (Meta {unMeta = fromList [("authors",MetaList [MetaInlines [Str "John",Space,Str "MacFarlane"],MetaInlines [Str "Anonymous"]]),("date",MetaInlines [Str "July",Space,Str "17,",Space,Str "2006"]),("revision",MetaBlocks [Para [Str "3"]]),("subtitle",MetaInlines [Str "Subtitle"]),("title",MetaInlines [Str "Pandoc",Space,Str "Test",Space,Str "Suite"])]})
-[Header 1 ("",[],[]) [Str "Level",Space,Str "one",Space,Str "header"]
+[Header 1 ("level-one-header",[],[]) [Str "Level",Space,Str "one",Space,Str "header"]
 ,Para [Str "This",Space,Str "is",Space,Str "a",Space,Str "set",Space,Str "of",Space,Str "tests",Space,Str "for",Space,Str "pandoc.",Space,Str "Most",Space,Str "of",Space,Str "them",Space,Str "are",Space,Str "adapted",Space,Str "from",Space,Str "John",Space,Str "Gruber\8217s",Space,Str "markdown",Space,Str "test",Space,Str "suite."]
-,Header 2 ("",[],[]) [Str "Level",Space,Str "two",Space,Str "header"]
-,Header 3 ("",[],[]) [Str "Level",Space,Str "three"]
-,Header 4 ("",[],[]) [Str "Level",Space,Str "four",Space,Str "with",Space,Emph [Str "emphasis"]]
-,Header 5 ("",[],[]) [Str "Level",Space,Str "five"]
-,Header 1 ("",[],[]) [Str "Paragraphs"]
+,Header 2 ("level-two-header",[],[]) [Str "Level",Space,Str "two",Space,Str "header"]
+,Header 3 ("level-three",[],[]) [Str "Level",Space,Str "three"]
+,Header 4 ("level-four-with-emphasis",[],[]) [Str "Level",Space,Str "four",Space,Str "with",Space,Emph [Str "emphasis"]]
+,Header 5 ("level-five",[],[]) [Str "Level",Space,Str "five"]
+,Header 1 ("paragraphs",[],[]) [Str "Paragraphs"]
 ,Para [Str "Here\8217s",Space,Str "a",Space,Str "regular",Space,Str "paragraph."]
 ,Para [Str "In",Space,Str "Markdown",Space,Str "1.0.0",Space,Str "and",Space,Str "earlier.",Space,Str "Version",Space,Str "8.",Space,Str "This",Space,Str "line",Space,Str "turns",Space,Str "into",Space,Str "a",Space,Str "list",Space,Str "item.",Space,Str "Because",Space,Str "a",Space,Str "hard-wrapped",Space,Str "line",Space,Str "in",Space,Str "the",Space,Str "middle",Space,Str "of",Space,Str "a",Space,Str "paragraph",Space,Str "looked",Space,Str "like",Space,Str "a",Space,Str "list",Space,Str "item."]
 ,Para [Str "Here\8217s",Space,Str "one",Space,Str "with",Space,Str "a",Space,Str "bullet.",Space,Str "*",Space,Str "criminey."]
@@ -13,7 +13,7 @@ Pandoc (Meta {unMeta = fromList [("authors",MetaList [MetaInlines [Str "John",Sp
 ,HorizontalRule
 ,Para [Str "Another:"]
 ,HorizontalRule
-,Header 1 ("",[],[]) [Str "Block",Space,Str "Quotes"]
+,Header 1 ("block-quotes",[],[]) [Str "Block",Space,Str "Quotes"]
 ,Para [Str "Here\8217s",Space,Str "a",Space,Str "block",Space,Str "quote:"]
 ,BlockQuote
  [Para [Str "This",Space,Str "is",Space,Str "a",Space,Str "block",Space,Str "quote.",Space,Str "It",Space,Str "is",Space,Str "pretty",Space,Str "short."]]
@@ -31,7 +31,7 @@ Pandoc (Meta {unMeta = fromList [("authors",MetaList [MetaInlines [Str "John",Sp
   [Para [Str "nested"]
   ,BlockQuote
    [Para [Str "nested"]]]]
-,Header 1 ("",[],[]) [Str "Code",Space,Str "Blocks"]
+,Header 1 ("code-blocks",[],[]) [Str "Code",Space,Str "Blocks"]
 ,Para [Str "Code:"]
 ,CodeBlock ("",[],[]) "---- (should be four hyphens)\n\nsub status {\n    print \"working\";\n}"
 ,CodeBlock ("",[],[]) "this code block is indented by one tab"
@@ -39,8 +39,8 @@ Pandoc (Meta {unMeta = fromList [("authors",MetaList [MetaInlines [Str "John",Sp
 ,CodeBlock ("",[],[]) "this block is indented by two tabs\n\nThese should not be escaped:  \\$ \\\\ \\> \\[ \\{"
 ,Para [Str "And:"]
 ,CodeBlock ("",["sourceCode","python"],[]) "def my_function(x):\n    return x + 1"
-,Header 1 ("",[],[]) [Str "Lists"]
-,Header 2 ("",[],[]) [Str "Unordered"]
+,Header 1 ("lists",[],[]) [Str "Lists"]
+,Header 2 ("unordered",[],[]) [Str "Unordered"]
 ,Para [Str "Asterisks",Space,Str "tight:"]
 ,BulletList
  [[Plain [Str "asterisk",Space,Str "1"]]
@@ -71,7 +71,7 @@ Pandoc (Meta {unMeta = fromList [("authors",MetaList [MetaInlines [Str "John",Sp
  [[Plain [Str "Minus",Space,Str "1"]]
  ,[Plain [Str "Minus",Space,Str "2"]]
  ,[Plain [Str "Minus",Space,Str "3"]]]
-,Header 2 ("",[],[]) [Str "Ordered"]
+,Header 2 ("ordered",[],[]) [Str "Ordered"]
 ,Para [Str "Tight:"]
 ,OrderedList (1,Decimal,Period)
  [[Plain [Str "First"]]
@@ -115,7 +115,7 @@ Pandoc (Meta {unMeta = fromList [("authors",MetaList [MetaInlines [Str "John",Sp
     ,[Plain [Str "Fie"]]
     ,[Plain [Str "Foe"]]]]]
  ,[Plain [Str "Third"]]]
-,Header 2 ("",[],[]) [Str "Fancy",Space,Str "list",Space,Str "markers"]
+,Header 2 ("fancy-list-markers",[],[]) [Str "Fancy",Space,Str "list",Space,Str "markers"]
 ,OrderedList (2,Decimal,TwoParens)
  [[Plain [Str "begins",Space,Str "with",Space,Str "2"]]
  ,[Para [Str "and",Space,Str "now",Space,Str "3"]
@@ -145,7 +145,7 @@ Pandoc (Meta {unMeta = fromList [("authors",MetaList [MetaInlines [Str "John",Sp
 ,OrderedList (4,LowerAlpha,TwoParens)
  [[Plain [Str "item",Space,Str "1"]]
  ,[Plain [Str "item",Space,Str "2"]]]
-,Header 2 ("",[],[]) [Str "Definition"]
+,Header 2 ("definition",[],[]) [Str "Definition"]
 ,DefinitionList
  [([Str "term",Space,Str "1"],
    [[Para [Str "Definition",Space,Str "1."]]])
@@ -154,7 +154,7 @@ Pandoc (Meta {unMeta = fromList [("authors",MetaList [MetaInlines [Str "John",Sp
     ,Para [Str "Definition",Space,Str "2,",Space,Str "paragraph",Space,Str "2."]]])
  ,([Str "term",Space,Str "with",Space,Emph [Str "emphasis"]],
    [[Para [Str "Definition",Space,Str "3."]]])]
-,Header 1 ("",[],[]) [Str "Field",Space,Str "Lists"]
+,Header 1 ("field-lists",[],[]) [Str "Field",Space,Str "Lists"]
 ,BlockQuote
  [DefinitionList
   [([Str "address"],
@@ -170,18 +170,18 @@ Pandoc (Meta {unMeta = fromList [("authors",MetaList [MetaInlines [Str "John",Sp
    [[Para [Emph [Str "Nowhere"],Str ",",Space,Str "MA,",Space,Str "USA"]]])
  ,([Str "phone"],
    [[Para [Str "123-4567"]]])]
-,Header 1 ("",[],[]) [Str "HTML",Space,Str "Blocks"]
+,Header 1 ("html-blocks",[],[]) [Str "HTML",Space,Str "Blocks"]
 ,Para [Str "Simple",Space,Str "block",Space,Str "on",Space,Str "one",Space,Str "line:"]
 ,RawBlock (Format "html") "
foo
" ,Para [Str "Now,",Space,Str "nested:"] ,RawBlock (Format "html") "
\n
\n
\n foo\n
\n
\n
" -,Header 1 ("",[],[]) [Str "LaTeX",Space,Str "Block"] +,Header 1 ("latex-block",[],[]) [Str "LaTeX",Space,Str "Block"] ,RawBlock (Format "latex") "\\begin{tabular}{|l|l|}\\hline\nAnimal & Number \\\\ \\hline\nDog & 2 \\\\\nCat & 1 \\\\ \\hline\n\\end{tabular}" -,Header 1 ("",[],[]) [Str "Inline",Space,Str "Markup"] +,Header 1 ("inline-markup",[],[]) [Str "Inline",Space,Str "Markup"] ,Para [Str "This",Space,Str "is",Space,Emph [Str "emphasized"],Str ".",Space,Str "This",Space,Str "is",Space,Strong [Str "strong"],Str "."] ,Para [Str "This",Space,Str "is",Space,Str "code:",Space,Code ("",[],[]) ">",Str ",",Space,Code ("",[],[]) "$",Str ",",Space,Code ("",[],[]) "\\",Str ",",Space,Code ("",[],[]) "\\$",Str ",",Space,Code ("",[],[]) "",Str "."] ,Para [Str "This",Space,Str "is",Subscript [Str "subscripted"],Space,Str "and",Space,Str "this",Space,Str "is",Space,Superscript [Str "superscripted"],Str "."] -,Header 1 ("",[],[]) [Str "Special",Space,Str "Characters"] +,Header 1 ("special-characters",[],[]) [Str "Special",Space,Str "Characters"] ,Para [Str "Here",Space,Str "is",Space,Str "some",Space,Str "unicode:"] ,BulletList [[Plain [Str "I",Space,Str "hat:",Space,Str "\206"]] @@ -209,7 +209,7 @@ Pandoc (Meta {unMeta = fromList [("authors",MetaList [MetaInlines [Str "John",Sp ,Para [Str "Bang:",Space,Str "!"] ,Para [Str "Plus:",Space,Str "+"] ,Para [Str "Minus:",Space,Str "-"] -,Header 1 ("",[],[]) [Str "Links"] +,Header 1 ("links",[],[]) [Str "Links"] ,Para [Str "Explicit:",Space,Str "a",Space,Link [Str "URL"] ("/url/",""),Str "."] ,Para [Str "Two",Space,Str "anonymous",Space,Str "links:",Space,Link [Str "the",Space,Str "first"] ("/url1/",""),Space,Str "and",Space,Link [Str "the",Space,Str "second"] ("/url2/","")] ,Para [Str "Reference",Space,Str "links:",Space,Link [Str "link1"] ("/url1/",""),Space,Str "and",Space,Link [Str "link2"] ("/url2/",""),Space,Str "and",Space,Link [Str "link1"] ("/url1/",""),Space,Str "again."] @@ -218,20 +218,20 @@ Pandoc (Meta {unMeta = fromList [("authors",MetaList [MetaInlines [Str "John",Sp ,Para [Str "Autolinks:",Space,Link [Str "http://example.com/?foo=1&bar=2"] ("http://example.com/?foo=1&bar=2",""),Space,Str "and",Space,Link [Str "nobody@nowhere.net"] ("mailto:nobody@nowhere.net",""),Str "."] ,Para [Str "But",Space,Str "not",Space,Str "here:"] ,CodeBlock ("",[],[]) "http://example.com/" -,Header 1 ("",[],[]) [Str "Images"] +,Header 1 ("images",[],[]) [Str "Images"] ,Para [Str "From",Space,Quoted DoubleQuote [Str "Voyage",Space,Str "dans",Space,Str "la",Space,Str "Lune"],Space,Str "by",Space,Str "Georges",Space,Str "Melies",Space,Str "(1902):"] ,Para [Image [Str "image"] ("lalune.jpg","")] ,Para [Image [Str "Voyage dans la Lune"] ("lalune.jpg","")] ,Para [Str "Here",Space,Str "is",Space,Str "a",Space,Str "movie",Space,Image [Str "movie"] ("movie.jpg",""),Space,Str "icon."] ,Para [Str "And",Space,Str "an",Space,Link [Image [Str "A movie"] ("movie.jpg","")] ("/url",""),Str "."] -,Header 1 ("",[],[]) [Str "Comments"] +,Header 1 ("comments",[],[]) [Str "Comments"] ,Para [Str "First",Space,Str "paragraph"] ,Para [Str "Another",Space,Str "paragraph"] ,Para [Str "A",Space,Str "third",Space,Str "paragraph"] -,Header 1 ("",[],[]) [Str "Line",Space,Str "blocks"] +,Header 1 ("line-blocks",[],[]) [Str "Line",Space,Str "blocks"] ,Para [Str "But",Space,Str "can",Space,Str "a",Space,Str "bee",Space,Str "be",Space,Str "said",Space,Str "to",Space,Str "be",LineBreak,Str "\160\160\160\160or",Space,Str "not",Space,Str "to",Space,Str "be",Space,Str "an",Space,Str "entire",Space,Str "bee,",LineBreak,Str "\160\160\160\160\160\160\160\160when",Space,Str "half",Space,Str "the",Space,Str "bee",Space,Str "is",Space,Str "not",Space,Str "a",Space,Str "bee,",LineBreak,Str "\160\160\160\160\160\160\160\160\160\160\160\160due",Space,Str "to",Space,Str "some",Space,Str "ancient",Space,Str "injury?"] ,Para [Str "Continuation",Space,Str "line",LineBreak,Str "\160\160and",Space,Str "another"] -,Header 1 ("",[],[]) [Str "Simple",Space,Str "Tables"] +,Header 1 ("simple-tables",[],[]) [Str "Simple",Space,Str "Tables"] ,Table [] [AlignDefault,AlignDefault,AlignDefault] [0.0,0.0,0.0] [[Plain [Str "col",Space,Str "1"]] ,[Plain [Str "col",Space,Str "2"]] @@ -253,7 +253,7 @@ Pandoc (Meta {unMeta = fromList [("authors",MetaList [MetaInlines [Str "John",Sp ,[[Plain [Str "r2",Space,Str "d"]] ,[Plain [Str "e"]] ,[Plain [Str "f"]]]] -,Header 1 ("",[],[]) [Str "Grid",Space,Str "Tables"] +,Header 1 ("grid-tables",[],[]) [Str "Grid",Space,Str "Tables"] ,Table [] [AlignDefault,AlignDefault,AlignDefault] [0.2375,0.15,0.1625] [[Plain [Str "col",Space,Str "1"]] ,[Plain [Str "col",Space,Str "2"]] @@ -298,26 +298,26 @@ Pandoc (Meta {unMeta = fromList [("authors",MetaList [MetaInlines [Str "John",Sp ,[Plain [Str "b",Space,Str "2"]] ,[Plain [Str "b",Space,Str "2"]]]] ,[Plain [Str "c",Space,Str "c",Space,Str "2",Space,Str "c",Space,Str "2"]]]] -,Header 1 ("",[],[]) [Str "Footnotes"] +,Header 1 ("footnotes",[],[]) [Str "Footnotes"] ,Para [Note [Para [Str "Note",Space,Str "with",Space,Str "one",Space,Str "line."]]] ,Para [Note [Para [Str "Note",Space,Str "with",Space,Str "continuation",Space,Str "line."]]] ,Para [Note [Para [Str "Note",Space,Str "with"],Para [Str "continuation",Space,Str "block."]]] ,Para [Note [Para [Str "Note",Space,Str "with",Space,Str "continuation",Space,Str "line"],Para [Str "and",Space,Str "a",Space,Str "second",Space,Str "para."]]] ,Para [Str "Not",Space,Str "in",Space,Str "note."] -,Header 1 ("",[],[]) [Str "Math"] +,Header 1 ("math",[],[]) [Str "Math"] ,Para [Str "Some",Space,Str "inline",Space,Str "math",Space,Math InlineMath "E=mc^2",Str ".",Space,Str "Now",Space,Str "some",Space,Str "display",Space,Str "math:"] ,Para [Math DisplayMath "E=mc^2"] ,Para [Math DisplayMath "E = mc^2"] ,Para [Math DisplayMath "E = mc^2",Math DisplayMath "\\alpha = \\beta"] ,Para [Math DisplayMath "E &= mc^2\\\\\nF &= \\pi E",Math DisplayMath "F &= \\gamma \\alpha^2"] ,Para [Str "All",Space,Str "done."] -,Header 1 ("",[],[]) [Str "Default-Role"] +,Header 1 ("default-role",[],[]) [Str "Default-Role"] ,Para [Str "Try",Space,Str "changing",Space,Str "the",Space,Str "default",Space,Str "role",Space,Str "to",Space,Str "a",Space,Str "few",Space,Str "different",Space,Str "things."] -,Header 2 ("",[],[]) [Str "Doesn\8217t",Space,Str "Break",Space,Str "Title",Space,Str "Parsing"] +,Header 2 ("doesnt-break-title-parsing",[],[]) [Str "Doesn\8217t",Space,Str "Break",Space,Str "Title",Space,Str "Parsing"] ,Para [Str "Inline",Space,Str "math:",Space,Math InlineMath "E=mc^2",Space,Str "or",Space,Math InlineMath "E=mc^2",Space,Str "or",Space,Math InlineMath "E=mc^2",Str ".",Space,Str "Other",Space,Str "roles:",Space,Superscript [Str "super"],Str ",",Space,Subscript [Str "sub"],Str "."] ,Para [Math DisplayMath "\\alpha = beta",Math DisplayMath "E = mc^2"] ,Para [Str "Some",Space,Superscript [Str "of"],Space,Str "these",Space,Superscript [Str "words"],Space,Str "are",Space,Str "in",Space,Superscript [Str "superscript"],Str "."] ,Para [Str "Reset",Space,Str "default-role",Space,Str "to",Space,Str "the",Space,Str "default",Space,Str "default."] ,Para [Str "And",Space,Str "now",Space,Str "some-invalid-string-3231231",Space,Str "is",Space,Str "nonsense."] -,Header 2 ("",[],[]) [Str "Literal",Space,Str "symbols"] +,Header 2 ("literal-symbols",[],[]) [Str "Literal",Space,Str "symbols"] ,Para [Str "2*2",Space,Str "=",Space,Str "4*1"]] -- cgit v1.2.3 From 90c49b0aaed34ef1efb8e342d80f93cb477512a7 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 1 Sep 2013 09:22:55 -0700 Subject: Use registerHeader in Textile reader. This produces automatic header identifiers, unless `auto_identifiers` extension is disabled. Closes #967. --- src/Text/Pandoc/Readers/Textile.hs | 4 ++- tests/textile-reader.native | 58 +++++++++++++++++++------------------- 2 files changed, 32 insertions(+), 30 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs index 8ccd1e227..23e07f621 100644 --- a/src/Text/Pandoc/Readers/Textile.hs +++ b/src/Text/Pandoc/Readers/Textile.hs @@ -52,6 +52,7 @@ TODO : refactor common patterns across readers : module Text.Pandoc.Readers.Textile ( readTextile) where import Text.Pandoc.Definition +import qualified Text.Pandoc.Builder as B import Text.Pandoc.Shared import Text.Pandoc.Options import Text.Pandoc.Parsing @@ -179,7 +180,8 @@ header = try $ do char '.' whitespace name <- normalizeSpaces <$> manyTill inline blockBreak - return $ Header level attr name + attr' <- registerHeader attr (B.fromList name) + return $ Header level attr' name -- | Blockquote of the form "bq. content" blockQuote :: Parser [Char] ParserState Block diff --git a/tests/textile-reader.native b/tests/textile-reader.native index 7e709a505..ebfbc07fd 100644 --- a/tests/textile-reader.native +++ b/tests/textile-reader.native @@ -1,13 +1,13 @@ Pandoc (Meta {unMeta = fromList []}) [Para [Str "This",Space,Str "is",Space,Str "a",Space,Str "set",Space,Str "of",Space,Str "tests",Space,Str "for",Space,Str "pandoc",Space,Str "Textile",Space,Str "Reader",Str ".",Space,Str "Part",Space,Str "of",Space,Str "it",Space,Str "comes",LineBreak,Str "from",Space,Str "John",Space,Str "Gruber",Str "\8217",Str "s",Space,Str "markdown",Space,Str "test",Space,Str "suite",Str "."] ,HorizontalRule -,Header 1 ("",[],[]) [Str "Headers"] -,Header 2 ("",[],[]) [Str "Level",Space,Str "2",Space,Str "with",Space,Str "an",Space,Link [Str "embeded",Space,Str "link"] ("http://www.example.com","")] -,Header 3 ("",[],[]) [Str "Level",Space,Str "3",Space,Str "with",Space,Strong [Str "emphasis"]] -,Header 4 ("",[],[]) [Str "Level",Space,Str "4"] -,Header 5 ("",[],[]) [Str "Level",Space,Str "5"] -,Header 6 ("",[],[]) [Str "Level",Space,Str "6"] -,Header 1 ("",[],[]) [Str "Paragraphs"] +,Header 1 ("headers",[],[]) [Str "Headers"] +,Header 2 ("level-2-with-an-embeded-link",[],[]) [Str "Level",Space,Str "2",Space,Str "with",Space,Str "an",Space,Link [Str "embeded",Space,Str "link"] ("http://www.example.com","")] +,Header 3 ("level-3-with-emphasis",[],[]) [Str "Level",Space,Str "3",Space,Str "with",Space,Strong [Str "emphasis"]] +,Header 4 ("level-4",[],[]) [Str "Level",Space,Str "4"] +,Header 5 ("level-5",[],[]) [Str "Level",Space,Str "5"] +,Header 6 ("level-6",[],[]) [Str "Level",Space,Str "6"] +,Header 1 ("paragraphs",[],[]) [Str "Paragraphs"] ,Para [Str "Here",Str "\8217",Str "s",Space,Str "a",Space,Str "regular",Space,Str "paragraph",Str "."] ,Para [Str "Line",Space,Str "breaks",Space,Str "are",Space,Str "preserved",Space,Str "in",Space,Str "textile",Str ",",Space,Str "so",Space,Str "you",Space,Str "can",Space,Str "not",Space,Str "wrap",Space,Str "your",Space,Str "very",LineBreak,Str "long",Space,Str "paragraph",Space,Str "with",Space,Str "your",Space,Str "favourite",Space,Str "text",Space,Str "editor",Space,Str "and",Space,Str "have",Space,Str "it",Space,Str "rendered",LineBreak,Str "with",Space,Str "no",Space,Str "break",Str "."] ,Para [Str "Here",Str "\8217",Str "s",Space,Str "one",Space,Str "with",Space,Str "a",Space,Str "bullet",Str "."] @@ -16,23 +16,23 @@ Pandoc (Meta {unMeta = fromList []}) ,Para [Str "There",Space,Str "should",Space,Str "be",Space,Str "a",Space,Str "paragraph",Space,Str "break",Space,Str "between",Space,Str "here"] ,Para [Str "and",Space,Str "here",Str "."] ,Para [Str "pandoc",Space,Str "converts",Space,Str "textile",Str "."] -,Header 1 ("",[],[]) [Str "Block",Space,Str "Quotes"] +,Header 1 ("block-quotes",[],[]) [Str "Block",Space,Str "Quotes"] ,BlockQuote [Para [Str "This",Space,Str "is",Space,Str "a",Space,Str "famous",Space,Str "quote",Space,Str "from",Space,Str "somebody",Str ".",Space,Str "He",Space,Str "had",Space,Str "a",Space,Str "lot",Space,Str "of",Space,Str "things",Space,Str "to",LineBreak,Str "say",Str ",",Space,Str "so",Space,Str "the",Space,Str "text",Space,Str "is",Space,Str "really",Space,Str "really",Space,Str "long",Space,Str "and",Space,Str "spans",Space,Str "on",Space,Str "multiple",Space,Str "lines",Str "."]] ,Para [Str "And",Space,Str "a",Space,Str "following",Space,Str "paragraph",Str "."] -,Header 1 ("",[],[]) [Str "Code",Space,Str "Blocks"] +,Header 1 ("code-blocks",[],[]) [Str "Code",Space,Str "Blocks"] ,Para [Str "Code",Str ":"] ,CodeBlock ("",[],[]) " ---- (should be four hyphens)\n\n sub status {\n print \"working\";\n }\n\n this code block is indented by one tab" ,Para [Str "And",Str ":"] ,CodeBlock ("",[],[]) " this code block is indented by two tabs\n\n These should not be escaped: \\$ \\\\ \\> \\[ \\{" ,CodeBlock ("",[],[]) "Code block with .bc\n continued\n @",Str ",",Space,Code ("",[],[]) "@",Str "."] -,Header 1 ("",[],[]) [Str "Notextile"] +,Header 1 ("notextile",[],[]) [Str "Notextile"] ,Para [Str "A",Space,Str "block",Space,Str "of",Space,Str "text",Space,Str "can",Space,Str "be",Space,Str "protected",Space,Str "with",Space,Str "notextile",Space,Str ":"] ,Para [Str "\nNo *bold* and\n* no bullet\n"] ,Para [Str "and",Space,Str "inlines",Space,Str "can",Space,Str "be",Space,Str "protected",Space,Str "with",Space,Str "double *equals (=)* markup",Str "."] -,Header 1 ("",[],[]) [Str "Lists"] -,Header 2 ("",[],[]) [Str "Unordered"] +,Header 1 ("lists",[],[]) [Str "Lists"] +,Header 2 ("unordered",[],[]) [Str "Unordered"] ,Para [Str "Asterisks",Space,Str "tight",Str ":"] ,BulletList [[Plain [Str "asterisk",Space,Str "1"]] @@ -42,13 +42,13 @@ Pandoc (Meta {unMeta = fromList []}) ,BulletList [[Plain [Str "asterisk",Space,Str "1",LineBreak,Str "newline"]] ,[Plain [Str "asterisk",Space,Str "2"]]] -,Header 2 ("",[],[]) [Str "Ordered"] +,Header 2 ("ordered",[],[]) [Str "Ordered"] ,Para [Str "Tight",Str ":"] ,OrderedList (1,DefaultStyle,DefaultDelim) [[Plain [Str "First"]] ,[Plain [Str "Second"]] ,[Plain [Str "Third"]]] -,Header 2 ("",[],[]) [Str "Nested"] +,Header 2 ("nested",[],[]) [Str "Nested"] ,BulletList [[Plain [Str "ui",Space,Str "1"] ,BulletList @@ -63,7 +63,7 @@ Pandoc (Meta {unMeta = fromList []}) ,BulletList [[Plain [Str "ui",Space,Str "2",Str ".",Str "1",Str ".",Str "1"]] ,[Plain [Str "ui",Space,Str "2",Str ".",Str "1",Str ".",Str "2"]]]]]]] -,Header 2 ("",[],[]) [Str "Definition",Space,Str "List"] +,Header 2 ("definition-list",[],[]) [Str "Definition",Space,Str "List"] ,DefinitionList [([Str "coffee"], [[Plain [Str "Hot",Space,Str "and",Space,Str "black"]]]) @@ -74,23 +74,23 @@ Pandoc (Meta {unMeta = fromList []}) ,Para [Str "Cold",Space,Str "drink",Space,Str "that",Space,Str "goes",Space,Str "great",Space,Str "with",Space,Str "cookies",Str "."]]]) ,([Str "beer"], [[Plain [Str "fresh",Space,Str "and",Space,Str "bitter"]]])] -,Header 1 ("",[],[]) [Str "Inline",Space,Str "Markup"] +,Header 1 ("inline-markup",[],[]) [Str "Inline",Space,Str "Markup"] ,Para [Str "This",Space,Str "is",Space,Emph [Str "emphasized"],Str ",",Space,Str "and",Space,Str "so",Space,Emph [Str "is",Space,Str "this"],Str ".",LineBreak,Str "This",Space,Str "is",Space,Strong [Str "strong"],Str ",",Space,Str "and",Space,Str "so",Space,Strong [Str "is",Space,Str "this"],Str ".",LineBreak,Str "Hyphenated-words-are-ok",Str ",",Space,Str "as",Space,Str "well",Space,Str "as",Space,Str "strange_underscore_notation",Str ".",LineBreak,Str "A",Space,Link [Strong [Str "strong",Space,Str "link"]] ("http://www.foobar.com",""),Str "."] ,Para [Emph [Strong [Str "This",Space,Str "is",Space,Str "strong",Space,Str "and",Space,Str "em",Str "."]],LineBreak,Str "So",Space,Str "is",Space,Strong [Emph [Str "this"]],Space,Str "word",Space,Str "and",Space,Emph [Strong [Str "that",Space,Str "one"]],Str ".",LineBreak,Strikeout [Str "This",Space,Str "is",Space,Str "strikeout",Space,Str "and",Space,Strong [Str "strong"]]] ,Para [Str "Superscripts",Str ":",Space,Str "a",Superscript [Str "bc"],Str "d",Space,Str "a",Superscript [Strong [Str "hello"]],Space,Str "a",Superscript [Str "hello",Space,Str "there"],Str ".",LineBreak,Str "Subscripts",Str ":",Space,Subscript [Str "here"],Space,Str "H",Subscript [Str "2"],Str "O",Str ",",Space,Str "H",Subscript [Str "23"],Str "O",Str ",",Space,Str "H",Subscript [Str "many",Space,Str "of",Space,Str "them"],Str "O",Str "."] ,Para [Str "Dashes",Space,Str ":",Space,Str "How",Space,Str "cool",Space,Str "\8212",Space,Str "automatic",Space,Str "dashes",Str "."] ,Para [Str "Elipses",Space,Str ":",Space,Str "He",Space,Str "thought",Space,Str "and",Space,Str "thought",Space,Str "\8230",Space,Str "and",Space,Str "then",Space,Str "thought",Space,Str "some",Space,Str "more",Str "."] ,Para [Str "Quotes",Space,Str "and",Space,Str "apostrophes",Space,Str ":",Space,Quoted DoubleQuote [Str "I",Str "\8217",Str "d",Space,Str "like",Space,Str "to",Space,Str "thank",Space,Str "you"],Space,Str "for",Space,Str "example",Str "."] -,Header 1 ("",[],[]) [Str "Links"] -,Header 2 ("",[],[]) [Str "Explicit"] +,Header 1 ("links",[],[]) [Str "Links"] +,Header 2 ("explicit",[],[]) [Str "Explicit"] ,Para [Str "Just",Space,Str "a",Space,Link [Str "url"] ("http://www.url.com","")] ,Para [Link [Str "Email",Space,Str "link"] ("mailto:nobody@nowhere.net","")] ,Para [Str "Automatic",Space,Str "linking",Space,Str "to",Space,Link [Str "http://www.example.com"] ("http://www.example.com",""),Str "."] ,Para [Link [Str "Example"] ("http://www.example.com/",""),Str ":",Space,Str "Example",Space,Str "of",Space,Str "a",Space,Str "link",Space,Str "followed",Space,Str "by",Space,Str "a",Space,Str "colon",Str "."] ,Para [Str "A",Space,Str "link",Link [Str "with",Space,Str "brackets"] ("http://www.example.com",""),Str "and",Space,Str "no",Space,Str "spaces",Str "."] -,Header 1 ("",[],[]) [Str "Tables"] +,Header 1 ("tables",[],[]) [Str "Tables"] ,Para [Str "Textile",Space,Str "allows",Space,Str "tables",Space,Str "with",Space,Str "and",Space,Str "without",Space,Str "headers",Space,Str ":"] -,Header 2 ("",[],[]) [Str "Without",Space,Str "headers"] +,Header 2 ("without-headers",[],[]) [Str "Without",Space,Str "headers"] ,Table [] [AlignDefault,AlignDefault,AlignDefault] [0.0,0.0,0.0] [] [[[Plain [Str "name"]] @@ -106,7 +106,7 @@ Pandoc (Meta {unMeta = fromList []}) ,[Plain [Str "45"]] ,[Plain [Str "f"]]]] ,Para [Str "and",Space,Str "some",Space,Str "text",Space,Str "following",Space,Str "\8230"] -,Header 2 ("",[],[]) [Str "With",Space,Str "headers"] +,Header 2 ("with-headers",[],[]) [Str "With",Space,Str "headers"] ,Table [] [AlignDefault,AlignDefault,AlignDefault] [0.0,0.0,0.0] [[Plain [Str "name"]] ,[Plain [Str "age"]] @@ -120,9 +120,9 @@ Pandoc (Meta {unMeta = fromList []}) ,[[Plain [Str "bella"]] ,[Plain [Str "45"]] ,[Plain [Str "f"]]]] -,Header 1 ("",[],[]) [Str "Images"] +,Header 1 ("images",[],[]) [Str "Images"] ,Para [Str "Textile",Space,Str "inline",Space,Str "image",Space,Str "syntax",Str ",",Space,Str "like",Space,LineBreak,Str "here",Space,Image [Str "this is the alt text"] ("this_is_an_image.png","this is the alt text"),LineBreak,Str "and",Space,Str "here",Space,Image [Str ""] ("this_is_an_image.png",""),Str "."] -,Header 1 ("",[],[]) [Str "Attributes"] +,Header 1 ("attributes",[],[]) [Str "Attributes"] ,Header 2 ("ident",["bar","foo"],[("style","color:red"),("lang","en")]) [Str "HTML",Space,Str "and",Space,Str "CSS",Space,Str "attributes",Space,Str "are",Space,Str "parsed",Space,Str "in",Space,Str "headers",Str "."] ,Para [Str "as",Space,Str "well",Space,Str "as",Space,Strong [Str "inline",Space,Str "attributes"],Space,Str "of",Space,Str " all kind"] ,Para [Str "and",Space,Str "paragraph",Space,Str "attributes",Str ",",Space,Str "and",Space,Str "table",Space,Str "attributes",Str "."] @@ -134,9 +134,9 @@ Pandoc (Meta {unMeta = fromList []}) ,[[Plain [Str "joan"]] ,[Plain [Str "24"]] ,[Plain [Str "f"]]]] -,Header 1 ("",[],[]) [Str "Entities"] +,Header 1 ("entities",[],[]) [Str "Entities"] ,Para [Str "*",LineBreak,Str "&"] -,Header 1 ("",[],[]) [Str "Raw",Space,Str "HTML"] +,Header 1 ("raw-html",[],[]) [Str "Raw",Space,Str "HTML"] ,Para [Str "However",Str ",",Space,RawInline (Format "html") "",Space,Str "raw",Space,Str "HTML",Space,Str "inlines",Space,RawInline (Format "html") "",Space,Str "are",Space,Str "accepted",Str ",",Space,Str "as",Space,Str "well",Space,Str "as",Space,Str ":"] ,RawBlock (Format "html") "
" ,Para [Str "any",Space,Strong [Str "Raw",Space,Str "HTML",Space,Str "Block"],Space,Str "with",Space,Str "bold"] @@ -150,18 +150,18 @@ Pandoc (Meta {unMeta = fromList []}) [[Plain [Str "this",Space,Str "<",Str "div",Str ">",Space,Str "won",Str "\8217",Str "t",Space,Str "produce",Space,Str "raw",Space,Str "html",Space,Str "blocks",Space,Str "<",Str "/div",Str ">"]] ,[Plain [Str "but",Space,Str "this",Space,RawInline (Format "html") "",Space,Str "will",Space,Str "produce",Space,Str "inline",Space,Str "html",Space,RawInline (Format "html") ""]]] ,Para [Str "Can",Space,Str "you",Space,Str "prove",Space,Str "that",Space,Str "2",Space,Str "<",Space,Str "3",Space,Str "?"] -,Header 1 ("",[],[]) [Str "Raw",Space,Str "LaTeX"] +,Header 1 ("raw-latex",[],[]) [Str "Raw",Space,Str "LaTeX"] ,Para [Str "This",Space,Str "Textile",Space,Str "reader",Space,Str "also",Space,Str "accepts",Space,Str "raw",Space,Str "LaTeX",Space,Str "for",Space,Str "blocks",Space,Str ":"] ,RawBlock (Format "latex") "\\begin{itemize}\n \\item one\n \\item two\n\\end{itemize}" ,Para [Str "and",Space,Str "for",Space,RawInline (Format "latex") "\\emph{inlines}",Str "."] -,Header 1 ("",[],[]) [Str "Acronyms",Space,Str "and",Space,Str "marks"] +,Header 1 ("acronyms-and-marks",[],[]) [Str "Acronyms",Space,Str "and",Space,Str "marks"] ,Para [Str "PBS (Public Broadcasting System)"] ,Para [Str "Hi",Str "\8482"] ,Para [Str "Hi",Space,Str "\8482"] ,Para [Str "\174",Space,Str "Hi",Str "\174"] ,Para [Str "Hi",Str "\169",Str "2008",Space,Str "\169",Space,Str "2008"] -,Header 1 ("",[],[]) [Str "Footnotes"] +,Header 1 ("footnotes",[],[]) [Str "Footnotes"] ,Para [Str "A",Space,Str "note",Str ".",Note [Para [Str "The",Space,Str "note",LineBreak,Str "is",Space,Str "here",Str "!"]],Space,Str "Another",Space,Str "note",Note [Para [Str "Other",Space,Str "note",Str "."]],Str "."] -,Header 1 ("",[],[]) [Str "Comment",Space,Str "blocks"] +,Header 1 ("comment-blocks",[],[]) [Str "Comment",Space,Str "blocks"] ,Null ,Para [Str "not",Space,Str "a",Space,Str "comment",Str "."]] -- cgit v1.2.3 From 8b0052ba5b0578814a5aca14a0e02874a10cf947 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 1 Sep 2013 15:05:51 -0700 Subject: Mathjax in HTML slide shows: include explicit "Typeset" instruction. This seems to be needed for some formats (e.g. slideous) and won't hurt in others. Closes #966. --- src/Text/Pandoc/Writers/HTML.hs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs index 25079574e..63b466af3 100644 --- a/src/Text/Pandoc/Writers/HTML.hs +++ b/src/Text/Pandoc/Writers/HTML.hs @@ -143,7 +143,8 @@ pandocToHtml opts (Pandoc meta blocks) = do MathJax url -> H.script ! A.src (toValue url) ! A.type_ "text/javascript" - $ mempty + $ preEscapedString + "MathJax.Hub.Queue([\"Typeset\",MathJax.Hub]);" JsMath (Just url) -> H.script ! A.src (toValue url) ! A.type_ "text/javascript" -- cgit v1.2.3 From 9b0b9b6e03c05ca81ff3cf52787a30ea00cb3a76 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 1 Sep 2013 15:18:56 -0700 Subject: Markdown reader: Don't autolink a bare URI that is followed by ``. Closes #937. --- src/Text/Pandoc/Readers/Markdown.hs | 1 + tests/Tests/Readers/Markdown.hs | 5 ++++- 2 files changed, 5 insertions(+), 1 deletion(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 267b30032..9b98cbc3e 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -1662,6 +1662,7 @@ bareURL :: MarkdownParser (F Inlines) bareURL = try $ do guardEnabled Ext_autolink_bare_uris (orig, src) <- uri <|> emailAddress + notFollowedBy $ try $ spaces >> htmlTag (~== TagClose "a") return $ return $ B.link src "" (B.str orig) autoLink :: MarkdownParser (F Inlines) diff --git a/tests/Tests/Readers/Markdown.hs b/tests/Tests/Readers/Markdown.hs index 8a9ed9667..ccca147ab 100644 --- a/tests/Tests/Readers/Markdown.hs +++ b/tests/Tests/Readers/Markdown.hs @@ -24,7 +24,7 @@ infix 4 =: testBareLink :: (String, Inlines) -> Test testBareLink (inp, ils) = test (readMarkdown def{ readerExtensions = - Set.fromList [Ext_autolink_bare_uris] }) + Set.fromList [Ext_autolink_bare_uris, Ext_raw_html] }) inp (inp, doc $ para ils) autolink :: String -> Inlines @@ -34,6 +34,9 @@ bareLinkTests :: [(String, Inlines)] bareLinkTests = [ ("http://google.com is a search engine.", autolink "http://google.com" <> " is a search engine.") + , ("http://foo.bar.baz", + rawInline "html" "" <> + "http://foo.bar.baz" <> rawInline "html" "") , ("Try this query: http://google.com?search=fish&time=hour.", "Try this query: " <> autolink "http://google.com?search=fish&time=hour" <> ".") , ("HTTPS://GOOGLE.COM,", -- cgit v1.2.3 From 728e47ae15252619444a9ee91f2ceeecd4f3cf98 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Fri, 6 Sep 2013 15:40:08 -0700 Subject: MediaWiki reader: Allow Image: for images. Closes #971. --- src/Text/Pandoc/Readers/MediaWiki.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/MediaWiki.hs b/src/Text/Pandoc/Readers/MediaWiki.hs index 8f1ff2776..2b938cd82 100644 --- a/src/Text/Pandoc/Readers/MediaWiki.hs +++ b/src/Text/Pandoc/Readers/MediaWiki.hs @@ -523,7 +523,7 @@ endline = () <$ try (newline <* image :: MWParser Inlines image = try $ do sym "[[" - sym "File:" + sym "File:" <|> sym "Image:" fname <- many1 (noneOf "|]") _ <- many (try $ char '|' *> imageOption) caption <- (B.str fname <$ sym "]]") -- cgit v1.2.3 From 8d43e08ce7be8673cc399b948d29386f525e9e1f Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Fri, 6 Sep 2013 22:26:18 -0700 Subject: Markdown writer: Fixed bugs in YAML header output. --- src/Text/Pandoc/Writers/Markdown.hs | 6 +++--- tests/writer.markdown | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs index d617954dd..23e730bf0 100644 --- a/src/Text/Pandoc/Writers/Markdown.hs +++ b/src/Text/Pandoc/Writers/Markdown.hs @@ -39,7 +39,7 @@ import Text.Pandoc.Writers.Shared import Text.Pandoc.Options import Text.Pandoc.Parsing hiding (blankline, char, space) import Data.List ( group, isPrefixOf, find, intersperse, transpose, sortBy ) -import Data.Char ( isSpace ) +import Data.Char ( isSpace, isPunctuation ) import Data.Ord ( comparing ) import Text.Pandoc.Pretty import Control.Monad.State @@ -143,7 +143,7 @@ jsonToYaml (Object hashmap) = | otherwise -> (k' <> ":") $$ x (k', Object _, x) -> (k' <> ":") $$ nest 2 x (_, String "", _) -> empty - (k', _, x) -> k' <> ":" <> space <> x) + (k', _, x) -> k' <> ":" <> space <> hang 2 "" x) $ sortBy (comparing fst) $ H.toList hashmap jsonToYaml (Array vec) = vcat $ map (\v -> hang 2 "- " (jsonToYaml v)) $ V.toList vec @@ -151,7 +151,7 @@ jsonToYaml (String "") = empty jsonToYaml (String s) = case T.unpack s of x | '\n' `elem` x -> hang 2 ("|" <> cr) $ text x - | not (any (`elem` x) "\"'#:[]{}?-") -> text x + | not (any isPunctuation x) -> text x | otherwise -> text $ "'" ++ substitute "'" "''" x ++ "'" jsonToYaml (Bool b) = text $ show b jsonToYaml (Number n) = text $ show n diff --git a/tests/writer.markdown b/tests/writer.markdown index 7d67e4e87..9cf153637 100644 --- a/tests/writer.markdown +++ b/tests/writer.markdown @@ -2,7 +2,7 @@ author: - John MacFarlane - Anonymous -date: July 17, 2006 +date: 'July 17, 2006' title: Pandoc Test Suite ... -- cgit v1.2.3 From 5afd373ae45f525ff1eff5e54c1850fe2c614b4b Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 7 Sep 2013 09:36:37 -0700 Subject: Added `lists_without_preceding_blankline` extension. * Added `Ext_lists_without_preceding_blankline` to `Extension` in `Options`. Added this option to `githubMarkdownExtensions`. * Made markdown reader sensitive to this. * Closes #972. --- README | 4 ++++ src/Text/Pandoc/Options.hs | 2 ++ src/Text/Pandoc/Readers/Markdown.hs | 1 + 3 files changed, 7 insertions(+) (limited to 'src/Text') diff --git a/README b/README index 0a04fb6ab..7d926216b 100644 --- a/README +++ b/README @@ -2454,6 +2454,10 @@ in pandoc, but may be enabled by adding `+EXTENSION` to the format name, where `EXTENSION` is the name of the extension. Thus, for example, `markdown+hard_line_breaks` is markdown with hard line breaks. +**Extension: `lists_without_preceding_blankline`**\ +Allow a list to occur right after a paragraph, with no intervening +blank space. + **Extension: `hard_line_breaks`**\ Causes all newlines within a paragraph to be interpreted as hard line breaks instead of spaces. diff --git a/src/Text/Pandoc/Options.hs b/src/Text/Pandoc/Options.hs index 48e418ab2..5f65abdde 100644 --- a/src/Text/Pandoc/Options.hs +++ b/src/Text/Pandoc/Options.hs @@ -80,6 +80,7 @@ data Extension = | Ext_link_attributes -- ^ MMD style reference link attributes | Ext_autolink_bare_uris -- ^ Make all absolute URIs into links | Ext_fancy_lists -- ^ Enable fancy list numbers and delimiters + | Ext_lists_without_preceding_blankline -- ^ Allow lists without preceding blank | Ext_startnum -- ^ Make start number of ordered list significant | Ext_definition_lists -- ^ Definition lists as in pandoc, mmd, php | Ext_example_lists -- ^ Markdown-style numbered examples @@ -169,6 +170,7 @@ githubMarkdownExtensions = Set.fromList , Ext_intraword_underscores , Ext_strikeout , Ext_hard_line_breaks + , Ext_lists_without_preceding_blankline ] multimarkdownExtensions :: Set Extension diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 9b98cbc3e..2ca0d312a 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -1559,6 +1559,7 @@ endline :: MarkdownParser (F Inlines) endline = try $ do newline notFollowedBy blankline + guardDisabled Ext_lists_without_preceding_blankline <|> notFollowedBy listStart guardEnabled Ext_blank_before_blockquote <|> notFollowedBy emailBlockQuoteStart guardEnabled Ext_blank_before_header <|> notFollowedBy (char '#') -- atx header -- parse potential list-starts differently if in a list: -- cgit v1.2.3 From 56f56e5e1594ef5d18326d1eb6de3176db307c6a Mon Sep 17 00:00:00 2001 From: Merijn Verstraaten Date: Sat, 7 Sep 2013 18:58:16 +0100 Subject: Added support for LaTeX style literate Haskell code blocks in rST. --- src/Text/Pandoc/Readers/RST.hs | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs index 32893128a..c12a1493a 100644 --- a/src/Text/Pandoc/Readers/RST.hs +++ b/src/Text/Pandoc/Readers/RST.hs @@ -347,14 +347,25 @@ lhsCodeBlock = try $ do getPosition >>= guard . (==1) . sourceColumn guardEnabled Ext_literate_haskell optional codeBlockStart - lns <- many1 birdTrackLine - -- if (as is normal) there is always a space after >, drop it - let lns' = if all (\ln -> null ln || take 1 ln == " ") lns - then map (drop 1) lns - else lns + lns <- latexCodeBlock <|> birdCodeBlock blanklines return $ B.codeBlockWith ("", ["sourceCode", "literate", "haskell"], []) - $ intercalate "\n" lns' + $ intercalate "\n" lns + +latexCodeBlock :: Parser [Char] st [[Char]] +latexCodeBlock = try $ do + try (latexBlockLine "\\begin{code}") + many1Till anyLine (try $ latexBlockLine "\\end{code}") + where + latexBlockLine s = skipMany spaceChar >> string s >> blankline + +birdCodeBlock :: Parser [Char] st [[Char]] +birdCodeBlock = filterSpace <$> many1 birdTrackLine + where filterSpace lns = + -- if (as is normal) there is always a space after >, drop it + if all (\ln -> null ln || take 1 ln == " ") lns + then map (drop 1) lns + else lns birdTrackLine :: Parser [Char] st [Char] birdTrackLine = char '>' >> anyLine -- cgit v1.2.3 From 2c13b6f6dc4f55b76861991dea318e3566cec9a2 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 7 Sep 2013 22:43:56 -0700 Subject: MedaWiki reader: Implement some mathjax extensions. * `:` for display math * `\(..\)` for inline math * `\[..\]` for display math We omit the `$` forms as the heuristics are harder. --- src/Text/Pandoc/Readers/MediaWiki.hs | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/MediaWiki.hs b/src/Text/Pandoc/Readers/MediaWiki.hs index 2b938cd82..0432915bc 100644 --- a/src/Text/Pandoc/Readers/MediaWiki.hs +++ b/src/Text/Pandoc/Readers/MediaWiki.hs @@ -91,7 +91,7 @@ nested p = do return res specialChars :: [Char] -specialChars = "'[]<=&*{}|\"" +specialChars = "'[]<=&*{}|\":\\" spaceChars :: [Char] spaceChars = " \n\t" @@ -380,8 +380,9 @@ defListItem = try $ do terms <- mconcat . intersperse B.linebreak <$> many defListTerm -- we allow dd with no dt, or dt with no dd defs <- if B.isNull terms - then many1 $ listItem ':' - else many $ listItem ':' + then notFollowedBy (try $ string ":") *> + many1 (listItem ':') + else many (listItem ':') return (terms, defs) defListTerm :: MWParser Inlines @@ -462,6 +463,7 @@ inline = whitespace <|> image <|> internalLink <|> externalLink + <|> math <|> inlineTag <|> B.singleton <$> charRef <|> inlineHtml @@ -472,6 +474,16 @@ inline = whitespace str :: MWParser Inlines str = B.str <$> many1 (noneOf $ specialChars ++ spaceChars) +math :: MWParser Inlines +math = (B.displayMath <$> try (char ':' >> charsInTags "math")) + <|> (B.math <$> charsInTags "math") + <|> (B.displayMath <$> try (dmStart *> manyTill anyChar dmEnd)) + <|> (B.math <$> try (mStart *> manyTill (satisfy (/='\n')) mEnd)) + where dmStart = string "\\[" + dmEnd = try (string "\\]") + mStart = string "\\(" + mEnd = try (string "\\)") + variable :: MWParser String variable = try $ do string "{{{" @@ -495,7 +507,6 @@ inlineTag = do TagOpen "del" _ -> B.strikeout <$> inlinesInTags "del" TagOpen "sub" _ -> B.subscript <$> inlinesInTags "sub" TagOpen "sup" _ -> B.superscript <$> inlinesInTags "sup" - TagOpen "math" _ -> B.math <$> charsInTags "math" TagOpen "code" _ -> B.code <$> charsInTags "code" TagOpen "tt" _ -> B.code <$> charsInTags "tt" TagOpen "hask" _ -> B.codeWith ("",["haskell"],[]) <$> charsInTags "hask" -- cgit v1.2.3 From cf2506acdc721ec27ed310cd7bdad8affb28d1e5 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 8 Sep 2013 11:43:46 -0700 Subject: Markdown: Allow backtick code blocks not to be preceded by blank line. Closes #975. --- src/Text/Pandoc/Readers/Markdown.hs | 3 +++ 1 file changed, 3 insertions(+) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 2ca0d312a..4a7789e17 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -872,6 +872,7 @@ para = try $ do newline (blanklines >> return mempty) <|> (guardDisabled Ext_blank_before_blockquote >> lookAhead blockQuote) + <|> (guardEnabled Ext_backtick_code_blocks >> lookAhead codeBlockFenced) <|> (guardDisabled Ext_blank_before_header >> lookAhead header) return $ do result' <- result @@ -1562,6 +1563,8 @@ endline = try $ do guardDisabled Ext_lists_without_preceding_blankline <|> notFollowedBy listStart guardEnabled Ext_blank_before_blockquote <|> notFollowedBy emailBlockQuoteStart guardEnabled Ext_blank_before_header <|> notFollowedBy (char '#') -- atx header + guardEnabled Ext_backtick_code_blocks >> + notFollowedBy (() <$ (lookAhead (char '`') >> codeBlockFenced)) -- parse potential list-starts differently if in a list: st <- getState when (stateParserContext st == ListItemState) $ do -- cgit v1.2.3 From 777226296b04fa37094ecb07eb33f8d3e05af036 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 8 Sep 2013 11:49:13 -0700 Subject: markdown+list_without_preceding_blankline:+Interpret text before list as paragraph. --- src/Text/Pandoc/Readers/Markdown.hs | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 4a7789e17..122db17de 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -871,9 +871,11 @@ para = try $ do $ try $ do newline (blanklines >> return mempty) - <|> (guardDisabled Ext_blank_before_blockquote >> lookAhead blockQuote) - <|> (guardEnabled Ext_backtick_code_blocks >> lookAhead codeBlockFenced) - <|> (guardDisabled Ext_blank_before_header >> lookAhead header) + <|> (guardDisabled Ext_blank_before_blockquote >> () <$ lookAhead blockQuote) + <|> (guardEnabled Ext_backtick_code_blocks >> () <$ lookAhead codeBlockFenced) + <|> (guardDisabled Ext_blank_before_header >> () <$ lookAhead header) + <|> (guardEnabled Ext_lists_without_preceding_blankline >> + () <$ lookAhead listStart) return $ do result' <- result case B.toList result' of -- cgit v1.2.3 From c78557f3ca333d9ae925fdcb8a7c03199f5e47fd Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 8 Sep 2013 12:04:47 -0700 Subject: Templates: more consistent behavior of `$for$`. When `foo` is not a list, `$for(foo)$...$endfor$` should behave like $if(foo)$...$endif$. So if `foo` resolves to "", no output should be produced. See pandoc-templates#39. --- src/Text/Pandoc/Templates.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Templates.hs b/src/Text/Pandoc/Templates.hs index 22a44e735..7f744c7e1 100644 --- a/src/Text/Pandoc/Templates.hs +++ b/src/Text/Pandoc/Templates.hs @@ -212,7 +212,7 @@ iter var' template sep = Template $ \val -> unTemplate Just (Array vec) -> mconcat $ intersperse sep $ map (setVar template var') $ toList vec - Just x -> setVar template var' x + Just x -> cond var' (setVar template var' x) mempty Nothing -> mempty) val setVar :: Template -> Variable -> Value -> Template -- cgit v1.2.3 From 81e2df32c92ee95771f2613b9ad30aeaf11423e5 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 8 Sep 2013 15:47:50 -0700 Subject: Made . . . for pause work in all slide show formats except slideous. --- README | 5 ++--- changelog | 5 +---- src/Text/Pandoc/Writers/HTML.hs | 22 +++++++++++++++------- 3 files changed, 18 insertions(+), 14 deletions(-) (limited to 'src/Text') diff --git a/README b/README index 7d926216b..56ad50b3c 100644 --- a/README +++ b/README @@ -2680,9 +2680,8 @@ a single document. Inserting pauses ---------------- -In reveal.js and beamer slide shows, you can add "pauses" within -a slide by including a paragraph containing three dots, separated -by spaces: +You can add "pauses" within a slide by including a paragraph containing +three dots, separated by spaces: # Slide with a pause diff --git a/changelog b/changelog index b7091460f..b3e9d8b0b 100644 --- a/changelog +++ b/changelog @@ -413,7 +413,7 @@ as markdown citations, it is redundant to have a bibliography, since one will be generated automatically.) - * Added syntax for "pauses" in beamer or reaveljs slide shows. + * Added syntax for "pauses" in slide shows: This gives @@ -421,9 +421,6 @@ a pause. - [note - no longer seems to work in recente revealjs - perhaps - this should be reverted] - * Use new flexible metadata type. + Depend on `pandoc-types` 1.12. This changes the type of diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs index 63b466af3..78a3edce8 100644 --- a/src/Text/Pandoc/Writers/HTML.hs +++ b/src/Text/Pandoc/Writers/HTML.hs @@ -268,11 +268,24 @@ elementToHtml slideLevel opts (Sec level num (id',classes,keyvals) title' elemen else blockToHtml opts (Header level' (id',classes,keyvals) title') let isSec (Sec _ _ _ _ _) = True isSec (Blk _) = False + let isPause (Blk x) = x == Para [Str ".",Space,Str ".",Space,Str "."] + isPause _ = False + let fragmentClass = case writerSlideVariant opts of + RevealJsSlides -> "fragment" + _ -> "incremental" + let inDiv xs = Blk (RawBlock (Format "html") ("
")) : + (xs ++ [Blk (RawBlock (Format "html") "
")]) innerContents <- mapM (elementToHtml slideLevel opts) $ if titleSlide -- title slides have no content of their own then filter isSec elements - else elements + else if slide + then case splitBy isPause elements of + [] -> [] + [x] -> x + xs -> concatMap inDiv xs + else elements let inNl x = mconcat $ nl opts : intersperse (nl opts) x ++ [nl opts] let classes' = ["titleslide" | titleSlide] ++ ["slide" | slide] ++ ["section" | (slide || writerSectionDivs opts) && @@ -401,10 +414,6 @@ blockToHtml opts (Para [Image txt (s,'f':'i':'g':':':tit)]) = do [nl opts, img, capt, nl opts] else H.div ! A.class_ "figure" $ mconcat [nl opts, img, capt, nl opts] --- . . . indicates a pause in a slideshow -blockToHtml opts (Para [Str ".",Space,Str ".",Space,Str "."]) - | writerSlideVariant opts == RevealJsSlides = - blockToHtml opts (RawBlock "html" "
") blockToHtml opts (Para lst) = do contents <- inlineListToHtml opts lst return $ H.p contents @@ -580,8 +589,7 @@ toListItem opts item = nl opts >> H.li item blockListToHtml :: WriterOptions -> [Block] -> State WriterState Html blockListToHtml opts lst = - mapM (blockToHtml opts) lst >>= - return . mconcat . intersperse (nl opts) + fmap (mconcat . intersperse (nl opts)) $ mapM (blockToHtml opts) lst -- | Convert list of Pandoc inline elements to HTML. inlineListToHtml :: WriterOptions -> [Inline] -> State WriterState Html -- cgit v1.2.3 From a9f3abc653bc7c0cb320056e31bb569652e03321 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Mon, 9 Sep 2013 11:19:37 -0700 Subject: Markdown: don't parse citation right after alphanumeric. An `@` after an alphanumeric is probably an email address. --- src/Text/Pandoc/Readers/Markdown.hs | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 122db17de..9f2bc4447 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -1823,6 +1823,11 @@ normalCite = try $ do citeKey :: MarkdownParser (Bool, String) citeKey = try $ do + -- make sure we're not right after an alphanumeric, + -- since foo@bar.baz is probably an email address + lastStrPos <- stateLastStrPos <$> getState + pos <- getPosition + guard $ lastStrPos /= Just pos suppress_author <- option False (char '-' >> return True) char '@' first <- letter -- cgit v1.2.3 From 71841de0f3d02fc2c88d61ab5d29b7022090f5f1 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Wed, 11 Sep 2013 09:31:41 -0700 Subject: Mediawiki: Parse an image + caption in a para by itself as a figure. --- src/Text/Pandoc/Readers/MediaWiki.hs | 4 ++-- tests/mediawiki-reader.native | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/MediaWiki.hs b/src/Text/Pandoc/Readers/MediaWiki.hs index 0432915bc..8b436c89f 100644 --- a/src/Text/Pandoc/Readers/MediaWiki.hs +++ b/src/Text/Pandoc/Readers/MediaWiki.hs @@ -43,7 +43,7 @@ import Text.Pandoc.Readers.HTML ( htmlTag, isBlockTag, isCommentTag ) import Text.Pandoc.XML ( fromEntities ) import Text.Pandoc.Parsing hiding ( nested ) import Text.Pandoc.Walk ( walk ) -import Text.Pandoc.Shared ( stripTrailingNewlines, safeRead ) +import Text.Pandoc.Shared ( stripTrailingNewlines, safeRead, stringify ) import Data.Monoid (mconcat, mempty) import Control.Applicative ((<$>), (<*), (*>), (<$)) import Control.Monad @@ -539,7 +539,7 @@ image = try $ do _ <- many (try $ char '|' *> imageOption) caption <- (B.str fname <$ sym "]]") <|> try (char '|' *> (mconcat <$> manyTill inline (sym "]]"))) - return $ B.image fname "image" caption + return $ B.image fname ("fig:" ++ stringify caption) caption imageOption :: MWParser String imageOption = diff --git a/tests/mediawiki-reader.native b/tests/mediawiki-reader.native index a424be0ae..5ddbd309f 100644 --- a/tests/mediawiki-reader.native +++ b/tests/mediawiki-reader.native @@ -84,10 +84,10 @@ Pandoc (Meta {unMeta = fromList []}) ,Para [Link [Str "#My",Space,Str "anchor"] ("#My_anchor","wikilink")] ,Para [Link [Str "and",Space,Str "text"] ("Page#with_anchor","wikilink")] ,Header 2 ("",[],[]) [Str "images"] -,Para [Image [Str "caption"] ("example.jpg","image")] -,Para [Image [Str "the",Space,Emph [Str "caption"],Space,Str "with",Space,Link [Str "external",Space,Str "link"] ("http://google.com","")] ("example.jpg","image")] -,Para [Image [Str "caption"] ("example.jpg","image")] -,Para [Image [Str "example.jpg"] ("example.jpg","image")] +,Para [Image [Str "caption"] ("example.jpg","fig:caption")] +,Para [Image [Str "the",Space,Emph [Str "caption"],Space,Str "with",Space,Link [Str "external",Space,Str "link"] ("http://google.com","")] ("example.jpg","fig:the caption with external link")] +,Para [Image [Str "caption"] ("example.jpg","fig:caption")] +,Para [Image [Str "example.jpg"] ("example.jpg","fig:example.jpg")] ,Header 2 ("",[],[]) [Str "lists"] ,BulletList [[Plain [Str "Start",Space,Str "each",Space,Str "line"]] -- cgit v1.2.3 From ca6842349e23b3f60cb2665d1c20de9951bea268 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Thu, 12 Sep 2013 09:24:25 -0700 Subject: HTML writer: Ensure proper escaping in header metadata. --- changelog | 1 + src/Text/Pandoc/Writers/HTML.hs | 9 +++++---- 2 files changed, 6 insertions(+), 4 deletions(-) (limited to 'src/Text') diff --git a/changelog b/changelog index f2742664c..a892ab0ec 100644 --- a/changelog +++ b/changelog @@ -405,6 +405,7 @@ + Fixed `--no-highlight` (Alexander Kondratskiy). + Don't convert to lowercase in email obfuscation (#839). + + Ensure proper escaping in `` and `<meta>` fields. * AsciiDoc writer: diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs index 78a3edce8..902c8bc53 100644 --- a/src/Text/Pandoc/Writers/HTML.hs +++ b/src/Text/Pandoc/Writers/HTML.hs @@ -39,7 +39,7 @@ import Text.Pandoc.Readers.TeXMath import Text.Pandoc.Slides import Text.Pandoc.Highlighting ( highlight, styleToCss, formatHtmlInline, formatHtmlBlock ) -import Text.Pandoc.XML (fromEntities) +import Text.Pandoc.XML (fromEntities, escapeStringForXML) import Network.HTTP ( urlEncode ) import Numeric ( showHex ) import Data.Char ( ord, toLower ) @@ -115,8 +115,9 @@ pandocToHtml opts (Pandoc meta blocks) = do (fmap renderHtml . blockListToHtml opts) (fmap renderHtml . inlineListToHtml opts) meta - let authsMeta = map stringify $ docAuthors meta - let dateMeta = stringify $ docDate meta + let stringifyHTML = escapeStringForXML . stringify + let authsMeta = map stringifyHTML $ docAuthors meta + let dateMeta = stringifyHTML $ docDate meta let slideLevel = maybe (getSlideLevel blocks) id $ writerSlideLevel opts let sects = hierarchicalize $ if writerSlideVariant opts == NoSlides @@ -168,7 +169,7 @@ pandocToHtml opts (Pandoc meta blocks) = do maybe id (defField "toc" . renderHtml) toc $ defField "author-meta" authsMeta $ maybe id (defField "date-meta") (normalizeDate dateMeta) $ - defField "pagetitle" (stringify $ docTitle meta) $ + defField "pagetitle" (stringifyHTML $ docTitle meta) $ defField "idprefix" (writerIdentifierPrefix opts) $ -- these should maybe be set in pandoc.hs defField "slidy-url" -- cgit v1.2.3 From 37471041788f079632ec369a970a184864799c3d Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Thu, 12 Sep 2013 11:23:34 -0700 Subject: Markdown writer: Print references if output is 'plain'. --- src/Text/Pandoc/Writers/Markdown.hs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs index 23e730bf0..a36bb8e14 100644 --- a/src/Text/Pandoc/Writers/Markdown.hs +++ b/src/Text/Pandoc/Writers/Markdown.hs @@ -189,7 +189,8 @@ pandocToMarkdown opts (Pandoc meta blocks) = do -- Strip off final 'references' header if markdown citations enabled let blocks' = case reverse blocks of (Div (_,["references"],_) _):xs - | isEnabled Ext_citations opts -> reverse xs + | not isPlain && isEnabled Ext_citations opts + -> reverse xs _ -> blocks body <- blockListToMarkdown opts blocks' st <- get -- cgit v1.2.3 From 21f1bcb2805aad0c8c3a201cf59ce068bab6ec51 Mon Sep 17 00:00:00 2001 From: John MacFarlane <fiddlosopher@gmail.com> Date: Sat, 14 Sep 2013 22:27:25 -0700 Subject: Markdown reader: unresolved citations fall back to original text. Not ???. Reason: Less surprising, especially for people using @ as in twitter. --- changelog | 2 +- src/Text/Pandoc/Readers/Markdown.hs | 25 ++++++++++++++----------- tests/markdown-citations.native | 28 ++++++++++++++-------------- 3 files changed, 29 insertions(+), 26 deletions(-) (limited to 'src/Text') diff --git a/changelog b/changelog index 5943fd33c..05bca58b9 100644 --- a/changelog +++ b/changelog @@ -97,7 +97,7 @@ may still be specified on the command line as before. * A `Cite` element is now created in parsing markdown whether or not - there is a matching reference. By default citations will print as `???`. + there is a matching reference. * The `pandoc-citeproc` script will put the bibliography at the end of the document, as before. However, it will be put inside a `Div` diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 9f2bc4447..5456f25b5 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -1774,12 +1774,11 @@ rawHtmlInline = do cite :: MarkdownParser (F Inlines) cite = do guardEnabled Ext_citations - citations <- textualCite <|> (fmap (flip B.cite unknownC) <$> normalCite) + citations <- textualCite + <|> do (cs, raw) <- withRaw normalCite + return $ (flip B.cite (B.text raw)) <$> cs return citations -unknownC :: Inlines -unknownC = B.str "???" - textualCite :: MarkdownParser (F Inlines) textualCite = try $ do (_, key) <- citeKey @@ -1790,14 +1789,18 @@ textualCite = try $ do , citationNoteNum = 0 , citationHash = 0 } - mbrest <- option Nothing $ try $ spnl >> Just <$> normalCite + mbrest <- option Nothing $ try $ spnl >> Just <$> withRaw normalCite case mbrest of - Just rest -> return $ (flip B.cite unknownC . (first:)) <$> rest - Nothing -> (fmap (flip B.cite unknownC) <$> bareloc first) <|> - return (do st <- askF - return $ case M.lookup key (stateExamples st) of - Just n -> B.str (show n) - _ -> B.cite [first] unknownC) + Just (rest, raw) -> + return $ (flip B.cite (B.text $ '@':key ++ " " ++ raw) . (first:)) + <$> rest + Nothing -> + (do (cs, raw) <- withRaw $ bareloc first + return $ (flip B.cite (B.text $ '@':key ++ " " ++ raw)) <$> cs) + <|> return (do st <- askF + return $ case M.lookup key (stateExamples st) of + Just n -> B.str (show n) + _ -> B.cite [first] $ B.str $ '@':key) bareloc :: Citation -> MarkdownParser (F [Citation]) bareloc c = try $ do diff --git a/tests/markdown-citations.native b/tests/markdown-citations.native index 1cd4bd035..d9738fb4f 100644 --- a/tests/markdown-citations.native +++ b/tests/markdown-citations.native @@ -1,17 +1,17 @@ [Header 1 ("pandoc-with-citeproc-hs",[],[]) [Str "Pandoc",Space,Str "with",Space,Str "citeproc-hs"] ,BulletList - [[Para [Cite [Citation {citationId = "nonexistent", citationPrefix = [], citationSuffix = [], citationMode = NormalCitation, citationNoteNum = 0, citationHash = 0}] [Str "???"]]] - ,[Para [Cite [Citation {citationId = "nonexistent", citationPrefix = [], citationSuffix = [], citationMode = AuthorInText, citationNoteNum = 0, citationHash = 0}] [Str "???"]]] - ,[Para [Cite [Citation {citationId = "item1", citationPrefix = [], citationSuffix = [], citationMode = AuthorInText, citationNoteNum = 0, citationHash = 0}] [Str "???"],Space,Str "says",Space,Str "blah."]] - ,[Para [Cite [Citation {citationId = "item1", citationPrefix = [], citationSuffix = [Str "p.",Space,Str "30"], citationMode = AuthorInText, citationNoteNum = 0, citationHash = 0}] [Str "???"],Space,Str "says",Space,Str "blah."]] - ,[Para [Cite [Citation {citationId = "item1", citationPrefix = [], citationSuffix = [Str "p.",Space,Str "30,",Space,Str "with",Space,Str "suffix"], citationMode = AuthorInText, citationNoteNum = 0, citationHash = 0}] [Str "???"],Space,Str "says",Space,Str "blah."]] - ,[Para [Cite [Citation {citationId = "item1", citationPrefix = [], citationSuffix = [], citationMode = AuthorInText, citationNoteNum = 0, citationHash = 0},Citation {citationId = "item2", citationPrefix = [], citationSuffix = [Space,Str "p.",Space,Str "30"], citationMode = SuppressAuthor, citationNoteNum = 0, citationHash = 0},Citation {citationId = "\1087\1091\1085\1082\1090\&3", citationPrefix = [Str "see",Space,Str "also"], citationSuffix = [], citationMode = NormalCitation, citationNoteNum = 0, citationHash = 0}] [Str "???"],Space,Str "says",Space,Str "blah."]] - ,[Para [Str "In",Space,Str "a",Space,Str "note.",Note [Para [Cite [Citation {citationId = "\1087\1091\1085\1082\1090\&3", citationPrefix = [], citationSuffix = [Str "p.",Space,Str "12"], citationMode = AuthorInText, citationNoteNum = 0, citationHash = 0}] [Str "???"],Space,Str "and",Space,Str "a",Space,Str "citation",Space,Str "without",Space,Str "locators",Space,Cite [Citation {citationId = "\1087\1091\1085\1082\1090\&3", citationPrefix = [], citationSuffix = [], citationMode = NormalCitation, citationNoteNum = 0, citationHash = 0}] [Str "???"],Str "."]]]] - ,[Para [Str "A",Space,Str "citation",Space,Str "group",Space,Cite [Citation {citationId = "item1", citationPrefix = [Str "see"], citationSuffix = [Space,Str "chap.",Space,Str "3"], citationMode = NormalCitation, citationNoteNum = 0, citationHash = 0},Citation {citationId = "\1087\1091\1085\1082\1090\&3", citationPrefix = [Str "also"], citationSuffix = [Space,Str "p.",Space,Str "34-35"], citationMode = NormalCitation, citationNoteNum = 0, citationHash = 0}] [Str "???"],Str "."]] - ,[Para [Str "Another",Space,Str "one",Space,Cite [Citation {citationId = "item1", citationPrefix = [Str "see"], citationSuffix = [Space,Str "p.",Space,Str "34-35"], citationMode = NormalCitation, citationNoteNum = 0, citationHash = 0}] [Str "???"],Str "."]] - ,[Para [Str "And",Space,Str "another",Space,Str "one",Space,Str "in",Space,Str "a",Space,Str "note.",Note [Para [Str "Some",Space,Str "citations",Space,Cite [Citation {citationId = "item1", citationPrefix = [Str "see"], citationSuffix = [Space,Str "chap.",Space,Str "3"], citationMode = NormalCitation, citationNoteNum = 0, citationHash = 0},Citation {citationId = "\1087\1091\1085\1082\1090\&3", citationPrefix = [], citationSuffix = [], citationMode = NormalCitation, citationNoteNum = 0, citationHash = 0},Citation {citationId = "item2", citationPrefix = [], citationSuffix = [], citationMode = NormalCitation, citationNoteNum = 0, citationHash = 0}] [Str "???"],Str "."]]]] - ,[Para [Str "Citation",Space,Str "with",Space,Str "a",Space,Str "suffix",Space,Str "and",Space,Str "locator",Space,Cite [Citation {citationId = "item1", citationPrefix = [], citationSuffix = [Space,Str "pp.",Space,Str "33,",Space,Str "35-37,",Space,Str "and",Space,Str "nowhere",Space,Str "else"], citationMode = NormalCitation, citationNoteNum = 0, citationHash = 0}] [Str "???"],Str "."]] - ,[Para [Str "Citation",Space,Str "with",Space,Str "suffix",Space,Str "only",Space,Cite [Citation {citationId = "item1", citationPrefix = [], citationSuffix = [Space,Str "and",Space,Str "nowhere",Space,Str "else"], citationMode = NormalCitation, citationNoteNum = 0, citationHash = 0}] [Str "???"],Str "."]] - ,[Para [Str "Now",Space,Str "some",Space,Str "modifiers.",Note [Para [Str "Like",Space,Str "a",Space,Str "citation",Space,Str "without",Space,Str "author:",Space,Cite [Citation {citationId = "item1", citationPrefix = [], citationSuffix = [], citationMode = SuppressAuthor, citationNoteNum = 0, citationHash = 0}] [Str "???"],Str ",",Space,Str "and",Space,Str "now",Space,Str "Doe",Space,Str "with",Space,Str "a",Space,Str "locator",Space,Cite [Citation {citationId = "item2", citationPrefix = [], citationSuffix = [Space,Str "p.",Space,Str "44"], citationMode = SuppressAuthor, citationNoteNum = 0, citationHash = 0}] [Str "???"],Str "."]]]] - ,[Para [Str "With",Space,Str "some",Space,Str "markup",Space,Cite [Citation {citationId = "item1", citationPrefix = [Emph [Str "see"]], citationSuffix = [Space,Str "p.",Space,Strong [Str "32"]], citationMode = NormalCitation, citationNoteNum = 0, citationHash = 0}] [Str "???"],Str "."]]] + [[Para [Cite [Citation {citationId = "nonexistent", citationPrefix = [], citationSuffix = [], citationMode = NormalCitation, citationNoteNum = 0, citationHash = 0}] [Str "[@nonexistent]"]]] + ,[Para [Cite [Citation {citationId = "nonexistent", citationPrefix = [], citationSuffix = [], citationMode = AuthorInText, citationNoteNum = 0, citationHash = 0}] [Str "@nonexistent"]]] + ,[Para [Cite [Citation {citationId = "item1", citationPrefix = [], citationSuffix = [], citationMode = AuthorInText, citationNoteNum = 0, citationHash = 0}] [Str "@item1"],Space,Str "says",Space,Str "blah."]] + ,[Para [Cite [Citation {citationId = "item1", citationPrefix = [], citationSuffix = [Str "p.",Space,Str "30"], citationMode = AuthorInText, citationNoteNum = 0, citationHash = 0}] [Str "@item1",Space,Str "[p.",Space,Str "30]"],Space,Str "says",Space,Str "blah."]] + ,[Para [Cite [Citation {citationId = "item1", citationPrefix = [], citationSuffix = [Str "p.",Space,Str "30,",Space,Str "with",Space,Str "suffix"], citationMode = AuthorInText, citationNoteNum = 0, citationHash = 0}] [Str "@item1",Space,Str "[p.",Space,Str "30,",Space,Str "with",Space,Str "suffix]"],Space,Str "says",Space,Str "blah."]] + ,[Para [Cite [Citation {citationId = "item1", citationPrefix = [], citationSuffix = [], citationMode = AuthorInText, citationNoteNum = 0, citationHash = 0},Citation {citationId = "item2", citationPrefix = [], citationSuffix = [Space,Str "p.",Space,Str "30"], citationMode = SuppressAuthor, citationNoteNum = 0, citationHash = 0},Citation {citationId = "\1087\1091\1085\1082\1090\&3", citationPrefix = [Str "see",Space,Str "also"], citationSuffix = [], citationMode = NormalCitation, citationNoteNum = 0, citationHash = 0}] [Str "@item1",Space,Str "[-@item2",Space,Str "p.",Space,Str "30;",Space,Str "see",Space,Str "also",Space,Str "@\1087\1091\1085\1082\1090\&3]"],Space,Str "says",Space,Str "blah."]] + ,[Para [Str "In",Space,Str "a",Space,Str "note.",Note [Para [Cite [Citation {citationId = "\1087\1091\1085\1082\1090\&3", citationPrefix = [], citationSuffix = [Str "p.",Space,Str "12"], citationMode = AuthorInText, citationNoteNum = 0, citationHash = 0}] [Str "@\1087\1091\1085\1082\1090\&3",Space,Str "[p.",Space,Str "12]"],Space,Str "and",Space,Str "a",Space,Str "citation",Space,Str "without",Space,Str "locators",Space,Cite [Citation {citationId = "\1087\1091\1085\1082\1090\&3", citationPrefix = [], citationSuffix = [], citationMode = NormalCitation, citationNoteNum = 0, citationHash = 0}] [Str "[@\1087\1091\1085\1082\1090\&3]"],Str "."]]]] + ,[Para [Str "A",Space,Str "citation",Space,Str "group",Space,Cite [Citation {citationId = "item1", citationPrefix = [Str "see"], citationSuffix = [Space,Str "chap.",Space,Str "3"], citationMode = NormalCitation, citationNoteNum = 0, citationHash = 0},Citation {citationId = "\1087\1091\1085\1082\1090\&3", citationPrefix = [Str "also"], citationSuffix = [Space,Str "p.",Space,Str "34-35"], citationMode = NormalCitation, citationNoteNum = 0, citationHash = 0}] [Str "[see",Space,Str "@item1",Space,Str "chap.",Space,Str "3;",Space,Str "also",Space,Str "@\1087\1091\1085\1082\1090\&3",Space,Str "p.",Space,Str "34-35]"],Str "."]] + ,[Para [Str "Another",Space,Str "one",Space,Cite [Citation {citationId = "item1", citationPrefix = [Str "see"], citationSuffix = [Space,Str "p.",Space,Str "34-35"], citationMode = NormalCitation, citationNoteNum = 0, citationHash = 0}] [Str "[see",Space,Str "@item1",Space,Str "p.",Space,Str "34-35]"],Str "."]] + ,[Para [Str "And",Space,Str "another",Space,Str "one",Space,Str "in",Space,Str "a",Space,Str "note.",Note [Para [Str "Some",Space,Str "citations",Space,Cite [Citation {citationId = "item1", citationPrefix = [Str "see"], citationSuffix = [Space,Str "chap.",Space,Str "3"], citationMode = NormalCitation, citationNoteNum = 0, citationHash = 0},Citation {citationId = "\1087\1091\1085\1082\1090\&3", citationPrefix = [], citationSuffix = [], citationMode = NormalCitation, citationNoteNum = 0, citationHash = 0},Citation {citationId = "item2", citationPrefix = [], citationSuffix = [], citationMode = NormalCitation, citationNoteNum = 0, citationHash = 0}] [Str "[see",Space,Str "@item1",Space,Str "chap.",Space,Str "3;",Space,Str "@\1087\1091\1085\1082\1090\&3;",Space,Str "@item2]"],Str "."]]]] + ,[Para [Str "Citation",Space,Str "with",Space,Str "a",Space,Str "suffix",Space,Str "and",Space,Str "locator",Space,Cite [Citation {citationId = "item1", citationPrefix = [], citationSuffix = [Space,Str "pp.",Space,Str "33,",Space,Str "35-37,",Space,Str "and",Space,Str "nowhere",Space,Str "else"], citationMode = NormalCitation, citationNoteNum = 0, citationHash = 0}] [Str "[@item1",Space,Str "pp.",Space,Str "33,",Space,Str "35-37,",Space,Str "and",Space,Str "nowhere",Space,Str "else]"],Str "."]] + ,[Para [Str "Citation",Space,Str "with",Space,Str "suffix",Space,Str "only",Space,Cite [Citation {citationId = "item1", citationPrefix = [], citationSuffix = [Space,Str "and",Space,Str "nowhere",Space,Str "else"], citationMode = NormalCitation, citationNoteNum = 0, citationHash = 0}] [Str "[@item1",Space,Str "and",Space,Str "nowhere",Space,Str "else]"],Str "."]] + ,[Para [Str "Now",Space,Str "some",Space,Str "modifiers.",Note [Para [Str "Like",Space,Str "a",Space,Str "citation",Space,Str "without",Space,Str "author:",Space,Cite [Citation {citationId = "item1", citationPrefix = [], citationSuffix = [], citationMode = SuppressAuthor, citationNoteNum = 0, citationHash = 0}] [Str "[-@item1]"],Str ",",Space,Str "and",Space,Str "now",Space,Str "Doe",Space,Str "with",Space,Str "a",Space,Str "locator",Space,Cite [Citation {citationId = "item2", citationPrefix = [], citationSuffix = [Space,Str "p.",Space,Str "44"], citationMode = SuppressAuthor, citationNoteNum = 0, citationHash = 0}] [Str "[-@item2",Space,Str "p.",Space,Str "44]"],Str "."]]]] + ,[Para [Str "With",Space,Str "some",Space,Str "markup",Space,Cite [Citation {citationId = "item1", citationPrefix = [Emph [Str "see"]], citationSuffix = [Space,Str "p.",Space,Strong [Str "32"]], citationMode = NormalCitation, citationNoteNum = 0, citationHash = 0}] [Str "[*see*",Space,Str "@item1",Space,Str "p.",Space,Str "**32**]"],Str "."]]] ,Header 1 ("references",[],[]) [Str "References"]] -- cgit v1.2.3 From 464b174d0f17c5a9f38d724523dcaf4e8a6b07fe Mon Sep 17 00:00:00 2001 From: John MacFarlane <fiddlosopher@gmail.com> Date: Wed, 18 Sep 2013 09:13:37 -0700 Subject: Fixed reference slides. The Div container around references messed up the procedure for carving a document into slides. So we now remove the surrounding Div in prepSlides. --- src/Text/Pandoc/Slides.hs | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Slides.hs b/src/Text/Pandoc/Slides.hs index 2bbdb120f..db4aa2509 100644 --- a/src/Text/Pandoc/Slides.hs +++ b/src/Text/Pandoc/Slides.hs @@ -46,13 +46,18 @@ getSlideLevel = go 6 -- | Prepare a block list to be passed to hierarchicalize. prepSlides :: Int -> [Block] -> [Block] -prepSlides slideLevel = ensureStartWithH . splitHrule +prepSlides slideLevel = ensureStartWithH . splitHrule . extractRefsHeader where splitHrule (HorizontalRule : Header n attr xs : ys) | n == slideLevel = Header slideLevel attr xs : splitHrule ys splitHrule (HorizontalRule : xs) = Header slideLevel nullAttr [Str "\0"] : splitHrule xs splitHrule (x : xs) = x : splitHrule xs splitHrule [] = [] + extractRefsHeader bs = + case reverse bs of + (Div (_,["references"],_) (Header n attrs xs : ys) : zs) + -> reverse zs ++ (Header n attrs xs : ys) + _ -> bs ensureStartWithH bs@(Header n _ _:_) | n <= slideLevel = bs ensureStartWithH bs = Header slideLevel nullAttr [Str "\0"] : bs -- cgit v1.2.3 From d27e5a6ff002d575004bdb7abaebdc9c50e02b50 Mon Sep 17 00:00:00 2001 From: John MacFarlane <fiddlosopher@gmail.com> Date: Thu, 19 Sep 2013 09:48:02 -0700 Subject: DOCX writer: Add missing settings.xml to the zip container. Closes #990. --- src/Text/Pandoc/Writers/Docx.hs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index c8673ae48..1214e7f8b 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -257,6 +257,7 @@ writeDocx opts doc@(Pandoc meta _) = do docPropsAppEntry <- entryFromArchive "docProps/app.xml" themeEntry <- entryFromArchive "word/theme/theme1.xml" fontTableEntry <- entryFromArchive "word/fontTable.xml" + settingsEntry <- entryFromArchive "word/settings.xml" webSettingsEntry <- entryFromArchive "word/webSettings.xml" -- Create archive @@ -264,7 +265,8 @@ writeDocx opts doc@(Pandoc meta _) = do contentTypesEntry : relsEntry : contentEntry : relEntry : footnoteRelEntry : numEntry : styleEntry : footnotesEntry : docPropsEntry : docPropsAppEntry : themeEntry : - fontTableEntry : webSettingsEntry : imageEntries + fontTableEntry : settingsEntry : webSettingsEntry : + imageEntries return $ fromArchive archive styleToOpenXml :: Style -> [Element] -- cgit v1.2.3 From e135955b1e37b4bee72ffc6d7f4dc60e99dcecae Mon Sep 17 00:00:00 2001 From: John MacFarlane <fiddlosopher@gmail.com> Date: Thu, 19 Sep 2013 10:08:49 -0700 Subject: LaTeX writer: Don't print biblio if --natbib or --biblatex option used. --- src/Text/Pandoc/Writers/LaTeX.hs | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs index 37ca60ce3..8b05cfb43 100644 --- a/src/Text/Pandoc/Writers/LaTeX.hs +++ b/src/Text/Pandoc/Writers/LaTeX.hs @@ -82,10 +82,17 @@ writeLaTeX options document = pandocToLaTeX :: WriterOptions -> Pandoc -> State WriterState String pandocToLaTeX options (Pandoc meta blocks) = do + -- Strip off final 'references' header if --natbib or --biblatex + let method = writerCiteMethod options + let blocks' = if method == Biblatex || method == Natbib + then case reverse blocks of + (Div (_,["references"],_) _):xs -> reverse xs + _ -> blocks + else blocks -- see if there are internal links let isInternalLink (Link _ ('#':xs,_)) = [xs] isInternalLink _ = [] - modify $ \s -> s{ stInternalLinks = query isInternalLink blocks } + modify $ \s -> s{ stInternalLinks = query isInternalLink blocks' } let template = writerTemplate options -- set stBook depending on documentclass let bookClasses = ["memoir","book","report","scrreprt","scrbook"] @@ -107,15 +114,15 @@ pandocToLaTeX options (Pandoc meta blocks) = do (fmap (render colwidth) . blockListToLaTeX) (fmap (render colwidth) . inlineListToLaTeX) meta - let (blocks', lastHeader) = if writerCiteMethod options == Citeproc then - (blocks, []) - else case last blocks of - Header 1 _ il -> (init blocks, il) - _ -> (blocks, []) - blocks'' <- if writerBeamer options - then toSlides blocks' - else return blocks' - body <- mapM (elementToLaTeX options) $ hierarchicalize blocks'' + let (blocks'', lastHeader) = if writerCiteMethod options == Citeproc then + (blocks', []) + else case last blocks' of + Header 1 _ il -> (init blocks', il) + _ -> (blocks', []) + blocks''' <- if writerBeamer options + then toSlides blocks'' + else return blocks'' + body <- mapM (elementToLaTeX options) $ hierarchicalize blocks''' (biblioTitle :: String) <- liftM (render colwidth) $ inlineListToLaTeX lastHeader let main = render colwidth $ vsep body st <- get -- cgit v1.2.3 From 255037a0912c5cc819985f4224057659f7af50fa Mon Sep 17 00:00:00 2001 From: John MacFarlane <fiddlosopher@gmail.com> Date: Thu, 19 Sep 2013 10:09:32 -0700 Subject: Markdown reader: small code improvement. --- src/Text/Pandoc/Writers/Markdown.hs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs index a36bb8e14..69ca05216 100644 --- a/src/Text/Pandoc/Writers/Markdown.hs +++ b/src/Text/Pandoc/Writers/Markdown.hs @@ -187,11 +187,11 @@ pandocToMarkdown opts (Pandoc meta blocks) = do then tableOfContents opts headerBlocks else empty -- Strip off final 'references' header if markdown citations enabled - let blocks' = case reverse blocks of - (Div (_,["references"],_) _):xs - | not isPlain && isEnabled Ext_citations opts - -> reverse xs - _ -> blocks + let blocks' = if not isPlain && isEnabled Ext_citations opts + then case reverse blocks of + (Div (_,["references"],_) _):xs -> reverse xs + _ -> blocks + else blocks body <- blockListToMarkdown opts blocks' st <- get notes' <- notesToMarkdown opts (reverse $ stNotes st) -- cgit v1.2.3 From e149d4e138fdf42df07ff8400a4748b6f7bde150 Mon Sep 17 00:00:00 2001 From: Václav Zeman <vhaisman@gmail.com> Date: Wed, 25 Sep 2013 01:18:39 +0200 Subject: src/Text/Pandoc/Writers/OpenDocument.hs: Fix formatting of strikeout code. --- src/Text/Pandoc/Writers/OpenDocument.hs | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Writers/OpenDocument.hs b/src/Text/Pandoc/Writers/OpenDocument.hs index 3ec5c2073..0f9044601 100644 --- a/src/Text/Pandoc/Writers/OpenDocument.hs +++ b/src/Text/Pandoc/Writers/OpenDocument.hs @@ -192,8 +192,15 @@ writeOpenDocument opts (Pandoc meta blocks) = listStyles = map listStyle (stListStyles s) automaticStyles = inTagsIndented "office:automatic-styles" $ vcat $ reverse $ styles ++ listStyles + fontFaceDecls = inTagsIndented "office:font-face-decls" $ vcat $ + [selfClosingTag "style:font-face" [ + ("style:name", "Courier New") + , ("style:font-family-generic", "modern") + , ("style:font-pitch", "fixed") + , ("svg:font-family", "'Courier New'")]] context = defField "body" body $ defField "automatic-styles" (render' automaticStyles) + $ defField "font-face-decls" (render' fontFaceDecls) $ metadata in if writerStandalone opts then renderTemplate' (writerTemplate opts) context @@ -373,18 +380,18 @@ inlineToOpenDocument o ils | Subscript l <- ils = withTextStyle Sub $ inlinesToOpenDocument o l | SmallCaps l <- ils = withTextStyle SmallC $ inlinesToOpenDocument o l | Quoted t l <- ils = inQuotes t <$> inlinesToOpenDocument o l - | Code _ s <- ils = preformatted s + | Code _ s <- ils = withTextStyle Pre $ inTextStyle $ preformatted s | Math _ s <- ils = inlinesToOpenDocument o (readTeXMath s) | Cite _ l <- ils = inlinesToOpenDocument o l | RawInline f s <- ils = if f == "opendocument" || f == "html" - then preformatted s + then withTextStyle Pre $ inTextStyle $ preformatted s else return empty | Link l (s,t) <- ils = mkLink s t <$> inlinesToOpenDocument o l | Image _ (s,t) <- ils = return $ mkImg s t | Note l <- ils = mkNote l | otherwise = return empty where - preformatted = return . inSpanTags "Teletype" . handleSpaces . escapeStringForXML + preformatted s = handleSpaces $ escapeStringForXML s mkLink s t = inTags False "text:a" [ ("xlink:type" , "simple") , ("xlink:href" , s ) , ("office:name", t ) @@ -524,7 +531,8 @@ paraTableStyles t s (a:xs) [ ("fo:text-align", x) , ("style:justify-single-word", "false")] -data TextStyle = Italic | Bold | Strike | Sub | Sup | SmallC deriving ( Eq,Ord ) +data TextStyle = Italic | Bold | Strike | Sub | Sup | SmallC | Pre + deriving ( Eq,Ord ) textStyleAttr :: TextStyle -> [(String,String)] textStyleAttr s @@ -538,5 +546,8 @@ textStyleAttr s | Sub <- s = [("style:text-position" ,"sub 58%" )] | Sup <- s = [("style:text-position" ,"super 58%" )] | SmallC <- s = [("fo:font-variant" ,"small-caps")] + | Pre <- s = [("style:font-name" ,"Courier New") + ,("style:font-name-asian" ,"Courier New") + ,("style:font-name-complex" ,"Courier New")] | otherwise = [] -- cgit v1.2.3 From d76a6e23720f4acb292d3384ee020dfb072a120c Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Tue, 24 Sep 2013 18:41:19 -0700 Subject: OpenDocument writer: don't use font-face-decls variable. --- data/templates | 2 +- src/Text/Pandoc/Writers/OpenDocument.hs | 7 ------- 2 files changed, 1 insertion(+), 8 deletions(-) (limited to 'src/Text') diff --git a/data/templates b/data/templates index 1ccb16bb3..0bb5f9ba2 160000 --- a/data/templates +++ b/data/templates @@ -1 +1 @@ -Subproject commit 1ccb16bb33e8022c9511284e6718386efa3a0bbf +Subproject commit 0bb5f9ba204ea242e361c264f019490ead1cf313 diff --git a/src/Text/Pandoc/Writers/OpenDocument.hs b/src/Text/Pandoc/Writers/OpenDocument.hs index 0f9044601..206be7133 100644 --- a/src/Text/Pandoc/Writers/OpenDocument.hs +++ b/src/Text/Pandoc/Writers/OpenDocument.hs @@ -192,15 +192,8 @@ writeOpenDocument opts (Pandoc meta blocks) = listStyles = map listStyle (stListStyles s) automaticStyles = inTagsIndented "office:automatic-styles" $ vcat $ reverse $ styles ++ listStyles - fontFaceDecls = inTagsIndented "office:font-face-decls" $ vcat $ - [selfClosingTag "style:font-face" [ - ("style:name", "Courier New") - , ("style:font-family-generic", "modern") - , ("style:font-pitch", "fixed") - , ("svg:font-family", "'Courier New'")]] context = defField "body" body $ defField "automatic-styles" (render' automaticStyles) - $ defField "font-face-decls" (render' fontFaceDecls) $ metadata in if writerStandalone opts then renderTemplate' (writerTemplate opts) context -- cgit v1.2.3 From 9e7072cf1bf9d2e786a9e49ae144b0e625f66c87 Mon Sep 17 00:00:00 2001 From: John MacFarlane <fiddlosopher@gmail.com> Date: Sat, 28 Sep 2013 11:53:19 -0700 Subject: LaTeX reader: Parse {groups} as Span. This is needed for accurate conversion of bibtex titles, since we need to know what was protected from titlecase conversions. --- src/Text/Pandoc/Readers/LaTeX.hs | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index ff5b73348..cf5119345 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -176,7 +176,7 @@ inline = (mempty <$ comment) <|> (space <$ sp) <|> inlineText <|> inlineCommand - <|> grouped inline + <|> inlineGroup <|> (char '-' *> option (str "-") ((char '-') *> option (str "–") (str "—" <$ char '-'))) <|> double_quote @@ -199,6 +199,15 @@ inline = (mempty <$ comment) inlines :: LP Inlines inlines = mconcat <$> many (notFollowedBy (char '}') *> inline) +inlineGroup :: LP Inlines +inlineGroup = do + ils <- grouped inline + if isNull ils + then return mempty + else return $ spanWith nullAttr ils + -- we need the span so we can detitlecase bibtex entries; + -- we need to know when something is {C}apitalized + block :: LP Blocks block = (mempty <$ comment) <|> (mempty <$ ((spaceChar <|> newline) *> spaces)) -- cgit v1.2.3 From dbd4aee7305ed82c9daf33a59fd0c29d3e3461d6 Mon Sep 17 00:00:00 2001 From: John MacFarlane <fiddlosopher@gmail.com> Date: Sun, 6 Oct 2013 17:21:33 -0700 Subject: Removed code that forces MathJax to typeset. Closes #1012. Reopens #966. A better solution for #966 will just affect slideous, not the other slide writers. --- src/Text/Pandoc/Writers/HTML.hs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs index 902c8bc53..f6775b13a 100644 --- a/src/Text/Pandoc/Writers/HTML.hs +++ b/src/Text/Pandoc/Writers/HTML.hs @@ -144,8 +144,7 @@ pandocToHtml opts (Pandoc meta blocks) = do MathJax url -> H.script ! A.src (toValue url) ! A.type_ "text/javascript" - $ preEscapedString - "MathJax.Hub.Queue([\"Typeset\",MathJax.Hub]);" + $ mempty JsMath (Just url) -> H.script ! A.src (toValue url) ! A.type_ "text/javascript" -- cgit v1.2.3 From 25e43d1c8944f793b7c22fad207a94a11d93365d Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Fri, 11 Oct 2013 10:43:07 -0700 Subject: LaTeX reader: Fixed character escaping in \url{}. Previously `\~` wasn't handled properly, among others. --- src/Text/Pandoc/Readers/LaTeX.hs | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index cf5119345..d22430eb9 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -527,9 +527,7 @@ inNote ils = unescapeURL :: String -> String unescapeURL ('\\':x:xs) | isEscapable x = x:unescapeURL xs - where isEscapable '%' = True - isEscapable '#' = True - isEscapable _ = False + where isEscapable c = c `elem` "#$%&~_^\\{}" unescapeURL (x:xs) = x:unescapeURL xs unescapeURL [] = "" -- cgit v1.2.3 From de10b1653e0624d91bc9b0b96d2f84c4673c6d98 Mon Sep 17 00:00:00 2001 From: John MacFarlane <fiddlosopher@gmail.com> Date: Fri, 11 Oct 2013 22:01:58 -0700 Subject: RST writer: Skip spaces after display math. Otherwise we get indentation problems, and part of the next paragraph may be rendered as part of the math. --- src/Text/Pandoc/Writers/RST.hs | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Writers/RST.hs b/src/Text/Pandoc/Writers/RST.hs index 70c6b4421..dd2c3186c 100644 --- a/src/Text/Pandoc/Writers/RST.hs +++ b/src/Text/Pandoc/Writers/RST.hs @@ -296,8 +296,14 @@ blockListToRST blocks = mapM blockToRST blocks >>= return . vcat -- | Convert list of Pandoc inline elements to RST. inlineListToRST :: [Inline] -> State WriterState Doc -inlineListToRST lst = mapM inlineToRST (insertBS lst) >>= return . hcat - where insertBS :: [Inline] -> [Inline] -- insert '\ ' where needed +inlineListToRST lst = + mapM inlineToRST (removeSpaceAfterDisplayMath $ insertBS lst) >>= return . hcat + where -- remove spaces after displaymath, as they screw up indentation: + removeSpaceAfterDisplayMath (Math DisplayMath x : zs) = + Math DisplayMath x : dropWhile (==Space) zs + removeSpaceAfterDisplayMath (x:xs) = x : removeSpaceAfterDisplayMath xs + removeSpaceAfterDisplayMath [] = [] + insertBS :: [Inline] -> [Inline] -- insert '\ ' where needed insertBS (x:y:z:zs) | isComplex y && surroundComplex x z = x : y : RawInline "rst" "\\ " : insertBS (z:zs) -- cgit v1.2.3 From 1a55c8f5de357f36885e0ac5c50cdc8b6cafc211 Mon Sep 17 00:00:00 2001 From: John MacFarlane <fiddlosopher@gmail.com> Date: Fri, 11 Oct 2013 22:43:47 -0700 Subject: LaTeX reader: Ensure that preamble doesn't contribute to text of doc. --- src/Text/Pandoc/Readers/LaTeX.hs | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index d22430eb9..762150dba 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -1037,14 +1037,14 @@ paragraph = do preamble :: LP Blocks preamble = mempty <$> manyTill preambleBlock beginDoc where beginDoc = lookAhead $ controlSeq "begin" *> string "{document}" - preambleBlock = (mempty <$ comment) - <|> (mempty <$ sp) - <|> (mempty <$ blanklines) - <|> (mempty <$ macro) - <|> blockCommand - <|> (mempty <$ anyControlSeq) - <|> (mempty <$ braced) - <|> (mempty <$ anyChar) + preambleBlock = (void comment) + <|> (void sp) + <|> (void blanklines) + <|> (void macro) + <|> (void blockCommand) + <|> (void anyControlSeq) + <|> (void braced) + <|> (void anyChar) ------- -- cgit v1.2.3 From 2ae7f5e2a0a741fa4822448ad378280f77ab0dd5 Mon Sep 17 00:00:00 2001 From: John MacFarlane <fiddlosopher@gmail.com> Date: Sun, 13 Oct 2013 11:31:33 -0700 Subject: HTML writer: Insert command to typeset mathjax for slideous output. Closes #966. --- src/Text/Pandoc/Writers/HTML.hs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs index f6775b13a..cee07cff5 100644 --- a/src/Text/Pandoc/Writers/HTML.hs +++ b/src/Text/Pandoc/Writers/HTML.hs @@ -144,7 +144,11 @@ pandocToHtml opts (Pandoc meta blocks) = do MathJax url -> H.script ! A.src (toValue url) ! A.type_ "text/javascript" - $ mempty + $ case writerSlideVariant opts of + SlideousSlides -> + preEscapedString + "MathJax.Hub.Queue([\"Typeset\",MathJax.Hub]);" + _ -> mempty JsMath (Just url) -> H.script ! A.src (toValue url) ! A.type_ "text/javascript" -- cgit v1.2.3 From 0df7cce37da162c656aa88ecb67788109749668c Mon Sep 17 00:00:00 2001 From: John MacFarlane <fiddlosopher@gmail.com> Date: Sun, 13 Oct 2013 15:36:19 -0700 Subject: Treat div with class "notes" as speaker notes in slide formats. Currently beamer goes to `\note{}`, revealjs to `<aside class="notes">`, and the notes are simply suppressed in other formats. Closes #925. --- README | 18 ++++++++++++++++++ src/Text/Pandoc/Writers/HTML.hs | 11 +++++++++-- src/Text/Pandoc/Writers/LaTeX.hs | 7 ++++++- 3 files changed, 33 insertions(+), 3 deletions(-) (limited to 'src/Text') diff --git a/README b/README index ba1f93661..133c99f64 100644 --- a/README +++ b/README @@ -2731,6 +2731,24 @@ bibliographies: # References {.allowframebreaks} +Speaker notes +------------- + +reveal.js has good support for speaker notes. You can add notes to your +markdown document thus: + + <div class="notes"> + This is my note. + + - It can contain markdown + - like this list + + </div> + +To show the notes window, press `s` while viewing the presentation. +Notes are not yet supported for other slide formats, but the notes +will not appear on the slides themselves. + Literate Haskell support ======================== diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs index cee07cff5..22f5b8074 100644 --- a/src/Text/Pandoc/Writers/HTML.hs +++ b/src/Text/Pandoc/Writers/HTML.hs @@ -421,9 +421,16 @@ blockToHtml opts (Para [Image txt (s,'f':'i':'g':':':tit)]) = do blockToHtml opts (Para lst) = do contents <- inlineListToHtml opts lst return $ H.p contents -blockToHtml opts (Div attr bs) = do +blockToHtml opts (Div attr@(_,classes,_) bs) = do contents <- blockListToHtml opts bs - return $ addAttrs opts attr $ H.div $ nl opts >> contents >> nl opts + let contents' = nl opts >> contents >> nl opts + return $ + if "notes" `elem` classes + then case writerSlideVariant opts of + RevealJsSlides -> addAttrs opts attr $ H5.aside $ contents' + NoSlides -> addAttrs opts attr $ H.div $ contents' + _ -> mempty + else addAttrs opts attr $ H.div $ contents' blockToHtml _ (RawBlock f str) | f == Format "html" = return $ preEscapedString str | otherwise = return mempty diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs index 8b05cfb43..d31e33a3a 100644 --- a/src/Text/Pandoc/Writers/LaTeX.hs +++ b/src/Text/Pandoc/Writers/LaTeX.hs @@ -285,7 +285,12 @@ isLineBreakOrSpace _ = False blockToLaTeX :: Block -- ^ Block to convert -> State WriterState Doc blockToLaTeX Null = return empty -blockToLaTeX (Div _ bs) = blockListToLaTeX bs +blockToLaTeX (Div (_,classes,_) bs) = do + beamer <- writerBeamer `fmap` gets stOptions + contents <- blockListToLaTeX bs + if beamer && "notes" `elem` classes -- speaker notes + then return $ "\\note" <> braces contents + else return contents blockToLaTeX (Plain lst) = inlineListToLaTeX $ dropWhile isLineBreakOrSpace lst -- title beginning with fig: indicates that the image is a figure -- cgit v1.2.3 From 386e933432f5badf3149da1e6edaeb2e693bbb9b Mon Sep 17 00:00:00 2001 From: John MacFarlane <fiddlosopher@gmail.com> Date: Wed, 16 Oct 2013 09:48:11 -0700 Subject: Use isURI instead of isAbsoluteURI. It allows fragments identifiers. --- src/Text/Pandoc/SelfContained.hs | 6 +++--- src/Text/Pandoc/Shared.hs | 4 ++-- src/Text/Pandoc/Writers/LaTeX.hs | 4 ++-- src/Text/Pandoc/Writers/Markdown.hs | 4 ++-- src/Text/Pandoc/Writers/RST.hs | 4 ++-- src/Text/Pandoc/Writers/RTF.hs | 4 ++-- src/Text/Pandoc/Writers/Texinfo.hs | 4 ++-- 7 files changed, 15 insertions(+), 15 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/SelfContained.hs b/src/Text/Pandoc/SelfContained.hs index 0547bc065..6112e764f 100644 --- a/src/Text/Pandoc/SelfContained.hs +++ b/src/Text/Pandoc/SelfContained.hs @@ -32,7 +32,7 @@ the HTML using data URIs. -} module Text.Pandoc.SelfContained ( makeSelfContained ) where import Text.HTML.TagSoup -import Network.URI (isAbsoluteURI, escapeURIString) +import Network.URI (isURI, escapeURIString) import Data.ByteString.Base64 import qualified Data.ByteString.Char8 as B import Data.ByteString (ByteString) @@ -86,7 +86,7 @@ cssURLs userdata d orig = "\"" -> B.takeWhile (/='"') $ B.drop 1 u "'" -> B.takeWhile (/='\'') $ B.drop 1 u _ -> u - let url' = if isAbsoluteURI url + let url' = if isURI url then url else d </> url (raw, mime) <- getRaw userdata "" url' @@ -97,7 +97,7 @@ cssURLs userdata d orig = getItem :: Maybe FilePath -> String -> IO (ByteString, Maybe String) getItem userdata f = - if isAbsoluteURI f + if isURI f then openURL f >>= either handleErr return else do -- strip off trailing query or fragment part, if relative URL. diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index 9a9a092fc..d6ccdae66 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -91,7 +91,7 @@ import Data.Char ( toLower, isLower, isUpper, isAlpha, isLetter, isDigit, isSpace ) import Data.List ( find, isPrefixOf, intercalate ) import qualified Data.Map as M -import Network.URI ( escapeURIString, isAbsoluteURI, unEscapeString ) +import Network.URI ( escapeURIString, isURI, unEscapeString ) import System.Directory import Text.Pandoc.MIME (getMimeType) import System.FilePath ( (</>), takeExtension, dropExtension ) @@ -624,7 +624,7 @@ readDataFileUTF8 userDir fname = fetchItem :: Maybe String -> String -> IO (Either E.SomeException (BS.ByteString, Maybe String)) fetchItem sourceURL s - | isAbsoluteURI s = openURL s + | isURI s = openURL s | otherwise = case sourceURL of Just u -> openURL (u ++ "/" ++ s) Nothing -> E.try readLocalFile diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs index d31e33a3a..dbfa57137 100644 --- a/src/Text/Pandoc/Writers/LaTeX.hs +++ b/src/Text/Pandoc/Writers/LaTeX.hs @@ -36,7 +36,7 @@ import Text.Pandoc.Writers.Shared import Text.Pandoc.Options import Text.Pandoc.Templates import Text.Printf ( printf ) -import Network.URI ( isAbsoluteURI, unEscapeString ) +import Network.URI ( isURI, unEscapeString ) import Data.List ( (\\), isSuffixOf, isInfixOf, isPrefixOf, intercalate, intersperse ) import Data.Char ( toLower, isPunctuation ) @@ -671,7 +671,7 @@ inlineToLaTeX (Link txt (src, _)) = contents <> char '}' inlineToLaTeX (Image _ (source, _)) = do modify $ \s -> s{ stGraphics = True } - let source' = if isAbsoluteURI source + let source' = if isURI source then source else unEscapeString source source'' <- stringToLaTeX URLString source' diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs index 69ca05216..33cb110b5 100644 --- a/src/Text/Pandoc/Writers/Markdown.hs +++ b/src/Text/Pandoc/Writers/Markdown.hs @@ -47,7 +47,7 @@ import qualified Data.Set as Set import Text.Pandoc.Writers.HTML (writeHtmlString) import Text.Pandoc.Readers.TeXMath (readTeXMath) import Text.HTML.TagSoup (renderTags, parseTags, isTagText, Tag(..)) -import Network.URI (isAbsoluteURI) +import Network.URI (isURI) import Data.Default import Data.Yaml (Value(Object,String,Array,Bool,Number)) import qualified Data.HashMap.Strict as H @@ -753,7 +753,7 @@ inlineToMarkdown opts (Link txt (src, tit)) = do then empty else text $ " \"" ++ tit ++ "\"" let srcSuffix = if isPrefixOf "mailto:" src then drop 7 src else src - let useAuto = isAbsoluteURI src && + let useAuto = isURI src && case txt of [Str s] | escapeURI s == srcSuffix -> True _ -> False diff --git a/src/Text/Pandoc/Writers/RST.hs b/src/Text/Pandoc/Writers/RST.hs index dd2c3186c..1a62f7250 100644 --- a/src/Text/Pandoc/Writers/RST.hs +++ b/src/Text/Pandoc/Writers/RST.hs @@ -38,7 +38,7 @@ import Text.Pandoc.Writers.Shared import Text.Pandoc.Templates (renderTemplate') import Text.Pandoc.Builder (deleteMeta) import Data.List ( isPrefixOf, intersperse, transpose ) -import Network.URI (isAbsoluteURI) +import Network.URI (isURI) import Text.Pandoc.Pretty import Control.Monad.State import Control.Applicative ( (<$>) ) @@ -393,7 +393,7 @@ inlineToRST (LineBreak) = return cr -- there's no line break in RST (see Para) inlineToRST Space = return space -- autolink inlineToRST (Link [Str str] (src, _)) - | isAbsoluteURI src && + | isURI src && if "mailto:" `isPrefixOf` src then src == escapeURI ("mailto:" ++ str) else src == escapeURI str = do diff --git a/src/Text/Pandoc/Writers/RTF.hs b/src/Text/Pandoc/Writers/RTF.hs index 0e8ce2ece..cb5fb3232 100644 --- a/src/Text/Pandoc/Writers/RTF.hs +++ b/src/Text/Pandoc/Writers/RTF.hs @@ -40,7 +40,7 @@ import Data.Char ( ord, chr, isDigit, toLower ) import System.FilePath ( takeExtension ) import qualified Data.ByteString as B import Text.Printf ( printf ) -import Network.URI ( isAbsoluteURI, unEscapeString ) +import Network.URI ( isURI, unEscapeString ) import qualified Control.Exception as E -- | Convert Image inlines into a raw RTF embedded image, read from a file. @@ -48,7 +48,7 @@ import qualified Control.Exception as E rtfEmbedImage :: Inline -> IO Inline rtfEmbedImage x@(Image _ (src,_)) = do let ext = map toLower (takeExtension src) - if ext `elem` [".jpg",".jpeg",".png"] && not (isAbsoluteURI src) + if ext `elem` [".jpg",".jpeg",".png"] && not (isURI src) then do let src' = unEscapeString src imgdata <- E.catch (B.readFile src') diff --git a/src/Text/Pandoc/Writers/Texinfo.hs b/src/Text/Pandoc/Writers/Texinfo.hs index b1fd3d6af..d62e50880 100644 --- a/src/Text/Pandoc/Writers/Texinfo.hs +++ b/src/Text/Pandoc/Writers/Texinfo.hs @@ -40,7 +40,7 @@ import Data.Ord ( comparing ) import Data.Char ( chr, ord ) import Control.Monad.State import Text.Pandoc.Pretty -import Network.URI ( isAbsoluteURI, unEscapeString ) +import Network.URI ( isURI, unEscapeString ) import System.FilePath data WriterState = @@ -448,7 +448,7 @@ inlineToTexinfo (Image alternate (source, _)) = do where ext = drop 1 $ takeExtension source' base = dropExtension source' - source' = if isAbsoluteURI source + source' = if isURI source then source else unEscapeString source -- cgit v1.2.3 From 6e1c24da8e9f4715e03fd1ff9ee64528936d8bf3 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Thu, 17 Oct 2013 13:23:38 -0700 Subject: LaTeX writer: Add link anchors for code blocks with identifiers. Closes #1025. --- src/Text/Pandoc/Writers/LaTeX.hs | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs index dbfa57137..8f52b11ca 100644 --- a/src/Text/Pandoc/Writers/LaTeX.hs +++ b/src/Text/Pandoc/Writers/LaTeX.hs @@ -329,17 +329,23 @@ blockToLaTeX (CodeBlock (identifier,classes,keyvalAttr) str) = do | writerListings opts -> listingsCodeBlock | writerHighlight opts && not (null classes) -> highlightedCodeBlock | otherwise -> rawCodeBlock - where lhsCodeBlock = do + where ref = text identifier + linkAnchor = if null identifier + then empty + else "\\hyperdef{}" <> braces ref <> + braces ("\\label" <> braces ref) + lhsCodeBlock = do modify $ \s -> s{ stLHS = True } - return $ flush ("\\begin{code}" $$ text str $$ "\\end{code}") $$ cr + return $ flush (linkAnchor $$ "\\begin{code}" $$ text str $$ + "\\end{code}") $$ cr rawCodeBlock = do st <- get env <- if stInNote st then modify (\s -> s{ stVerbInNote = True }) >> return "Verbatim" else return "verbatim" - return $ flush (text ("\\begin{" ++ env ++ "}") $$ text str $$ - text ("\\end{" ++ env ++ "}")) <> cr + return $ flush (linkAnchor $$ text ("\\begin{" ++ env ++ "}") $$ + text str $$ text ("\\end{" ++ env ++ "}")) <> cr listingsCodeBlock = do st <- get let params = if writerListings (stOptions st) @@ -367,7 +373,7 @@ blockToLaTeX (CodeBlock (identifier,classes,keyvalAttr) str) = do case highlight formatLaTeXBlock ("",classes,keyvalAttr) str of Nothing -> rawCodeBlock Just h -> modify (\st -> st{ stHighlighting = True }) >> - return (flush $ text h) + return (flush $ linkAnchor $$ text h) blockToLaTeX (RawBlock f x) | f == Format "latex" || f == Format "tex" = return $ text x -- cgit v1.2.3 From 80c1967e751c9b58f7c1f28211d47fbd57316d32 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Thu, 17 Oct 2013 13:36:43 -0700 Subject: PDF: Minor code cleanup. --- src/Text/Pandoc/PDF.hs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/PDF.hs b/src/Text/Pandoc/PDF.hs index a445e2991..e8683b98f 100644 --- a/src/Text/Pandoc/PDF.hs +++ b/src/Text/Pandoc/PDF.hs @@ -107,7 +107,7 @@ tex2pdf' tmpDir program source = do then 3 -- to get page numbers else 2 -- 1 run won't give you PDF bookmarks (exit, log', mbPdf) <- runTeXProgram program numruns tmpDir source - let msg = "Error producing PDF from TeX source." + let msg = "Error producing PDF from TeX source.\n" case (exit, mbPdf) of (ExitFailure _, _) -> do let logmsg = extractMsg log' @@ -116,7 +116,7 @@ tex2pdf' tmpDir program source = do x | "! Package inputenc Error" `BC.isPrefixOf` x -> "\nTry running pandoc with --latex-engine=xelatex." _ -> "" - return $ Left $ msg <> "\n" <> extractMsg log' <> extramsg + return $ Left $ msg <> logmsg <> extramsg (ExitSuccess, Nothing) -> return $ Left msg (ExitSuccess, Just pdf) -> return $ Right pdf -- cgit v1.2.3 From 1f29f4678e0a1b1272704e5ddafa0bee45e5e426 Mon Sep 17 00:00:00 2001 From: John MacFarlane <fiddlosopher@gmail.com> Date: Thu, 17 Oct 2013 22:06:39 -0700 Subject: LaTeX writer: Specially escape non-ascii characters in labels. Otherwise we can get compile errors and other bugs when compiled with pdflatex. Closes #1007. Thanks to begemotv2718 for the fix. --- src/Text/Pandoc/Writers/LaTeX.hs | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs index 8f52b11ca..20a6ac9a9 100644 --- a/src/Text/Pandoc/Writers/LaTeX.hs +++ b/src/Text/Pandoc/Writers/LaTeX.hs @@ -39,7 +39,7 @@ import Text.Printf ( printf ) import Network.URI ( isURI, unEscapeString ) import Data.List ( (\\), isSuffixOf, isInfixOf, isPrefixOf, intercalate, intersperse ) -import Data.Char ( toLower, isPunctuation ) +import Data.Char ( toLower, isPunctuation, isAscii, isLetter, isDigit, ord ) import Control.Applicative ((<|>)) import Control.Monad.State import Text.Pandoc.Pretty @@ -222,6 +222,13 @@ stringToLaTeX ctx (x:xs) = do '\x2013' | ligatures -> "--" ++ rest _ -> x : rest +toLabel :: String -> String +toLabel [] = "" +toLabel (x:xs) + | (isLetter x || isDigit x) && isAscii x = x:toLabel xs + | elem x "-+=:;." = x:toLabel xs + | otherwise = "ux" ++ printf "%x" (ord x) ++ toLabel xs + -- | Puts contents into LaTeX command. inCmd :: String -> Doc -> Doc inCmd cmd contents = char '\\' <> text cmd <> braces contents @@ -329,7 +336,7 @@ blockToLaTeX (CodeBlock (identifier,classes,keyvalAttr) str) = do | writerListings opts -> listingsCodeBlock | writerHighlight opts && not (null classes) -> highlightedCodeBlock | otherwise -> rawCodeBlock - where ref = text identifier + where ref = text $ toLabel identifier linkAnchor = if null identifier then empty else "\\hyperdef{}" <> braces ref <> @@ -361,7 +368,7 @@ blockToLaTeX (CodeBlock (identifier,classes,keyvalAttr) str) = do (key,attr) <- keyvalAttr ] ++ (if identifier == "" then [] - else [ "label=" ++ identifier ]) + else [ "label=" ++ toLabel identifier ]) else [] printParams @@ -537,13 +544,13 @@ sectionHeader unnumbered ref level lst = do let refLabel x = (if ref `elem` internalLinks then text "\\hyperdef" <> braces empty - <> braces (text ref) + <> braces (text $ toLabel ref) <> braces x else x) let headerWith x y r = refLabel $ text x <> y <> if null r then empty - else text "\\label" <> braces (text r) + else text "\\label" <> braces (text $ toLabel r) let sectionType = case level' of 0 | writerBeamer opts -> "part" | otherwise -> "chapter" @@ -664,7 +671,8 @@ inlineToLaTeX Space = return space inlineToLaTeX (Link txt ('#':ident, _)) = do contents <- inlineListToLaTeX txt ident' <- stringToLaTeX URLString ident - return $ text "\\hyperref" <> brackets (text ident') <> braces contents + return $ text "\\hyperref" <> brackets (text $ toLabel ident') <> + braces contents inlineToLaTeX (Link txt (src, _)) = case txt of [Str x] | x == src -> -- autolink -- cgit v1.2.3 From e5feed00f7edb0d79f717ec9815c57df696a992e Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Fri, 18 Oct 2013 17:50:43 -0700 Subject: MediaWiki reader: Trim contents of `<math>` tags. Otherwise we get problems when converting to markdown. Closes #1027. --- src/Text/Pandoc/Readers/MediaWiki.hs | 10 +++++----- tests/mediawiki-reader.native | 1 + tests/mediawiki-reader.wiki | 2 ++ 3 files changed, 8 insertions(+), 5 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/MediaWiki.hs b/src/Text/Pandoc/Readers/MediaWiki.hs index 8b436c89f..136701bd0 100644 --- a/src/Text/Pandoc/Readers/MediaWiki.hs +++ b/src/Text/Pandoc/Readers/MediaWiki.hs @@ -43,7 +43,7 @@ import Text.Pandoc.Readers.HTML ( htmlTag, isBlockTag, isCommentTag ) import Text.Pandoc.XML ( fromEntities ) import Text.Pandoc.Parsing hiding ( nested ) import Text.Pandoc.Walk ( walk ) -import Text.Pandoc.Shared ( stripTrailingNewlines, safeRead, stringify ) +import Text.Pandoc.Shared ( stripTrailingNewlines, safeRead, stringify, trim ) import Data.Monoid (mconcat, mempty) import Control.Applicative ((<$>), (<*), (*>), (<$)) import Control.Monad @@ -475,10 +475,10 @@ str :: MWParser Inlines str = B.str <$> many1 (noneOf $ specialChars ++ spaceChars) math :: MWParser Inlines -math = (B.displayMath <$> try (char ':' >> charsInTags "math")) - <|> (B.math <$> charsInTags "math") - <|> (B.displayMath <$> try (dmStart *> manyTill anyChar dmEnd)) - <|> (B.math <$> try (mStart *> manyTill (satisfy (/='\n')) mEnd)) +math = (B.displayMath . trim <$> try (char ':' >> charsInTags "math")) + <|> (B.math . trim <$> charsInTags "math") + <|> (B.displayMath . trim <$> try (dmStart *> manyTill anyChar dmEnd)) + <|> (B.math . trim <$> try (mStart *> manyTill (satisfy (/='\n')) mEnd)) where dmStart = string "\\[" dmEnd = try (string "\\]") mStart = string "\\(" diff --git a/tests/mediawiki-reader.native b/tests/mediawiki-reader.native index 5ddbd309f..0ab51a3aa 100644 --- a/tests/mediawiki-reader.native +++ b/tests/mediawiki-reader.native @@ -163,6 +163,7 @@ Pandoc (Meta {unMeta = fromList []}) ,[Plain [Str "The",Space,Str "Hague"]]] ,Header 2 ("",[],[]) [Str "math"] ,Para [Str "Here",Space,Str "is",Space,Str "some",Space,Math InlineMath "x=\\frac{y^\\pi}{z}",Str "."] +,Para [Str "With",Space,Str "spaces:",Space,Math InlineMath "x=\\frac{y^\\pi}{z}",Str "."] ,Header 2 ("",[],[]) [Str "preformatted",Space,Str "blocks"] ,Para [Code ("",[],[]) "Start\160each\160line\160with\160a\160space.",LineBreak,Code ("",[],[]) "Text\160is\160",Strong [Code ("",[],[]) "preformatted"],Code ("",[],[]) "\160and",LineBreak,Emph [Code ("",[],[]) "markups"],Code ("",[],[]) "\160",Strong [Emph [Code ("",[],[]) "can"]],Code ("",[],[]) "\160be\160done."] ,Para [Code ("",[],[]) "\160hell\160\160\160\160\160\160yeah"] diff --git a/tests/mediawiki-reader.wiki b/tests/mediawiki-reader.wiki index 1e885daf0..26f4ef164 100644 --- a/tests/mediawiki-reader.wiki +++ b/tests/mediawiki-reader.wiki @@ -248,6 +248,8 @@ ends the list. Here is some <math>x=\frac{y^\pi}{z}</math>. +With spaces: <math> x=\frac{y^\pi}{z} </math>. + == preformatted blocks == Start each line with a space. -- cgit v1.2.3 From 1ce875a010e1471e06338a20c82781036d476776 Mon Sep 17 00:00:00 2001 From: John MacFarlane <fiddlosopher@gmail.com> Date: Sun, 20 Oct 2013 09:56:50 -0700 Subject: Fixed '. . .' (pause) on HTML slide formats. Closes #1029. The old version caused a pause to be inserted before the first material on a slide. This has been fixed. --- src/Text/Pandoc/Writers/HTML.hs | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs index 22f5b8074..8a71c3a2e 100644 --- a/src/Text/Pandoc/Writers/HTML.hs +++ b/src/Text/Pandoc/Writers/HTML.hs @@ -286,9 +286,8 @@ elementToHtml slideLevel opts (Sec level num (id',classes,keyvals) title' elemen then filter isSec elements else if slide then case splitBy isPause elements of - [] -> [] - [x] -> x - xs -> concatMap inDiv xs + [] -> [] + (x:xs) -> x ++ concatMap inDiv xs else elements let inNl x = mconcat $ nl opts : intersperse (nl opts) x ++ [nl opts] let classes' = ["titleslide" | titleSlide] ++ ["slide" | slide] ++ -- cgit v1.2.3 From 75ea0c4d0dd9133db8754ff640b9d3b316170e64 Mon Sep 17 00:00:00 2001 From: John MacFarlane <fiddlosopher@gmail.com> Date: Mon, 21 Oct 2013 09:33:10 -0700 Subject: LaTeX reader: Improved citation parsing. This fixes a run-time error that occured with `\citet{}` (empty list of keys). It also ensures that empty keys don't get produced. --- src/Text/Pandoc/Readers/LaTeX.hs | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index 762150dba..681dcb077 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -43,7 +43,7 @@ import qualified Text.Pandoc.UTF8 as UTF8 import Data.Char ( chr, ord ) import Control.Monad import Text.Pandoc.Builder -import Data.Char (isLetter) +import Data.Char (isLetter, isAlphaNum) import Control.Applicative import Data.Monoid import Data.Maybe (fromMaybe) @@ -1065,6 +1065,7 @@ simpleCiteArgs = try $ do first <- optionMaybe $ toList <$> opt second <- optionMaybe $ toList <$> opt char '{' + optional sp keys <- manyTill citationLabel (char '}') let (pre, suf) = case (first , second ) of (Just s , Nothing) -> (mempty, s ) @@ -1080,18 +1081,24 @@ simpleCiteArgs = try $ do return $ addPrefix pre $ addSuffix suf $ map conv keys citationLabel :: LP String -citationLabel = trim <$> - (many1 (satisfy $ \c -> c /=',' && c /='}') <* optional (char ',') <* optional sp) +citationLabel = optional sp *> + (many1 (satisfy isBibtexKeyChar) + <* optional sp + <* optional (char ',') + <* optional sp) + where isBibtexKeyChar c = isAlphaNum c || c `elem` ".:;?!`'()/*@_+=-[]*" cites :: CitationMode -> Bool -> LP [Citation] cites mode multi = try $ do cits <- if multi then many1 simpleCiteArgs else count 1 simpleCiteArgs - let (c:cs) = concat cits + let cs = concat cits return $ case mode of - AuthorInText -> c {citationMode = mode} : cs - _ -> map (\a -> a {citationMode = mode}) (c:cs) + AuthorInText -> case cs of + (c:rest) -> c {citationMode = mode} : rest + [] -> [] + _ -> map (\a -> a {citationMode = mode}) cs citation :: String -> CitationMode -> Bool -> LP Inlines citation name mode multi = do -- cgit v1.2.3 From 0b16b08543e87503968649e63154a629005a406f Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Mon, 21 Oct 2013 12:31:07 -0700 Subject: Templates: Changed how array variables are resolved. Previously if `foo` is an array (which might be because multiple values were set on the command line), `$foo$` would resolve to the concatenation of the elements of foo. This is rarely useful behavior. It has been changed so that the first value is rendered. Of course, you can still iterate over the values using `$for(foo)$`. This has the result that you can override earlier settings using -V by putting new values later on the command line. That's useful for many purposes. --- src/Text/Pandoc/Templates.hs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Templates.hs b/src/Text/Pandoc/Templates.hs index 7f744c7e1..ad8838f72 100644 --- a/src/Text/Pandoc/Templates.hs +++ b/src/Text/Pandoc/Templates.hs @@ -117,6 +117,7 @@ import Text.Blaze (preEscapedText, Html) #endif import Data.ByteString.Lazy (ByteString, fromChunks) import Text.Pandoc.Shared (readDataFileUTF8) +import Data.Vector ((!?)) -- | Get default template for the specified writer. getDefaultTemplate :: (Maybe FilePath) -- ^ User data directory to search first @@ -185,7 +186,7 @@ var = Template . resolveVar resolveVar :: Variable -> Value -> Text resolveVar var' val = case multiLookup var' val of - Just (Array vec) -> mconcat $ map (resolveVar []) $ toList vec + Just (Array vec) -> maybe mempty (resolveVar []) $ vec !? 0 Just (String t) -> T.stripEnd t Just (Number n) -> T.pack $ show n Just (Bool True) -> "true" -- cgit v1.2.3 From e63aafd62004e3424da46de75c78ba4dc7562af4 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Mon, 21 Oct 2013 17:33:42 -0700 Subject: Fix definition lists with internal links in terms (closes #1032). This fix puts braces around a term that contains an internal link, to avoid problems with square brackets. --- src/Text/Pandoc/Writers/LaTeX.hs | 9 ++++++++- tests/Tests/Writers/LaTeX.hs | 5 +++++ 2 files changed, 13 insertions(+), 1 deletion(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs index 20a6ac9a9..72b0bde6d 100644 --- a/src/Text/Pandoc/Writers/LaTeX.hs +++ b/src/Text/Pandoc/Writers/LaTeX.hs @@ -513,8 +513,15 @@ listItemToLaTeX lst = blockListToLaTeX lst >>= return . (text "\\item" $$) . defListItemToLaTeX :: ([Inline], [[Block]]) -> State WriterState Doc defListItemToLaTeX (term, defs) = do term' <- inlineListToLaTeX term + -- put braces around term if it contains an internal link, + -- since otherwise we get bad bracket interactions: \item[\hyperref[..] + let isInternalLink (Link _ ('#':_,_)) = True + isInternalLink _ = False + let term'' = if any isInternalLink term + then braces term' + else term' def' <- liftM vsep $ mapM blockListToLaTeX defs - return $ "\\item" <> brackets term' $$ def' + return $ "\\item" <> brackets term'' $$ def' -- | Craft the section header, inserting the secton reference, if supplied. sectionHeader :: Bool -- True for unnumbered diff --git a/tests/Tests/Writers/LaTeX.hs b/tests/Tests/Writers/LaTeX.hs index 5f702a85d..8a9519e2e 100644 --- a/tests/Tests/Writers/LaTeX.hs +++ b/tests/Tests/Writers/LaTeX.hs @@ -39,6 +39,11 @@ tests = [ testGroup "code blocks" , test latexListing "no identifier" $ codeBlock "hi" =?> ("\\begin{lstlisting}\nhi\n\\end{lstlisting}" :: String) ] + , testGroup "definition lists" + [ "with internal link" =: definitionList [(link "#go" "" (str "testing"), + [plain (text "hi there")])] =?> + "\\begin{description}\n\\itemsep1pt\\parskip0pt\\parsep0pt\n\\item[{\\hyperref[go]{testing}}]\nhi there\n\\end{description}" + ] , testGroup "math" [ "escape |" =: para (math "\\sigma|_{\\{x\\}}") =?> "$\\sigma|_{\\{x\\}}$" -- cgit v1.2.3 From 416dad86dca4087f4eab21f0ac6296178a1479cf Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Tue, 22 Oct 2013 12:34:18 -0700 Subject: DocBook reader: Handle numerical attributes starting with decimal. Also use safeRead instead of read. --- src/Text/Pandoc/Readers/DocBook.hs | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs index 6a799e270..fc29988d5 100644 --- a/src/Text/Pandoc/Readers/DocBook.hs +++ b/src/Text/Pandoc/Readers/DocBook.hs @@ -1,5 +1,6 @@ module Text.Pandoc.Readers.DocBook ( readDocBook ) where -import Data.Char (toUpper, isDigit) +import Data.Char (toUpper) +import Text.Pandoc.Shared (safeRead) import Text.Pandoc.Options import Text.Pandoc.Definition import Text.Pandoc.Builder @@ -682,10 +683,9 @@ parseBlock (Elem e) = "lowerroman" -> LowerRoman "upperroman" -> UpperRoman _ -> Decimal - let start = case attrValue "override" <$> - filterElement (named "listitem") e of - Just x@(_:_) | all isDigit x -> read x - _ -> 1 + let start = maybe 1 id $ + (attrValue "override" <$> filterElement (named "listitem") e) + >>= safeRead orderedListWith (start,listStyle,DefaultDelim) <$> listitems "variablelist" -> definitionList <$> deflistitems @@ -801,7 +801,8 @@ parseBlock (Elem e) = Just "center" -> AlignCenter _ -> AlignDefault let toWidth c = case findAttr (unqual "colwidth") c of - Just w -> read $ filter (\x -> + Just w -> maybe 0 id + $ safeRead $ '0': filter (\x -> (x >= '0' && x <= '9') || x == '.') w Nothing -> 0 :: Double -- cgit v1.2.3 From ac7714ca398eaf25dd512754c1cb9aee297ff3d1 Mon Sep 17 00:00:00 2001 From: John MacFarlane <fiddlosopher@gmail.com> Date: Sat, 26 Oct 2013 18:22:59 -0700 Subject: Text.Pandoc.Writer.Shared: fixed bug in tagWithAttrs. A space was omitted before key-value attributes, leading to invalid HTML. --- src/Text/Pandoc/Writers/Shared.hs | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Writers/Shared.hs b/src/Text/Pandoc/Writers/Shared.hs index 89923822c..9cb08803c 100644 --- a/src/Text/Pandoc/Writers/Shared.hs +++ b/src/Text/Pandoc/Writers/Shared.hs @@ -134,7 +134,6 @@ tagWithAttrs tag (ident,classes,kvs) = hsep ,if null classes then empty else "class=" <> doubleQuotes (text (unwords classes)) - ] - <> hsep (map (\(k,v) -> text k <> "=" <> + ,hsep (map (\(k,v) -> text k <> "=" <> doubleQuotes (text (escapeStringForXML v))) kvs) - <> ">" + ] <> ">" -- cgit v1.2.3 From a6aaff102ef75bf02c9a3256a2a750abae78d0b6 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Tue, 29 Oct 2013 10:40:40 -0700 Subject: Slides: Preserve `<div class="references">` in references slide. --- src/Text/Pandoc/Slides.hs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Slides.hs b/src/Text/Pandoc/Slides.hs index db4aa2509..50c46d17f 100644 --- a/src/Text/Pandoc/Slides.hs +++ b/src/Text/Pandoc/Slides.hs @@ -55,8 +55,8 @@ prepSlides slideLevel = ensureStartWithH . splitHrule . extractRefsHeader splitHrule [] = [] extractRefsHeader bs = case reverse bs of - (Div (_,["references"],_) (Header n attrs xs : ys) : zs) - -> reverse zs ++ (Header n attrs xs : ys) + (Div ("",["references"],[]) (Header n attrs xs : ys) : zs) + -> reverse zs ++ (Header n attrs xs : [Div ("",["references"],[]) ys]) _ -> bs ensureStartWithH bs@(Header n _ _:_) | n <= slideLevel = bs -- cgit v1.2.3 From ab0ffe6549261313410207a0b5beba9284135962 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Tue, 29 Oct 2013 10:57:48 -0700 Subject: Markdown reader: Yaml block must start immediately after `---`. If there's a blank line after `---`, we interpreted it as a horizontal rule. --- src/Text/Pandoc/Readers/Markdown.hs | 1 + 1 file changed, 1 insertion(+) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 5456f25b5..8f804f863 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -227,6 +227,7 @@ yamlMetaBlock = try $ do pos <- getPosition string "---" blankline + notFollowedBy blankline -- if --- is followed by a blank it's an HRULE rawYamlLines <- manyTill anyLine stopLine -- by including --- and ..., we allow yaml blocks with just comments: let rawYaml = unlines ("---" : (rawYamlLines ++ ["..."])) -- cgit v1.2.3 From 0d95c15e8316eb28128bdd4c9c2f98e29f13f564 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Fri, 1 Nov 2013 14:27:22 -0700 Subject: TexMath: Export readTeXMath', which attends to display/inline. Deprecate readTeXMath, and use readTeXMath' in all the writers. Require texmath >= 0.6.5. --- pandoc.cabal | 2 +- src/Text/Pandoc/Readers/TeXMath.hs | 24 +++++++++++++++++++----- src/Text/Pandoc/Writers/Docbook.hs | 4 ++-- src/Text/Pandoc/Writers/Docx.hs | 2 +- src/Text/Pandoc/Writers/HTML.hs | 4 ++-- src/Text/Pandoc/Writers/Man.hs | 5 +++-- src/Text/Pandoc/Writers/Markdown.hs | 6 +++--- src/Text/Pandoc/Writers/OpenDocument.hs | 2 +- src/Text/Pandoc/Writers/RTF.hs | 2 +- tests/writer.docbook | 2 +- tests/writer.html | 2 +- tests/writer.man | 2 +- tests/writer.opendocument | 2 +- tests/writer.rtf | 2 +- 14 files changed, 38 insertions(+), 23 deletions(-) (limited to 'src/Text') diff --git a/pandoc.cabal b/pandoc.cabal index 6f51bc110..7e7081900 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -215,7 +215,7 @@ Library old-locale >= 1 && < 1.1, time >= 1.2 && < 1.5, HTTP >= 4000.0.5 && < 4000.3, - texmath >= 0.6.4 && < 0.7, + texmath >= 0.6.5 && < 0.7, xml >= 1.3.12 && < 1.4, random >= 1 && < 1.1, extensible-exceptions >= 0.1 && < 0.2, diff --git a/src/Text/Pandoc/Readers/TeXMath.hs b/src/Text/Pandoc/Readers/TeXMath.hs index 1f7088f72..6bd617f7e 100644 --- a/src/Text/Pandoc/Readers/TeXMath.hs +++ b/src/Text/Pandoc/Readers/TeXMath.hs @@ -27,16 +27,30 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Conversion of TeX math to a list of 'Pandoc' inline elements. -} -module Text.Pandoc.Readers.TeXMath ( readTeXMath ) where +module Text.Pandoc.Readers.TeXMath ( readTeXMath, readTeXMath' ) where import Text.Pandoc.Definition import Text.TeXMath -- | Converts a raw TeX math formula to a list of 'Pandoc' inlines. --- Defaults to raw formula between @$@ characters if entire formula +-- Defaults to raw formula between @$@ or @$$@ characters if entire formula -- can't be converted. +readTeXMath' :: MathType + -> String -- ^ String to parse (assumes @'\n'@ line endings) + -> [Inline] +readTeXMath' mt inp = case texMathToPandoc dt inp of + Left _ -> [Str (delim ++ inp ++ delim)] + Right res -> res + where (dt, delim) = case mt of + DisplayMath -> (DisplayBlock, "$$") + InlineMath -> (DisplayInline, "$") + +{-# DEPRECATED readTeXMath "Use readTeXMath' from Text.Pandoc.JSON instead" #-} +-- | Converts a raw TeX math formula to a list of 'Pandoc' inlines. +-- Defaults to raw formula between @$@ characters if entire formula +-- can't be converted. (This is provided for backwards compatibility; +-- it is better to use @readTeXMath'@, which properly distinguishes +-- between display and inline math.) readTeXMath :: String -- ^ String to parse (assumes @'\n'@ line endings) -> [Inline] -readTeXMath inp = case texMathToPandoc DisplayInline inp of - Left _ -> [Str ("$" ++ inp ++ "$")] - Right res -> res +readTeXMath = readTeXMath' InlineMath diff --git a/src/Text/Pandoc/Writers/Docbook.hs b/src/Text/Pandoc/Writers/Docbook.hs index 7c03c07dc..dad83d7bb 100644 --- a/src/Text/Pandoc/Writers/Docbook.hs +++ b/src/Text/Pandoc/Writers/Docbook.hs @@ -281,8 +281,8 @@ inlineToDocbook opts (Math t str) $ fixNS $ removeAttr r Left _ -> inlinesToDocbook opts - $ readTeXMath str - | otherwise = inlinesToDocbook opts $ readTeXMath str + $ readTeXMath' t str + | otherwise = inlinesToDocbook opts $ readTeXMath' t str where (dt, tagtype) = case t of InlineMath -> (DisplayInline,"inlineequation") DisplayMath -> (DisplayBlock,"informalequation") diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index 1214e7f8b..0fdea0a7a 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -669,7 +669,7 @@ inlineToOpenXML opts (Math mathType str) = do else DisplayInline case texMathToOMML displayType str of Right r -> return [r] - Left _ -> inlinesToOpenXML opts (readTeXMath str) + Left _ -> inlinesToOpenXML opts (readTeXMath' mathType str) inlineToOpenXML opts (Cite _ lst) = inlinesToOpenXML opts lst inlineToOpenXML opts (Code attrs str) = withTextProp (rStyle "VerbatimChar") diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs index 8a71c3a2e..c1cca291b 100644 --- a/src/Text/Pandoc/Writers/HTML.hs +++ b/src/Text/Pandoc/Writers/HTML.hs @@ -685,14 +685,14 @@ inlineToHtml opts inline = Right r -> return $ preEscapedString $ ppcElement conf r Left _ -> inlineListToHtml opts - (readTeXMath str) >>= return . + (readTeXMath' t str) >>= return . (H.span ! A.class_ "math") MathJax _ -> return $ H.span ! A.class_ "math" $ toHtml $ case t of InlineMath -> "\\(" ++ str ++ "\\)" DisplayMath -> "\\[" ++ str ++ "\\]" PlainMath -> do - x <- inlineListToHtml opts (readTeXMath str) + x <- inlineListToHtml opts (readTeXMath' t str) let m = H.span ! A.class_ "math" $ x let brtag = if writerHtml5 opts then H5.br else H.br return $ case t of diff --git a/src/Text/Pandoc/Writers/Man.hs b/src/Text/Pandoc/Writers/Man.hs index 642a002d6..b31cc2b70 100644 --- a/src/Text/Pandoc/Writers/Man.hs +++ b/src/Text/Pandoc/Writers/Man.hs @@ -330,9 +330,10 @@ inlineToMan opts (Cite _ lst) = inlineToMan _ (Code _ str) = return $ text $ "\\f[C]" ++ escapeCode str ++ "\\f[]" inlineToMan _ (Str str) = return $ text $ escapeString str -inlineToMan opts (Math InlineMath str) = inlineListToMan opts $ readTeXMath str +inlineToMan opts (Math InlineMath str) = + inlineListToMan opts $ readTeXMath' InlineMath str inlineToMan opts (Math DisplayMath str) = do - contents <- inlineListToMan opts $ readTeXMath str + contents <- inlineListToMan opts $ readTeXMath' DisplayMath str return $ cr <> text ".RS" $$ contents $$ text ".RE" inlineToMan _ (RawInline f str) | f == Format "man" = return $ text str diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs index 33cb110b5..56be709d8 100644 --- a/src/Text/Pandoc/Writers/Markdown.hs +++ b/src/Text/Pandoc/Writers/Markdown.hs @@ -45,7 +45,7 @@ import Text.Pandoc.Pretty import Control.Monad.State import qualified Data.Set as Set import Text.Pandoc.Writers.HTML (writeHtmlString) -import Text.Pandoc.Readers.TeXMath (readTeXMath) +import Text.Pandoc.Readers.TeXMath (readTeXMath') import Text.HTML.TagSoup (renderTags, parseTags, isTagText, Tag(..)) import Network.URI (isURI) import Data.Default @@ -697,7 +697,7 @@ inlineToMarkdown opts (Math InlineMath str) return $ "\\(" <> text str <> "\\)" | isEnabled Ext_tex_math_double_backslash opts = return $ "\\\\(" <> text str <> "\\\\)" - | otherwise = inlineListToMarkdown opts $ readTeXMath str + | otherwise = inlineListToMarkdown opts $ readTeXMath' InlineMath str inlineToMarkdown opts (Math DisplayMath str) | isEnabled Ext_tex_math_dollars opts = return $ "$$" <> text str <> "$$" @@ -706,7 +706,7 @@ inlineToMarkdown opts (Math DisplayMath str) | isEnabled Ext_tex_math_double_backslash opts = return $ "\\\\[" <> text str <> "\\\\]" | otherwise = (\x -> cr <> x <> cr) `fmap` - inlineListToMarkdown opts (readTeXMath str) + inlineListToMarkdown opts (readTeXMath' DisplayMath str) inlineToMarkdown opts (RawInline f str) | f == "html" || f == "markdown" || (isEnabled Ext_raw_tex opts && (f == "latex" || f == "tex")) = diff --git a/src/Text/Pandoc/Writers/OpenDocument.hs b/src/Text/Pandoc/Writers/OpenDocument.hs index 206be7133..b38d250aa 100644 --- a/src/Text/Pandoc/Writers/OpenDocument.hs +++ b/src/Text/Pandoc/Writers/OpenDocument.hs @@ -374,7 +374,7 @@ inlineToOpenDocument o ils | SmallCaps l <- ils = withTextStyle SmallC $ inlinesToOpenDocument o l | Quoted t l <- ils = inQuotes t <$> inlinesToOpenDocument o l | Code _ s <- ils = withTextStyle Pre $ inTextStyle $ preformatted s - | Math _ s <- ils = inlinesToOpenDocument o (readTeXMath s) + | Math t s <- ils = inlinesToOpenDocument o (readTeXMath' t s) | Cite _ l <- ils = inlinesToOpenDocument o l | RawInline f s <- ils = if f == "opendocument" || f == "html" then withTextStyle Pre $ inTextStyle $ preformatted s diff --git a/src/Text/Pandoc/Writers/RTF.hs b/src/Text/Pandoc/Writers/RTF.hs index cb5fb3232..fb935fa6a 100644 --- a/src/Text/Pandoc/Writers/RTF.hs +++ b/src/Text/Pandoc/Writers/RTF.hs @@ -324,7 +324,7 @@ inlineToRTF (Quoted DoubleQuote lst) = "\\u8220\"" ++ (inlineListToRTF lst) ++ "\\u8221\"" inlineToRTF (Code _ str) = "{\\f1 " ++ (codeStringToRTF str) ++ "}" inlineToRTF (Str str) = stringToRTF str -inlineToRTF (Math _ str) = inlineListToRTF $ readTeXMath str +inlineToRTF (Math t str) = inlineListToRTF $ readTeXMath' t str inlineToRTF (Cite _ lst) = inlineListToRTF lst inlineToRTF (RawInline f str) | f == Format "rtf" = str diff --git a/tests/writer.docbook b/tests/writer.docbook index e427d8ffc..1d4da4842 100644 --- a/tests/writer.docbook +++ b/tests/writer.docbook @@ -1084,7 +1084,7 @@ These should not be escaped: \$ \\ \> \[ \{ <listitem> <para> Here’s some display math: - $\frac{d}{dx}f(x)=\lim_{h\to 0}\frac{f(x+h)-f(x)}{h}$ + $$\frac{d}{dx}f(x)=\lim_{h\to 0}\frac{f(x+h)-f(x)}{h}$$ </para> </listitem> <listitem> diff --git a/tests/writer.html b/tests/writer.html index e8e619f44..e0d1a3b25 100644 --- a/tests/writer.html +++ b/tests/writer.html @@ -439,7 +439,7 @@ Blah <li><span class="math"><em>α</em> ∧ <em>ω</em></span></li> <li><span class="math">223</span></li> <li><span class="math"><em>p</em></span>-Tree</li> -<li>Here’s some display math: <br /><span class="math">$\frac{d}{dx}f(x)=\lim_{h\to 0}\frac{f(x+h)-f(x)}{h}$</span><br /></li> +<li>Here’s some display math: <br /><span class="math">$$\frac{d}{dx}f(x)=\lim_{h\to 0}\frac{f(x+h)-f(x)}{h}$$</span><br /></li> <li>Here’s one that has a line break in it: <span class="math"><em>α</em> + <em>ω</em> × <em>x</em><sup>2</sup></span>.</li> </ul> <p>These shouldn’t be math:</p> diff --git a/tests/writer.man b/tests/writer.man index 54baaf791..aab588f9c 100644 --- a/tests/writer.man +++ b/tests/writer.man @@ -572,7 +572,7 @@ Ellipses\&...and\&...and\&.... .IP \[bu] 2 Here's some display math: .RS -$\\frac{d}{dx}f(x)=\\lim_{h\\to 0}\\frac{f(x+h)\-f(x)}{h}$ +$$\\frac{d}{dx}f(x)=\\lim_{h\\to 0}\\frac{f(x+h)\-f(x)}{h}$$ .RE .IP \[bu] 2 Here's one that has a line break in it: diff --git a/tests/writer.opendocument b/tests/writer.opendocument index d5eec1b60..b3888e34d 100644 --- a/tests/writer.opendocument +++ b/tests/writer.opendocument @@ -1418,7 +1418,7 @@ five.</text:p> </text:list-item> <text:list-item> <text:p text:style-name="P51">Here’s some display math: - $\frac{d}{dx}f(x)=\lim_{h\to 0}\frac{f(x+h)-f(x)}{h}$</text:p> + $$\frac{d}{dx}f(x)=\lim_{h\to 0}\frac{f(x+h)-f(x)}{h}$$</text:p> </text:list-item> <text:list-item> <text:p text:style-name="P51">Here’s one that has a line break in it: diff --git a/tests/writer.rtf b/tests/writer.rtf index 42c13d8c7..954d95cc4 100644 --- a/tests/writer.rtf +++ b/tests/writer.rtf @@ -269,7 +269,7 @@ quoted link {\pard \ql \f0 \sa0 \li360 \fi-360 \bullet \tx360\tab {\i \u945?}\u8197?\u8743?\u8197?{\i \u969?}\par} {\pard \ql \f0 \sa0 \li360 \fi-360 \bullet \tx360\tab 223\par} {\pard \ql \f0 \sa0 \li360 \fi-360 \bullet \tx360\tab {\i p}-Tree\par} -{\pard \ql \f0 \sa0 \li360 \fi-360 \bullet \tx360\tab Here\u8217's some display math: $\\frac\{d\}\{dx\}f(x)=\\lim_\{h\\to 0\}\\frac\{f(x+h)-f(x)\}\{h\}$\par} +{\pard \ql \f0 \sa0 \li360 \fi-360 \bullet \tx360\tab Here\u8217's some display math: $$\\frac\{d\}\{dx\}f(x)=\\lim_\{h\\to 0\}\\frac\{f(x+h)-f(x)\}\{h\}$$\par} {\pard \ql \f0 \sa0 \li360 \fi-360 \bullet \tx360\tab Here\u8217's one that has a line break in it: {\i \u945?}\u8197?+\u8197?{\i \u969?}\u8197?\u215?\u8197?{\i x}{\super 2}.\sa180\par} {\pard \ql \f0 \sa180 \li0 \fi0 These shouldn\u8217't be math:\par} {\pard \ql \f0 \sa0 \li360 \fi-360 \bullet \tx360\tab To get the famous equation, write {\f1 $e = mc^2$}.\par} -- cgit v1.2.3 From 732f6abe15b75724c2eb7a8bf0763f054b2dc500 Mon Sep 17 00:00:00 2001 From: John MacFarlane <fiddlosopher@gmail.com> Date: Sun, 3 Nov 2013 11:17:39 -0800 Subject: HTML reader: Use pandoc Div and Span for raw "<div>", "<span>". Only if --parse-raw. --- src/Text/Pandoc/Readers/HTML.hs | 35 +++++++++++++++++++++++++---------- 1 file changed, 25 insertions(+), 10 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index 7ca554fa3..80279bf61 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -92,6 +92,7 @@ block = choice , pHead , pBody , pPlain + , pDiv , pRawHtmlBlock ] @@ -177,6 +178,13 @@ pRawTag = do then return [] else return $ renderTags' [tag] +pDiv :: TagParser [Block] +pDiv = try $ do + getOption readerParseRaw >>= guard + TagOpen _ attr <- lookAhead $ pSatisfy $ tagOpen (=="div") (const True) + contents <- pInTags "div" block + return [Div (mkAttr attr) contents] + pRawHtmlBlock :: TagParser [Block] pRawHtmlBlock = do raw <- pHtmlBlock "script" <|> pHtmlBlock "style" <|> pRawTag @@ -295,11 +303,7 @@ pCodeBlock = try $ do let result = case reverse result' of '\n':_ -> init result' _ -> result' - let attribsId = fromMaybe "" $ lookup "id" attr - let attribsClasses = words $ fromMaybe "" $ lookup "class" attr - let attribsKV = filter (\(k,_) -> k /= "class" && k /= "id") attr - let attribs = (attribsId, attribsClasses, attribsKV) - return [CodeBlock attribs result] + return [CodeBlock (mkAttr attr) result] inline :: TagParser [Inline] inline = choice @@ -314,6 +318,7 @@ inline = choice , pLink , pImage , pCode + , pSpan , pRawHtmlInline ] @@ -397,11 +402,14 @@ pCode :: TagParser [Inline] pCode = try $ do (TagOpen open attr) <- pSatisfy $ tagOpen (`elem` ["code","tt"]) (const True) result <- manyTill pAnyTag (pCloses open) - let ident = fromMaybe "" $ lookup "id" attr - let classes = words $ fromMaybe [] $ lookup "class" attr - let rest = filter (\(x,_) -> x /= "id" && x /= "class") attr - return [Code (ident,classes,rest) - $ intercalate " " $ lines $ innerText result] + return [Code (mkAttr attr) $ intercalate " " $ lines $ innerText result] + +pSpan :: TagParser [Inline] +pSpan = try $ do + getOption readerParseRaw >>= guard + TagOpen _ attr <- lookAhead $ pSatisfy $ tagOpen (=="span") (const True) + contents <- pInTags "span" inline + return [Span (mkAttr attr) contents] pRawHtmlInline :: TagParser [Inline] pRawHtmlInline = do @@ -648,3 +656,10 @@ htmlTag f = try $ do _ -> do rendered <- manyTill anyChar (char '>') return (next, rendered ++ ">") + +mkAttr :: [(String, String)] -> Attr +mkAttr attr = (attribsId, attribsClasses, attribsKV) + where attribsId = fromMaybe "" $ lookup "id" attr + attribsClasses = words $ fromMaybe "" $ lookup "class" attr + attribsKV = filter (\(k,_) -> k /= "class" && k /= "id") attr + -- cgit v1.2.3 From 4301fa4a2794cc354da0bc15e7a4ebb214bb9966 Mon Sep 17 00:00:00 2001 From: John MacFarlane <fiddlosopher@gmail.com> Date: Sun, 3 Nov 2013 21:16:47 -0800 Subject: Markdown reader: Correctly handle empty bullet list items. For example: - one - - two This should NOT be parsed as a setext header followed by a list. --- src/Text/Pandoc/Readers/Markdown.hs | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 8f804f863..ea49d8c1d 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -444,6 +444,9 @@ block = choice [ mempty <$ blanklines , codeBlockFenced , yamlMetaBlock , guardEnabled Ext_latex_macros *> (macro >>= return . return) + -- note: bulletList needs to be before header because of + -- the possibility of empty list items: - + , bulletList , header , lhsCodeBlock , rawTeXBlock @@ -454,7 +457,6 @@ block = choice [ mempty <$ blanklines , codeBlockIndented , blockQuote , hrule - , bulletList , orderedList , definitionList , noteBlock @@ -699,7 +701,7 @@ bulletListStart = try $ do skipNonindentSpaces notFollowedBy' (() <$ hrule) -- because hrules start out just like lists satisfy isBulletListMarker - spaceChar + spaceChar <|> lookAhead newline skipSpaces anyOrderedListStart :: MarkdownParser (Int, ListNumberStyle, ListNumberDelim) @@ -727,7 +729,6 @@ listStart = bulletListStart <|> (anyOrderedListStart >> return ()) -- parse a line of a list item (start = parser for beginning of list item) listLine :: MarkdownParser String listLine = try $ do - notFollowedBy blankline notFollowedBy' (do indentSpaces many (spaceChar) listStart) @@ -740,7 +741,7 @@ rawListItem :: MarkdownParser a rawListItem start = try $ do start first <- listLine - rest <- many (notFollowedBy listStart >> listLine) + rest <- many (notFollowedBy listStart >> notFollowedBy blankline >> listLine) blanks <- many blankline return $ unlines (first:rest) ++ blanks -- cgit v1.2.3 From 6b24b1afca306c9149c285b60ed36f04fae2ece2 Mon Sep 17 00:00:00 2001 From: John MacFarlane <fiddlosopher@gmail.com> Date: Wed, 6 Nov 2013 09:25:50 -0800 Subject: Don't print `<span>` tags in 'plain' output. --- src/Text/Pandoc/Writers/Markdown.hs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs index 56be709d8..eefcd547a 100644 --- a/src/Text/Pandoc/Writers/Markdown.hs +++ b/src/Text/Pandoc/Writers/Markdown.hs @@ -642,8 +642,11 @@ escapeSpaces x = x -- | Convert Pandoc inline element to markdown. inlineToMarkdown :: WriterOptions -> Inline -> State WriterState Doc inlineToMarkdown opts (Span attrs ils) = do + st <- get contents <- inlineListToMarkdown opts ils - return $ tagWithAttrs "span" attrs <> contents <> text "</span>" + return $ if stPlain st + then contents + else tagWithAttrs "span" attrs <> contents <> text "</span>" inlineToMarkdown opts (Emph lst) = do contents <- inlineListToMarkdown opts lst return $ "*" <> contents <> "*" -- cgit v1.2.3 From 5b99112f229db1fcbe82943678f9f55c1ead8f11 Mon Sep 17 00:00:00 2001 From: John MacFarlane <fiddlosopher@gmail.com> Date: Wed, 6 Nov 2013 19:18:24 -0800 Subject: Docx writer: Fix URL for core-properties in `_rels/.rels`. Partially addresses #1046. --- src/Text/Pandoc/Writers/Docx.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index 0fdea0a7a..f1276c831 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -247,7 +247,7 @@ writeDocx opts doc@(Pandoc meta _) = do ,("Type","http://schemas.openxmlformats.org/officeDocument/2006/relationships/extended-properties") ,("Target","docProps/app.xml")] , [("Id","rId3") - ,("Type","http://schemas.openxmlformats.org/officedocument/2006/relationships/metadata/core-properties") + ,("Type","http://schemas.openxmlformats.org/package/2006/relationships/metadata/core-properties") ,("Target","docProps/core.xml")] ] let relsEntry = toEntry relsPath epochtime $ renderXml rels -- cgit v1.2.3 From 2efd0951d3d560a159f92df4730e2cee26978698 Mon Sep 17 00:00:00 2001 From: John MacFarlane <fiddlosopher@gmail.com> Date: Thu, 7 Nov 2013 08:46:52 -0800 Subject: Docx writer: fixed core metadata. - Don't create empty date nodes if no date given. - Don't create multiple dc:creator nodes; instead separate by semicolons. Closes #1046. --- src/Text/Pandoc/Writers/Docx.hs | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index f1276c831..5d1647844 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -231,10 +231,11 @@ writeDocx opts doc@(Pandoc meta _) = do ,("xmlns:dcmitype","http://purl.org/dc/dcmitype/") ,("xmlns:xsi","http://www.w3.org/2001/XMLSchema-instance")] $ mknode "dc:title" [] (stringify $ docTitle meta) - : mknode "dcterms:created" [("xsi:type","dcterms:W3CDTF")] - (maybe "" id $ normalizeDate $ stringify $ docDate meta) - : mknode "dcterms:modified" [("xsi:type","dcterms:W3CDTF")] () -- put current time here - : map (mknode "dc:creator" [] . stringify) (docAuthors meta) + : mknode "dc:creator" [] (intercalate "; " (map stringify $ docAuthors meta)) + : maybe [] + (\x -> [ mknode "dcterms:created" [("xsi:type","dcterms:W3CDTF")] $ x + , mknode "dcterms:modified" [("xsi:type","dcterms:W3CDTF")] $ x + ]) (normalizeDate $ stringify $ docDate meta) let docPropsEntry = toEntry docPropsPath epochtime $ renderXml docProps let relsPath = "_rels/.rels" -- cgit v1.2.3 From 01fed75b8f755bf4de3d4ec2b720e4dd9e66951a Mon Sep 17 00:00:00 2001 From: MinRK <benjaminrk@gmail.com> Date: Thu, 7 Nov 2013 22:25:44 -0800 Subject: recognize svg tag in HTML Reader avoids adding lots of `<p>` tags in embedded SVG content, for instance in markdown to HTML. --- src/Text/Pandoc/Readers/HTML.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index 80279bf61..d691c9878 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -557,7 +557,7 @@ blockHtmlTags = ["address", "article", "aside", "blockquote", "body", "button", "noframes", "noscript", "object", "ol", "output", "p", "pre", "progress", "section", "table", "tbody", "textarea", "thead", "tfoot", "ul", "dd", "dt", "frameset", "li", "tbody", "td", "tfoot", - "th", "thead", "tr", "script", "style", "video"] + "th", "thead", "tr", "script", "style", "svg", "video"] -- We want to allow raw docbook in markdown documents, so we -- include docbook block tags here too. -- cgit v1.2.3 From b4441c940dc76d1a64636ef88287b38306ebccb4 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Tue, 12 Nov 2013 18:48:06 -0800 Subject: HTML/EPUB footnotes: Put `<sup>` tag inside `<a>` tags. This allows better control of formatting, since the `<a>` tags have a distinguishing class. Closes #1049. --- src/Text/Pandoc/Writers/HTML.hs | 8 ++++---- tests/writer.html | 6 +++--- 2 files changed, 7 insertions(+), 7 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs index c1cca291b..424843539 100644 --- a/src/Text/Pandoc/Writers/HTML.hs +++ b/src/Text/Pandoc/Writers/HTML.hs @@ -757,11 +757,11 @@ inlineToHtml opts inline = writerIdentifierPrefix opts ++ "fn" ++ ref) ! A.class_ "footnoteRef" ! prefixedId opts ("fnref" ++ ref) + $ H.sup $ toHtml ref - let link' = case writerEpubVersion opts of - Just EPUB3 -> link ! customAttribute "epub:type" "noteref" - _ -> link - return $ H.sup $ link' + return $ case writerEpubVersion opts of + Just EPUB3 -> link ! customAttribute "epub:type" "noteref" + _ -> link (Cite cits il)-> do contents <- inlineListToHtml opts il let citationIds = unwords $ map citationId cits let result = H.span ! A.class_ "citation" $ contents diff --git a/tests/writer.html b/tests/writer.html index e0d1a3b25..d00b8ca66 100644 --- a/tests/writer.html +++ b/tests/writer.html @@ -544,12 +544,12 @@ document.write('<a h'+'ref'+'="ma'+'ilto'+':'+e+'">'+e+'<\/'+'a'+'>'); <p>Here is a movie <img src="movie.jpg" alt="movie" /> icon.</p> <hr /> <h1 id="footnotes">Footnotes</h1> -<p>Here is a footnote reference,<sup><a href="#fn1" class="footnoteRef" id="fnref1">1</a></sup> and another.<sup><a href="#fn2" class="footnoteRef" id="fnref2">2</a></sup> This should <em>not</em> be a footnote reference, because it contains a space.[^my note] Here is an inline note.<sup><a href="#fn3" class="footnoteRef" id="fnref3">3</a></sup></p> +<p>Here is a footnote reference,<a href="#fn1" class="footnoteRef" id="fnref1"><sup>1</sup></a> and another.<a href="#fn2" class="footnoteRef" id="fnref2"><sup>2</sup></a> This should <em>not</em> be a footnote reference, because it contains a space.[^my note] Here is an inline note.<a href="#fn3" class="footnoteRef" id="fnref3"><sup>3</sup></a></p> <blockquote> -<p>Notes can go in quotes.<sup><a href="#fn4" class="footnoteRef" id="fnref4">4</a></sup></p> +<p>Notes can go in quotes.<a href="#fn4" class="footnoteRef" id="fnref4"><sup>4</sup></a></p> </blockquote> <ol style="list-style-type: decimal"> -<li>And in list items.<sup><a href="#fn5" class="footnoteRef" id="fnref5">5</a></sup></li> +<li>And in list items.<a href="#fn5" class="footnoteRef" id="fnref5"><sup>5</sup></a></li> </ol> <p>This paragraph should not be part of the note, as it is not indented.</p> <div class="footnotes"> -- cgit v1.2.3 From 892ba2dd163cbba598cbd86814a594008aae9f27 Mon Sep 17 00:00:00 2001 From: John MacFarlane <fiddlosopher@gmail.com> Date: Sat, 16 Nov 2013 14:09:09 -0800 Subject: LaTeX writer: Properly escape pdftitle, pdfauthor. Closes #1059. --- src/Text/Pandoc/Writers/LaTeX.hs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs index 72b0bde6d..7ff64cf74 100644 --- a/src/Text/Pandoc/Writers/LaTeX.hs +++ b/src/Text/Pandoc/Writers/LaTeX.hs @@ -126,14 +126,16 @@ pandocToLaTeX options (Pandoc meta blocks) = do (biblioTitle :: String) <- liftM (render colwidth) $ inlineListToLaTeX lastHeader let main = render colwidth $ vsep body st <- get + titleMeta <- stringToLaTeX TextString $ stringify $ docTitle meta + authorsMeta <- mapM (stringToLaTeX TextString . stringify) $ docAuthors meta let context = defField "toc" (writerTableOfContents options) $ defField "toc-depth" (show (writerTOCDepth options - if writerChapters options then 1 else 0)) $ defField "body" main $ - defField "title-meta" (stringify $ docTitle meta) $ - defField "author-meta" (intercalate "; " $ map stringify $ docAuthors meta) $ + defField "title-meta" titleMeta $ + defField "author-meta" (intercalate "; " authorsMeta) $ defField "documentclass" (if writerBeamer options then ("beamer" :: String) else if writerChapters options -- cgit v1.2.3 From 9b0378b939f46183fc152b1a49a69f3007de295a Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Sat, 16 Nov 2013 22:49:15 -0800 Subject: OpenDocument: Skip raw HTML. Previously it was erroneously included as verbatim text. Closes #1035. --- src/Text/Pandoc/Writers/OpenDocument.hs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Writers/OpenDocument.hs b/src/Text/Pandoc/Writers/OpenDocument.hs index b38d250aa..565f5f869 100644 --- a/src/Text/Pandoc/Writers/OpenDocument.hs +++ b/src/Text/Pandoc/Writers/OpenDocument.hs @@ -376,8 +376,8 @@ inlineToOpenDocument o ils | Code _ s <- ils = withTextStyle Pre $ inTextStyle $ preformatted s | Math t s <- ils = inlinesToOpenDocument o (readTeXMath' t s) | Cite _ l <- ils = inlinesToOpenDocument o l - | RawInline f s <- ils = if f == "opendocument" || f == "html" - then withTextStyle Pre $ inTextStyle $ preformatted s + | RawInline f s <- ils = if f == "opendocument" + then return $ preformatted s else return empty | Link l (s,t) <- ils = mkLink s t <$> inlinesToOpenDocument o l | Image _ (s,t) <- ils = return $ mkImg s t -- cgit v1.2.3 From d5660275a38a58334372326a79a9ce0153fede43 Mon Sep 17 00:00:00 2001 From: John MacFarlane <fiddlosopher@gmail.com> Date: Sun, 17 Nov 2013 08:45:21 -0800 Subject: Parsing: Generalized type of registerHeader, using new typeclasses. New type classes HasReadeOptions, HasIdentifierList, HasHeaderMap. These allow certain common functions to be reused even in parsers that use custom state (instead of ParserState), such as the MediaWiki reader. Minor API bump. --- src/Text/Pandoc/Parsing.hs | 54 +++++++++++++++++++++++++++++++++++----------- 1 file changed, 42 insertions(+), 12 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs index 701b2ef84..9687d7712 100644 --- a/src/Text/Pandoc/Parsing.hs +++ b/src/Text/Pandoc/Parsing.hs @@ -1,4 +1,5 @@ -{-# LANGUAGE GeneralizedNewtypeDeriving #-} +{-# LANGUAGE GeneralizedNewtypeDeriving, TypeSynonymInstances, + FlexibleInstances#-} {- Copyright (C) 2006-2010 John MacFarlane <jgm@berkeley.edu> @@ -65,6 +66,9 @@ module Text.Pandoc.Parsing ( (>>~), guardEnabled, guardDisabled, ParserState (..), + HasReaderOptions (..), + HasHeaderMap (..), + HasIdentifierList (..), defaultParserState, HeaderType (..), ParserContext (..), @@ -826,6 +830,34 @@ instance HasMeta ParserState where deleteMeta field st = st{ stateMeta = deleteMeta field $ stateMeta st } +class Monad m => HasReaderOptions m where + askReaderOption :: (ReaderOptions -> b) -> m b + +class Monad m => HasHeaderMap m where + getHeaderMap :: m (M.Map Inlines String) + putHeaderMap :: M.Map Inlines String -> m () + modifyHeaderMap :: (M.Map Inlines String -> M.Map Inlines String) -> m () + -- default + modifyHeaderMap f = getHeaderMap >>= putHeaderMap . f + +class Monad m => HasIdentifierList m where + getIdentifierList :: m [String] + putIdentifierList :: [String] -> m () + modifyIdentifierList :: ([String] -> [String]) -> m () + -- default + modifyIdentifierList f = getIdentifierList >>= putIdentifierList . f + +instance HasReaderOptions (Parser s ParserState) where + askReaderOption = getOption + +instance HasHeaderMap (Parser s ParserState) where + getHeaderMap = fmap stateHeaders getState + putHeaderMap hm = updateState $ \st -> st{ stateHeaders = hm } + +instance HasIdentifierList (Parser s ParserState) where + getIdentifierList = fmap stateIdentifiers getState + putIdentifierList l = updateState $ \st -> st{ stateIdentifiers = l } + defaultParserState :: ParserState defaultParserState = ParserState { stateOptions = def, @@ -895,10 +927,11 @@ type SubstTable = M.Map Key Inlines -- and the auto_identifers extension is set, generate a new -- unique identifier, and update the list of identifiers -- in state. -registerHeader :: Attr -> Inlines -> Parser s ParserState Attr +registerHeader :: (HasReaderOptions m, HasHeaderMap m, HasIdentifierList m) + => Attr -> Inlines -> m Attr registerHeader (ident,classes,kvs) header' = do - ids <- stateIdentifiers `fmap` getState - exts <- getOption readerExtensions + ids <- getIdentifierList + exts <- askReaderOption readerExtensions let insert' = M.insertWith (\_new old -> old) if null ident && Ext_auto_identifiers `Set.member` exts then do @@ -906,16 +939,13 @@ registerHeader (ident,classes,kvs) header' = do let id'' = if Ext_ascii_identifiers `Set.member` exts then catMaybes $ map toAsciiChar id' else id' - updateState $ \st -> st{ - stateIdentifiers = if id' == id'' - then id' : ids - else id' : id'' : ids, - stateHeaders = insert' header' id' $ stateHeaders st } + putIdentifierList $ if id' == id'' + then id' : ids + else id' : id'' : ids + modifyHeaderMap $ insert' header' id' return (id'',classes,kvs) else do - unless (null ident) $ - updateState $ \st -> st{ - stateHeaders = insert' header' ident $ stateHeaders st } + unless (null ident) $ modifyHeaderMap $ insert' header' ident return (ident,classes,kvs) -- | Fail unless we're in "smart typography" mode. -- cgit v1.2.3 From 0fd2176e29bd1118d314c6179455fb78bed35aea Mon Sep 17 00:00:00 2001 From: John MacFarlane <fiddlosopher@gmail.com> Date: Sun, 17 Nov 2013 08:47:14 -0800 Subject: MediaWiki reader: Add automatic header identifiers. --- src/Text/Pandoc/Readers/MediaWiki.hs | 22 ++++++++++++-- tests/mediawiki-reader.native | 58 ++++++++++++++++++------------------ 2 files changed, 49 insertions(+), 31 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/MediaWiki.hs b/src/Text/Pandoc/Readers/MediaWiki.hs index 136701bd0..1c074e3de 100644 --- a/src/Text/Pandoc/Readers/MediaWiki.hs +++ b/src/Text/Pandoc/Readers/MediaWiki.hs @@ -1,4 +1,5 @@ -{-# LANGUAGE RelaxedPolyRec #-} -- needed for inlinesBetween on GHC < 7 +{-# LANGUAGE RelaxedPolyRec, FlexibleInstances, TypeSynonymInstances #-} +-- RelaxedPolyRec needed for inlinesBetween on GHC < 7 {- Copyright (C) 2012 John MacFarlane <jgm@berkeley.edu> @@ -51,6 +52,7 @@ import Data.List (intersperse, intercalate, isPrefixOf ) import Text.HTML.TagSoup import Data.Sequence (viewl, ViewL(..), (<|)) import qualified Data.Foldable as F +import qualified Data.Map as M import Data.Char (isDigit, isSpace) -- | Read mediawiki from an input string and return a Pandoc document. @@ -62,6 +64,8 @@ readMediaWiki opts s = , mwMaxNestingLevel = 4 , mwNextLinkNumber = 1 , mwCategoryLinks = [] + , mwHeaderMap = M.empty + , mwIdentifierList = [] } "source" (s ++ "\n") of Left err' -> error $ "\nError:\n" ++ show err' @@ -71,10 +75,23 @@ data MWState = MWState { mwOptions :: ReaderOptions , mwMaxNestingLevel :: Int , mwNextLinkNumber :: Int , mwCategoryLinks :: [Inlines] + , mwHeaderMap :: M.Map Inlines String + , mwIdentifierList :: [String] } type MWParser = Parser [Char] MWState +instance HasReaderOptions MWParser where + askReaderOption f = (f . mwOptions) `fmap` getState + +instance HasHeaderMap MWParser where + getHeaderMap = fmap mwHeaderMap getState + putHeaderMap hm = updateState $ \st -> st{ mwHeaderMap = hm } + +instance HasIdentifierList MWParser where + getIdentifierList = fmap mwIdentifierList getState + putIdentifierList l = updateState $ \st -> st{ mwIdentifierList = l } + -- -- auxiliary functions -- @@ -351,7 +368,8 @@ header = try $ do let lev = length eqs guard $ lev <= 6 contents <- trimInlines . mconcat <$> manyTill inline (count lev $ char '=') - return $ B.header lev contents + attr <- registerHeader nullAttr contents + return $ B.headerWith attr lev contents bulletList :: MWParser Blocks bulletList = B.bulletList <$> diff --git a/tests/mediawiki-reader.native b/tests/mediawiki-reader.native index 0ab51a3aa..238413445 100644 --- a/tests/mediawiki-reader.native +++ b/tests/mediawiki-reader.native @@ -1,39 +1,39 @@ Pandoc (Meta {unMeta = fromList []}) -[Header 1 ("",[],[]) [Str "header"] -,Header 2 ("",[],[]) [Str "header",Space,Str "level",Space,Str "two"] -,Header 3 ("",[],[]) [Str "header",Space,Str "level",Space,Str "3"] -,Header 4 ("",[],[]) [Str "header",Space,Emph [Str "level"],Space,Str "four"] -,Header 5 ("",[],[]) [Str "header",Space,Str "level",Space,Str "5"] -,Header 6 ("",[],[]) [Str "header",Space,Str "level",Space,Str "6"] +[Header 1 ("header",[],[]) [Str "header"] +,Header 2 ("header-level-two",[],[]) [Str "header",Space,Str "level",Space,Str "two"] +,Header 3 ("header-level-3",[],[]) [Str "header",Space,Str "level",Space,Str "3"] +,Header 4 ("header-level-four",[],[]) [Str "header",Space,Emph [Str "level"],Space,Str "four"] +,Header 5 ("header-level-5",[],[]) [Str "header",Space,Str "level",Space,Str "5"] +,Header 6 ("header-level-6",[],[]) [Str "header",Space,Str "level",Space,Str "6"] ,Para [Str "=======",Space,Str "not",Space,Str "a",Space,Str "header",Space,Str "========"] ,Para [Code ("",[],[]) "==\160not\160a\160header\160=="] -,Header 2 ("",[],[]) [Str "emph",Space,Str "and",Space,Str "strong"] +,Header 2 ("emph-and-strong",[],[]) [Str "emph",Space,Str "and",Space,Str "strong"] ,Para [Emph [Str "emph"],Space,Strong [Str "strong"]] ,Para [Strong [Emph [Str "strong",Space,Str "and",Space,Str "emph"]]] ,Para [Strong [Emph [Str "emph",Space,Str "inside"],Space,Str "strong"]] ,Para [Strong [Str "strong",Space,Str "with",Space,Emph [Str "emph"]]] ,Para [Emph [Strong [Str "strong",Space,Str "inside"],Space,Str "emph"]] -,Header 2 ("",[],[]) [Str "horizontal",Space,Str "rule"] +,Header 2 ("horizontal-rule",[],[]) [Str "horizontal",Space,Str "rule"] ,Para [Str "top"] ,HorizontalRule ,Para [Str "bottom"] ,HorizontalRule -,Header 2 ("",[],[]) [Str "nowiki"] +,Header 2 ("nowiki",[],[]) [Str "nowiki"] ,Para [Str "''not",Space,Str "emph''"] -,Header 2 ("",[],[]) [Str "strikeout"] +,Header 2 ("strikeout",[],[]) [Str "strikeout"] ,Para [Strikeout [Str "This",Space,Str "is",Space,Emph [Str "struck",Space,Str "out"]]] -,Header 2 ("",[],[]) [Str "entities"] +,Header 2 ("entities",[],[]) [Str "entities"] ,Para [Str "hi",Space,Str "&",Space,Str "low"] ,Para [Str "hi",Space,Str "&",Space,Str "low"] ,Para [Str "G\246del"] ,Para [Str "\777\2730"] -,Header 2 ("",[],[]) [Str "comments"] +,Header 2 ("comments",[],[]) [Str "comments"] ,Para [Str "inline",Space,Str "comment"] ,Para [Str "between",Space,Str "blocks"] -,Header 2 ("",[],[]) [Str "linebreaks"] +,Header 2 ("linebreaks",[],[]) [Str "linebreaks"] ,Para [Str "hi",LineBreak,Str "there"] ,Para [Str "hi",LineBreak,Str "there"] -,Header 2 ("",[],[]) [Str ":",Space,Str "indents"] +,Header 2 ("indents",[],[]) [Str ":",Space,Str "indents"] ,Para [Str "hi"] ,DefinitionList [([], @@ -46,36 +46,36 @@ Pandoc (Meta {unMeta = fromList []}) [([], [[Plain [Str "there"]]])]]])] ,Para [Str "bud"] -,Header 2 ("",[],[]) [Str "p",Space,Str "tags"] +,Header 2 ("p-tags",[],[]) [Str "p",Space,Str "tags"] ,Para [Str "hi",Space,Str "there"] ,Para [Str "bud"] ,Para [Str "another"] -,Header 2 ("",[],[]) [Str "raw",Space,Str "html"] +,Header 2 ("raw-html",[],[]) [Str "raw",Space,Str "html"] ,Para [Str "hi",Space,RawInline (Format "html") "<span style=\"color:red\">",Emph [Str "there"],RawInline (Format "html") "</span>",Str "."] ,Para [RawInline (Format "html") "<ins>",Str "inserted",RawInline (Format "html") "</ins>"] ,RawBlock (Format "html") "<div class=\"special\">" ,Para [Str "hi",Space,Emph [Str "there"]] ,RawBlock (Format "html") "</div>" -,Header 2 ("",[],[]) [Str "sup,",Space,Str "sub,",Space,Str "del"] +,Header 2 ("sup-sub-del",[],[]) [Str "sup,",Space,Str "sub,",Space,Str "del"] ,Para [Str "H",Subscript [Str "2"],Str "O",Space,Str "base",Superscript [Emph [Str "exponent"]],Space,Strikeout [Str "hello"]] -,Header 2 ("",[],[]) [Str "inline",Space,Str "code"] +,Header 2 ("inline-code",[],[]) [Str "inline",Space,Str "code"] ,Para [Code ("",[],[]) "*\8594*",Space,Code ("",[],[]) "typed",Space,Code ("",["haskell"],[]) ">>="] -,Header 2 ("",[],[]) [Str "code",Space,Str "blocks"] +,Header 2 ("code-blocks",[],[]) [Str "code",Space,Str "blocks"] ,CodeBlock ("",[],[]) "case xs of\n (_:_) -> reverse xs\n [] -> ['*']" ,CodeBlock ("",["haskell"],[]) "case xs of\n (_:_) -> reverse xs\n [] -> ['*']" ,CodeBlock ("",["ruby","numberLines"],[("startFrom","100")]) "widgets.each do |w|\n print w.price\nend" -,Header 2 ("",[],[]) [Str "block",Space,Str "quotes"] +,Header 2 ("block-quotes",[],[]) [Str "block",Space,Str "quotes"] ,Para [Str "Regular",Space,Str "paragraph"] ,BlockQuote [Para [Str "This",Space,Str "is",Space,Str "a",Space,Str "block",Space,Str "quote."] ,Para [Str "With",Space,Str "two",Space,Str "paragraphs."]] ,Para [Str "Nother",Space,Str "paragraph."] -,Header 2 ("",[],[]) [Str "external",Space,Str "links"] +,Header 2 ("external-links",[],[]) [Str "external",Space,Str "links"] ,Para [Link [Emph [Str "Google"],Space,Str "search",Space,Str "engine"] ("http://google.com","")] ,Para [Link [Str "http://johnmacfarlane.net/pandoc/"] ("http://johnmacfarlane.net/pandoc/","")] ,Para [Link [Str "1"] ("http://google.com",""),Space,Link [Str "2"] ("http://yahoo.com","")] ,Para [Link [Str "email",Space,Str "me"] ("mailto:info@example.org","")] -,Header 2 ("",[],[]) [Str "internal",Space,Str "links"] +,Header 2 ("internal-links",[],[]) [Str "internal",Space,Str "links"] ,Para [Link [Str "Help"] ("Help","wikilink")] ,Para [Link [Str "the",Space,Str "help",Space,Str "page"] ("Help","wikilink")] ,Para [Link [Str "Helpers"] ("Help","wikilink")] @@ -83,12 +83,12 @@ Pandoc (Meta {unMeta = fromList []}) ,Para [Link [Str "Contents"] ("Help:Contents","wikilink")] ,Para [Link [Str "#My",Space,Str "anchor"] ("#My_anchor","wikilink")] ,Para [Link [Str "and",Space,Str "text"] ("Page#with_anchor","wikilink")] -,Header 2 ("",[],[]) [Str "images"] +,Header 2 ("images",[],[]) [Str "images"] ,Para [Image [Str "caption"] ("example.jpg","fig:caption")] ,Para [Image [Str "the",Space,Emph [Str "caption"],Space,Str "with",Space,Link [Str "external",Space,Str "link"] ("http://google.com","")] ("example.jpg","fig:the caption with external link")] ,Para [Image [Str "caption"] ("example.jpg","fig:caption")] ,Para [Image [Str "example.jpg"] ("example.jpg","fig:example.jpg")] -,Header 2 ("",[],[]) [Str "lists"] +,Header 2 ("lists",[],[]) [Str "lists"] ,BulletList [[Plain [Str "Start",Space,Str "each",Space,Str "line"]] ,[Plain [Str "with",Space,Str "an",Space,Str "asterisk",Space,Str "(*)."] @@ -161,10 +161,10 @@ Pandoc (Meta {unMeta = fromList []}) [[Plain [Str "Amsterdam"]] ,[Plain [Str "Rotterdam"]] ,[Plain [Str "The",Space,Str "Hague"]]] -,Header 2 ("",[],[]) [Str "math"] +,Header 2 ("math",[],[]) [Str "math"] ,Para [Str "Here",Space,Str "is",Space,Str "some",Space,Math InlineMath "x=\\frac{y^\\pi}{z}",Str "."] ,Para [Str "With",Space,Str "spaces:",Space,Math InlineMath "x=\\frac{y^\\pi}{z}",Str "."] -,Header 2 ("",[],[]) [Str "preformatted",Space,Str "blocks"] +,Header 2 ("preformatted-blocks",[],[]) [Str "preformatted",Space,Str "blocks"] ,Para [Code ("",[],[]) "Start\160each\160line\160with\160a\160space.",LineBreak,Code ("",[],[]) "Text\160is\160",Strong [Code ("",[],[]) "preformatted"],Code ("",[],[]) "\160and",LineBreak,Emph [Code ("",[],[]) "markups"],Code ("",[],[]) "\160",Strong [Emph [Code ("",[],[]) "can"]],Code ("",[],[]) "\160be\160done."] ,Para [Code ("",[],[]) "\160hell\160\160\160\160\160\160yeah"] ,Para [Code ("",[],[]) "Start\160with\160a\160space\160in\160the\160first\160column,",LineBreak,Code ("",[],[]) "(before\160the\160<nowiki>).",LineBreak,Code ("",[],[]) "",LineBreak,Code ("",[],[]) "Then\160your\160block\160format\160will\160be",LineBreak,Code ("",[],[]) "\160\160\160\160maintained.",LineBreak,Code ("",[],[]) "",LineBreak,Code ("",[],[]) "This\160is\160good\160for\160copying\160in\160code\160blocks:",LineBreak,Code ("",[],[]) "",LineBreak,Code ("",[],[]) "def\160function():",LineBreak,Code ("",[],[]) "\160\160\160\160\"\"\"documentation\160string\"\"\"",LineBreak,Code ("",[],[]) "",LineBreak,Code ("",[],[]) "\160\160\160\160if\160True:",LineBreak,Code ("",[],[]) "\160\160\160\160\160\160\160\160print\160True",LineBreak,Code ("",[],[]) "\160\160\160\160else:",LineBreak,Code ("",[],[]) "\160\160\160\160\160\160\160\160print\160False"] @@ -174,12 +174,12 @@ Pandoc (Meta {unMeta = fromList []}) ,Para [Str "Don't",Space,Str "need"] ,Para [Code ("",[],[]) "a\160blank\160line"] ,Para [Str "around",Space,Str "a",Space,Str "preformatted",Space,Str "block."] -,Header 2 ("",[],[]) [Str "templates"] +,Header 2 ("templates",[],[]) [Str "templates"] ,RawBlock (Format "mediawiki") "{{Welcome}}" ,RawBlock (Format "mediawiki") "{{Foo:Bar}}" ,RawBlock (Format "mediawiki") "{{Thankyou|all your effort|Me}}" ,Para [Str "Written",Space,RawInline (Format "mediawiki") "{{{date}}}",Space,Str "by",Space,RawInline (Format "mediawiki") "{{{name}}}",Str "."] -,Header 2 ("",[],[]) [Str "tables"] +,Header 2 ("tables",[],[]) [Str "tables"] ,Table [] [AlignDefault,AlignDefault] [0.0,0.0] [[] ,[]] @@ -245,6 +245,6 @@ Pandoc (Meta {unMeta = fromList []}) [[]] [[[Para [Str "Orange"]]]] ,Para [Str "Paragraph",Space,Str "after",Space,Str "the",Space,Str "table."] -,Header 2 ("",[],[]) [Str "notes"] +,Header 2 ("notes",[],[]) [Str "notes"] ,Para [Str "My",Space,Str "note!",Note [Plain [Str "This."]]] ,Para [Str "URL",Space,Str "note.",Note [Plain [Link [Str "http://docs.python.org/library/functions.html#range"] ("http://docs.python.org/library/functions.html#range","")]]]] -- cgit v1.2.3 From e690c87dc4c3ff79689d9a887a9b5e3bffbb5d37 Mon Sep 17 00:00:00 2001 From: John MacFarlane <fiddlosopher@gmail.com> Date: Sun, 17 Nov 2013 09:07:25 -0800 Subject: LaTeX reader: Support `\textnormal` as span with class "nodecor". This is needed for pandoc-citeproc. --- src/Text/Pandoc/Readers/LaTeX.hs | 1 + 1 file changed, 1 insertion(+) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index 681dcb077..689b12c8e 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -373,6 +373,7 @@ inlineCommands = M.fromList $ , ("backslash", lit "\\") , ("slash", lit "/") , ("textbf", strong <$> tok) + , ("textnormal", spanWith ("",["nodecor"],[]) <$> tok) , ("ldots", lit "…") , ("dots", lit "…") , ("mdots", lit "…") -- cgit v1.2.3 From d07dc971da0123149e751dff987cf9df1a55b75d Mon Sep 17 00:00:00 2001 From: "Shaun Attfield (shaun@victor)" <heurist+git@gmail.com> Date: Mon, 18 Nov 2013 11:53:13 +0200 Subject: Epub Writer: Add cover reference to guide element Fixes an issue with calibre http://calibre-ebook.com/ putting the cover at the end of the book if the spine has linear="no". Apparently this is best practice for other converters as well. http://www.idpf.org/epub/20/spec/OPF_2.0.1_draft.htm#Section2.6 --- src/Text/Pandoc/Writers/EPUB.hs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs index ac0e7610c..ca69a0fd4 100644 --- a/src/Text/Pandoc/Writers/EPUB.hs +++ b/src/Text/Pandoc/Writers/EPUB.hs @@ -260,8 +260,10 @@ writeEPUB opts doc@(Pandoc meta _) = do else "no")] $ ()) : map chapterRefNode chapterEntries) , unode "guide" $ - unode "reference" ! - [("type","toc"),("title",plainTitle),("href","nav.xhtml")] $ () + [ unode "reference" ! + [("type","toc"),("title",plainTitle),("href","nav.xhtml")] $ () ] + ++ [ unode "reference" ! + [("type","cover"),("title","Cover"),("href","cover.xhtml")] $ () ] ] let contentsEntry = mkEntry "content.opf" contentsData -- cgit v1.2.3 From a3eba6ee848497f98f72aef5dad112c49bdd4fec Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Mon, 18 Nov 2013 20:28:27 -0800 Subject: LaTeX reader: Parse contents of curly quotes or matched `"` as quotes. --- src/Text/Pandoc/Readers/LaTeX.hs | 24 +++++++++++++++++------- tests/latex-reader.native | 2 +- 2 files changed, 18 insertions(+), 8 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index 689b12c8e..75e29ebb9 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -163,13 +163,23 @@ mathChars = concat <$> <|> (\c -> ['\\',c]) <$> (try $ char '\\' *> anyChar) ) +quoted' :: (Inlines -> Inlines) -> LP String -> LP () -> LP Inlines +quoted' f starter ender = do + startchs <- starter + try ((f . mconcat) <$> manyTill inline ender) <|> lit startchs + double_quote :: LP Inlines -double_quote = (doubleQuoted . mconcat) <$> - (try $ string "``" *> manyTill inline (try $ string "''")) +double_quote = + ( quoted' doubleQuoted (try $ string "``") (void $ try $ string "''") + <|> quoted' doubleQuoted (string "“") (void $ char '”') + <|> quoted' doubleQuoted (string "\"") (void $ char '"') + ) single_quote :: LP Inlines -single_quote = (singleQuoted . mconcat) <$> - (try $ char '`' *> manyTill inline (try $ char '\'' >> notFollowedBy letter)) +single_quote = + ( quoted' singleQuoted (string "`") (try $ char '\'' >> notFollowedBy letter) + <|> quoted' singleQuoted (string "‘") (try $ char '’' >> notFollowedBy letter) + ) inline :: LP Inlines inline = (mempty <$ comment) @@ -181,10 +191,10 @@ inline = (mempty <$ comment) ((char '-') *> option (str "–") (str "—" <$ char '-'))) <|> double_quote <|> single_quote - <|> (str "“" <$ try (string "``")) -- nb. {``} won't be caught by double_quote <|> (str "”" <$ try (string "''")) - <|> (str "‘" <$ char '`') -- nb. {`} won't be caught by single_quote + <|> (str "”" <$ char '”') <|> (str "’" <$ char '\'') + <|> (str "’" <$ char '’') <|> (str "\160" <$ char '~') <|> (mathDisplay $ string "$$" *> mathChars <* string "$$") <|> (mathInline $ char '$' *> mathChars <* char '$') @@ -755,7 +765,7 @@ inlineText :: LP Inlines inlineText = str <$> many1 inlineChar inlineChar :: LP Char -inlineChar = noneOf "\\$%^_&~#{}^'`-[] \t\n" +inlineChar = noneOf "\\$%^_&~#{}^'`\"‘’“”-[] \t\n" environment :: LP Blocks environment = do diff --git a/tests/latex-reader.native b/tests/latex-reader.native index 15b667b2f..fcc3153cf 100644 --- a/tests/latex-reader.native +++ b/tests/latex-reader.native @@ -302,7 +302,7 @@ Pandoc (Meta {unMeta = fromList [("authors",MetaList [MetaInlines [Str "John",Sp ,Para [Str "4",Space,Str "<",Space,Str "5."] ,Para [Str "6",Space,Str ">",Space,Str "5."] ,Para [Str "Backslash:",Space,Str "\\"] -,Para [Str "Backtick:",Space,Str "\8216"] +,Para [Str "Backtick:",Space,Str "`"] ,Para [Str "Asterisk:",Space,Str "*"] ,Para [Str "Underscore:",Space,Str "_"] ,Para [Str "Left",Space,Str "brace:",Space,Str "{"] -- cgit v1.2.3 From 5b27480e54f4ecfc145bde133f1f51865d9b12a0 Mon Sep 17 00:00:00 2001 From: "Shaun Attfield (shaun@victor)" <heurist+git@gmail.com> Date: Tue, 19 Nov 2013 08:20:27 +0200 Subject: Epub Writer: Add cover reference to guide element (v2) Avoiding an unnecessary list concatenation. Fixes an issue with calibre http://calibre-ebook.com/ putting the cover at the end of the book if the spine has linear="no". Apparently this is best practice for other converters as well. http://www.idpf.org/epub/20/spec/OPF_2.0.1_draft.htm#Section2.6 --- src/Text/Pandoc/Writers/EPUB.hs | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs index ca69a0fd4..be8de7073 100644 --- a/src/Text/Pandoc/Writers/EPUB.hs +++ b/src/Text/Pandoc/Writers/EPUB.hs @@ -260,10 +260,11 @@ writeEPUB opts doc@(Pandoc meta _) = do else "no")] $ ()) : map chapterRefNode chapterEntries) , unode "guide" $ - [ unode "reference" ! - [("type","toc"),("title",plainTitle),("href","nav.xhtml")] $ () ] - ++ [ unode "reference" ! - [("type","cover"),("title","Cover"),("href","cover.xhtml")] $ () ] + [ unode "reference" ! + [("type","toc"),("title",plainTitle),("href","nav.xhtml")] $ () + , unode "reference" ! + [("type","cover"),("title","Cover"),("href","cover.xhtml")] $ () + ] ] let contentsEntry = mkEntry "content.opf" contentsData -- cgit v1.2.3 From c226a57eaa7f1e9d4782c7ea3b56e72251a6487a Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Tue, 19 Nov 2013 12:01:52 -0800 Subject: Docbook writer: Hierarchicalize block content in metadata. Previously headers just disappeared from block-level metadata when it was used in templates. Now we apply the 'hierarchicalize' transformation. Note that a block headed by a level-2 header will turn into a `<sect1>` element. --- src/Text/Pandoc/Writers/Docbook.hs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Writers/Docbook.hs b/src/Text/Pandoc/Writers/Docbook.hs index dad83d7bb..02d875be3 100644 --- a/src/Text/Pandoc/Writers/Docbook.hs +++ b/src/Text/Pandoc/Writers/Docbook.hs @@ -85,8 +85,9 @@ writeDocbook opts (Pandoc meta blocks) = auths' = map (authorToDocbook opts) $ docAuthors meta meta' = B.setMeta "author" auths' meta Just metadata = metaToJSON opts - (Just . render colwidth . blocksToDocbook opts) - (Just . render colwidth . inlinesToDocbook opts) + (Just . render colwidth . (vcat . + (map (elementToDocbook opts' startLvl)) . hierarchicalize)) + (Just . render colwidth . inlinesToDocbook opts') meta' main = render' $ vcat (map (elementToDocbook opts' startLvl) elements) context = defField "body" main -- cgit v1.2.3 From e290d91c93b265ce7763797a5c995d29fb683c67 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Tue, 19 Nov 2013 13:09:34 -0800 Subject: MIME: In looking up extensions, drop the encoding info. E.g. for 'image/jpg;base64' we should lookup 'image/jpg'. --- src/Text/Pandoc/MIME.hs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/MIME.hs b/src/Text/Pandoc/MIME.hs index d9cb94a33..1f5f6f862 100644 --- a/src/Text/Pandoc/MIME.hs +++ b/src/Text/Pandoc/MIME.hs @@ -40,7 +40,8 @@ getMimeType f = M.lookup (map toLower $ drop 1 $ takeExtension f) mimeTypes where mimeTypes = M.fromList mimeTypesList extensionFromMimeType :: String -> Maybe String -extensionFromMimeType mimetype = M.lookup mimetype reverseMimeTypes +extensionFromMimeType mimetype = M.lookup (takeWhile (/=';') mimetype) reverseMimeTypes + -- note: we just look up the basic mime type, dropping the content-encoding etc. where reverseMimeTypes = M.fromList $ map (\(k,v) -> (v,k)) mimeTypesList mimeTypesList :: [(String, String)] -- cgit v1.2.3 From 83b9a66bf468c0f32bd96be92bb571bd83b73903 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Tue, 19 Nov 2013 13:15:24 -0800 Subject: Shared: Fixed bug in openURL with data: URIs. Previously the base-64 encoded bytestring was returned. We now decode it so it's a proper image! This should fix parsing of data: URLs. --- src/Text/Pandoc/Shared.hs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index d6ccdae66..8dcd88148 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -108,6 +108,7 @@ import Text.HTML.TagSoup (renderTagsOptions, RenderOptions(..), Tag(..), import qualified Data.ByteString as BS import qualified Data.ByteString.Char8 as B8 import Text.Pandoc.Compat.Monoid +import Data.ByteString.Base64 (decodeLenient) #ifdef EMBED_DATA_FILES import Text.Pandoc.Data (dataFiles) @@ -641,7 +642,7 @@ openURL u | "data:" `isPrefixOf` u = let mime = takeWhile (/=',') $ drop 5 u contents = B8.pack $ unEscapeString $ drop 1 $ dropWhile (/=',') u - in return $ Right (contents, Just mime) + in return $ Right (decodeLenient contents, Just mime) #ifdef HTTP_CONDUIT | otherwise = E.try $ do req <- parseUrl u -- cgit v1.2.3 From 3d453f096cfa12e231c5a4d4c8e468378e20e5e8 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Tue, 19 Nov 2013 13:16:31 -0800 Subject: Docx writer: Use mime type info returned by fetchItem. --- src/Text/Pandoc/Writers/Docx.hs | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index 5d1647844..b9c198a78 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -55,8 +55,8 @@ import Data.Unique (hashUnique, newUnique) import System.Random (randomRIO) import Text.Printf (printf) import qualified Control.Exception as E -import System.FilePath (takeExtension) -import Text.Pandoc.MIME (getMimeType) +import Text.Pandoc.MIME (getMimeType, extensionFromMimeType) +import Control.Applicative ((<|>)) data WriterState = WriterState{ stTextProperties :: [Element] @@ -737,7 +737,7 @@ inlineToOpenXML opts (Image alt (src, tit)) = do liftIO $ warn $ "Could not find image `" ++ src ++ "', skipping..." -- emit alt text inlinesToOpenXML opts alt - Right (img, _) -> do + Right (img, mt) -> do ident <- ("rId"++) `fmap` getUniqueId let size = imageSize img let (xpt,ypt) = maybe (120,120) sizeInPoints size @@ -776,19 +776,21 @@ inlineToOpenXML opts (Image alt (src, tit)) = do , mknode "wp:effectExtent" [("b","0"),("l","0"),("r","0"),("t","0")] () , mknode "wp:docPr" [("descr",tit),("id","1"),("name","Picture")] () , graphic ] - let imgext = case imageType img of - Just Png -> ".png" - Just Jpeg -> ".jpeg" - Just Gif -> ".gif" - Just Pdf -> ".pdf" - Just Eps -> ".eps" - Nothing -> takeExtension src + let imgext = case mt >>= extensionFromMimeType of + Just x -> '.':x + Nothing -> case imageType img of + Just Png -> ".png" + Just Jpeg -> ".jpeg" + Just Gif -> ".gif" + Just Pdf -> ".pdf" + Just Eps -> ".eps" + Nothing -> "" if null imgext then -- without an extension there is no rule for content type inlinesToOpenXML opts alt -- return alt to avoid corrupted docx else do let imgpath = "media/" ++ ident ++ imgext - let mbMimeType = getMimeType imgpath + let mbMimeType = mt <|> getMimeType imgpath -- insert mime type to use in constructing [Content_Types].xml modify $ \st -> st{ stImages = M.insert src (ident, imgpath, mbMimeType, imgElt, img) -- cgit v1.2.3 From cf149fcf38d98b1bee79ecd9056fa0f46264e7ce Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Fri, 22 Nov 2013 19:41:08 -0800 Subject: Fixed bug with intraword emphasis. Closes #1066. --- src/Text/Pandoc/Readers/Markdown.hs | 3 ++- tests/Tests/Readers/Markdown.hs | 5 +++++ 2 files changed, 7 insertions(+), 1 deletion(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index ea49d8c1d..33d1a9620 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -1455,6 +1455,7 @@ enclosure c = do -- Parse inlines til you hit one c or a sequence of two cs. -- If one c, emit emph and then parse two. -- If two cs, emit strong and then parse one. +-- Otherwise, emit ccc then the results. three :: Char -> MarkdownParser (F Inlines) three c = do contents <- mconcat <$> many (notFollowedBy (char c) >> inline) @@ -1479,7 +1480,7 @@ one c prefix' = do contents <- mconcat <$> many ( (notFollowedBy (char c) >> inline) <|> try (string [c,c] >> notFollowedBy (char c) >> - two c prefix') ) + two c mempty) ) (char c >> return (B.emph <$> (prefix' <> contents))) <|> return (return (B.str [c]) <> (prefix' <> contents)) diff --git a/tests/Tests/Readers/Markdown.hs b/tests/Tests/Readers/Markdown.hs index ccca147ab..b04ff9a0d 100644 --- a/tests/Tests/Readers/Markdown.hs +++ b/tests/Tests/Readers/Markdown.hs @@ -136,6 +136,11 @@ tests = [ testGroup "inline code" "`*` {.haskell .special x=\"7\"}" =?> para (codeWith ("",["haskell","special"],[("x","7")]) "*") ] + , testGroup "emph and strong" + [ "two strongs in emph" =: + "***a**b **c**d*" =?> para (emph (strong (str "a") <> str "b" <> space + <> strong (str "c") <> str "d")) + ] , testGroup "raw LaTeX" [ "in URL" =: "\\begin\n" =?> para (text "\\begin") -- cgit v1.2.3 From 526762bf222dbab199f6ff90c925fe18535c698f Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Fri, 22 Nov 2013 19:51:07 -0800 Subject: ConTeXt writer: Use setupcaption to separate style from content. Instead of adding 'nunumber' every time we place a figure... Closes #1067. --- data/templates | 2 +- src/Text/Pandoc/Writers/ConTeXt.hs | 2 +- tests/writer.context | 4 +++- 3 files changed, 5 insertions(+), 3 deletions(-) (limited to 'src/Text') diff --git a/data/templates b/data/templates index 4bdebc73b..8cadd4f20 160000 --- a/data/templates +++ b/data/templates @@ -1 +1 @@ -Subproject commit 4bdebc73b0b2025cf01704e7e564088c34d8f86c +Subproject commit 8cadd4f2044c0c25842eeb5a2370a6e3384f4bd4 diff --git a/src/Text/Pandoc/Writers/ConTeXt.hs b/src/Text/Pandoc/Writers/ConTeXt.hs index 0379f8b0a..179d9bc5b 100644 --- a/src/Text/Pandoc/Writers/ConTeXt.hs +++ b/src/Text/Pandoc/Writers/ConTeXt.hs @@ -130,7 +130,7 @@ blockToConTeXt (Plain lst) = inlineListToConTeXt lst -- title beginning with fig: indicates that the image is a figure blockToConTeXt (Para [Image txt (src,'f':'i':'g':':':_)]) = do capt <- inlineListToConTeXt txt - return $ blankline $$ "\\placefigure[here,nonumber]" <> braces capt <> + return $ blankline $$ "\\placefigure[here]" <> braces capt <> braces ("\\externalfigure" <> brackets (text src)) <> blankline blockToConTeXt (Para lst) = do contents <- inlineListToConTeXt lst diff --git a/tests/writer.context b/tests/writer.context index 114d00b3c..fb95f5615 100644 --- a/tests/writer.context +++ b/tests/writer.context @@ -30,6 +30,8 @@ \setupitemize[autointro] % prevent orphan list intro \setupitemize[indentnext=no] +\setupcaption[figure][number=no] % don't number figures + \setupthinrules[width=15em] % width of horizontal rules \setupdelimitedtext @@ -842,7 +844,7 @@ or here: <http://example.com/> From \quotation{Voyage dans la Lune} by Georges Melies (1902): -\placefigure[here,nonumber]{lalune}{\externalfigure[lalune.jpg]} +\placefigure[here]{lalune}{\externalfigure[lalune.jpg]} Here is a movie {\externalfigure[movie.jpg]} icon. -- cgit v1.2.3 From 4321a09b7fa5b52e72aef4fa637392d7da4eb808 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Fri, 22 Nov 2013 23:36:19 -0800 Subject: EPUB writer: Ensure that same identifier is used throughout. If dc:identifier is given in metadata, we use that; otherwise we use a random uuid. Closes #1044. --- src/Text/Pandoc/Writers/EPUB.hs | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs index be8de7073..98f043cd8 100644 --- a/src/Text/Pandoc/Writers/EPUB.hs +++ b/src/Text/Pandoc/Writers/EPUB.hs @@ -198,7 +198,12 @@ writeEPUB opts doc@(Pandoc meta _) = do let lang = case lookup "lang" (writerVariables opts') of Just x -> x Nothing -> localeLang - uuid <- getRandomUUID + let userNodes = onlyElems $ parseXML $ writerEpubMetadata opts' + let mbIdent = findElement (QName "identifier" Nothing (Just "dc")) + $ unode "dummy" ! [] $ userNodes + uuid <- case mbIdent of + Just id' -> return $ trim $ strContent id' + Nothing -> fmap show getRandomUUID let chapterNode ent = unode "item" ! ([("id", takeBaseName $ eRelativePath ent), ("href", eRelativePath ent), @@ -230,7 +235,7 @@ writeEPUB opts doc@(Pandoc meta _) = do EPUB3 -> "3.0") ,("xmlns","http://www.idpf.org/2007/opf") ,("unique-identifier","BookId")] $ - [ metadataElement version (writerEpubMetadata opts') + [ metadataElement version userNodes uuid lang plainTitle plainAuthors plainDate currentTime mbCoverImage , unode "manifest" $ [ unode "item" ! [("id","ncx"), ("href","toc.ncx") @@ -311,7 +316,7 @@ writeEPUB opts doc@(Pandoc meta _) = do ,("xmlns","http://www.daisy.org/z3986/2005/ncx/")] $ [ unode "head" $ [ unode "meta" ! [("name","dtb:uid") - ,("content", show uuid)] $ () + ,("content", uuid)] $ () , unode "meta" ! [("name","dtb:depth") ,("content", "1")] $ () , unode "meta" ! [("name","dtb:totalPageCount") @@ -384,13 +389,12 @@ writeEPUB opts doc@(Pandoc meta _) = do (picEntries ++ cpicEntry ++ cpgEntry ++ chapterEntries ++ fontEntries)) return $ fromArchive archive -metadataElement :: EPUBVersion -> String -> UUID -> String -> String -> [String] +metadataElement :: EPUBVersion -> [Element] -> String -> String -> String -> [String] -> String -> UTCTime -> Maybe a -> Element -metadataElement version metadataXML uuid lang title authors date currentTime mbCoverImage = - let userNodes = parseXML metadataXML - elt = unode "metadata" ! [("xmlns:dc","http://purl.org/dc/elements/1.1/") +metadataElement version userNodes uuid lang title authors date currentTime mbCoverImage = + let elt = unode "metadata" ! [("xmlns:dc","http://purl.org/dc/elements/1.1/") ,("xmlns:opf","http://www.idpf.org/2007/opf")] $ - filter isMetadataElement $ onlyElems userNodes + filter isMetadataElement userNodes dublinElements = ["contributor","coverage","creator","date", "description","format","identifier","language","publisher", "relation","rights","source","subject","title","type"] @@ -401,7 +405,7 @@ metadataElement version metadataXML uuid lang title authors date currentTime mbC contains e n = not (null (findElements (QName n Nothing (Just "dc")) e)) newNodes = [ unode "dc:title" title | not (elt `contains` "title") ] ++ [ unode "dc:language" lang | not (elt `contains` "language") ] ++ - [ unode "dc:identifier" ! [("id","BookId")] $ show uuid | + [ unode "dc:identifier" ! [("id","BookId")] $ uuid | not (elt `contains` "identifier") ] ++ [ unode "dc:creator" ! [("opf:role","aut") | version == EPUB2] $ a | a <- authors, not (elt `contains` "creator") ] ++ -- cgit v1.2.3 From 56277bacea73818a7541b60987e5bc877782baab Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Fri, 22 Nov 2013 23:40:46 -0800 Subject: EPUB writer: Don't include node for cover.xhtml if no cover! --- src/Text/Pandoc/Writers/EPUB.hs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs index 98f043cd8..ef4c7be23 100644 --- a/src/Text/Pandoc/Writers/EPUB.hs +++ b/src/Text/Pandoc/Writers/EPUB.hs @@ -267,8 +267,9 @@ writeEPUB opts doc@(Pandoc meta _) = do , unode "guide" $ [ unode "reference" ! [("type","toc"),("title",plainTitle),("href","nav.xhtml")] $ () - , unode "reference" ! - [("type","cover"),("title","Cover"),("href","cover.xhtml")] $ () + ] ++ + [ unode "reference" ! + [("type","cover"),("title","Cover"),("href","cover.xhtml")] $ () | mbCoverImage /= Nothing ] ] let contentsEntry = mkEntry "content.opf" contentsData -- cgit v1.2.3 From c1ff65e5ef0b5b8d78149cf666cda1de4a0e4fdb Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Sat, 23 Nov 2013 14:24:33 -0800 Subject: HTML writer: Handle csl flipflopping spans (csl-no-emph, etc.) --- src/Text/Pandoc/Writers/HTML.hs | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs index 424843539..641652276 100644 --- a/src/Text/Pandoc/Writers/HTML.hs +++ b/src/Text/Pandoc/Writers/HTML.hs @@ -613,8 +613,22 @@ inlineToHtml opts inline = (Str str) -> return $ strToHtml str (Space) -> return $ strToHtml " " (LineBreak) -> return $ if writerHtml5 opts then H5.br else H.br - (Span attr ils) -> inlineListToHtml opts ils >>= - return . addAttrs opts attr . H.span + (Span (id',classes,kvs) ils) + -> inlineListToHtml opts ils >>= + return . addAttrs opts attr' . H.span + where attr' = (id',classes',kvs') + classes' = filter (`notElem` ["csl-no-emph", + "csl-no-strong", + "csl-no-smallcaps"]) classes + kvs' = if null styles + then kvs + else (("style", concat styles) : kvs) + styles = ["font-style:normal;" + | "csl-no-emph" `elem` classes] + ++ ["font-weight:normal;" + | "csl-no-strong" `elem` classes] + ++ ["font-variant:normal;" + | "csl-no-smallcaps" `elem` classes] (Emph lst) -> inlineListToHtml opts lst >>= return . H.em (Strong lst) -> inlineListToHtml opts lst >>= return . H.strong (Code attr str) -> case hlCode of -- cgit v1.2.3 From b82ef0e29a652b7e1629b6e2e5f56140ed08e633 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Sat, 23 Nov 2013 14:41:22 -0800 Subject: LaTeX writer: Handle csl flipflopping spans (csl-no-emph, etc.) --- src/Text/Pandoc/Writers/LaTeX.hs | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs index 7ff64cf74..f3cbcf19f 100644 --- a/src/Text/Pandoc/Writers/LaTeX.hs +++ b/src/Text/Pandoc/Writers/LaTeX.hs @@ -602,7 +602,16 @@ isQuoted _ = False -- | Convert inline element to LaTeX inlineToLaTeX :: Inline -- ^ Inline to convert -> State WriterState Doc -inlineToLaTeX (Span _ ils) = inlineListToLaTeX ils >>= return . braces +inlineToLaTeX (Span (_,classes,_) ils) = do + let noEmph = "csl-no-emph" `elem` classes + let noStrong = "csl-no-strong" `elem` classes + let noSmallCaps = "csl-no-smallcaps" `elem` classes + ((if noEmph then inCmd "textup" else id) . + (if noStrong then inCmd "textnormal" else id) . + (if noSmallCaps then inCmd "textnormal" else id) . + (if not (noEmph || noStrong || noSmallCaps) + then braces + else id)) `fmap` inlineListToLaTeX ils inlineToLaTeX (Emph lst) = inlineListToLaTeX lst >>= return . inCmd "emph" inlineToLaTeX (Strong lst) = -- cgit v1.2.3 From e1a9a61774cce807c6db2fff7b8609b2d9dbc678 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Sat, 23 Nov 2013 14:52:14 -0800 Subject: Docx writer: Implemented csl flipflopping spans. --- src/Text/Pandoc/Writers/Docx.hs | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index b9c198a78..5c7341b69 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -638,7 +638,12 @@ formattedString str = do inlineToOpenXML :: WriterOptions -> Inline -> WS [Element] inlineToOpenXML _ (Str str) = formattedString str inlineToOpenXML opts Space = inlineToOpenXML opts (Str " ") -inlineToOpenXML opts (Span _ ils) = inlinesToOpenXML opts ils +inlineToOpenXML opts (Span (_,classes,_) ils) = do + let off x = withTextProp (mknode x [("w:val","0")] ()) + ((if "csl-no-emph" `elem` classes then off "w:i" else id) . + (if "csl-no-strong" `elem` classes then off "w:b" else id) . + (if "csl-no-smallcaps" `elem` classes then off "w:smallCaps" else id)) + $ inlinesToOpenXML opts ils inlineToOpenXML opts (Strong lst) = withTextProp (mknode "w:b" [] ()) $ inlinesToOpenXML opts lst inlineToOpenXML opts (Emph lst) = -- cgit v1.2.3 From 303e42a94f16e00ecb65fb9de2d282d050a626c1 Mon Sep 17 00:00:00 2001 From: Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com> Date: Sun, 24 Nov 2013 12:51:41 +0100 Subject: MediaWiki reader: Accept image links in more languages In some of the Wikipedia versions the local version of 'File' is used (for example 'Archivo' in Spanish) --- src/Text/Pandoc/Readers/MediaWiki.hs | 6 +++++- tests/mediawiki-reader.native | 1 + tests/mediawiki-reader.wiki | 2 ++ 3 files changed, 8 insertions(+), 1 deletion(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/MediaWiki.hs b/src/Text/Pandoc/Readers/MediaWiki.hs index 1c074e3de..7f99af528 100644 --- a/src/Text/Pandoc/Readers/MediaWiki.hs +++ b/src/Text/Pandoc/Readers/MediaWiki.hs @@ -549,10 +549,14 @@ endline = () <$ try (newline <* notFollowedBy' header <* notFollowedBy anyListStart) +imageIdentifiers :: [MWParser ()] +imageIdentifiers = [sym (identifier ++ ":") | identifier <- identifiers] + where identifiers = ["File", "Image", "Archivo", "Datei", "Fichier"] + image :: MWParser Inlines image = try $ do sym "[[" - sym "File:" <|> sym "Image:" + choice imageIdentifiers fname <- many1 (noneOf "|]") _ <- many (try $ char '|' *> imageOption) caption <- (B.str fname <$ sym "]]") diff --git a/tests/mediawiki-reader.native b/tests/mediawiki-reader.native index 238413445..87e4043f7 100644 --- a/tests/mediawiki-reader.native +++ b/tests/mediawiki-reader.native @@ -88,6 +88,7 @@ Pandoc (Meta {unMeta = fromList []}) ,Para [Image [Str "the",Space,Emph [Str "caption"],Space,Str "with",Space,Link [Str "external",Space,Str "link"] ("http://google.com","")] ("example.jpg","fig:the caption with external link")] ,Para [Image [Str "caption"] ("example.jpg","fig:caption")] ,Para [Image [Str "example.jpg"] ("example.jpg","fig:example.jpg")] +,Para [Image [Str "example_es.jpg"] ("example_es.jpg","fig:example_es.jpg")] ,Header 2 ("lists",[],[]) [Str "lists"] ,BulletList [[Plain [Str "Start",Space,Str "each",Space,Str "line"]] diff --git a/tests/mediawiki-reader.wiki b/tests/mediawiki-reader.wiki index 26f4ef164..15f586bda 100644 --- a/tests/mediawiki-reader.wiki +++ b/tests/mediawiki-reader.wiki @@ -173,6 +173,8 @@ http://johnmacfarlane.net/pandoc/ [[File:example.jpg]] +[[Archivo:example_es.jpg]] + == lists == * Start each line -- cgit v1.2.3 From 659596876b74a2ce99ec5f7253962b27ca091354 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Fri, 29 Nov 2013 18:01:07 -0800 Subject: EPUB writer: Improved metadata handling. * Metadata may now be included in YAML blocks in a markdown document. For example, --- title: - type: main text: My Book - type: subtitle text: An investigation of metadata creator: - role: author text: John Smith - role: editor text: Sarah Jones identifier: - scheme: DOI text: doi:10.234234.234/33 publisher: My Press rights: (c) 2007 John Smith, CC BY-NC ... * Metadata may still be provided using `--epub-metadata`; it will be merged with the metadata in YAML blocks. * meta tags are used instead of opf attributes for EPUB3. --- src/Text/Pandoc/Writers/EPUB.hs | 642 ++++++++++++++++++++++++++++++++++++---- 1 file changed, 591 insertions(+), 51 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs index ef4c7be23..4f1de5df0 100644 --- a/src/Text/Pandoc/Writers/EPUB.hs +++ b/src/Text/Pandoc/Writers/EPUB.hs @@ -1,4 +1,4 @@ -{-# LANGUAGE PatternGuards, CPP #-} +{-# LANGUAGE PatternGuards, CPP, ScopedTypeVariables #-} {- Copyright (C) 2010 John MacFarlane <jgm@berkeley.edu> @@ -30,7 +30,8 @@ Conversion of 'Pandoc' documents to EPUB. -} module Text.Pandoc.Writers.EPUB ( writeEPUB ) where import Data.IORef -import Data.Maybe ( fromMaybe, isNothing ) +import qualified Data.Map as M +import Data.Maybe ( fromMaybe ) import Data.List ( isInfixOf, intercalate ) import System.Environment ( getEnv ) import Text.Printf (printf) @@ -40,6 +41,7 @@ import qualified Data.ByteString.Lazy.Char8 as B8 import Text.Pandoc.UTF8 ( fromStringLazy, toString ) import Text.Pandoc.SelfContained ( makeSelfContained ) import Codec.Archive.Zip +import Control.Applicative ((<$>)) import Data.Time.Clock.POSIX import Data.Time import System.Locale @@ -70,6 +72,216 @@ import Text.Blaze.Html.Renderer.Utf8 (renderHtml) -- in filenames, chapter0003.xhtml. data Chapter = Chapter (Maybe [Int]) [Block] +data EPUBMetadata = EPUBMetadata{ + epubIdentifier :: [Identifier] + , epubTitle :: [Title] + , epubDate :: String + , epubLanguage :: String + , epubCreator :: [Creator] + , epubContributor :: [Creator] + , epubSubject :: [String] + , epubDescription :: Maybe String + , epubType :: Maybe String + , epubFormat :: Maybe String + , epubPublisher :: Maybe String + , epubSource :: Maybe String + , epubRelation :: Maybe String + , epubCoverage :: Maybe String + , epubRights :: Maybe String + , epubCoverImage :: Maybe String + } deriving Show + +data Creator = Creator{ + creatorText :: String + , creatorRole :: Maybe String + , creatorFileAs :: Maybe String + } deriving Show + +data Identifier = Identifier{ + identifierText :: String + , identifierScheme :: Maybe String + } deriving Show + +data Title = Title{ + titleText :: String + , titleFileAs :: Maybe String + , titleType :: Maybe String + } deriving Show + +dcName :: String -> QName +dcName n = QName n Nothing (Just "dc") + +dcNode :: Node t => String -> t -> Element +dcNode = node . dcName + +opfName :: String -> QName +opfName n = QName n Nothing (Just "opf") + +plainify :: [Inline] -> String +plainify t = + trimr $ writePlain def{ writerStandalone = False } $ Pandoc nullMeta [Plain t] + +getEPUBMetadata :: WriterOptions -> Meta -> IO EPUBMetadata +getEPUBMetadata opts meta = do + let md = metadataFromMeta opts meta + let elts = onlyElems $ parseXML $ writerEpubMetadata opts + let md' = foldr addMetadataFromXML md elts + let addIdentifier m = + if null (epubIdentifier m) + then do + randomId <- fmap show getRandomUUID + return $ m{ epubIdentifier = [Identifier randomId Nothing] } + else return m + let addLanguage m = + if null (epubLanguage m) + then case lookup "lang" (writerVariables opts) of + Just x -> return m{ epubLanguage = x } + Nothing -> do + localeLang <- catch (liftM + (map (\c -> if c == '_' then '-' else c) . + takeWhile (/='.')) $ getEnv "LANG") + (\e -> let _ = (e :: SomeException) in return "en-US") + return m{ epubLanguage = localeLang } + else return m + let fixDate m = + if null (epubDate m) + then do + currentTime <- getCurrentTime + return $ m{ epubDate = showDateTimeISO8601 currentTime } + else return m + let addAuthor m = + if any (\c -> creatorRole c == Just "aut") $ epubCreator m + then return m + else do + let authors' = map plainify $ docAuthors meta + let toAuthor name = Creator{ creatorText = name + , creatorRole = Just "aut" + , creatorFileAs = Nothing } + return $ m{ epubCreator = map toAuthor authors' ++ epubCreator m } + addIdentifier md' >>= fixDate >>= addAuthor >>= addLanguage + +addMetadataFromXML :: Element -> EPUBMetadata -> EPUBMetadata +addMetadataFromXML e@(Element (QName name _ (Just "dc")) attrs _ _) md + | name == "identifier" = md{ epubIdentifier = + Identifier{ identifierText = strContent e + , identifierScheme = lookupAttr (opfName "scheme") attrs + } : epubIdentifier md } + | name == "title" = md{ epubTitle = + Title{ titleText = strContent e + , titleFileAs = getAttr "file-as" + , titleType = getAttr "type" + } : epubTitle md } + | name == "date" = md{ epubDate = maybe "" id $ normalizeDate $ strContent e } + | name == "language" = md{ epubLanguage = strContent e } + | name == "creator" = md{ epubCreator = + Creator{ creatorText = strContent e + , creatorRole = getAttr "role" + , creatorFileAs = getAttr "file-as" + } : epubCreator md } + | name == "contributor" = md{ epubContributor = + Creator { creatorText = strContent e + , creatorRole = getAttr "role" + , creatorFileAs = getAttr "file-as" + } : epubContributor md } + | name == "subject" = md{ epubSubject = strContent e : epubSubject md } + | name == "description" = md { epubDescription = Just $ strContent e } + | name == "type" = md { epubType = Just $ strContent e } + | name == "format" = md { epubFormat = Just $ strContent e } + | name == "type" = md { epubType = Just $ strContent e } + | name == "publisher" = md { epubPublisher = Just $ strContent e } + | name == "source" = md { epubSource = Just $ strContent e } + | name == "relation" = md { epubRelation = Just $ strContent e } + | name == "coverage" = md { epubCoverage = Just $ strContent e } + | name == "rights" = md { epubRights = Just $ strContent e } + | otherwise = md + where getAttr n = lookupAttr (opfName n) attrs +addMetadataFromXML _ md = md + +metaValueToString :: MetaValue -> String +metaValueToString (MetaString s) = s +metaValueToString (MetaInlines ils) = plainify ils +metaValueToString (MetaBlocks bs) = plainify $ query (:[]) bs +metaValueToString (MetaBool b) = show b +metaValueToString _ = "" + +getList :: String -> Meta -> (MetaValue -> a) -> [a] +getList s meta handleMetaValue = + case lookupMeta s meta of + Just (MetaList xs) -> map handleMetaValue xs + Just mv -> [handleMetaValue mv] + Nothing -> [] + +getIdentifier :: Meta -> [Identifier] +getIdentifier meta = getList "identifier" meta handleMetaValue + where handleMetaValue (MetaMap m) = + Identifier{ identifierText = maybe "" metaValueToString + $ M.lookup "text" m + , identifierScheme = metaValueToString <$> + M.lookup "scheme" m } + handleMetaValue mv = Identifier (metaValueToString mv) Nothing + +getTitle :: Meta -> [Title] +getTitle meta = getList "title" meta handleMetaValue + where handleMetaValue (MetaMap m) = + Title{ titleText = maybe "" metaValueToString $ M.lookup "text" m + , titleFileAs = metaValueToString <$> M.lookup "file-as" m + , titleType = metaValueToString <$> M.lookup "type" m } + handleMetaValue mv = Title (metaValueToString mv) Nothing Nothing + +getCreator :: String -> Meta -> [Creator] +getCreator s meta = getList s meta handleMetaValue + where handleMetaValue (MetaMap m) = + Creator{ creatorText = maybe "" metaValueToString $ M.lookup "text" m + , creatorFileAs = metaValueToString <$> M.lookup "file-as" m + , creatorRole = metaValueToString <$> M.lookup "role" m } + handleMetaValue mv = Creator (metaValueToString mv) Nothing Nothing + +simpleList :: String -> Meta -> [String] +simpleList s meta = + case lookupMeta s meta of + Just (MetaList xs) -> map metaValueToString xs + Just x -> [metaValueToString x] + Nothing -> [] + +metadataFromMeta :: WriterOptions -> Meta -> EPUBMetadata +metadataFromMeta opts meta = EPUBMetadata{ + epubIdentifier = identifiers + , epubTitle = titles + , epubDate = date + , epubLanguage = language + , epubCreator = creators + , epubContributor = contributors + , epubSubject = subjects + , epubDescription = description + , epubType = epubtype + , epubFormat = format + , epubPublisher = publisher + , epubSource = source + , epubRelation = relation + , epubCoverage = coverage + , epubRights = rights + , epubCoverImage = coverImage + } + where identifiers = getIdentifier meta + titles = getTitle meta + date = maybe "" id $ + (metaValueToString <$> lookupMeta "date" meta) >>= normalizeDate + language = maybe "" metaValueToString $ + lookupMeta "language" meta `mplus` lookupMeta "lang" meta + creators = getCreator "creator" meta + contributors = getCreator "contributor" meta + subjects = simpleList "subject" meta + description = metaValueToString <$> lookupMeta "description" meta + epubtype = metaValueToString <$> lookupMeta "type" meta + format = metaValueToString <$> lookupMeta "format" meta + publisher = metaValueToString <$> lookupMeta "publisher" meta + source = metaValueToString <$> lookupMeta "source" meta + relation = metaValueToString <$> lookupMeta "relation" meta + coverage = metaValueToString <$> lookupMeta "coverage" meta + rights = metaValueToString <$> lookupMeta "rights" meta + coverImage = fmap (const "cover-image") $ + lookup "epub-cover-image" $ writerVariables opts + -- | Produce an EPUB file from a Pandoc document. writeEPUB :: WriterOptions -- ^ Writer options -> Pandoc -- ^ Document to convert @@ -192,18 +404,6 @@ writeEPUB opts doc@(Pandoc meta _) = do let containsMathML ent = "<math" `isInfixOf` (B8.unpack $ fromEntry ent) -- contents.opf - localeLang <- catch (liftM (map (\c -> if c == '_' then '-' else c) . - takeWhile (/='.')) $ getEnv "LANG") - (\e -> let _ = (e :: SomeException) in return "en-US") - let lang = case lookup "lang" (writerVariables opts') of - Just x -> x - Nothing -> localeLang - let userNodes = onlyElems $ parseXML $ writerEpubMetadata opts' - let mbIdent = findElement (QName "identifier" Nothing (Just "dc")) - $ unode "dummy" ! [] $ userNodes - uuid <- case mbIdent of - Just id' -> return $ trim $ strContent id' - Nothing -> fmap show getRandomUUID let chapterNode ent = unode "item" ! ([("id", takeBaseName $ eRelativePath ent), ("href", eRelativePath ent), @@ -221,22 +421,23 @@ writeEPUB opts doc@(Pandoc meta _) = do [("id", takeBaseName $ eRelativePath ent), ("href", eRelativePath ent), ("media-type", maybe "" id $ getMimeType $ eRelativePath ent)] $ () - let plainify t = trimr $ - writePlain opts'{ writerStandalone = False } $ - Pandoc meta [Plain t] - let plainTitle = plainify $ docTitle meta - let plainAuthors = map plainify $ docAuthors meta + metadata <- getEPUBMetadata opts' meta + let plainTitle = case docTitle meta of + [] -> case epubTitle metadata of + [] -> "UNTITLED" + (x:_) -> titleText x + x -> plainify x + let uuid = case epubIdentifier metadata of + (x:_) -> identifierText x -- use first identifier as UUID + [] -> error "epubIdentifier is null" -- shouldn't happen currentTime <- getCurrentTime - let plainDate = maybe (showDateTimeISO8601 currentTime) id - $ normalizeDate $ stringify $ docDate meta let contentsData = fromStringLazy $ ppTopElement $ unode "package" ! [("version", case version of EPUB2 -> "2.0" EPUB3 -> "3.0") ,("xmlns","http://www.idpf.org/2007/opf") - ,("unique-identifier","BookId")] $ - [ metadataElement version userNodes - uuid lang plainTitle plainAuthors plainDate currentTime mbCoverImage + ,("unique-identifier","epub-id-1")] $ + [ metadataElement version metadata currentTime , unode "manifest" $ [ unode "item" ! [("id","ncx"), ("href","toc.ncx") ,("media-type","application/x-dtbncx+xml")] $ () @@ -266,7 +467,8 @@ writeEPUB opts doc@(Pandoc meta _) = do map chapterRefNode chapterEntries) , unode "guide" $ [ unode "reference" ! - [("type","toc"),("title",plainTitle),("href","nav.xhtml")] $ () + [("type","toc"),("title",plainTitle), + ("href","nav.xhtml")] $ () ] ++ [ unode "reference" ! [("type","cover"),("title","Cover"),("href","cover.xhtml")] $ () | mbCoverImage /= Nothing @@ -390,32 +592,96 @@ writeEPUB opts doc@(Pandoc meta _) = do (picEntries ++ cpicEntry ++ cpgEntry ++ chapterEntries ++ fontEntries)) return $ fromArchive archive -metadataElement :: EPUBVersion -> [Element] -> String -> String -> String -> [String] - -> String -> UTCTime -> Maybe a -> Element -metadataElement version userNodes uuid lang title authors date currentTime mbCoverImage = - let elt = unode "metadata" ! [("xmlns:dc","http://purl.org/dc/elements/1.1/") - ,("xmlns:opf","http://www.idpf.org/2007/opf")] $ - filter isMetadataElement userNodes - dublinElements = ["contributor","coverage","creator","date", - "description","format","identifier","language","publisher", - "relation","rights","source","subject","title","type"] - isMetadataElement e = (qPrefix (elName e) == Just "dc" && - qName (elName e) `elem` dublinElements) || - (qPrefix (elName e) == Nothing && - qName (elName e) `elem` ["link","meta"]) - contains e n = not (null (findElements (QName n Nothing (Just "dc")) e)) - newNodes = [ unode "dc:title" title | not (elt `contains` "title") ] ++ - [ unode "dc:language" lang | not (elt `contains` "language") ] ++ - [ unode "dc:identifier" ! [("id","BookId")] $ uuid | - not (elt `contains` "identifier") ] ++ - [ unode "dc:creator" ! [("opf:role","aut") | version == EPUB2] - $ a | a <- authors, not (elt `contains` "creator") ] ++ - [ unode "dc:date" date | not (elt `contains` "date") ] ++ - [ unode "meta" ! [("property", "dcterms:modified")] $ - (showDateTimeISO8601 currentTime) | version == EPUB3] ++ - [ unode "meta" ! [("name","cover"), ("content","cover-image")] $ () | - not (isNothing mbCoverImage) ] - in elt{ elContent = elContent elt ++ map Elem newNodes } +metadataElement :: EPUBVersion -> EPUBMetadata -> UTCTime -> Element +metadataElement version md currentTime = + unode "metadata" ! [("xmlns:dc","http://purl.org/dc/elements/1.1/") + ,("xmlns:opf","http://www.idpf.org/2007/opf")] $ mdNodes + where mdNodes = identifierNodes ++ titleNodes ++ dateNodes ++ languageNodes + ++ creatorNodes ++ contributorNodes ++ subjectNodes + ++ descriptionNodes ++ typeNodes ++ formatNodes + ++ publisherNodes ++ sourceNodes ++ relationNodes + ++ coverageNodes ++ rightsNodes ++ coverImageNodes + ++ modifiedNodes + withIds base f = concat . zipWith f (map (\x -> base ++ ('-' : show x)) + ([1..] :: [Int])) + identifierNodes = withIds "epub-id" toIdentifierNode $ + epubIdentifier md + titleNodes = withIds "epub-title" toTitleNode $ epubTitle md + dateNodes = dcTag' "date" $ epubDate md + languageNodes = [dcTag "language" $ epubLanguage md] + creatorNodes = withIds "epub-creator" (toCreatorNode "creator") $ + epubCreator md + contributorNodes = withIds "epub-contributor" + (toCreatorNode "contributor") $ epubContributor md + subjectNodes = map (dcTag "subject") $ epubSubject md + descriptionNodes = maybe [] (dcTag' "description") $ epubDescription md + typeNodes = maybe [] (dcTag' "type") $ epubType md + formatNodes = maybe [] (dcTag' "format") $ epubFormat md + publisherNodes = maybe [] (dcTag' "publisher") $ epubPublisher md + sourceNodes = maybe [] (dcTag' "source") $ epubSource md + relationNodes = maybe [] (dcTag' "relation") $ epubRelation md + coverageNodes = maybe [] (dcTag' "coverage") $ epubCoverage md + rightsNodes = maybe [] (dcTag' "rights") $ epubRights md + coverImageNodes = maybe [] + (\ci -> [unode "meta" ! [("name","cover"), ("content",ci)] $ ()]) + $ epubCoverImage md + modifiedNodes = [ unode "meta" ! [("property", "dcterms:modified")] $ + (showDateTimeISO8601 currentTime) | version == EPUB3 ] + dcTag n s = unode ("dc:" ++ n) s + dcTag' n s = [dcTag n s] + toIdentifierNode id' (Identifier txt scheme) + | version == EPUB2 = [dcNode "identifier" ! + ([("id",id')] ++ maybe [] (\x -> [("opf:scheme", x)]) scheme) $ + txt] + | otherwise = [dcNode "identifier" ! [("id",id')] $ txt] ++ + maybe [] (\x -> [unode "meta" ! + [("refines",'#':id'),("property","identifier-type"), + ("scheme","onix:codelist5")] $ x]) + (schemeToOnix `fmap` scheme) + toCreatorNode s id' creator + | version == EPUB2 = [dcNode s ! + ([("id",id')] ++ + maybe [] (\x -> [("opf:file-as",x)]) (creatorFileAs creator) ++ + maybe [] (\x -> [("opf:role",x)]) + (creatorRole creator >>= toRelator)) $ creatorText creator] + | otherwise = [dcNode s ! [("id",id')] $ creatorText creator] ++ + maybe [] (\x -> [unode "meta" ! + [("refines",'#':id'),("property","file-as")] $ x]) + (creatorFileAs creator) ++ + maybe [] (\x -> [unode "meta" ! + [("refines",'#':id'),("property","role"), + ("scheme","marc:relators")] $ x]) + (creatorRole creator >>= toRelator) + toTitleNode id' title + | version == EPUB2 = [dcNode "title" ! + ([("id",id')] ++ + maybe [] (\x -> [("opf:file-as",x)]) (titleFileAs title) ++ + maybe [] (\x -> [("opf:title-type",x)]) (titleType title)) $ + titleText title] + | otherwise = [dcNode "title" ! [("id",id')] $ titleText title] + ++ + maybe [] (\x -> [unode "meta" ! + [("refines",'#':id'),("property","file-as")] $ x]) + (titleFileAs title) ++ + maybe [] (\x -> [unode "meta" ! + [("refines",'#':id'),("property","title-type")] $ x]) + (titleType title) + schemeToOnix "ISBN-10" = "02" + schemeToOnix "GTIN-13" = "03" + schemeToOnix "UPC" = "04" + schemeToOnix "ISMN-10" = "05" + schemeToOnix "DOI" = "06" + schemeToOnix "LCCN" = "13" + schemeToOnix "GTIN-14" = "14" + schemeToOnix "ISBN-13" = "15" + schemeToOnix "Legal deposit number" = "17" + schemeToOnix "URN" = "22" + schemeToOnix "OCLC" = "23" + schemeToOnix "ISMN-13" = "25" + schemeToOnix "ISBN-A" = "26" + schemeToOnix "JP" = "27" + schemeToOnix "OLCC" = "28" + schemeToOnix _ = "01" showDateTimeISO8601 :: UTCTime -> String showDateTimeISO8601 = formatTime defaultTimeLocale "%FT%TZ" @@ -527,3 +793,277 @@ replaceRefs refTable = walk replaceOneRef Just url -> Link lab (url,tit) Nothing -> x replaceOneRef x = x + +toRelator :: String -> Maybe String +toRelator x + | x `elem` relators = Just x + | otherwise = lookup (map toLower x) relatorMap + +relators :: [String] +relators = map snd relatorMap + +relatorMap :: [(String, String)] +relatorMap = + [("abridger", "abr") + ,("actor", "act") + ,("adapter", "adp") + ,("addressee", "rcp") + ,("analyst", "anl") + ,("animator", "anm") + ,("annotator", "ann") + ,("appellant", "apl") + ,("appellee", "ape") + ,("applicant", "app") + ,("architect", "arc") + ,("arranger", "arr") + ,("art copyist", "acp") + ,("art director", "adi") + ,("artist", "art") + ,("artistic director", "ard") + ,("assignee", "asg") + ,("associated name", "asn") + ,("attributed name", "att") + ,("auctioneer", "auc") + ,("author", "aut") + ,("author in quotations or text abstracts", "aqt") + ,("author of afterword, colophon, etc.", "aft") + ,("author of dialog", "aud") + ,("author of introduction, etc.", "aui") + ,("autographer", "ato") + ,("bibliographic antecedent", "ant") + ,("binder", "bnd") + ,("binding designer", "bdd") + ,("blurb writer", "blw") + ,("book designer", "bkd") + ,("book producer", "bkp") + ,("bookjacket designer", "bjd") + ,("bookplate designer", "bpd") + ,("bookseller", "bsl") + ,("braille embosser", "brl") + ,("broadcaster", "brd") + ,("calligrapher", "cll") + ,("cartographer", "ctg") + ,("caster", "cas") + ,("censor", "cns") + ,("choreographer", "chr") + ,("cinematographer", "cng") + ,("client", "cli") + ,("collection registrar", "cor") + ,("collector", "col") + ,("collotyper", "clt") + ,("colorist", "clr") + ,("commentator", "cmm") + ,("commentator for written text", "cwt") + ,("compiler", "com") + ,("complainant", "cpl") + ,("complainant-appellant", "cpt") + ,("complainant-appellee", "cpe") + ,("composer", "cmp") + ,("compositor", "cmt") + ,("conceptor", "ccp") + ,("conductor", "cnd") + ,("conservator", "con") + ,("consultant", "csl") + ,("consultant to a project", "csp") + ,("contestant", "cos") + ,("contestant-appellant", "cot") + ,("contestant-appellee", "coe") + ,("contestee", "cts") + ,("contestee-appellant", "ctt") + ,("contestee-appellee", "cte") + ,("contractor", "ctr") + ,("contributor", "ctb") + ,("copyright claimant", "cpc") + ,("copyright holder", "cph") + ,("corrector", "crr") + ,("correspondent", "crp") + ,("costume designer", "cst") + ,("court governed", "cou") + ,("court reporter", "crt") + ,("cover designer", "cov") + ,("creator", "cre") + ,("curator", "cur") + ,("dancer", "dnc") + ,("data contributor", "dtc") + ,("data manager", "dtm") + ,("dedicatee", "dte") + ,("dedicator", "dto") + ,("defendant", "dfd") + ,("defendant-appellant", "dft") + ,("defendant-appellee", "dfe") + ,("degree granting institution", "dgg") + ,("delineator", "dln") + ,("depicted", "dpc") + ,("depositor", "dpt") + ,("designer", "dsr") + ,("director", "drt") + ,("dissertant", "dis") + ,("distribution place", "dbp") + ,("distributor", "dst") + ,("donor", "dnr") + ,("draftsman", "drm") + ,("dubious author", "dub") + ,("editor", "edt") + ,("editor of compilation", "edc") + ,("editor of moving image work", "edm") + ,("electrician", "elg") + ,("electrotyper", "elt") + ,("enacting jurisdiction", "enj") + ,("engineer", "eng") + ,("engraver", "egr") + ,("etcher", "etr") + ,("event place", "evp") + ,("expert", "exp") + ,("facsimilist", "fac") + ,("field director", "fld") + ,("film director", "fmd") + ,("film distributor", "fds") + ,("film editor", "flm") + ,("film producer", "fmp") + ,("filmmaker", "fmk") + ,("first party", "fpy") + ,("forger", "frg") + ,("former owner", "fmo") + ,("funder", "fnd") + ,("geographic information specialist", "gis") + ,("honoree", "hnr") + ,("host", "hst") + ,("host institution", "his") + ,("illuminator", "ilu") + ,("illustrator", "ill") + ,("inscriber", "ins") + ,("instrumentalist", "itr") + ,("interviewee", "ive") + ,("interviewer", "ivr") + ,("inventor", "inv") + ,("issuing body", "isb") + ,("judge", "jud") + ,("jurisdiction governed", "jug") + ,("laboratory", "lbr") + ,("laboratory director", "ldr") + ,("landscape architect", "lsa") + ,("lead", "led") + ,("lender", "len") + ,("libelant", "lil") + ,("libelant-appellant", "lit") + ,("libelant-appellee", "lie") + ,("libelee", "lel") + ,("libelee-appellant", "let") + ,("libelee-appellee", "lee") + ,("librettist", "lbt") + ,("licensee", "lse") + ,("licensor", "lso") + ,("lighting designer", "lgd") + ,("lithographer", "ltg") + ,("lyricist", "lyr") + ,("manufacture place", "mfp") + ,("manufacturer", "mfr") + ,("marbler", "mrb") + ,("markup editor", "mrk") + ,("metadata contact", "mdc") + ,("metal-engraver", "mte") + ,("moderator", "mod") + ,("monitor", "mon") + ,("music copyist", "mcp") + ,("musical director", "msd") + ,("musician", "mus") + ,("narrator", "nrt") + ,("onscreen presenter", "osp") + ,("opponent", "opn") + ,("organizer of meeting", "orm") + ,("originator", "org") + ,("other", "oth") + ,("owner", "own") + ,("panelist", "pan") + ,("papermaker", "ppm") + ,("patent applicant", "pta") + ,("patent holder", "pth") + ,("patron", "pat") + ,("performer", "prf") + ,("permitting agency", "pma") + ,("photographer", "pht") + ,("plaintiff", "ptf") + ,("plaintiff-appellant", "ptt") + ,("plaintiff-appellee", "pte") + ,("platemaker", "plt") + ,("praeses", "pra") + ,("presenter", "pre") + ,("printer", "prt") + ,("printer of plates", "pop") + ,("printmaker", "prm") + ,("process contact", "prc") + ,("producer", "pro") + ,("production company", "prn") + ,("production designer", "prs") + ,("production manager", "pmn") + ,("production personnel", "prd") + ,("production place", "prp") + ,("programmer", "prg") + ,("project director", "pdr") + ,("proofreader", "pfr") + ,("provider", "prv") + ,("publication place", "pup") + ,("publisher", "pbl") + ,("publishing director", "pbd") + ,("puppeteer", "ppt") + ,("radio director", "rdd") + ,("radio producer", "rpc") + ,("recording engineer", "rce") + ,("recordist", "rcd") + ,("redaktor", "red") + ,("renderer", "ren") + ,("reporter", "rpt") + ,("repository", "rps") + ,("research team head", "rth") + ,("research team member", "rtm") + ,("researcher", "res") + ,("respondent", "rsp") + ,("respondent-appellant", "rst") + ,("respondent-appellee", "rse") + ,("responsible party", "rpy") + ,("restager", "rsg") + ,("restorationist", "rsr") + ,("reviewer", "rev") + ,("rubricator", "rbr") + ,("scenarist", "sce") + ,("scientific advisor", "sad") + ,("screenwriter", "aus") + ,("scribe", "scr") + ,("sculptor", "scl") + ,("second party", "spy") + ,("secretary", "sec") + ,("seller", "sll") + ,("set designer", "std") + ,("setting", "stg") + ,("signer", "sgn") + ,("singer", "sng") + ,("sound designer", "sds") + ,("speaker", "spk") + ,("sponsor", "spn") + ,("stage director", "sgd") + ,("stage manager", "stm") + ,("standards body", "stn") + ,("stereotyper", "str") + ,("storyteller", "stl") + ,("supporting host", "sht") + ,("surveyor", "srv") + ,("teacher", "tch") + ,("technical director", "tcd") + ,("television director", "tld") + ,("television producer", "tlp") + ,("thesis advisor", "ths") + ,("transcriber", "trc") + ,("translator", "trl") + ,("type designer", "tyd") + ,("typographer", "tyg") + ,("university place", "uvp") + ,("videographer", "vdg") + ,("witness", "wit") + ,("wood engraver", "wde") + ,("woodcutter", "wdc") + ,("writer of accompanying material", "wam") + ,("writer of added commentary", "wac") + ,("writer of added lyrics", "wal") + ,("writer of added text", "wat") + ] + -- cgit v1.2.3 From 96b678d823b544e3da7dd8531615c52de6164cb8 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Sat, 30 Nov 2013 15:17:38 -0800 Subject: Allow specification of epub-cover-image in YAML metadata. --- README | 6 +++++- src/Text/Pandoc/Writers/EPUB.hs | 17 ++++++++--------- 2 files changed, 13 insertions(+), 10 deletions(-) (limited to 'src/Text') diff --git a/README b/README index 3afbe8b0b..35194769b 100644 --- a/README +++ b/README @@ -541,7 +541,9 @@ Options affecting specific writers `--epub-cover-image=`*FILE* : Use the specified image as the EPUB cover. It is recommended - that the image be less than 1000px in width and height. + that the image be less than 1000px in width and height. Note that + in a markdown source document you can also specify `epub-cover-image` + in a YAML metadata block (see [EPUB Metadata], below). `--epub-metadata=`*FILE* : Look in the specified XML file for metadata for the EPUB. @@ -2818,6 +2820,8 @@ The following fields are recognized: ~ A string value. `rights` ~ A string value. +`epub-cover-image` + ~ A string value (path to cover image). Literate Haskell support ======================== diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs index 4f1de5df0..a3b01848e 100644 --- a/src/Text/Pandoc/Writers/EPUB.hs +++ b/src/Text/Pandoc/Writers/EPUB.hs @@ -279,8 +279,7 @@ metadataFromMeta opts meta = EPUBMetadata{ relation = metaValueToString <$> lookupMeta "relation" meta coverage = metaValueToString <$> lookupMeta "coverage" meta rights = metaValueToString <$> lookupMeta "rights" meta - coverImage = fmap (const "cover-image") $ - lookup "epub-cover-image" $ writerVariables opts + coverImage = lookup "epub-cover-image" (writerVariables opts) -- | Produce an EPUB file from a Pandoc document. writeEPUB :: WriterOptions -- ^ Writer options @@ -305,11 +304,11 @@ writeEPUB opts doc@(Pandoc meta _) = do then MathML Nothing else writerHTMLMathMethod opts , writerWrapText = False } - let mbCoverImage = lookup "epub-cover-image" vars + metadata <- getEPUBMetadata opts' meta -- cover page (cpgEntry, cpicEntry) <- - case mbCoverImage of + case epubCoverImage metadata of Nothing -> return ([],[]) Just img -> do let coverImage = "cover-image" ++ takeExtension img @@ -421,7 +420,6 @@ writeEPUB opts doc@(Pandoc meta _) = do [("id", takeBaseName $ eRelativePath ent), ("href", eRelativePath ent), ("media-type", maybe "" id $ getMimeType $ eRelativePath ent)] $ () - metadata <- getEPUBMetadata opts' meta let plainTitle = case docTitle meta of [] -> case epubTitle metadata of [] -> "UNTITLED" @@ -452,7 +450,7 @@ writeEPUB opts doc@(Pandoc meta _) = do map pictureNode (cpicEntry ++ picEntries) ++ map fontNode fontEntries , unode "spine" ! [("toc","ncx")] $ - case mbCoverImage of + case epubCoverImage metadata of Nothing -> [] Just _ -> [ unode "itemref" ! [("idref", "cover"),("linear","no")] $ () ] @@ -471,7 +469,7 @@ writeEPUB opts doc@(Pandoc meta _) = do ("href","nav.xhtml")] $ () ] ++ [ unode "reference" ! - [("type","cover"),("title","Cover"),("href","cover.xhtml")] $ () | mbCoverImage /= Nothing + [("type","cover"),("title","Cover"),("href","cover.xhtml")] $ () | epubCoverImage metadata /= Nothing ] ] let contentsEntry = mkEntry "content.opf" contentsData @@ -526,7 +524,7 @@ writeEPUB opts doc@(Pandoc meta _) = do ,("content", "0")] $ () , unode "meta" ! [("name","dtb:maxPageNumber") ,("content", "0")] $ () - ] ++ case mbCoverImage of + ] ++ case epubCoverImage metadata of Nothing -> [] Just _ -> [unode "meta" ! [("name","cover"), ("content","cover-image")] $ ()] @@ -623,7 +621,8 @@ metadataElement version md currentTime = coverageNodes = maybe [] (dcTag' "coverage") $ epubCoverage md rightsNodes = maybe [] (dcTag' "rights") $ epubRights md coverImageNodes = maybe [] - (\ci -> [unode "meta" ! [("name","cover"), ("content",ci)] $ ()]) + (const $ [unode "meta" ! [("name","cover"), + ("content","cover-image")] $ ()]) $ epubCoverImage md modifiedNodes = [ unode "meta" ! [("property", "dcterms:modified")] $ (showDateTimeISO8601 currentTime) | version == EPUB3 ] -- cgit v1.2.3 From 17ef39d1577a7c844f8747294f0e7a0486c1edaa Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Sat, 30 Nov 2013 15:25:28 -0800 Subject: Fixed `cover-image` in EPUB YAML metadata. --- README | 4 ++-- src/Text/Pandoc/Writers/EPUB.hs | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) (limited to 'src/Text') diff --git a/README b/README index 35194769b..2a5ecc6ad 100644 --- a/README +++ b/README @@ -542,7 +542,7 @@ Options affecting specific writers `--epub-cover-image=`*FILE* : Use the specified image as the EPUB cover. It is recommended that the image be less than 1000px in width and height. Note that - in a markdown source document you can also specify `epub-cover-image` + in a markdown source document you can also specify `cover-image` in a YAML metadata block (see [EPUB Metadata], below). `--epub-metadata=`*FILE* @@ -2820,7 +2820,7 @@ The following fields are recognized: ~ A string value. `rights` ~ A string value. -`epub-cover-image` +`cover-image` ~ A string value (path to cover image). Literate Haskell support diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs index a3b01848e..9745db5a0 100644 --- a/src/Text/Pandoc/Writers/EPUB.hs +++ b/src/Text/Pandoc/Writers/EPUB.hs @@ -279,7 +279,8 @@ metadataFromMeta opts meta = EPUBMetadata{ relation = metaValueToString <$> lookupMeta "relation" meta coverage = metaValueToString <$> lookupMeta "coverage" meta rights = metaValueToString <$> lookupMeta "rights" meta - coverImage = lookup "epub-cover-image" (writerVariables opts) + coverImage = lookup "epub-cover-image" (writerVariables opts) `mplus` + (metaValueToString <$> lookupMeta "cover-image" meta) -- | Produce an EPUB file from a Pandoc document. writeEPUB :: WriterOptions -- ^ Writer options -- cgit v1.2.3 From bb0f299165de22e119675aa6ceed0192a80c78e8 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Sat, 30 Nov 2013 16:16:35 -0800 Subject: ConTeXt writer: Don't hardcode figure/table placement. Instead, let this be set in the template, using `\setupfloat`. Thanks to Aditya Mahajan for the suggestion. --- data/templates | 2 +- src/Text/Pandoc/Writers/ConTeXt.hs | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'src/Text') diff --git a/data/templates b/data/templates index 8cadd4f20..713a8f63d 160000 --- a/data/templates +++ b/data/templates @@ -1 +1 @@ -Subproject commit 8cadd4f2044c0c25842eeb5a2370a6e3384f4bd4 +Subproject commit 713a8f63d5589ab9313869e47b03cf7f49e00e98 diff --git a/src/Text/Pandoc/Writers/ConTeXt.hs b/src/Text/Pandoc/Writers/ConTeXt.hs index 179d9bc5b..3095cf508 100644 --- a/src/Text/Pandoc/Writers/ConTeXt.hs +++ b/src/Text/Pandoc/Writers/ConTeXt.hs @@ -130,7 +130,7 @@ blockToConTeXt (Plain lst) = inlineListToConTeXt lst -- title beginning with fig: indicates that the image is a figure blockToConTeXt (Para [Image txt (src,'f':'i':'g':':':_)]) = do capt <- inlineListToConTeXt txt - return $ blankline $$ "\\placefigure[here]" <> braces capt <> + return $ blankline $$ "\\placefigure" <> braces capt <> braces ("\\externalfigure" <> brackets (text src)) <> blankline blockToConTeXt (Para lst) = do contents <- inlineListToConTeXt lst @@ -205,9 +205,9 @@ blockToConTeXt (Table caption aligns widths heads rows) = do else liftM ($$ "\\HL") $ tableRowToConTeXt heads captionText <- inlineListToConTeXt caption rows' <- mapM tableRowToConTeXt rows - return $ "\\placetable" <> brackets ("here" <> if null caption - then ",none" - else "") + return $ "\\placetable" <> (if null caption + then brackets "none" + else empty) <> braces captionText $$ "\\starttable" <> brackets (text colDescriptors) $$ "\\HL" $$ headers $$ -- cgit v1.2.3 From 7aa4d519686af1416eaf3b380f8584ab89569c41 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Sat, 30 Nov 2013 17:00:58 -0800 Subject: ODT writer: Add `draw:name` attribute to `draw:frame` elements. This is reported to be necessary to avoid an error from recent versions of Libre Office when files contain more than one image. Closes #1069. Thanks to wmanley for reporting and diagnosing the problem. --- src/Text/Pandoc/Writers/OpenDocument.hs | 10 ++++++++-- tests/writer.opendocument | 4 ++-- 2 files changed, 10 insertions(+), 4 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Writers/OpenDocument.hs b/src/Text/Pandoc/Writers/OpenDocument.hs index 565f5f869..4ddfd7166 100644 --- a/src/Text/Pandoc/Writers/OpenDocument.hs +++ b/src/Text/Pandoc/Writers/OpenDocument.hs @@ -64,6 +64,7 @@ data WriterState = , stInDefinition :: Bool , stTight :: Bool , stFirstPara :: Bool + , stImageId :: Int } defaultWriterState :: WriterState @@ -78,6 +79,7 @@ defaultWriterState = , stInDefinition = False , stTight = False , stFirstPara = False + , stImageId = 1 } when :: Bool -> Doc -> Doc @@ -380,7 +382,7 @@ inlineToOpenDocument o ils then return $ preformatted s else return empty | Link l (s,t) <- ils = mkLink s t <$> inlinesToOpenDocument o l - | Image _ (s,t) <- ils = return $ mkImg s t + | Image _ (s,t) <- ils = mkImg s t | Note l <- ils = mkNote l | otherwise = return empty where @@ -389,7 +391,11 @@ inlineToOpenDocument o ils , ("xlink:href" , s ) , ("office:name", t ) ] . inSpanTags "Definition" - mkImg s t = inTags False "draw:frame" (attrsFromTitle t) $ + mkImg s t = do + id' <- gets stImageId + modify (\st -> st{ stImageId = id' + 1 }) + return $ inTags False "draw:frame" + (("draw:name", "img" ++ show id'):attrsFromTitle t) $ selfClosingTag "draw:image" [ ("xlink:href" , s ) , ("xlink:type" , "simple") , ("xlink:show" , "embed" ) diff --git a/tests/writer.opendocument b/tests/writer.opendocument index b3888e34d..81c793a62 100644 --- a/tests/writer.opendocument +++ b/tests/writer.opendocument @@ -1576,9 +1576,9 @@ link in pointy braces</text:span></text:a>.</text:p> <text:h text:style-name="Heading_20_1" text:outline-level="1">Images</text:h> <text:p text:style-name="First_20_paragraph">From “Voyage dans la Lune” by Georges Melies (1902):</text:p> -<text:p text:style-name="Text_20_body"><draw:frame><draw:image xlink:href="lalune.jpg" xlink:type="simple" xlink:show="embed" xlink:actuate="onLoad" /></draw:frame></text:p> +<text:p text:style-name="Text_20_body"><draw:frame draw:name="img1"><draw:image xlink:href="lalune.jpg" xlink:type="simple" xlink:show="embed" xlink:actuate="onLoad" /></draw:frame></text:p> <text:p text:style-name="Text_20_body">Here is a movie -<draw:frame><draw:image xlink:href="movie.jpg" xlink:type="simple" xlink:show="embed" xlink:actuate="onLoad" /></draw:frame> +<draw:frame draw:name="img2"><draw:image xlink:href="movie.jpg" xlink:type="simple" xlink:show="embed" xlink:actuate="onLoad" /></draw:frame> icon.</text:p> <text:p text:style-name="Horizontal_20_Line" /> <text:h text:style-name="Heading_20_1" text:outline-level="1">Footnotes</text:h> -- cgit v1.2.3 From 7f09c1834da9f87e7715f5c9dc52f4b730da8f3f Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Sat, 30 Nov 2013 17:59:28 -0800 Subject: Markdown writer: Fix rendering of tight sublists. E.g. - foo - bar - baz Previously a spurious blank line was included before the last item. Closes #1050. --- src/Text/Pandoc/Writers/Markdown.hs | 9 ++++++++- tests/Tests/Writers/Markdown.hs | 4 ++++ 2 files changed, 12 insertions(+), 1 deletion(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs index eefcd547a..60d474263 100644 --- a/src/Text/Pandoc/Writers/Markdown.hs +++ b/src/Text/Pandoc/Writers/Markdown.hs @@ -555,7 +555,14 @@ bulletListItemToMarkdown opts items = do contents <- blockListToMarkdown opts items let sps = replicate (writerTabStop opts - 2) ' ' let start = text ('-' : ' ' : sps) - return $ hang (writerTabStop opts) start $ contents <> cr + -- remove trailing blank line if it is a tight list + let contents' = case reverse items of + (BulletList xs:_) | isTightList xs -> + chomp contents <> cr + (OrderedList _ xs:_) | isTightList xs -> + chomp contents <> cr + _ -> contents + return $ hang (writerTabStop opts) start $ contents' <> cr -- | Convert ordered list item (a list of blocks) to markdown. orderedListItemToMarkdown :: WriterOptions -- ^ options diff --git a/tests/Tests/Writers/Markdown.hs b/tests/Tests/Writers/Markdown.hs index 99b85dfb7..c2a8f5903 100644 --- a/tests/Tests/Writers/Markdown.hs +++ b/tests/Tests/Writers/Markdown.hs @@ -31,4 +31,8 @@ tests :: [Test] tests = [ "indented code after list" =: (orderedList [ para "one" <> para "two" ] <> codeBlock "test") =?> "1. one\n\n two\n\n<!-- -->\n\n test" + , "list with tight sublist" + =: bulletList [ plain "foo" <> bulletList [ plain "bar" ], + plain "baz" ] + =?> "- foo\n - bar\n- baz\n" ] -- cgit v1.2.3 From 37569f30ed35a45b4765e9cd6c408cd167dd3452 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Sat, 30 Nov 2013 19:33:02 -0800 Subject: EPUB writer: Add properties attribute to cover-image item for EPUB v3. --- src/Text/Pandoc/Writers/EPUB.hs | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs index 9745db5a0..6e1a391af 100644 --- a/src/Text/Pandoc/Writers/EPUB.hs +++ b/src/Text/Pandoc/Writers/EPUB.hs @@ -448,7 +448,12 @@ writeEPUB opts doc@(Pandoc meta _) = do [("properties","nav") | epub3 ]) $ () ] ++ map chapterNode (cpgEntry ++ (tpEntry : chapterEntries)) ++ - map pictureNode (cpicEntry ++ picEntries) ++ + (case cpicEntry of + [] -> [] + (x:_) -> [add_attrs + [Attr (unqual "properties") "cover-image" | epub3] + (pictureNode x)]) ++ + map pictureNode picEntries ++ map fontNode fontEntries , unode "spine" ! [("toc","ncx")] $ case epubCoverImage metadata of -- cgit v1.2.3 From 4501344d45947bee79707522efdc77bfdc650bd6 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Sat, 30 Nov 2013 19:43:12 -0800 Subject: MIME: Add entry for jfif. --- src/Text/Pandoc/MIME.hs | 1 + 1 file changed, 1 insertion(+) (limited to 'src/Text') diff --git a/src/Text/Pandoc/MIME.hs b/src/Text/Pandoc/MIME.hs index 1f5f6f862..f41aa98bb 100644 --- a/src/Text/Pandoc/MIME.hs +++ b/src/Text/Pandoc/MIME.hs @@ -220,6 +220,7 @@ mimeTypesList = -- List borrowed from happstack-server. ,("jnlp","application/x-java-jnlp-file") ,("jpe","image/jpeg") ,("jpeg","image/jpeg") + ,("jfif","image/jpeg") ,("jpg","image/jpeg") ,("js","application/x-javascript") ,("kar","audio/midi") -- cgit v1.2.3 From a7067ab22b0ce297ccc5271fca59b9f9e787195e Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Sat, 30 Nov 2013 19:43:24 -0800 Subject: EPUB writer: Simplify imageTypeOf using getMimeType. --- src/Text/Pandoc/Writers/EPUB.hs | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs index 6e1a391af..7be7eb368 100644 --- a/src/Text/Pandoc/Writers/EPUB.hs +++ b/src/Text/Pandoc/Writers/EPUB.hs @@ -736,15 +736,9 @@ ppTopElement = ("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" ++) . unEntity . unEntity (x:xs) = x : unEntity xs imageTypeOf :: FilePath -> Maybe String -imageTypeOf x = case drop 1 (map toLower (takeExtension x)) of - "jpg" -> Just "image/jpeg" - "jpeg" -> Just "image/jpeg" - "jfif" -> Just "image/jpeg" - "png" -> Just "image/png" - "gif" -> Just "image/gif" - "svg" -> Just "image/svg+xml" - _ -> Nothing - +imageTypeOf x = case getMimeType x of + Just y@('i':'m':'a':'g':'e':_) -> Just y + _ -> Nothing data IdentState = IdentState{ chapterNumber :: Int, -- cgit v1.2.3 From 0e158d8d8dc277f19d1f7d2686c2672f08410f82 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Sat, 30 Nov 2013 20:20:18 -0800 Subject: EPUB writer: Insert "svg" property as needed in opf (EPUB 3). --- src/Text/Pandoc/Writers/EPUB.hs | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs index 7be7eb368..b23f43a6e 100644 --- a/src/Text/Pandoc/Writers/EPUB.hs +++ b/src/Text/Pandoc/Writers/EPUB.hs @@ -401,15 +401,21 @@ writeEPUB opts doc@(Pandoc meta _) = do let chapterEntries = zipWith chapToEntry [1..] chapters -- incredibly inefficient (TODO): - let containsMathML ent = "<math" `isInfixOf` (B8.unpack $ fromEntry ent) + let containsMathML ent = epub3 && + "<math" `isInfixOf` (B8.unpack $ fromEntry ent) + let containsSVG ent = epub3 && + "<svg" `isInfixOf` (B8.unpack $ fromEntry ent) + let props ent = ["mathml" | containsMathML ent] ++ ["svg" | containsSVG ent] -- contents.opf let chapterNode ent = unode "item" ! ([("id", takeBaseName $ eRelativePath ent), ("href", eRelativePath ent), ("media-type", "application/xhtml+xml")] - ++ [("properties","mathml") | epub3 && - containsMathML ent]) $ () + ++ case props ent of + [] -> [] + xs -> [("properties", unwords xs)]) + $ () let chapterRefNode ent = unode "itemref" ! [("idref", takeBaseName $ eRelativePath ent)] $ () let pictureNode ent = unode "item" ! -- cgit v1.2.3 From 82813b55852c99c2e4d179083c119937c39d5398 Mon Sep 17 00:00:00 2001 From: Shaun Attfield <heurist+git@gmail.com> Date: Sun, 1 Dec 2013 10:19:08 +0200 Subject: normalizeDate: Allow dates with year only (%Y) --- src/Text/Pandoc/Shared.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index 8dcd88148..7592b7659 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -270,7 +270,7 @@ normalizeDate s = fmap (formatTime defaultTimeLocale "%F") (msum $ map (\fs -> parsetimeWith fs s) formats :: Maybe Day) where parsetimeWith = parseTime defaultTimeLocale formats = ["%x","%m/%d/%Y", "%D","%F", "%d %b %Y", - "%d %B %Y", "%b. %d, %Y", "%B %d, %Y"] + "%d %B %Y", "%b. %d, %Y", "%B %d, %Y", "%Y"] -- -- Pandoc block and inline list processing -- cgit v1.2.3 From 1f6238f3ba1999988fbb2b25b091171451647d4e Mon Sep 17 00:00:00 2001 From: John MacFarlane <fiddlosopher@gmail.com> Date: Sun, 1 Dec 2013 10:16:56 -0800 Subject: EPUB writer: Allow partial dates: YYYY, YYYY-MM. Improves on #1074, since now we don't default to January 1. --- src/Text/Pandoc/Writers/EPUB.hs | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs index b23f43a6e..59c90b16d 100644 --- a/src/Text/Pandoc/Writers/EPUB.hs +++ b/src/Text/Pandoc/Writers/EPUB.hs @@ -56,7 +56,7 @@ import Text.XML.Light hiding (ppTopElement) import Text.Pandoc.UUID import Text.Pandoc.Writers.HTML import Text.Pandoc.Writers.Markdown ( writePlain ) -import Data.Char ( toLower ) +import Data.Char ( toLower, isDigit ) import Network.URI ( unEscapeString ) import Text.Pandoc.MIME (getMimeType) #if MIN_VERSION_base(4,6,0) @@ -171,7 +171,8 @@ addMetadataFromXML e@(Element (QName name _ (Just "dc")) attrs _ _) md , titleFileAs = getAttr "file-as" , titleType = getAttr "type" } : epubTitle md } - | name == "date" = md{ epubDate = maybe "" id $ normalizeDate $ strContent e } + | name == "date" = md{ epubDate = maybe "" id $ normalizeDate' + $ strContent e } | name == "language" = md{ epubLanguage = strContent e } | name == "creator" = md{ epubCreator = Creator{ creatorText = strContent e @@ -265,7 +266,7 @@ metadataFromMeta opts meta = EPUBMetadata{ where identifiers = getIdentifier meta titles = getTitle meta date = maybe "" id $ - (metaValueToString <$> lookupMeta "date" meta) >>= normalizeDate + (metaValueToString <$> lookupMeta "date" meta) >>= normalizeDate' language = maybe "" metaValueToString $ lookupMeta "language" meta `mplus` lookupMeta "lang" meta creators = getCreator "creator" meta @@ -799,6 +800,16 @@ replaceRefs refTable = walk replaceOneRef Nothing -> x replaceOneRef x = x +-- Variant of normalizeDate that allows partial dates: YYYY, YYYY-MM +normalizeDate' :: String -> Maybe String +normalizeDate' xs = + let xs' = trim xs in + case xs' of + [y1,y2,y3,y4] | all isDigit [y1,y2,y3,y4] -> Just xs' -- YYYY + [y1,y2,y3,y4,'-',m1,m2] | all isDigit [y1,y2,y3,y4,m1,m2] -- YYYY-MM + -> Just xs' + _ -> normalizeDate xs' + toRelator :: String -> Maybe String toRelator x | x `elem` relators = Just x -- cgit v1.2.3 From bd96f2bdcce55bfe5a7b753d29b3f4a45986f211 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Tue, 3 Dec 2013 10:32:03 -0800 Subject: EPUB writer: Allow 'stylesheet' in metadata. The value is a path to the stylesheet. --- README | 2 ++ src/Text/Pandoc/Writers/EPUB.hs | 33 ++++++++++++++++++++++----------- 2 files changed, 24 insertions(+), 11 deletions(-) (limited to 'src/Text') diff --git a/README b/README index 2a5ecc6ad..11cbf189f 100644 --- a/README +++ b/README @@ -2822,6 +2822,8 @@ The following fields are recognized: ~ A string value. `cover-image` ~ A string value (path to cover image). +`stylesheet` + ~ A string value (path to CSS stylesheet). Literate Haskell support ======================== diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs index 59c90b16d..32b0c3c32 100644 --- a/src/Text/Pandoc/Writers/EPUB.hs +++ b/src/Text/Pandoc/Writers/EPUB.hs @@ -38,7 +38,7 @@ import Text.Printf (printf) import System.FilePath ( (</>), takeBaseName, takeExtension, takeFileName ) import qualified Data.ByteString.Lazy as B import qualified Data.ByteString.Lazy.Char8 as B8 -import Text.Pandoc.UTF8 ( fromStringLazy, toString ) +import qualified Text.Pandoc.UTF8 as UTF8 import Text.Pandoc.SelfContained ( makeSelfContained ) import Codec.Archive.Zip import Control.Applicative ((<$>)) @@ -89,8 +89,13 @@ data EPUBMetadata = EPUBMetadata{ , epubCoverage :: Maybe String , epubRights :: Maybe String , epubCoverImage :: Maybe String + , epubStylesheet :: Maybe Stylesheet } deriving Show +data Stylesheet = StylesheetPath FilePath + | StylesheetContents String + deriving Show + data Creator = Creator{ creatorText :: String , creatorRole :: Maybe String @@ -262,6 +267,7 @@ metadataFromMeta opts meta = EPUBMetadata{ , epubCoverage = coverage , epubRights = rights , epubCoverImage = coverImage + , epubStylesheet = stylesheet } where identifiers = getIdentifier meta titles = getTitle meta @@ -282,6 +288,10 @@ metadataFromMeta opts meta = EPUBMetadata{ rights = metaValueToString <$> lookupMeta "rights" meta coverImage = lookup "epub-cover-image" (writerVariables opts) `mplus` (metaValueToString <$> lookupMeta "cover-image" meta) + stylesheet = (StylesheetContents <$> + lookup "epub-stylesheet" (writerVariables opts)) `mplus` + ((StylesheetPath . metaValueToString) <$> + lookupMeta "stylesheet" meta) -- | Produce an EPUB file from a Pandoc document. writeEPUB :: WriterOptions -- ^ Writer options @@ -437,7 +447,7 @@ writeEPUB opts doc@(Pandoc meta _) = do (x:_) -> identifierText x -- use first identifier as UUID [] -> error "epubIdentifier is null" -- shouldn't happen currentTime <- getCurrentTime - let contentsData = fromStringLazy $ ppTopElement $ + let contentsData = UTF8.fromStringLazy $ ppTopElement $ unode "package" ! [("version", case version of EPUB2 -> "2.0" EPUB3 -> "3.0") @@ -525,7 +535,7 @@ writeEPUB opts doc@(Pandoc meta _) = do [ unode "navLabel" $ unode "text" (plainify $ docTitle meta) , unode "content" ! [("src","title_page.xhtml")] $ () ] - let tocData = fromStringLazy $ ppTopElement $ + let tocData = UTF8.fromStringLazy $ ppTopElement $ unode "ncx" ! [("version","2005-1") ,("xmlns","http://www.daisy.org/z3986/2005/ncx/")] $ [ unode "head" $ @@ -557,7 +567,7 @@ writeEPUB opts doc@(Pandoc meta _) = do (_:_) -> [unode "ol" ! [("class","toc")] $ subs] let navtag = if epub3 then "nav" else "div" - let navData = fromStringLazy $ ppTopElement $ + let navData = UTF8.fromStringLazy $ ppTopElement $ unode "html" ! [("xmlns","http://www.w3.org/1999/xhtml") ,("xmlns:epub","http://www.idpf.org/2007/ops")] $ [ unode "head" $ @@ -571,10 +581,10 @@ writeEPUB opts doc@(Pandoc meta _) = do let navEntry = mkEntry "nav.xhtml" navData -- mimetype - let mimetypeEntry = mkEntry "mimetype" $ fromStringLazy "application/epub+zip" + let mimetypeEntry = mkEntry "mimetype" $ UTF8.fromStringLazy "application/epub+zip" -- container.xml - let containerData = fromStringLazy $ ppTopElement $ + let containerData = UTF8.fromStringLazy $ ppTopElement $ unode "container" ! [("version","1.0") ,("xmlns","urn:oasis:names:tc:opendocument:xmlns:container")] $ unode "rootfiles" $ @@ -583,18 +593,19 @@ writeEPUB opts doc@(Pandoc meta _) = do let containerEntry = mkEntry "META-INF/container.xml" containerData -- com.apple.ibooks.display-options.xml - let apple = fromStringLazy $ ppTopElement $ + let apple = UTF8.fromStringLazy $ ppTopElement $ unode "display_options" $ unode "platform" ! [("name","*")] $ unode "option" ! [("name","specified-fonts")] $ "true" let appleEntry = mkEntry "META-INF/com.apple.ibooks.display-options.xml" apple -- stylesheet - stylesheet <- case writerEpubStylesheet opts of - Just s -> return s - Nothing -> toString `fmap` + stylesheet <- case epubStylesheet metadata of + Just (StylesheetPath fp) -> UTF8.readFile fp + Just (StylesheetContents s) -> return s + Nothing -> UTF8.toString `fmap` readDataFile (writerUserDataDir opts) "epub.css" - let stylesheetEntry = mkEntry "stylesheet.css" $ fromStringLazy stylesheet + let stylesheetEntry = mkEntry "stylesheet.css" $ UTF8.fromStringLazy stylesheet -- construct archive let archive = foldr addEntryToArchive emptyArchive -- cgit v1.2.3 From fdaeec0c48d742489ddf0ec0c0261ca9c53f989b Mon Sep 17 00:00:00 2001 From: Jose Luis Duran <jlduran@gmail.com> Date: Mon, 2 Dec 2013 09:55:58 +0000 Subject: Add booktabs package for LaTeX tables [ci skip] --- README | 2 +- data/templates | 2 +- src/Text/Pandoc/Writers/LaTeX.hs | 10 ++--- tests/tables.latex | 90 ++++++++++++++++++++-------------------- 4 files changed, 52 insertions(+), 52 deletions(-) (limited to 'src/Text') diff --git a/README b/README index 2a5ecc6ad..3ba1e364e 100644 --- a/README +++ b/README @@ -108,7 +108,7 @@ to PDF: Production of a PDF requires that a LaTeX engine be installed (see `--latex-engine`, below), and assumes that the following LaTeX packages are available: `amssymb`, `amsmath`, `ifxetex`, `ifluatex`, `listings` (if the -`--listings` option is used), `fancyvrb`, `longtable`, `url`, +`--listings` option is used), `fancyvrb`, `longtable`, `booktabs`, `url`, `graphicx`, `hyperref`, `ulem`, `babel` (if the `lang` variable is set), `fontspec` (if `xelatex` or `lualatex` is used as the LaTeX engine), `xltxtra` and `xunicode` (if `xelatex` is used). diff --git a/data/templates b/data/templates index 713a8f63d..f643a076d 160000 --- a/data/templates +++ b/data/templates @@ -1 +1 @@ -Subproject commit 713a8f63d5589ab9313869e47b03cf7f49e00e98 +Subproject commit f643a076d8c2b0b21391fd6aa1dedb2dd84c7e63 diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs index f3cbcf19f..a2e0b016f 100644 --- a/src/Text/Pandoc/Writers/LaTeX.hs +++ b/src/Text/Pandoc/Writers/LaTeX.hs @@ -453,12 +453,12 @@ blockToLaTeX (Header level (id',classes,_) lst) = blockToLaTeX (Table caption aligns widths heads rows) = do headers <- if all null heads then return empty - else ($$ "\\hline\\noalign{\\medskip}") `fmap` + else ($$ "\\midrule\\endhead") `fmap` (tableRowToLaTeX True aligns widths) heads captionText <- inlineListToLaTeX caption let capt = if isEmpty captionText then empty - else text "\\noalign{\\medskip}" + else text "\\addlinespace" $$ text "\\caption" <> braces captionText rows' <- mapM (tableRowToLaTeX False aligns widths) rows let colDescriptors = text $ concat $ map toColDescriptor aligns @@ -466,10 +466,10 @@ blockToLaTeX (Table caption aligns widths heads rows) = do return $ "\\begin{longtable}[c]" <> braces ("@{}" <> colDescriptors <> "@{}") -- the @{} removes extra space at beginning and end - $$ "\\hline\\noalign{\\medskip}" + $$ "\\toprule\\addlinespace" $$ headers $$ vcat rows' - $$ "\\hline" + $$ "\\bottomrule" $$ capt $$ "\\end{longtable}" @@ -506,7 +506,7 @@ tableRowToLaTeX header aligns widths cols = do (w * scaleFactor))) <> (halign a <> cr <> c <> cr) <> "\\end{minipage}" let cells = zipWith3 toCell widths aligns renderedCells - return $ hsep (intersperse "&" cells) $$ "\\\\\\noalign{\\medskip}" + return $ hsep (intersperse "&" cells) $$ "\\\\\\addlinespace" listItemToLaTeX :: [Block] -> State WriterState Doc listItemToLaTeX lst = blockListToLaTeX lst >>= return . (text "\\item" $$) . diff --git a/tests/tables.latex b/tests/tables.latex index c27e10461..1a87c4f71 100644 --- a/tests/tables.latex +++ b/tests/tables.latex @@ -1,59 +1,59 @@ Simple table with caption: \begin{longtable}[c]{@{}rlcl@{}} -\hline\noalign{\medskip} +\toprule\addlinespace Right & Left & Center & Default -\\\noalign{\medskip} -\hline\noalign{\medskip} +\\\addlinespace +\midrule\endhead 12 & 12 & 12 & 12 -\\\noalign{\medskip} +\\\addlinespace 123 & 123 & 123 & 123 -\\\noalign{\medskip} +\\\addlinespace 1 & 1 & 1 & 1 -\\\noalign{\medskip} -\hline -\noalign{\medskip} +\\\addlinespace +\bottomrule +\addlinespace \caption{Demonstration of simple table syntax.} \end{longtable} Simple table without caption: \begin{longtable}[c]{@{}rlcl@{}} -\hline\noalign{\medskip} +\toprule\addlinespace Right & Left & Center & Default -\\\noalign{\medskip} -\hline\noalign{\medskip} +\\\addlinespace +\midrule\endhead 12 & 12 & 12 & 12 -\\\noalign{\medskip} +\\\addlinespace 123 & 123 & 123 & 123 -\\\noalign{\medskip} +\\\addlinespace 1 & 1 & 1 & 1 -\\\noalign{\medskip} -\hline +\\\addlinespace +\bottomrule \end{longtable} Simple table indented two spaces: \begin{longtable}[c]{@{}rlcl@{}} -\hline\noalign{\medskip} +\toprule\addlinespace Right & Left & Center & Default -\\\noalign{\medskip} -\hline\noalign{\medskip} +\\\addlinespace +\midrule\endhead 12 & 12 & 12 & 12 -\\\noalign{\medskip} +\\\addlinespace 123 & 123 & 123 & 123 -\\\noalign{\medskip} +\\\addlinespace 1 & 1 & 1 & 1 -\\\noalign{\medskip} -\hline -\noalign{\medskip} +\\\addlinespace +\bottomrule +\addlinespace \caption{Demonstration of simple table syntax.} \end{longtable} Multiline table with caption: \begin{longtable}[c]{@{}clrl@{}} -\hline\noalign{\medskip} +\toprule\addlinespace \begin{minipage}[b]{0.13\columnwidth}\centering Centered Header \end{minipage} & \begin{minipage}[b]{0.12\columnwidth}\raggedright @@ -63,8 +63,8 @@ Right Aligned \end{minipage} & \begin{minipage}[b]{0.30\columnwidth}\raggedright Default aligned \end{minipage} -\\\noalign{\medskip} -\hline\noalign{\medskip} +\\\addlinespace +\midrule\endhead \begin{minipage}[t]{0.13\columnwidth}\centering First \end{minipage} & \begin{minipage}[t]{0.12\columnwidth}\raggedright @@ -74,7 +74,7 @@ row \end{minipage} & \begin{minipage}[t]{0.30\columnwidth}\raggedright Example of a row that spans multiple lines. \end{minipage} -\\\noalign{\medskip} +\\\addlinespace \begin{minipage}[t]{0.13\columnwidth}\centering Second \end{minipage} & \begin{minipage}[t]{0.12\columnwidth}\raggedright @@ -84,16 +84,16 @@ row \end{minipage} & \begin{minipage}[t]{0.30\columnwidth}\raggedright Here's another one. Note the blank line between rows. \end{minipage} -\\\noalign{\medskip} -\hline -\noalign{\medskip} +\\\addlinespace +\bottomrule +\addlinespace \caption{Here's the caption. It may span multiple lines.} \end{longtable} Multiline table without caption: \begin{longtable}[c]{@{}clrl@{}} -\hline\noalign{\medskip} +\toprule\addlinespace \begin{minipage}[b]{0.13\columnwidth}\centering Centered Header \end{minipage} & \begin{minipage}[b]{0.12\columnwidth}\raggedright @@ -103,8 +103,8 @@ Right Aligned \end{minipage} & \begin{minipage}[b]{0.30\columnwidth}\raggedright Default aligned \end{minipage} -\\\noalign{\medskip} -\hline\noalign{\medskip} +\\\addlinespace +\midrule\endhead \begin{minipage}[t]{0.13\columnwidth}\centering First \end{minipage} & \begin{minipage}[t]{0.12\columnwidth}\raggedright @@ -114,7 +114,7 @@ row \end{minipage} & \begin{minipage}[t]{0.30\columnwidth}\raggedright Example of a row that spans multiple lines. \end{minipage} -\\\noalign{\medskip} +\\\addlinespace \begin{minipage}[t]{0.13\columnwidth}\centering Second \end{minipage} & \begin{minipage}[t]{0.12\columnwidth}\raggedright @@ -124,27 +124,27 @@ row \end{minipage} & \begin{minipage}[t]{0.30\columnwidth}\raggedright Here's another one. Note the blank line between rows. \end{minipage} -\\\noalign{\medskip} -\hline +\\\addlinespace +\bottomrule \end{longtable} Table without column headers: \begin{longtable}[c]{@{}rlcr@{}} -\hline\noalign{\medskip} +\toprule\addlinespace 12 & 12 & 12 & 12 -\\\noalign{\medskip} +\\\addlinespace 123 & 123 & 123 & 123 -\\\noalign{\medskip} +\\\addlinespace 1 & 1 & 1 & 1 -\\\noalign{\medskip} -\hline +\\\addlinespace +\bottomrule \end{longtable} Multiline table without column headers: \begin{longtable}[c]{@{}clrl@{}} -\hline\noalign{\medskip} +\toprule\addlinespace \begin{minipage}[t]{0.13\columnwidth}\centering First \end{minipage} & \begin{minipage}[t]{0.12\columnwidth}\raggedright @@ -154,7 +154,7 @@ row \end{minipage} & \begin{minipage}[t]{0.30\columnwidth}\raggedright Example of a row that spans multiple lines. \end{minipage} -\\\noalign{\medskip} +\\\addlinespace \begin{minipage}[t]{0.13\columnwidth}\centering Second \end{minipage} & \begin{minipage}[t]{0.12\columnwidth}\raggedright @@ -164,6 +164,6 @@ row \end{minipage} & \begin{minipage}[t]{0.30\columnwidth}\raggedright Here's another one. Note the blank line between rows. \end{minipage} -\\\noalign{\medskip} -\hline +\\\addlinespace +\bottomrule \end{longtable} -- cgit v1.2.3 From 5314df51f368853f7ee4bf8f75eedf43afc0400e Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Wed, 4 Dec 2013 10:00:40 -0800 Subject: Stop parsing "list lines" when we hit a block tag. This fixes exponential slowdown in certain input, e.g. a series of lists followed by `</div>`. --- src/Text/Pandoc/Readers/Markdown.hs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 33d1a9620..4cb75d86c 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -732,7 +732,8 @@ listLine = try $ do notFollowedBy' (do indentSpaces many (spaceChar) listStart) - chunks <- manyTill (liftM snd (htmlTag isCommentTag) <|> count 1 anyChar) newline + chunks <- manyTill (liftM snd (htmlTag isCommentTag) <|> count 1 (satisfy (/='<')) + <|> (notFollowedBy' (htmlTag isBlockTag) >> count 1 anyChar)) newline return $ concat chunks -- parse raw text for one list item, excluding start marker and continuations @@ -759,6 +760,7 @@ listContinuationLine :: MarkdownParser String listContinuationLine = try $ do notFollowedBy blankline notFollowedBy' listStart + notFollowedBy' $ try $ skipMany spaceChar >> htmlTag (~== TagClose "div") optional indentSpaces result <- anyLine return $ result ++ "\n" -- cgit v1.2.3 From def05d3504bfc90c31c1621438f7a51c95079ad2 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Fri, 6 Dec 2013 16:43:59 -0800 Subject: HTML reader: Parse LaTeX math if appropriate options are set. * Moved inlineMath, displayMath from Markdown reader to Parsing. * Export them from Parsing. (API change.) * Generalize their types. --- src/Text/Pandoc/Parsing.hs | 35 +++++++++++++++++++++++++++++++++++ src/Text/Pandoc/Readers/HTML.hs | 9 ++++++++- src/Text/Pandoc/Readers/LaTeX.hs | 3 ++- src/Text/Pandoc/Readers/Markdown.hs | 33 --------------------------------- 4 files changed, 45 insertions(+), 35 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs index 9687d7712..e15854333 100644 --- a/src/Text/Pandoc/Parsing.hs +++ b/src/Text/Pandoc/Parsing.hs @@ -48,6 +48,8 @@ module Text.Pandoc.Parsing ( (>>~), romanNumeral, emailAddress, uri, + mathInline, + mathDisplay, withHorizDisplacement, withRaw, escaped, @@ -455,6 +457,39 @@ uri = try $ do let uri' = scheme ++ ":" ++ fromEntities str' return (uri', escapeURI uri') +mathInlineWith :: String -> String -> Parser [Char] st String +mathInlineWith op cl = try $ do + string op + notFollowedBy space + words' <- many1Till (count 1 (noneOf "\n\\") + <|> (char '\\' >> anyChar >>= \c -> return ['\\',c]) + <|> count 1 newline <* notFollowedBy' blankline + *> return " ") + (try $ string cl) + notFollowedBy digit -- to prevent capture of $5 + return $ concat words' + +mathDisplayWith :: String -> String -> Parser [Char] st String +mathDisplayWith op cl = try $ do + string op + many1Till (noneOf "\n" <|> (newline >>~ notFollowedBy' blankline)) (try $ string cl) + +mathDisplay :: Parser [Char] ParserState String +mathDisplay = + (guardEnabled Ext_tex_math_dollars >> mathDisplayWith "$$" "$$") + <|> (guardEnabled Ext_tex_math_single_backslash >> + mathDisplayWith "\\[" "\\]") + <|> (guardEnabled Ext_tex_math_double_backslash >> + mathDisplayWith "\\\\[" "\\\\]") + +mathInline :: Parser [Char] ParserState String +mathInline = + (guardEnabled Ext_tex_math_dollars >> mathInlineWith "$" "$") + <|> (guardEnabled Ext_tex_math_single_backslash >> + mathInlineWith "\\(" "\\)") + <|> (guardEnabled Ext_tex_math_double_backslash >> + mathInlineWith "\\\\(" "\\\\)") + -- | Applies a parser, returns tuple of its results and its horizontal -- displacement (the difference between the source column at the end -- and the source column at the beginning). Vertical displacement diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index d691c9878..e758f712f 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -467,7 +467,13 @@ pBlank = try $ do pTagContents :: Parser [Char] ParserState Inline pTagContents = - pStr <|> pSpace <|> smartPunctuation pTagContents <|> pSymbol <|> pBad + Math InlineMath `fmap` mathInline + <|> Math DisplayMath `fmap` mathDisplay + <|> pStr + <|> pSpace + <|> smartPunctuation pTagContents + <|> pSymbol + <|> pBad pStr :: Parser [Char] ParserState Inline pStr = do @@ -482,6 +488,7 @@ isSpecial '"' = True isSpecial '\'' = True isSpecial '.' = True isSpecial '-' = True +isSpecial '$' = True isSpecial '\8216' = True isSpecial '\8217' = True isSpecial '\8220' = True diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index 75e29ebb9..509cb5d74 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -38,7 +38,8 @@ import Text.Pandoc.Definition import Text.Pandoc.Walk import Text.Pandoc.Shared import Text.Pandoc.Options -import Text.Pandoc.Parsing hiding ((<|>), many, optional, space) +import Text.Pandoc.Parsing hiding ((<|>), many, optional, space, + mathDisplay, mathInline) import qualified Text.Pandoc.UTF8 as UTF8 import Data.Char ( chr, ord ) import Control.Monad diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 4cb75d86c..11168bc09 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -1408,39 +1408,6 @@ math :: MarkdownParser (F Inlines) math = (return . B.displayMath <$> (mathDisplay >>= applyMacros')) <|> (return . B.math <$> (mathInline >>= applyMacros')) -mathDisplay :: MarkdownParser String -mathDisplay = - (guardEnabled Ext_tex_math_dollars >> mathDisplayWith "$$" "$$") - <|> (guardEnabled Ext_tex_math_single_backslash >> - mathDisplayWith "\\[" "\\]") - <|> (guardEnabled Ext_tex_math_double_backslash >> - mathDisplayWith "\\\\[" "\\\\]") - -mathDisplayWith :: String -> String -> MarkdownParser String -mathDisplayWith op cl = try $ do - string op - many1Till (noneOf "\n" <|> (newline >>~ notFollowedBy' blankline)) (try $ string cl) - -mathInline :: MarkdownParser String -mathInline = - (guardEnabled Ext_tex_math_dollars >> mathInlineWith "$" "$") - <|> (guardEnabled Ext_tex_math_single_backslash >> - mathInlineWith "\\(" "\\)") - <|> (guardEnabled Ext_tex_math_double_backslash >> - mathInlineWith "\\\\(" "\\\\)") - -mathInlineWith :: String -> String -> MarkdownParser String -mathInlineWith op cl = try $ do - string op - notFollowedBy space - words' <- many1Till (count 1 (noneOf "\n\\") - <|> (char '\\' >> anyChar >>= \c -> return ['\\',c]) - <|> count 1 newline <* notFollowedBy' blankline - *> return " ") - (try $ string cl) - notFollowedBy digit -- to prevent capture of $5 - return $ concat words' - -- Parses material enclosed in *s, **s, _s, or __s. -- Designed to avoid backtracking. enclosure :: Char -- cgit v1.2.3 From 4a1446705575ade1cf8ecaf19dec877bdd0ffb93 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Fri, 6 Dec 2013 17:31:47 -0800 Subject: Text.Pandoc: Don't default to pandocExtensions for all writers. In particular, we don't want to default to math parsing for the HTML writer. --- src/Text/Pandoc.hs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc.hs b/src/Text/Pandoc.hs index 703bb876a..3ae81db00 100644 --- a/src/Text/Pandoc.hs +++ b/src/Text/Pandoc.hs @@ -267,7 +267,10 @@ getDefaultExtensions "markdown_strict" = strictExtensions getDefaultExtensions "markdown_phpextra" = phpMarkdownExtraExtensions getDefaultExtensions "markdown_mmd" = multimarkdownExtensions getDefaultExtensions "markdown_github" = githubMarkdownExtensions -getDefaultExtensions _ = pandocExtensions +getDefaultExtensions "markdown" = pandocExtensions +getDefaultExtensions "plain" = pandocExtensions +getDefaultExtensions "textile" = Set.fromList [Ext_auto_identifiers, Ext_raw_tex] +getDefaultExtensions _ = Set.fromList [Ext_auto_identifiers] -- | Retrieve reader based on formatSpec (format+extensions). getReader :: String -> Either String (ReaderOptions -> String -> IO Pandoc) -- cgit v1.2.3 From e5a7c31a32b2b53ef5073355b70dc17ecf1d16af Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Sat, 7 Dec 2013 17:12:52 -0800 Subject: Markdown reader: Fixed bug with literal `</div>` in lists. Closes #1078. --- src/Text/Pandoc/Readers/Markdown.hs | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 11168bc09..c32c5ed86 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -732,8 +732,9 @@ listLine = try $ do notFollowedBy' (do indentSpaces many (spaceChar) listStart) - chunks <- manyTill (liftM snd (htmlTag isCommentTag) <|> count 1 (satisfy (/='<')) - <|> (notFollowedBy' (htmlTag isBlockTag) >> count 1 anyChar)) newline + notFollowedBy' $ htmlTag (~== TagClose "div") + chunks <- manyTill (liftM snd (htmlTag isCommentTag) <|> count 1 anyChar) + newline return $ concat chunks -- parse raw text for one list item, excluding start marker and continuations @@ -760,7 +761,7 @@ listContinuationLine :: MarkdownParser String listContinuationLine = try $ do notFollowedBy blankline notFollowedBy' listStart - notFollowedBy' $ try $ skipMany spaceChar >> htmlTag (~== TagClose "div") + notFollowedBy' $ htmlTag (~== TagClose "div") optional indentSpaces result <- anyLine return $ result ++ "\n" -- cgit v1.2.3 From e2c4156c20f113b6f965ccd9313c5aa062cc198f Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Sat, 7 Dec 2013 19:41:42 -0800 Subject: Small performance improvement in list parsing. --- src/Text/Pandoc/Readers/Markdown.hs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index c32c5ed86..a948d5ad3 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -733,8 +733,9 @@ listLine = try $ do many (spaceChar) listStart) notFollowedBy' $ htmlTag (~== TagClose "div") - chunks <- manyTill (liftM snd (htmlTag isCommentTag) <|> count 1 anyChar) - newline + chunks <- manyTill (liftM snd (htmlTag isCommentTag) + <|> many1 (satisfy (/='\n')) + <|> count 1 anyChar) newline return $ concat chunks -- parse raw text for one list item, excluding start marker and continuations -- cgit v1.2.3 From 8e255fad98baf2448d8c1866d7c03c5d110a505f Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Sat, 7 Dec 2013 19:56:54 -0800 Subject: Another small performance improvement. --- src/Text/Pandoc/Readers/Markdown.hs | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index a948d5ad3..e77dda8d7 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -733,9 +733,11 @@ listLine = try $ do many (spaceChar) listStart) notFollowedBy' $ htmlTag (~== TagClose "div") - chunks <- manyTill (liftM snd (htmlTag isCommentTag) - <|> many1 (satisfy (/='\n')) - <|> count 1 anyChar) newline + chunks <- manyTill + ( many1 (satisfy $ \c -> c /= '\n' && c /= '<') + <|> liftM snd (htmlTag isCommentTag) + <|> count 1 anyChar + ) newline return $ concat chunks -- parse raw text for one list item, excluding start marker and continuations -- cgit v1.2.3 From f9662957704ebfe83d7764dd64151d37c476c0b0 Mon Sep 17 00:00:00 2001 From: John MacFarlane <fiddlosopher@gmail.com> Date: Mon, 9 Dec 2013 20:31:47 -0800 Subject: Don't use tilde code blocks with braced attributes in gfm output. A consequence of this change is that the backtick form will be preferred in general if both are enabled. I think that is good, as it is much more widespread than the tilde form. Closes #1084. --- src/Text/Pandoc/Writers/Markdown.hs | 18 +++++++++--------- tests/lhs-test.markdown | 4 ++-- 2 files changed, 11 insertions(+), 11 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs index 60d474263..c0b189b75 100644 --- a/src/Text/Pandoc/Writers/Markdown.hs +++ b/src/Text/Pandoc/Writers/Markdown.hs @@ -381,13 +381,11 @@ blockToMarkdown opts (CodeBlock (_,classes,_) str) isEnabled Ext_literate_haskell opts = return $ prefixed "> " (text str) <> blankline blockToMarkdown opts (CodeBlock attribs str) = return $ - case attribs of - x | x /= nullAttr && isEnabled Ext_fenced_code_blocks opts -> - tildes <> " " <> attrs <> cr <> text str <> - cr <> tildes <> blankline - (_,(cls:_),_) | isEnabled Ext_backtick_code_blocks opts -> - backticks <> " " <> text cls <> cr <> text str <> - cr <> backticks <> blankline + case attribs == nullAttr of + False | isEnabled Ext_backtick_code_blocks opts -> + backticks <> attrs <> cr <> text str <> cr <> backticks <> blankline + | isEnabled Ext_fenced_code_blocks opts -> + tildes <> attrs <> cr <> text str <> cr <> tildes <> blankline _ -> nest (writerTabStop opts) (text str) <> blankline where tildes = text $ case [ln | ln <- lines str, all (=='~') ln] of [] -> "~~~~" @@ -396,8 +394,10 @@ blockToMarkdown opts (CodeBlock attribs str) = return $ | otherwise -> replicate (n+1) '~' backticks = text "```" attrs = if isEnabled Ext_fenced_code_attributes opts - then nowrap $ attrsToMarkdown attribs - else empty + then nowrap $ " " <> attrsToMarkdown attribs + else case attribs of + (_,[cls],_) -> " " <> text cls + _ -> empty blockToMarkdown opts (BlockQuote blocks) = do st <- get -- if we're writing literate haskell, put a space before the bird tracks diff --git a/tests/lhs-test.markdown b/tests/lhs-test.markdown index 47ec920d3..75a253bf4 100644 --- a/tests/lhs-test.markdown +++ b/tests/lhs-test.markdown @@ -4,11 +4,11 @@ lhs test `unsplit` is an arrow that takes a pair of values and combines them to return a single value: -~~~~ {.sourceCode .literate .haskell} +``` {.sourceCode .literate .haskell} unsplit :: (Arrow a) => (b -> c -> d) -> a (b, c) d unsplit = arr . uncurry -- arr (\op (x,y) -> x `op` y) -~~~~ +``` `(***)` combines two arrows into a new arrow by running the two arrows on a pair of values (one arrow on the first item of the pair and one arrow on the -- cgit v1.2.3 From 142f81889b6ede73f965afa01ac67427e1335e9d Mon Sep 17 00:00:00 2001 From: John MacFarlane <fiddlosopher@gmail.com> Date: Mon, 9 Dec 2013 22:35:22 -0800 Subject: Added `withSocketsDo` around http conduit code in `openURL`. This should address #1080, but further testing on Windows is needed before we can close the bug. --- src/Text/Pandoc/Shared.hs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index 7592b7659..ce71881e6 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -121,6 +121,7 @@ import Data.ByteString.Lazy (toChunks) import Network.HTTP.Conduit (httpLbs, parseUrl, withManager, responseBody, responseHeaders) import Network.HTTP.Types.Header ( hContentType) +import Network (withSocketsDo) #else import Network.URI (parseURI) import Network.HTTP (findHeader, rspBody, @@ -644,7 +645,7 @@ openURL u contents = B8.pack $ unEscapeString $ drop 1 $ dropWhile (/=',') u in return $ Right (decodeLenient contents, Just mime) #ifdef HTTP_CONDUIT - | otherwise = E.try $ do + | otherwise = withSocketsDo $ E.try $ do req <- parseUrl u resp <- withManager $ httpLbs req return (BS.concat $ toChunks $ responseBody resp, -- cgit v1.2.3 From 6d0cd9203ce968b96ffa8fc2fbae5a50c99b125b Mon Sep 17 00:00:00 2001 From: John MacFarlane <fiddlosopher@gmail.com> Date: Thu, 12 Dec 2013 22:34:13 -0800 Subject: Markdown reader: Fixed regression in title blocks. If author field was empty, date was being ignored. Closes #1089. --- src/Text/Pandoc/Readers/Markdown.hs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index e77dda8d7..8014d480c 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -215,10 +215,10 @@ pandocTitleBlock = try $ do author' <- author date' <- date return $ - ( if B.isNull title' then id else B.setMeta "title" title' - . if null author' then id else B.setMeta "author" author' - . if B.isNull date' then id else B.setMeta "date" date' ) - nullMeta + (if B.isNull title' then id else B.setMeta "title" title') + . (if null author' then id else B.setMeta "author" author') + . (if B.isNull date' then id else B.setMeta "date" date') + $ nullMeta updateState $ \st -> st{ stateMeta' = stateMeta' st <> meta' } yamlMetaBlock :: MarkdownParser (F Blocks) -- cgit v1.2.3 From ca3c292f30c04bd24287d554f08a7911c808a4e2 Mon Sep 17 00:00:00 2001 From: John MacFarlane <fiddlosopher@gmail.com> Date: Fri, 13 Dec 2013 11:10:04 -0800 Subject: EPUB writer: Fixed bug with `--epub-stylesheet`. Now the contents of `writerEpubStylesheet` (set by `--epub-stylesheet`) should again work, and take precedence over a stylesheet specified in the metadata. --- src/Text/Pandoc/Writers/EPUB.hs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs index 32b0c3c32..36ead0b8f 100644 --- a/src/Text/Pandoc/Writers/EPUB.hs +++ b/src/Text/Pandoc/Writers/EPUB.hs @@ -288,8 +288,7 @@ metadataFromMeta opts meta = EPUBMetadata{ rights = metaValueToString <$> lookupMeta "rights" meta coverImage = lookup "epub-cover-image" (writerVariables opts) `mplus` (metaValueToString <$> lookupMeta "cover-image" meta) - stylesheet = (StylesheetContents <$> - lookup "epub-stylesheet" (writerVariables opts)) `mplus` + stylesheet = (StylesheetContents <$> writerEpubStylesheet opts) `mplus` ((StylesheetPath . metaValueToString) <$> lookupMeta "stylesheet" meta) -- cgit v1.2.3 From 5adbe7b36592ba8779563bba791ad4724fde927c Mon Sep 17 00:00:00 2001 From: Jeff Arnold <jeffrey.arnold@gmail.com> Date: Fri, 13 Dec 2013 19:16:04 -0500 Subject: LaTeX reader: add support for Verb macro --- src/Text/Pandoc/Readers/LaTeX.hs | 1 + 1 file changed, 1 insertion(+) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index 509cb5d74..736f2ff1c 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -455,6 +455,7 @@ inlineCommands = M.fromList $ , ("footnote", (note . mconcat) <$> (char '{' *> manyTill block (char '}'))) , ("verb", doverb) , ("lstinline", doverb) + , ("Verb", doverb) , ("texttt", (code . stringify . toList) <$> tok) , ("url", (unescapeURL <$> braced) >>= \url -> pure (link url "" (str url))) -- cgit v1.2.3 From 2f00f5c7c22ef879df3a21f1c3da967d106628e9 Mon Sep 17 00:00:00 2001 From: John MacFarlane <fiddlosopher@gmail.com> Date: Sun, 15 Dec 2013 12:27:29 -0800 Subject: Properly handle script blocks in strict mode. (That is, markdown-markdown_in_html_blocks.) Previously a spurious `<p>` tag was being added. Closes #1093. --- src/Text/Pandoc/Readers/Markdown.hs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 8014d480c..b2e88d47e 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -902,7 +902,9 @@ plain = fmap B.plain . trimInlinesF . mconcat <$> many1 inline -- htmlElement :: MarkdownParser String -htmlElement = strictHtmlBlock <|> liftM snd (htmlTag isBlockTag) +htmlElement = rawVerbatimBlock + <|> strictHtmlBlock + <|> liftM snd (htmlTag isBlockTag) htmlBlock :: MarkdownParser (F Blocks) htmlBlock = do -- cgit v1.2.3 From 826443926f85b72840b49cd7973e91ed80a09b5d Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Mon, 16 Dec 2013 13:58:54 -0800 Subject: Docbook reader: Avoid failure if tbody contains no tr or row elements. --- src/Text/Pandoc/Readers/DocBook.hs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs index fc29988d5..03c6140ac 100644 --- a/src/Text/Pandoc/Readers/DocBook.hs +++ b/src/Text/Pandoc/Readers/DocBook.hs @@ -806,7 +806,9 @@ parseBlock (Elem e) = (x >= '0' && x <= '9') || x == '.') w Nothing -> 0 :: Double - let numrows = maximum $ map length bodyrows + let numrows = case bodyrows of + [] -> 0 + xs -> maximum $ map length xs let aligns = case colspecs of [] -> replicate numrows AlignDefault cs -> map toAlignment cs -- cgit v1.2.3 From 0132f6fcb75e58774846e92bac6df2afd7a50e6f Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Tue, 17 Dec 2013 16:03:43 -0800 Subject: LaTeX reader: Support babel-style quoting: `` "`..."' ``. --- src/Text/Pandoc/Readers/LaTeX.hs | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index 736f2ff1c..0020a8f26 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -173,6 +173,8 @@ double_quote :: LP Inlines double_quote = ( quoted' doubleQuoted (try $ string "``") (void $ try $ string "''") <|> quoted' doubleQuoted (string "“") (void $ char '”') + -- the following is used by babel for localized quotes: + <|> quoted' doubleQuoted (try $ string "\"`") (void $ try $ string "\"'") <|> quoted' doubleQuoted (string "\"") (void $ char '"') ) -- cgit v1.2.3 From a3f6f2827c9acb0e68ac533745b3f4dc8836ee24 Mon Sep 17 00:00:00 2001 From: John MacFarlane <fiddlosopher@gmail.com> Date: Tue, 17 Dec 2013 20:10:09 -0800 Subject: LaTeX writer: Factored out function for table cell creation. --- src/Text/Pandoc/Writers/LaTeX.hs | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs index a2e0b016f..b7beacd91 100644 --- a/src/Text/Pandoc/Writers/LaTeX.hs +++ b/src/Text/Pandoc/Writers/LaTeX.hs @@ -490,24 +490,28 @@ tableRowToLaTeX :: Bool -> [[Block]] -> State WriterState Doc tableRowToLaTeX header aligns widths cols = do - renderedCells <- mapM blockListToLaTeX cols - let valign = text $ if header then "[b]" else "[t]" - let halign x = case x of - AlignLeft -> "\\raggedright" - AlignRight -> "\\raggedleft" - AlignCenter -> "\\centering" - AlignDefault -> "\\raggedright" -- scale factor compensates for extra space between columns -- so the whole table isn't larger than columnwidth let scaleFactor = 0.97 ** fromIntegral (length aligns) - let toCell 0 _ c = c - toCell w a c = "\\begin{minipage}" <> valign <> - braces (text (printf "%.2f\\columnwidth" - (w * scaleFactor))) <> - (halign a <> cr <> c <> cr) <> "\\end{minipage}" - let cells = zipWith3 toCell widths aligns renderedCells + let widths' = map (scaleFactor *) widths + cells <- mapM (tableCellToLaTeX header) $ zip3 widths' aligns cols return $ hsep (intersperse "&" cells) $$ "\\\\\\addlinespace" +tableCellToLaTeX :: Bool -> (Double, Alignment, [Block]) + -> State WriterState Doc +tableCellToLaTeX _ (0, _, blocks) = blockListToLaTeX blocks +tableCellToLaTeX header (width, align, blocks) = do + cellContents <- blockListToLaTeX blocks + let valign = text $ if header then "[b]" else "[t]" + let halign = case align of + AlignLeft -> "\\raggedright" + AlignRight -> "\\raggedleft" + AlignCenter -> "\\centering" + AlignDefault -> "\\raggedright" + return $ "\\begin{minipage}" <> valign <> + braces (text (printf "%.2f\\columnwidth" width)) <> + (halign <> cr <> cellContents <> cr) <> "\\end{minipage}" + listItemToLaTeX :: [Block] -> State WriterState Doc listItemToLaTeX lst = blockListToLaTeX lst >>= return . (text "\\item" $$) . (nest 2) -- cgit v1.2.3 From 8053ba2123699fd89900b2a9c5656a72d3a2fe85 Mon Sep 17 00:00:00 2001 From: John MacFarlane <fiddlosopher@gmail.com> Date: Tue, 17 Dec 2013 20:53:59 -0800 Subject: LaTeX writer: Better treatment of footnotes in tables. Notes now appear in the regular sequence, rather than in the table cell. (This was a regression in 1.10.) --- src/Text/Pandoc/Writers/LaTeX.hs | 35 +++++++++++++++++++++++++++++------ 1 file changed, 29 insertions(+), 6 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs index b7beacd91..7612be100 100644 --- a/src/Text/Pandoc/Writers/LaTeX.hs +++ b/src/Text/Pandoc/Writers/LaTeX.hs @@ -50,6 +50,8 @@ import Text.Pandoc.Highlighting (highlight, styleToLaTeX, data WriterState = WriterState { stInNote :: Bool -- true if we're in a note + , stInMinipage :: Bool -- true if in minipage + , stNotes :: [Doc] -- notes in a minipage , stOLLevel :: Int -- level of ordered list nesting , stOptions :: WriterOptions -- writer options, so they don't have to be parameter , stVerbInNote :: Bool -- true if document has verbatim text in note @@ -70,7 +72,7 @@ data WriterState = writeLaTeX :: WriterOptions -> Pandoc -> String writeLaTeX options document = evalState (pandocToLaTeX options document) $ - WriterState { stInNote = False, + WriterState { stInNote = False, stInMinipage = False, stNotes = [], stOLLevel = 1, stOptions = options, stVerbInNote = False, stTable = False, stStrikeout = False, @@ -501,16 +503,31 @@ tableCellToLaTeX :: Bool -> (Double, Alignment, [Block]) -> State WriterState Doc tableCellToLaTeX _ (0, _, blocks) = blockListToLaTeX blocks tableCellToLaTeX header (width, align, blocks) = do + modify $ \st -> st{ stInMinipage = True, stNotes = [] } cellContents <- blockListToLaTeX blocks + notes <- gets stNotes + modify $ \st -> st{ stInMinipage = False, stNotes = [] } let valign = text $ if header then "[b]" else "[t]" let halign = case align of AlignLeft -> "\\raggedright" AlignRight -> "\\raggedleft" AlignCenter -> "\\centering" AlignDefault -> "\\raggedright" - return $ "\\begin{minipage}" <> valign <> - braces (text (printf "%.2f\\columnwidth" width)) <> - (halign <> cr <> cellContents <> cr) <> "\\end{minipage}" + return $ ("\\begin{minipage}" <> valign <> + braces (text (printf "%.2f\\columnwidth" width)) <> + (halign <> cr <> cellContents <> cr) <> "\\end{minipage}") + $$ case notes of + [] -> empty + ns -> (case length ns of + n | n > 1 -> "\\addtocounter" <> + braces "footnote" <> + braces (text $ show $ 1 - n) + | otherwise -> empty) + $$ + vcat (intersperse + ("\\addtocounter" <> braces "footnote" <> braces "1") + $ map (\x -> "\\footnotetext" <> braces x) + $ reverse ns) listItemToLaTeX :: [Block] -> State WriterState Doc listItemToLaTeX lst = blockListToLaTeX lst >>= return . (text "\\item" $$) . @@ -713,14 +730,20 @@ inlineToLaTeX (Image _ (source, _)) = do source'' <- stringToLaTeX URLString source' return $ "\\includegraphics" <> braces (text source'') inlineToLaTeX (Note contents) = do + inMinipage <- gets stInMinipage modify (\s -> s{stInNote = True}) contents' <- blockListToLaTeX contents modify (\s -> s {stInNote = False}) let optnl = case reverse contents of (CodeBlock _ _ : _) -> cr _ -> empty - return $ "\\footnote" <> braces (nest 2 contents' <> optnl) - -- note: a \n before } needed when note ends with a Verbatim environment + let noteContents = nest 2 contents' <> optnl + modify $ \st -> st{ stNotes = noteContents : stNotes st } + return $ + if inMinipage + then "\\footnotemark{}" + -- note: a \n before } needed when note ends with a Verbatim environment + else "\\footnote" <> braces noteContents citationsToNatbib :: [Citation] -> State WriterState Doc citationsToNatbib (one:[]) -- cgit v1.2.3 From 1ed2c467c9442934c060257f6e191a5a3d6c1e38 Mon Sep 17 00:00:00 2001 From: Henry de Valence <hdevalence@hdevalence.ca> Date: Thu, 19 Dec 2013 17:06:27 -0500 Subject: HLint: Use all Replace `and . map` with `all`. --- src/Text/Pandoc/Shared.hs | 2 +- src/Text/Pandoc/Writers/LaTeX.hs | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index ce71881e6..cd2f7e24d 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -532,7 +532,7 @@ headerShift n = walk shift -- | Detect if a list is tight. isTightList :: [[Block]] -> Bool -isTightList = and . map firstIsPlain +isTightList = all firstIsPlain where firstIsPlain (Plain _ : _) = True firstIsPlain _ = False diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs index 7612be100..47b769c48 100644 --- a/src/Text/Pandoc/Writers/LaTeX.hs +++ b/src/Text/Pandoc/Writers/LaTeX.hs @@ -443,7 +443,7 @@ blockToLaTeX (DefinitionList lst) = do incremental <- gets stIncremental let inc = if incremental then "[<+->]" else "" items <- mapM defListItemToLaTeX lst - let spacing = if and $ map isTightList (map snd lst) + let spacing = if all isTightList (map snd lst) then text "\\itemsep1pt\\parskip0pt\\parsep0pt" else empty return $ text ("\\begin{description}" ++ inc) $$ spacing $$ vcat items $$ @@ -764,9 +764,9 @@ citationsToNatbib cits | noPrefix (tail cits) && noSuffix (init cits) && ismode NormalCitation cits = citeCommand "citep" p s ks where - noPrefix = and . map (null . citationPrefix) - noSuffix = and . map (null . citationSuffix) - ismode m = and . map (((==) m) . citationMode) + noPrefix = all (null . citationPrefix) + noSuffix = all (null . citationSuffix) + ismode m = all (((==) m) . citationMode) p = citationPrefix $ head $ cits s = citationSuffix $ last $ cits ks = intercalate ", " $ map citationId cits -- cgit v1.2.3 From 0c5e7cf8cb4fe6959d7e89880e8925afe6625414 Mon Sep 17 00:00:00 2001 From: Henry de Valence <hdevalence@hdevalence.ca> Date: Thu, 19 Dec 2013 20:19:24 -0500 Subject: HLint: use `elem` and `notElem` Replaces long conditional chains with calls to `elem` and `notElem`. --- pandoc.hs | 2 +- src/Text/Pandoc/Parsing.hs | 4 ++-- src/Text/Pandoc/Readers/LaTeX.hs | 5 ++--- src/Text/Pandoc/Readers/Markdown.hs | 8 ++++---- src/Text/Pandoc/Shared.hs | 12 ++++-------- src/Text/Pandoc/Writers/Markdown.hs | 11 ++++++----- src/Text/Pandoc/Writers/Org.hs | 2 +- 7 files changed, 20 insertions(+), 24 deletions(-) (limited to 'src/Text') diff --git a/pandoc.hs b/pandoc.hs index cada3347d..ccd3e57fb 100644 --- a/pandoc.hs +++ b/pandoc.hs @@ -656,7 +656,7 @@ options = (ReqArg (\arg opt -> do let b = takeBaseName arg - if (b == "pdflatex" || b == "lualatex" || b == "xelatex") + if b `elem` ["pdflatex", "lualatex", "xelatex"] then return opt { optLaTeXEngine = arg } else err 45 "latex-engine must be pdflatex, lualatex, or xelatex.") "PROGRAM") diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs index e15854333..2f21e1253 100644 --- a/src/Text/Pandoc/Parsing.hs +++ b/src/Text/Pandoc/Parsing.hs @@ -271,7 +271,7 @@ spaceChar = satisfy $ \c -> c == ' ' || c == '\t' -- | Parses a nonspace, nonnewline character. nonspaceChar :: Parser [Char] st Char -nonspaceChar = satisfy $ \x -> x /= '\t' && x /= '\n' && x /= ' ' && x /= '\r' +nonspaceChar = satisfy $ flip notElem ['\t', '\n', ' ', '\r'] -- | Skips zero or more spaces or tabs. skipSpaces :: Parser [Char] st () @@ -1062,7 +1062,7 @@ doubleQuoteStart :: Parser [Char] ParserState () doubleQuoteStart = do failIfInQuoteContext InDoubleQuote try $ do charOrRef "\"\8220\147" - notFollowedBy (satisfy (\c -> c == ' ' || c == '\t' || c == '\n')) + notFollowedBy . satisfy $ flip elem [' ', '\t', '\n'] doubleQuoteEnd :: Parser [Char] st () doubleQuoteEnd = do diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index 0020a8f26..51271edc5 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -874,9 +874,8 @@ verbatimEnv = do (_,r) <- withRaw $ do controlSeq "begin" name <- braced - guard $ name == "verbatim" || name == "Verbatim" || - name == "lstlisting" || name == "minted" || - name == "alltt" + guard $ name `elem` ["verbatim", "Verbatim", "lstlisting", + "minted", "alltt"] verbEnv name rest <- getInput return (r,rest) diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index b2e88d47e..f483ab059 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -789,8 +789,8 @@ listItem start = try $ do orderedList :: MarkdownParser (F Blocks) orderedList = try $ do (start, style, delim) <- lookAhead anyOrderedListStart - unless ((style == DefaultStyle || style == Decimal || style == Example) && - (delim == DefaultDelim || delim == Period)) $ + unless (style `elem` [DefaultStyle, Decimal, Example] && + delim `elem` [DefaultDelim, Period]) $ guardEnabled Ext_fancy_lists when (style == Example) $ guardEnabled Ext_example_lists items <- fmap sequence $ many1 $ listItem @@ -925,8 +925,8 @@ strictHtmlBlock = htmlInBalanced (not . isInlineTag) rawVerbatimBlock :: MarkdownParser String rawVerbatimBlock = try $ do - (TagOpen tag _, open) <- htmlTag (tagOpen (\t -> - t == "pre" || t == "style" || t == "script") + (TagOpen tag _, open) <- htmlTag (tagOpen (flip elem + ["pre", "style", "script"]) (const True)) contents <- manyTill anyChar (htmlTag (~== TagClose tag)) return $ open ++ contents ++ renderTags [TagClose tag] diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index cd2f7e24d..3446f4343 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -564,14 +564,10 @@ makeMeta title authors date = -- | Render HTML tags. renderTags' :: [Tag String] -> String renderTags' = renderTagsOptions - renderOptions{ optMinimize = \x -> - let y = map toLower x - in y == "hr" || y == "br" || - y == "img" || y == "meta" || - y == "link" - , optRawTag = \x -> - let y = map toLower x - in y == "script" || y == "style" } + renderOptions{ optMinimize = matchTags ["hr", "br", "img", + "meta", "link"] + , optRawTag = matchTags ["script", "style"] } + where matchTags = \tags -> flip elem tags . map toLower -- -- File handling diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs index c0b189b75..278e5cc9d 100644 --- a/src/Text/Pandoc/Writers/Markdown.hs +++ b/src/Text/Pandoc/Writers/Markdown.hs @@ -338,7 +338,7 @@ blockToMarkdown opts (RawBlock f str) else return $ if isEnabled Ext_markdown_attribute opts then text (addMarkdownAttribute str) <> text "\n" else text str <> text "\n" - | f == "latex" || f == "tex" || f == "markdown" = do + | f `elem` ["latex", "tex", "markdown"] = do st <- get if stPlain st then return empty @@ -628,10 +628,11 @@ getReference label (src, tit) = do Nothing -> do let label' = case find ((== label) . fst) (stRefs st) of Just _ -> -- label is used; generate numerical label - case find (\n -> not (any (== [Str (show n)]) - (map fst (stRefs st)))) [1..(10000 :: Integer)] of - Just x -> [Str (show x)] - Nothing -> error "no unique label" + case find (\n -> notElem [Str (show n)] + (map fst (stRefs st))) + [1..(10000 :: Integer)] of + Just x -> [Str (show x)] + Nothing -> error "no unique label" Nothing -> label modify (\s -> s{ stRefs = (label', (src,tit)) : stRefs st }) return label' diff --git a/src/Text/Pandoc/Writers/Org.hs b/src/Text/Pandoc/Writers/Org.hs index 51083f52b..d318c5f6a 100644 --- a/src/Text/Pandoc/Writers/Org.hs +++ b/src/Text/Pandoc/Writers/Org.hs @@ -129,7 +129,7 @@ blockToOrg (Para inlines) = do blockToOrg (RawBlock "html" str) = return $ blankline $$ "#+BEGIN_HTML" $$ nest 2 (text str) $$ "#+END_HTML" $$ blankline -blockToOrg (RawBlock f str) | f == "org" || f == "latex" || f == "tex" = +blockToOrg (RawBlock f str) | f `elem` ["org", "latex", "tex"] = return $ text str blockToOrg (RawBlock _ _) = return empty blockToOrg HorizontalRule = return $ blankline $$ "--------------" $$ blankline -- cgit v1.2.3 From c35f5ba42df094cef5f69a191315385a0a1e12b0 Mon Sep 17 00:00:00 2001 From: Henry de Valence <hdevalence@hdevalence.ca> Date: Thu, 19 Dec 2013 20:28:53 -0500 Subject: HLint: Remove lambdas. --- src/Text/Pandoc/Writers/Shared.hs | 3 +-- tests/Tests/Walk.hs | 4 ++-- 2 files changed, 3 insertions(+), 4 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Writers/Shared.hs b/src/Text/Pandoc/Writers/Shared.hs index 9cb08803c..33091ea94 100644 --- a/src/Text/Pandoc/Writers/Shared.hs +++ b/src/Text/Pandoc/Writers/Shared.hs @@ -65,8 +65,7 @@ metaToJSON opts blockWriter inlineWriter (Meta metamap) renderedMap <- Traversable.mapM (metaValueToJSON blockWriter inlineWriter) metamap - return $ M.foldWithKey (\key val obj -> defField key val obj) - baseContext renderedMap + return $ M.foldWithKey defField baseContext renderedMap | otherwise = return (Object H.empty) metaValueToJSON :: Monad m diff --git a/tests/Tests/Walk.hs b/tests/Tests/Walk.hs index f6aa1beae..34350e28a 100644 --- a/tests/Tests/Walk.hs +++ b/tests/Tests/Walk.hs @@ -21,11 +21,11 @@ tests = [ testGroup "Walk" p_walk :: (Typeable a, Walkable a Pandoc) => (a -> a) -> Pandoc -> Bool -p_walk f = (\(d :: Pandoc) -> everywhere (mkT f) d == walk f d) +p_walk f d = everywhere (mkT f) d == walk f d p_query :: (Eq a, Typeable a1, Monoid a, Walkable a1 Pandoc) => (a1 -> a) -> Pandoc -> Bool -p_query f = (\(d :: Pandoc) -> everything mappend (mempty `mkQ` f) d == query f d) +p_query f d = everything mappend (mempty `mkQ` f) d == query f d inlineTrans :: Inline -> Inline inlineTrans (Str xs) = Str $ map toUpper xs -- cgit v1.2.3 From f6d151889c8fff303be8ee8a4f9be67a04de9210 Mon Sep 17 00:00:00 2001 From: Henry de Valence <hdevalence@hdevalence.ca> Date: Thu, 19 Dec 2013 20:43:25 -0500 Subject: HLint: redundant parens Remove parens enclosing a single element. --- pandoc.hs | 6 ++---- src/Text/Pandoc/Readers/Markdown.hs | 2 +- src/Text/Pandoc/Readers/Textile.hs | 2 +- src/Text/Pandoc/Writers/HTML.hs | 2 +- src/Text/Pandoc/Writers/MediaWiki.hs | 2 +- src/Text/Pandoc/Writers/Textile.hs | 2 +- tests/Tests/Readers/LaTeX.hs | 2 +- tests/Tests/Readers/Markdown.hs | 4 ++-- 8 files changed, 10 insertions(+), 12 deletions(-) (limited to 'src/Text') diff --git a/pandoc.hs b/pandoc.hs index ccd3e57fb..574c89771 100644 --- a/pandoc.hs +++ b/pandoc.hs @@ -1034,12 +1034,10 @@ main = do variables' <- case mathMethod of LaTeXMathML Nothing -> do - s <- readDataFileUTF8 datadir - ("LaTeXMathML.js") + s <- readDataFileUTF8 datadir "LaTeXMathML.js" return $ ("mathml-script", s) : variables MathML Nothing -> do - s <- readDataFileUTF8 datadir - ("MathMLinHTML.js") + s <- readDataFileUTF8 datadir "MathMLinHTML.js" return $ ("mathml-script", s) : variables _ -> return variables diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index f483ab059..166c524ef 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -730,7 +730,7 @@ listStart = bulletListStart <|> (anyOrderedListStart >> return ()) listLine :: MarkdownParser String listLine = try $ do notFollowedBy' (do indentSpaces - many (spaceChar) + many spaceChar listStart) notFollowedBy' $ htmlTag (~== TagClose "div") chunks <- manyTill diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs index 23e07f621..93658cdea 100644 --- a/src/Text/Pandoc/Readers/Textile.hs +++ b/src/Text/Pandoc/Readers/Textile.hs @@ -594,7 +594,7 @@ surrounded border = enclosed (border *> notFollowedBy (oneOf " \t\n\r")) (try bo simpleInline :: Parser [Char] ParserState t -- ^ surrounding parser -> ([Inline] -> Inline) -- ^ Inline constructor -> Parser [Char] ParserState Inline -- ^ content parser (to be used repeatedly) -simpleInline border construct = surrounded border (inlineWithAttribute) >>= +simpleInline border construct = surrounded border inlineWithAttribute >>= return . construct . normalizeSpaces where inlineWithAttribute = (try $ optional attributes) >> inline diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs index 641652276..2c6435457 100644 --- a/src/Text/Pandoc/Writers/HTML.hs +++ b/src/Text/Pandoc/Writers/HTML.hs @@ -524,7 +524,7 @@ blockToHtml opts (DefinitionList lst) = do contents <- mapM (\(term, defs) -> do term' <- if null term then return mempty - else liftM (H.dt) $ inlineListToHtml opts term + else liftM H.dt $ inlineListToHtml opts term defs' <- mapM ((liftM (\x -> H.dd $ (x >> nl opts))) . blockListToHtml opts) defs return $ mconcat $ nl opts : term' : nl opts : diff --git a/src/Text/Pandoc/Writers/MediaWiki.hs b/src/Text/Pandoc/Writers/MediaWiki.hs index 61741a61e..83fefaa29 100644 --- a/src/Text/Pandoc/Writers/MediaWiki.hs +++ b/src/Text/Pandoc/Writers/MediaWiki.hs @@ -51,7 +51,7 @@ data WriterState = WriterState { writeMediaWiki :: WriterOptions -> Pandoc -> String writeMediaWiki opts document = evalState (pandocToMediaWiki opts document) - (WriterState { stNotes = False, stListLevel = [], stUseTags = False }) + WriterState { stNotes = False, stListLevel = [], stUseTags = False } -- | Return MediaWiki representation of document. pandocToMediaWiki :: WriterOptions -> Pandoc -> State WriterState String diff --git a/src/Text/Pandoc/Writers/Textile.hs b/src/Text/Pandoc/Writers/Textile.hs index 7c102cc86..95aedf780 100644 --- a/src/Text/Pandoc/Writers/Textile.hs +++ b/src/Text/Pandoc/Writers/Textile.hs @@ -51,7 +51,7 @@ data WriterState = WriterState { writeTextile :: WriterOptions -> Pandoc -> String writeTextile opts document = evalState (pandocToTextile opts document) - (WriterState { stNotes = [], stListLevel = [], stUseTags = False }) + WriterState { stNotes = [], stListLevel = [], stUseTags = False } -- | Return Textile representation of document. pandocToTextile :: WriterOptions -> Pandoc -> State WriterState String diff --git a/tests/Tests/Readers/LaTeX.hs b/tests/Tests/Readers/LaTeX.hs index c1efd1b68..8ff23ebc1 100644 --- a/tests/Tests/Readers/LaTeX.hs +++ b/tests/Tests/Readers/LaTeX.hs @@ -21,7 +21,7 @@ tests = [ testGroup "basic" [ "simple" =: "word" =?> para "word" , "space" =: - "some text" =?> para ("some text") + "some text" =?> para "some text" , "emphasized" =: "\\emph{emphasized}" =?> para (emph "emphasized") ] diff --git a/tests/Tests/Readers/Markdown.hs b/tests/Tests/Readers/Markdown.hs index b04ff9a0d..492680a35 100644 --- a/tests/Tests/Readers/Markdown.hs +++ b/tests/Tests/Readers/Markdown.hs @@ -171,13 +171,13 @@ tests = [ testGroup "inline code" , testGroup "smart punctuation" [ test markdownSmart "quote before ellipses" ("'...hi'" - =?> para (singleQuoted ("…hi"))) + =?> para (singleQuoted "…hi")) , test markdownSmart "apostrophe before emph" ("D'oh! A l'*aide*!" =?> para ("D’oh! A l’" <> emph "aide" <> "!")) , test markdownSmart "apostrophe in French" ("À l'arrivée de la guerre, le thème de l'«impossibilité du socialisme»" - =?> para ("À l’arrivée de la guerre, le thème de l’«impossibilité du socialisme»")) + =?> para "À l’arrivée de la guerre, le thème de l’«impossibilité du socialisme»") ] , testGroup "mixed emphasis and strong" [ "emph and strong emph alternating" =: -- cgit v1.2.3 From c8fc0a03748ceb4dbea502297ece89eac9d73aff Mon Sep 17 00:00:00 2001 From: Henry de Valence <hdevalence@hdevalence.ca> Date: Thu, 19 Dec 2013 20:46:11 -0500 Subject: HLint: use /= --- src/Text/Pandoc/Writers/LaTeX.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs index 47b769c48..1deacecb4 100644 --- a/src/Text/Pandoc/Writers/LaTeX.hs +++ b/src/Text/Pandoc/Writers/LaTeX.hs @@ -190,7 +190,7 @@ stringToLaTeX _ [] = return "" stringToLaTeX ctx (x:xs) = do opts <- gets stOptions rest <- stringToLaTeX ctx xs - let ligatures = writerTeXLigatures opts && not (ctx == CodeString) + let ligatures = writerTeXLigatures opts && (ctx /= CodeString) let isUrl = ctx == URLString when (x == '€') $ modify $ \st -> st{ stUsesEuro = True } -- cgit v1.2.3 From 3d70059a4896c66cab59832a3ed9eb8334b84a2f Mon Sep 17 00:00:00 2001 From: Henry de Valence <hdevalence@hdevalence.ca> Date: Thu, 19 Dec 2013 21:07:09 -0500 Subject: HLint: use fromMaybe Replace uses of `maybe x id` with `fromMaybe x`. --- src/Text/Pandoc/PDF.hs | 3 ++- src/Text/Pandoc/Readers/DocBook.hs | 7 ++++--- src/Text/Pandoc/Readers/HTML.hs | 4 ++-- src/Text/Pandoc/Readers/Markdown.hs | 4 ++-- src/Text/Pandoc/Readers/MediaWiki.hs | 7 ++++--- src/Text/Pandoc/Writers/Docx.hs | 6 ++++-- src/Text/Pandoc/Writers/EPUB.hs | 12 ++++++------ src/Text/Pandoc/Writers/HTML.hs | 4 ++-- src/Text/Pandoc/Writers/LaTeX.hs | 3 ++- src/Text/Pandoc/Writers/ODT.hs | 3 ++- 10 files changed, 30 insertions(+), 23 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/PDF.hs b/src/Text/Pandoc/PDF.hs index e8683b98f..360338f8f 100644 --- a/src/Text/Pandoc/PDF.hs +++ b/src/Text/Pandoc/PDF.hs @@ -41,6 +41,7 @@ import System.Directory import System.Environment import Control.Monad (unless) import Data.List (isInfixOf) +import Data.Maybe (fromMaybe) import qualified Data.ByteString.Base64 as B64 import qualified Text.Pandoc.UTF8 as UTF8 import Text.Pandoc.Definition @@ -87,7 +88,7 @@ handleImage' baseURL tmpdir (Image ils (src,tit)) = do res <- fetchItem baseURL src case res of Right (contents, Just mime) -> do - let ext = maybe (takeExtension src) id $ + let ext = fromMaybe (takeExtension src) $ extensionFromMimeType mime let basename = UTF8.toString $ B64.encode $ UTF8.fromString src let fname = tmpdir </> basename <.> ext diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs index 03c6140ac..56cb16b20 100644 --- a/src/Text/Pandoc/Readers/DocBook.hs +++ b/src/Text/Pandoc/Readers/DocBook.hs @@ -12,6 +12,7 @@ import Data.Char (isSpace) import Control.Monad.State import Control.Applicative ((<$>)) import Data.List (intersperse) +import Data.Maybe (fromMaybe) {- @@ -683,7 +684,7 @@ parseBlock (Elem e) = "lowerroman" -> LowerRoman "upperroman" -> UpperRoman _ -> Decimal - let start = maybe 1 id $ + let start = fromMaybe 1 $ (attrValue "override" <$> filterElement (named "listitem") e) >>= safeRead orderedListWith (start,listStyle,DefaultDelim) @@ -779,7 +780,7 @@ parseBlock (Elem e) = caption <- case filterChild isCaption e of Just t -> getInlines t Nothing -> return mempty - let e' = maybe e id $ filterChild (named "tgroup") e + let e' = fromMaybe e $ filterChild (named "tgroup") e let isColspec x = named "colspec" x || named "col" x let colspecs = case filterChild (named "colgroup") e' of Just c -> filterChildren isColspec c @@ -801,7 +802,7 @@ parseBlock (Elem e) = Just "center" -> AlignCenter _ -> AlignDefault let toWidth c = case findAttr (unqual "colwidth") c of - Just w -> maybe 0 id + Just w -> fromMaybe 0 $ safeRead $ '0': filter (\x -> (x >= '0' && x <= '9') || x == '.') w diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index e758f712f..4b44a3a21 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -207,7 +207,7 @@ pHeader = try $ do let bodyTitle = TagOpen tagtype attr ~== TagOpen "h1" [("class","title")] let level = read (drop 1 tagtype) contents <- liftM concat $ manyTill inline (pCloses tagtype <|> eof) - let ident = maybe "" id $ lookup "id" attr + let ident = fromMaybe "" $ lookup "id" attr let classes = maybe [] words $ lookup "class" attr let keyvals = [(k,v) | (k,v) <- attr, k /= "class", k /= "id"] return $ if bodyTitle @@ -257,7 +257,7 @@ pCol = try $ do skipMany pBlank return $ case lookup "width" attribs of Just x | not (null x) && last x == '%' -> - maybe 0.0 id $ safeRead ('0':'.':init x) + fromMaybe 0.0 $ safeRead ('0':'.':init x) _ -> 0.0 pColgroup :: TagParser [Double] diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 166c524ef..3a5d29b4e 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -1722,7 +1722,7 @@ spanHtml = try $ do guardEnabled Ext_markdown_in_html_blocks (TagOpen _ attrs, _) <- htmlTag (~== TagOpen "span" []) contents <- mconcat <$> manyTill inline (htmlTag (~== TagClose "span")) - let ident = maybe "" id $ lookup "id" attrs + let ident = fromMaybe "" $ lookup "id" attrs let classes = maybe [] words $ lookup "class" attrs let keyvals = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"] return $ B.spanWith (ident, classes, keyvals) <$> contents @@ -1732,7 +1732,7 @@ divHtml = try $ do guardEnabled Ext_markdown_in_html_blocks (TagOpen _ attrs, _) <- htmlTag (~== TagOpen "div" []) contents <- mconcat <$> manyTill block (htmlTag (~== TagClose "div")) - let ident = maybe "" id $ lookup "id" attrs + let ident = fromMaybe "" $ lookup "id" attrs let classes = maybe [] words $ lookup "class" attrs let keyvals = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"] return $ B.divWith (ident, classes, keyvals) <$> contents diff --git a/src/Text/Pandoc/Readers/MediaWiki.hs b/src/Text/Pandoc/Readers/MediaWiki.hs index 1c074e3de..8d8ea0199 100644 --- a/src/Text/Pandoc/Readers/MediaWiki.hs +++ b/src/Text/Pandoc/Readers/MediaWiki.hs @@ -54,6 +54,7 @@ import Data.Sequence (viewl, ViewL(..), (<|)) import qualified Data.Foldable as F import qualified Data.Map as M import Data.Char (isDigit, isSpace) +import Data.Maybe (fromMaybe) -- | Read mediawiki from an input string and return a Pandoc document. readMediaWiki :: ReaderOptions -- ^ Reader options @@ -204,7 +205,7 @@ table = do tableStart styles <- option [] parseAttrs <* blankline let tableWidth = case lookup "width" styles of - Just w -> maybe 1.0 id $ parseWidth w + Just w -> fromMaybe 1.0 $ parseWidth w Nothing -> 1.0 caption <- option mempty tableCaption optional rowsep @@ -285,7 +286,7 @@ tableCell = try $ do Just "center" -> AlignCenter _ -> AlignDefault let width = case lookup "width" attrs of - Just xs -> maybe 0.0 id $ parseWidth xs + Just xs -> fromMaybe 0.0 $ parseWidth xs Nothing -> 0.0 return ((align, width), bs) @@ -387,7 +388,7 @@ orderedList = spaces items <- many (listItem '#' <|> li) optional (htmlTag (~== TagClose "ol")) - let start = maybe 1 id $ safeRead $ fromAttrib "start" tag + let start = fromMaybe 1 $ safeRead $ fromAttrib "start" tag return $ B.orderedListWith (start, DefaultStyle, DefaultDelim) items definitionList :: MWParser Blocks diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index 5c7341b69..32ba7715a 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -29,6 +29,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Conversion of 'Pandoc' documents to docx. -} module Text.Pandoc.Writers.Docx ( writeDocx ) where +import Data.Maybe (fromMaybe) import Data.List ( intercalate, groupBy ) import qualified Data.ByteString as B import qualified Data.ByteString.Lazy as BL @@ -130,7 +131,8 @@ writeDocx opts doc@(Pandoc meta _) = do let mkOverrideNode (part', contentType') = mknode "Override" [("PartName",part'),("ContentType",contentType')] () let mkImageOverride (_, imgpath, mbMimeType, _, _) = - mkOverrideNode ("/word/" ++ imgpath, maybe "application/octet-stream" id mbMimeType) + mkOverrideNode ("/word/" ++ imgpath, + fromMaybe "application/octet-stream" mbMimeType) let overrides = map mkOverrideNode [("/word/webSettings.xml", "application/vnd.openxmlformats-officedocument.wordprocessingml.webSettings+xml") @@ -322,7 +324,7 @@ mkNum markers marker numid = NumberMarker _ _ start -> map (\lvl -> mknode "w:lvlOverride" [("w:ilvl",show (lvl :: Int))] $ mknode "w:startOverride" [("w:val",show start)] ()) [0..6] - where absnumid = maybe 0 id $ M.lookup marker markers + where absnumid = fromMaybe 0 $ M.lookup marker markers mkAbstractNum :: (ListMarker,Int) -> IO Element mkAbstractNum (marker,numid) = do diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs index 36ead0b8f..4daa9609e 100644 --- a/src/Text/Pandoc/Writers/EPUB.hs +++ b/src/Text/Pandoc/Writers/EPUB.hs @@ -176,8 +176,8 @@ addMetadataFromXML e@(Element (QName name _ (Just "dc")) attrs _ _) md , titleFileAs = getAttr "file-as" , titleType = getAttr "type" } : epubTitle md } - | name == "date" = md{ epubDate = maybe "" id $ normalizeDate' - $ strContent e } + | name == "date" = md{ epubDate = fromMaybe "" $ normalizeDate' + $ strContent e } | name == "language" = md{ epubLanguage = strContent e } | name == "creator" = md{ epubCreator = Creator{ creatorText = strContent e @@ -271,7 +271,7 @@ metadataFromMeta opts meta = EPUBMetadata{ } where identifiers = getIdentifier meta titles = getTitle meta - date = maybe "" id $ + date = fromMaybe "" $ (metaValueToString <$> lookupMeta "date" meta) >>= normalizeDate' language = maybe "" metaValueToString $ lookupMeta "language" meta `mplus` lookupMeta "lang" meta @@ -297,7 +297,7 @@ writeEPUB :: WriterOptions -- ^ Writer options -> Pandoc -- ^ Document to convert -> IO B.ByteString writeEPUB opts doc@(Pandoc meta _) = do - let version = maybe EPUB2 id (writerEpubVersion opts) + let version = fromMaybe EPUB2 (writerEpubVersion opts) let epub3 = version == EPUB3 epochtime <- floor `fmap` getPOSIXTime let mkEntry path content = toEntry path epochtime content @@ -401,7 +401,7 @@ writeEPUB opts doc@(Pandoc meta _) = do chapToEntry num (Chapter mbnum bs) = mkEntry (showChapter num) $ renderHtml $ writeHtml opts'{ writerNumberOffset = - maybe [] id mbnum } + fromMaybe [] mbnum } $ case bs of (Header _ _ xs : _) -> Pandoc (setMeta "title" (fromList xs) nullMeta) bs @@ -436,7 +436,7 @@ writeEPUB opts doc@(Pandoc meta _) = do let fontNode ent = unode "item" ! [("id", takeBaseName $ eRelativePath ent), ("href", eRelativePath ent), - ("media-type", maybe "" id $ getMimeType $ eRelativePath ent)] $ () + ("media-type", fromMaybe "" $ getMimeType $ eRelativePath ent)] $ () let plainTitle = case docTitle meta of [] -> case epubTitle metadata of [] -> "UNTITLED" diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs index 2c6435457..129776363 100644 --- a/src/Text/Pandoc/Writers/HTML.hs +++ b/src/Text/Pandoc/Writers/HTML.hs @@ -45,7 +45,7 @@ import Numeric ( showHex ) import Data.Char ( ord, toLower ) import Data.List ( isPrefixOf, intersperse ) import Data.String ( fromString ) -import Data.Maybe ( catMaybes ) +import Data.Maybe ( catMaybes, fromMaybe ) import Control.Monad.State import Text.Blaze.Html hiding(contents) import Text.Blaze.Internal(preEscapedString) @@ -118,7 +118,7 @@ pandocToHtml opts (Pandoc meta blocks) = do let stringifyHTML = escapeStringForXML . stringify let authsMeta = map stringifyHTML $ docAuthors meta let dateMeta = stringifyHTML $ docDate meta - let slideLevel = maybe (getSlideLevel blocks) id $ writerSlideLevel opts + let slideLevel = fromMaybe (getSlideLevel blocks) $ writerSlideLevel opts let sects = hierarchicalize $ if writerSlideVariant opts == NoSlides then blocks diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs index 1deacecb4..a76d6d82b 100644 --- a/src/Text/Pandoc/Writers/LaTeX.hs +++ b/src/Text/Pandoc/Writers/LaTeX.hs @@ -40,6 +40,7 @@ import Network.URI ( isURI, unEscapeString ) import Data.List ( (\\), isSuffixOf, isInfixOf, isPrefixOf, intercalate, intersperse ) import Data.Char ( toLower, isPunctuation, isAscii, isLetter, isDigit, ord ) +import Data.Maybe ( fromMaybe ) import Control.Applicative ((<|>)) import Control.Monad.State import Text.Pandoc.Pretty @@ -240,7 +241,7 @@ inCmd cmd contents = char '\\' <> text cmd <> braces contents toSlides :: [Block] -> State WriterState [Block] toSlides bs = do opts <- gets stOptions - let slideLevel = maybe (getSlideLevel bs) id $ writerSlideLevel opts + let slideLevel = fromMaybe (getSlideLevel bs) $ writerSlideLevel opts let bs' = prepSlides slideLevel bs concat `fmap` (mapM (elementToBeamer slideLevel) $ hierarchicalize bs') diff --git a/src/Text/Pandoc/Writers/ODT.hs b/src/Text/Pandoc/Writers/ODT.hs index cc0a06243..25cd5ae13 100644 --- a/src/Text/Pandoc/Writers/ODT.hs +++ b/src/Text/Pandoc/Writers/ODT.hs @@ -31,6 +31,7 @@ Conversion of 'Pandoc' documents to ODT. module Text.Pandoc.Writers.ODT ( writeODT ) where import Data.IORef import Data.List ( isPrefixOf ) +import Data.Maybe ( fromMaybe ) import qualified Data.ByteString.Lazy as B import Text.Pandoc.UTF8 ( fromStringLazy ) import Codec.Archive.Zip @@ -127,7 +128,7 @@ transformPic opts entriesRef (Image lab (src,_)) = do return $ Emph lab Right (img, _) -> do let size = imageSize img - let (w,h) = maybe (0,0) id $ sizeInPoints `fmap` size + let (w,h) = fromMaybe (0,0) $ sizeInPoints `fmap` size let tit' = show w ++ "x" ++ show h entries <- readIORef entriesRef let newsrc = "Pictures/" ++ show (length entries) ++ takeExtension src -- cgit v1.2.3 From d6ec6cf9cf5731977fa0f476cabef4786edd5665 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Wed, 1 Jan 2014 09:01:05 -0800 Subject: Docx writer: Fixed problem with some modified reference docx files. Include `word/_rels/settings.xml.rels` if it exists, as well as other `rels` files besides the ones pandoc generates explicitly. --- src/Text/Pandoc/Writers/Docx.hs | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index 32ba7715a..67d202010 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -30,7 +30,7 @@ Conversion of 'Pandoc' documents to docx. -} module Text.Pandoc.Writers.Docx ( writeDocx ) where import Data.Maybe (fromMaybe) -import Data.List ( intercalate, groupBy ) +import Data.List ( intercalate, groupBy, isPrefixOf ) import qualified Data.ByteString as B import qualified Data.ByteString.Lazy as BL import qualified Data.ByteString.Lazy.Char8 as BL8 @@ -262,6 +262,11 @@ writeDocx opts doc@(Pandoc meta _) = do fontTableEntry <- entryFromArchive "word/fontTable.xml" settingsEntry <- entryFromArchive "word/settings.xml" webSettingsEntry <- entryFromArchive "word/webSettings.xml" + let miscRels = [ f | f <- filesInArchive refArchive + , "word/_rels/" `isPrefixOf` f + , f /= "word/_rels/document.xml.rels" + , f /= "word/_rels/footnotes.xml.rels" ] + miscRelEntries <- mapM entryFromArchive miscRels -- Create archive let archive = foldr addEntryToArchive emptyArchive $ @@ -269,7 +274,7 @@ writeDocx opts doc@(Pandoc meta _) = do footnoteRelEntry : numEntry : styleEntry : footnotesEntry : docPropsEntry : docPropsAppEntry : themeEntry : fontTableEntry : settingsEntry : webSettingsEntry : - imageEntries + imageEntries ++ miscRelEntries return $ fromArchive archive styleToOpenXml :: Style -> [Element] -- cgit v1.2.3 From f3ee82373b4ad8e955db12d3c2c2159a2bea53a0 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Wed, 1 Jan 2014 09:22:37 -0800 Subject: HTML reader: Parse name/content pairs from meta tags as metadata. Closes #1106. --- src/Text/Pandoc/Readers/HTML.hs | 11 ++++++++++- tests/html-reader.native | 2 +- 2 files changed, 11 insertions(+), 2 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index 4b44a3a21..506fe7770 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -76,9 +76,18 @@ pBody :: TagParser [Block] pBody = pInTags "body" block pHead :: TagParser [Block] -pHead = pInTags "head" $ pTitle <|> ([] <$ pAnyTag) +pHead = pInTags "head" $ pTitle <|> pMetaTag <|> ([] <$ pAnyTag) where pTitle = pInTags "title" inline >>= setTitle . normalizeSpaces setTitle t = [] <$ (updateState $ B.setMeta "title" (B.fromList t)) + pMetaTag = do + mt <- pSatisfy (~== TagOpen "meta" []) + let name = fromAttrib "name" mt + if null name + then return [] + else do + let content = fromAttrib "content" mt + updateState $ B.setMeta name (B.text content) + return [] block :: TagParser [Block] block = choice diff --git a/tests/html-reader.native b/tests/html-reader.native index 794512426..e80905729 100644 --- a/tests/html-reader.native +++ b/tests/html-reader.native @@ -1,4 +1,4 @@ -Pandoc (Meta {unMeta = fromList [("title",MetaInlines [Str "Pandoc",Space,Str "Test",Space,Str "Suite"])]}) +Pandoc (Meta {unMeta = fromList [("generator",MetaInlines [Str "pandoc"]),("title",MetaInlines [Str "Pandoc",Space,Str "Test",Space,Str "Suite"])]}) [Para [Str "This",Space,Str "is",Space,Str "a",Space,Str "set",Space,Str "of",Space,Str "tests",Space,Str "for",Space,Str "pandoc",Str ".",Space,Str "Most",Space,Str "of",Space,Str "them",Space,Str "are",Space,Str "adapted",Space,Str "from",Space,Str "John",Space,Str "Gruber",Str "'",Str "s",Space,Str "markdown",Space,Str "test",Space,Str "suite",Str "."] ,HorizontalRule ,Header 1 ("",[],[]) [Str "Headers"] -- cgit v1.2.3 From e3d48da6271a37129ae5bdb6cb57f006de8c5bfc Mon Sep 17 00:00:00 2001 From: John MacFarlane <fiddlosopher@gmail.com> Date: Thu, 2 Jan 2014 15:22:50 -0800 Subject: Moved fixDisplayMath from Docx writer to Writer.Shared. --- src/Text/Pandoc/Writers/Docx.hs | 27 ++------------------------- src/Text/Pandoc/Writers/Shared.hs | 27 +++++++++++++++++++++++++++ 2 files changed, 29 insertions(+), 25 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index 67d202010..25739f7c8 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -30,7 +30,7 @@ Conversion of 'Pandoc' documents to docx. -} module Text.Pandoc.Writers.Docx ( writeDocx ) where import Data.Maybe (fromMaybe) -import Data.List ( intercalate, groupBy, isPrefixOf ) +import Data.List ( intercalate, isPrefixOf ) import qualified Data.ByteString as B import qualified Data.ByteString.Lazy as BL import qualified Data.ByteString.Lazy.Char8 as BL8 @@ -43,6 +43,7 @@ import Text.Pandoc.Definition import Text.Pandoc.Generic import Text.Pandoc.ImageSize import Text.Pandoc.Shared hiding (Element) +import Text.Pandoc.Writers.Shared (fixDisplayMath) import Text.Pandoc.Options import Text.Pandoc.Readers.TeXMath import Text.Pandoc.Highlighting ( highlight ) @@ -817,27 +818,3 @@ parseXml refArchive relpath = case (findEntryByPath relpath refArchive >>= parseXMLDoc . UTF8.toStringLazy . fromEntry) of Just d -> return d Nothing -> fail $ relpath ++ " missing in reference docx" - -isDisplayMath :: Inline -> Bool -isDisplayMath (Math DisplayMath _) = True -isDisplayMath _ = False - -stripLeadingTrailingSpace :: [Inline] -> [Inline] -stripLeadingTrailingSpace = go . reverse . go . reverse - where go (Space:xs) = xs - go xs = xs - -fixDisplayMath :: Block -> Block -fixDisplayMath (Plain lst) - | any isDisplayMath lst && not (all isDisplayMath lst) = - -- chop into several paragraphs so each displaymath is its own - Div ("",["math"],[]) $ map (Plain . stripLeadingTrailingSpace) $ - groupBy (\x y -> (isDisplayMath x && isDisplayMath y) || - not (isDisplayMath x || isDisplayMath y)) lst -fixDisplayMath (Para lst) - | any isDisplayMath lst && not (all isDisplayMath lst) = - -- chop into several paragraphs so each displaymath is its own - Div ("",["math"],[]) $ map (Para . stripLeadingTrailingSpace) $ - groupBy (\x y -> (isDisplayMath x && isDisplayMath y) || - not (isDisplayMath x || isDisplayMath y)) lst -fixDisplayMath x = x diff --git a/src/Text/Pandoc/Writers/Shared.hs b/src/Text/Pandoc/Writers/Shared.hs index 33091ea94..604aac1c9 100644 --- a/src/Text/Pandoc/Writers/Shared.hs +++ b/src/Text/Pandoc/Writers/Shared.hs @@ -34,6 +34,7 @@ module Text.Pandoc.Writers.Shared ( , setField , defField , tagWithAttrs + , fixDisplayMath ) where import Text.Pandoc.Definition @@ -46,6 +47,7 @@ import qualified Data.Map as M import qualified Data.Text as T import Data.Aeson (FromJSON(..), fromJSON, ToJSON (..), Value(Object), Result(..)) import qualified Data.Traversable as Traversable +import Data.List ( groupBy ) -- | Create JSON value for template from a 'Meta' and an association list -- of variables, specified at the command line or in the writer. @@ -136,3 +138,28 @@ tagWithAttrs tag (ident,classes,kvs) = hsep ,hsep (map (\(k,v) -> text k <> "=" <> doubleQuotes (text (escapeStringForXML v))) kvs) ] <> ">" + +isDisplayMath :: Inline -> Bool +isDisplayMath (Math DisplayMath _) = True +isDisplayMath _ = False + +stripLeadingTrailingSpace :: [Inline] -> [Inline] +stripLeadingTrailingSpace = go . reverse . go . reverse + where go (Space:xs) = xs + go xs = xs + +-- Put display math in its own block (for ODT/DOCX). +fixDisplayMath :: Block -> Block +fixDisplayMath (Plain lst) + | any isDisplayMath lst && not (all isDisplayMath lst) = + -- chop into several paragraphs so each displaymath is its own + Div ("",["math"],[]) $ map (Plain . stripLeadingTrailingSpace) $ + groupBy (\x y -> (isDisplayMath x && isDisplayMath y) || + not (isDisplayMath x || isDisplayMath y)) lst +fixDisplayMath (Para lst) + | any isDisplayMath lst && not (all isDisplayMath lst) = + -- chop into several paragraphs so each displaymath is its own + Div ("",["math"],[]) $ map (Para . stripLeadingTrailingSpace) $ + groupBy (\x y -> (isDisplayMath x && isDisplayMath y) || + not (isDisplayMath x || isDisplayMath y)) lst +fixDisplayMath x = x -- cgit v1.2.3 From ac100f27249e4c572e687d027330ca2cc53cb1ed Mon Sep 17 00:00:00 2001 From: John MacFarlane <fiddlosopher@gmail.com> Date: Thu, 2 Jan 2014 15:23:16 -0800 Subject: OpenDocument writer: Fixed RawInline, RawBlock so they don't escape. --- src/Text/Pandoc/Writers/OpenDocument.hs | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Writers/OpenDocument.hs b/src/Text/Pandoc/Writers/OpenDocument.hs index 4ddfd7166..0029c3296 100644 --- a/src/Text/Pandoc/Writers/OpenDocument.hs +++ b/src/Text/Pandoc/Writers/OpenDocument.hs @@ -285,8 +285,12 @@ blocksToOpenDocument o b = vcat <$> mapM (blockToOpenDocument o) b -- | Convert a Pandoc block element to OpenDocument. blockToOpenDocument :: WriterOptions -> Block -> State WriterState Doc blockToOpenDocument o bs - | Plain b <- bs = inParagraphTags =<< inlinesToOpenDocument o b - | Para b <- bs = inParagraphTags =<< inlinesToOpenDocument o b + | Plain b <- bs = if null b + then return empty + else inParagraphTags =<< inlinesToOpenDocument o b + | Para b <- bs = if null b + then return empty + else inParagraphTags =<< inlinesToOpenDocument o b | Div _ xs <- bs = blocksToOpenDocument o xs | Header i _ b <- bs = setFirstPara >> (inHeaderTags i =<< inlinesToOpenDocument o b) @@ -298,8 +302,8 @@ blockToOpenDocument o bs | Table c a w h r <- bs = setFirstPara >> table c a w h r | HorizontalRule <- bs = setFirstPara >> return (selfClosingTag "text:p" [ ("text:style-name", "Horizontal_20_Line") ]) - | RawBlock f s <- bs = if f == "opendocument" - then preformatted s + | RawBlock f s <- bs = if f == Format "opendocument" + then return $ text s else return empty | Null <- bs = return empty | otherwise = return empty @@ -378,8 +382,8 @@ inlineToOpenDocument o ils | Code _ s <- ils = withTextStyle Pre $ inTextStyle $ preformatted s | Math t s <- ils = inlinesToOpenDocument o (readTeXMath' t s) | Cite _ l <- ils = inlinesToOpenDocument o l - | RawInline f s <- ils = if f == "opendocument" - then return $ preformatted s + | RawInline f s <- ils = if f == Format "opendocument" + then return $ text s else return empty | Link l (s,t) <- ils = mkLink s t <$> inlinesToOpenDocument o l | Image _ (s,t) <- ils = mkImg s t -- cgit v1.2.3 From 33955fd2efd640becfddb262c4443509521cb6cc Mon Sep 17 00:00:00 2001 From: John MacFarlane <fiddlosopher@gmail.com> Date: Thu, 2 Jan 2014 15:23:40 -0800 Subject: ODT writer: Use mathml for proper rendering of formulas. Note: LibreOffice's support for this seems a bit buggy. But it should be better than what we had before. --- src/Text/Pandoc/Writers/ODT.hs | 50 +++++++++++++++++++++++++++++++++++------- 1 file changed, 42 insertions(+), 8 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Writers/ODT.hs b/src/Text/Pandoc/Writers/ODT.hs index 25cd5ae13..c3652d65d 100644 --- a/src/Text/Pandoc/Writers/ODT.hs +++ b/src/Text/Pandoc/Writers/ODT.hs @@ -30,8 +30,10 @@ Conversion of 'Pandoc' documents to ODT. -} module Text.Pandoc.Writers.ODT ( writeODT ) where import Data.IORef -import Data.List ( isPrefixOf ) +import Data.List ( isPrefixOf, isSuffixOf ) import Data.Maybe ( fromMaybe ) +import Text.XML.Light.Output +import Text.TeXMath import qualified Data.ByteString.Lazy as B import Text.Pandoc.UTF8 ( fromStringLazy ) import Codec.Archive.Zip @@ -41,13 +43,14 @@ import Text.Pandoc.ImageSize ( imageSize, sizeInPoints ) import Text.Pandoc.MIME ( getMimeType ) import Text.Pandoc.Definition import Text.Pandoc.Walk +import Text.Pandoc.Writers.Shared ( fixDisplayMath ) import Text.Pandoc.Writers.OpenDocument ( writeOpenDocument ) import Control.Monad (liftM) import Text.Pandoc.XML import Text.Pandoc.Pretty import qualified Control.Exception as E import Data.Time.Clock.POSIX ( getPOSIXTime ) -import System.FilePath ( takeExtension ) +import System.FilePath ( takeExtension, takeDirectory ) -- | Produce an ODT file from a Pandoc document. writeODT :: WriterOptions -- ^ Writer options @@ -61,9 +64,9 @@ writeODT opts doc@(Pandoc meta _) = do Just f -> B.readFile f Nothing -> (B.fromChunks . (:[])) `fmap` readDataFile datadir "reference.odt" - -- handle pictures + -- handle formulas and pictures picEntriesRef <- newIORef ([] :: [Entry]) - doc' <- walkM (transformPic opts picEntriesRef) doc + doc' <- walkM (transformPicMath opts picEntriesRef) $ walk fixDisplayMath doc let newContents = writeOpenDocument opts{writerWrapText = False} doc' epochtime <- floor `fmap` getPOSIXTime let contentEntry = toEntry "content.xml" epochtime @@ -73,7 +76,11 @@ writeODT opts doc@(Pandoc meta _) = do $ contentEntry : picEntries -- construct META-INF/manifest.xml based on archive let toFileEntry fp = case getMimeType fp of - Nothing -> empty + Nothing -> if "Formula-" `isPrefixOf` fp && "/" `isSuffixOf` fp + then selfClosingTag "manifest:file-entry" + [("manifest:media-type","application/vnd.oasis.opendocument.formula") + ,("manifest:full-path",fp)] + else empty Just m -> selfClosingTag "manifest:file-entry" [("manifest:media-type", m) ,("manifest:full-path", fp) @@ -81,6 +88,8 @@ writeODT opts doc@(Pandoc meta _) = do ] let files = [ ent | ent <- filesInArchive archive, not ("META-INF" `isPrefixOf` ent) ] + let formulas = [ takeDirectory ent ++ "/" | ent <- filesInArchive archive, + "Formula-" `isPrefixOf` ent, takeExtension ent == ".xml" ] let manifestEntry = toEntry "META-INF/manifest.xml" epochtime $ fromStringLazy $ render Nothing $ text "<?xml version=\"1.0\" encoding=\"utf-8\"?>" @@ -92,6 +101,7 @@ writeODT opts doc@(Pandoc meta _) = do [("manifest:media-type","application/vnd.oasis.opendocument.text") ,("manifest:full-path","/")] $$ vcat ( map toFileEntry $ files ) + $$ vcat ( map toFileEntry $ formulas ) ) ) let archive' = addEntryToArchive manifestEntry archive @@ -119,8 +129,8 @@ writeODT opts doc@(Pandoc meta _) = do $ addEntryToArchive metaEntry archive' return $ fromArchive archive'' -transformPic :: WriterOptions -> IORef [Entry] -> Inline -> IO Inline -transformPic opts entriesRef (Image lab (src,_)) = do +transformPicMath :: WriterOptions -> IORef [Entry] -> Inline -> IO Inline +transformPicMath opts entriesRef (Image lab (src,_)) = do res <- fetchItem (writerSourceURL opts) src case res of Left (_ :: E.SomeException) -> do @@ -137,5 +147,29 @@ transformPic opts entriesRef (Image lab (src,_)) = do let entry = toEntry newsrc epochtime $ toLazy img modifyIORef entriesRef (entry:) return $ Image lab (newsrc, tit') -transformPic _ _ x = return x +transformPicMath _ entriesRef (Math t math) = do + entries <- readIORef entriesRef + let dt = if t == InlineMath then DisplayInline else DisplayBlock + case texMathToMathML dt math of + Left _ -> return $ Math t math + Right r -> do + let conf = useShortEmptyTags (const False) defaultConfigPP + let mathml = ppcTopElement conf r + epochtime <- floor `fmap` getPOSIXTime + let dirname = "Formula-" ++ show (length entries) ++ "/" + let fname = dirname ++ "content.xml" + let entry = toEntry fname epochtime (fromStringLazy mathml) + modifyIORef entriesRef (entry:) + return $ RawInline (Format "opendocument") $ render Nothing $ + inTags False "draw:frame" [("text:anchor-type", + if t == DisplayMath + then "paragraph" + else "as-char") + ,("style:vertical-pos", "middle") + ,("style:vertical-rel", "text")] $ + selfClosingTag "draw:object" [("xlink:href", dirname) + , ("xlink:type", "simple") + , ("xlink:show", "embed") + , ("xlink:actuate", "onLoad")] +transformPicMath _ _ x = return x -- cgit v1.2.3 From b2db6979fedc5f66d45596c9564a8313af90b9fa Mon Sep 17 00:00:00 2001 From: John MacFarlane <fiddlosopher@gmail.com> Date: Thu, 2 Jan 2014 19:32:13 -0800 Subject: Use isHeaderBlock from Shared rather than defining it anew... --- src/Text/Pandoc/Writers/FB2.hs | 6 ++---- src/Text/Pandoc/Writers/Texinfo.hs | 6 +----- 2 files changed, 3 insertions(+), 9 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Writers/FB2.hs b/src/Text/Pandoc/Writers/FB2.hs index adbe948be..803617f95 100644 --- a/src/Text/Pandoc/Writers/FB2.hs +++ b/src/Text/Pandoc/Writers/FB2.hs @@ -44,7 +44,7 @@ import qualified Text.XML.Light.Cursor as XC import Text.Pandoc.Definition import Text.Pandoc.Options (WriterOptions(..), HTMLMathMethod(..), def) -import Text.Pandoc.Shared (orderedListMarkers) +import Text.Pandoc.Shared (orderedListMarkers, isHeaderBlock) import Text.Pandoc.Walk -- | Data to be written at the end of the document: @@ -157,9 +157,7 @@ renderSection level (ttl, body) = do else cMapM blockToXml body return $ el "section" (title ++ content) where - hasSubsections = any isHeader - isHeader (Header _ _ _) = True - isHeader _ = False + hasSubsections = any isHeaderBlock -- | Only <p> and <empty-line> are allowed within <title> in FB2. formatTitle :: [Inline] -> [Content] diff --git a/src/Text/Pandoc/Writers/Texinfo.hs b/src/Text/Pandoc/Writers/Texinfo.hs index d62e50880..bf3df8035 100644 --- a/src/Text/Pandoc/Writers/Texinfo.hs +++ b/src/Text/Pandoc/Writers/Texinfo.hs @@ -293,7 +293,7 @@ blockListToTexinfo (x:xs) = do case x of Header level _ _ -> do -- We need need to insert a menu for this node. - let (before, after) = break isHeader xs + let (before, after) = break isHeaderBlock xs before' <- blockListToTexinfo before let menu = if level < 4 then collectNodes (level + 1) after @@ -315,10 +315,6 @@ blockListToTexinfo (x:xs) = do xs' <- blockListToTexinfo xs return $ x' $$ xs' -isHeader :: Block -> Bool -isHeader (Header _ _ _) = True -isHeader _ = False - collectNodes :: Int -> [Block] -> [Block] collectNodes _ [] = [] collectNodes level (x:xs) = -- cgit v1.2.3 From 4e7aadb903548bb08c9132c93699071e52f61d76 Mon Sep 17 00:00:00 2001 From: John MacFarlane <fiddlosopher@gmail.com> Date: Thu, 2 Jan 2014 19:59:33 -0800 Subject: HTML writer: With --toc, headers no longer link to themselves. Closes #1081. --- src/Text/Pandoc/Writers/HTML.hs | 23 ++++++++--------------- 1 file changed, 8 insertions(+), 15 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs index 129776363..c8a7e802b 100644 --- a/src/Text/Pandoc/Writers/HTML.hs +++ b/src/Text/Pandoc/Writers/HTML.hs @@ -475,28 +475,21 @@ blockToHtml opts (BlockQuote blocks) = else do contents <- blockListToHtml opts blocks return $ H.blockquote $ nl opts >> contents >> nl opts -blockToHtml opts (Header level (ident,_,_) lst) = do +blockToHtml opts (Header level (_,_,_) lst) = do contents <- inlineListToHtml opts lst secnum <- liftM stSecNum get let contents' = if writerNumberSections opts && not (null secnum) then (H.span ! A.class_ "header-section-number" $ toHtml $ showSecNum secnum) >> strToHtml " " >> contents else contents - let revealSlash = ['/' | writerSlideVariant opts == RevealJsSlides] - let contents'' = if writerTableOfContents opts && not (null ident) - then H.a ! A.href (toValue $ - '#' : revealSlash ++ - writerIdentifierPrefix opts ++ - ident) $ contents' - else contents' return $ case level of - 1 -> H.h1 contents'' - 2 -> H.h2 contents'' - 3 -> H.h3 contents'' - 4 -> H.h4 contents'' - 5 -> H.h5 contents'' - 6 -> H.h6 contents'' - _ -> H.p contents'' + 1 -> H.h1 contents' + 2 -> H.h2 contents' + 3 -> H.h3 contents' + 4 -> H.h4 contents' + 5 -> H.h5 contents' + 6 -> H.h6 contents' + _ -> H.p contents' blockToHtml opts (BulletList lst) = do contents <- mapM (blockListToHtml opts) lst return $ unordList opts contents -- cgit v1.2.3 From 452a140d0cd808abbdfbb1f91dc35280b1f90794 Mon Sep 17 00:00:00 2001 From: John MacFarlane <fiddlosopher@gmail.com> Date: Thu, 2 Jan 2014 21:09:39 -0800 Subject: Pretty: Added nestle. API change, minor version bump to 1.12.3. --- pandoc.cabal | 2 +- src/Text/Pandoc/Pretty.hs | 11 ++++++++++- 2 files changed, 11 insertions(+), 2 deletions(-) (limited to 'src/Text') diff --git a/pandoc.cabal b/pandoc.cabal index 976b71785..c794822bc 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -1,5 +1,5 @@ Name: pandoc -Version: 1.12.2.1 +Version: 1.12.3 Cabal-Version: >= 1.10 Build-Type: Custom License: GPL diff --git a/src/Text/Pandoc/Pretty.hs b/src/Text/Pandoc/Pretty.hs index faf2a6797..033511832 100644 --- a/src/Text/Pandoc/Pretty.hs +++ b/src/Text/Pandoc/Pretty.hs @@ -60,6 +60,7 @@ module Text.Pandoc.Pretty ( , hsep , vcat , vsep + , nestle , chomp , inside , braces @@ -72,7 +73,7 @@ module Text.Pandoc.Pretty ( ) where -import Data.Sequence (Seq, fromList, (<|), singleton, mapWithIndex) +import Data.Sequence (Seq, fromList, (<|), singleton, mapWithIndex, viewl, ViewL(..)) import Data.Foldable (toList) import Data.List (intercalate) import Data.Monoid @@ -186,6 +187,14 @@ vcat = foldr ($$) empty vsep :: [Doc] -> Doc vsep = foldr ($+$) empty +-- | Removes leading blank lines from a 'Doc'. +nestle :: Doc -> Doc +nestle (Doc d) = Doc $ go d + where go x = case viewl x of + (BlankLine :< rest) -> go rest + (NewLine :< rest) -> go rest + _ -> x + -- | Chomps trailing blank space off of a 'Doc'. chomp :: Doc -> Doc chomp d = Doc (fromList dl') -- cgit v1.2.3 From e7e76dbdd8b07b1fd80bda1599d58b7fbc7cf4bd Mon Sep 17 00:00:00 2001 From: John MacFarlane <fiddlosopher@gmail.com> Date: Thu, 2 Jan 2014 21:10:14 -0800 Subject: RST writer: Ensure no blank line after def in definition list. Closes #992. --- src/Text/Pandoc/Writers/RST.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Writers/RST.hs b/src/Text/Pandoc/Writers/RST.hs index 1a62f7250..37bb66632 100644 --- a/src/Text/Pandoc/Writers/RST.hs +++ b/src/Text/Pandoc/Writers/RST.hs @@ -287,7 +287,7 @@ definitionListItemToRST (label, defs) = do label' <- inlineListToRST label contents <- liftM vcat $ mapM blockListToRST defs tabstop <- get >>= (return . writerTabStop . stOptions) - return $ label' $$ nest tabstop (contents <> cr) + return $ label' $$ nest tabstop (nestle contents <> cr) -- | Convert list of Pandoc block elements to RST. blockListToRST :: [Block] -- ^ List of block elements -- cgit v1.2.3 From 2dd6d892fa310135cece1d8e6d6bac7b538f4e76 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Mon, 6 Jan 2014 10:17:31 -0800 Subject: HTML writer: Omit footnotes from TOC entries. Otherwise we get doubled footnotes when headers have notes! --- src/Text/Pandoc/Writers/HTML.hs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs index c8a7e802b..805bb57f1 100644 --- a/src/Text/Pandoc/Writers/HTML.hs +++ b/src/Text/Pandoc/Writers/HTML.hs @@ -749,7 +749,9 @@ inlineToHtml opts inline = else [A.title $ toValue tit]) return $ foldl (!) H5.embed attributes -- note: null title included, as in Markdown.pl - (Note contents) -> do + (Note contents) + | writerIgnoreNotes opts -> return mempty + | otherwise -> do st <- get let notes = stNotes st let number = (length notes) + 1 -- cgit v1.2.3 From ba6a26b25871a3912556f1a17330d2363b3f3db5 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Mon, 6 Jan 2014 12:12:21 -0800 Subject: EPUB writer: Avoid duplicate notes when headings contain notes. This arose because the headings are copied into the metadata "title" field, and the note gets rendered twice. We strip the note now before putting the heading in "title". --- src/Text/Pandoc/Writers/EPUB.hs | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs index 4daa9609e..d2dd7da2e 100644 --- a/src/Text/Pandoc/Writers/EPUB.hs +++ b/src/Text/Pandoc/Writers/EPUB.hs @@ -397,6 +397,10 @@ writeEPUB opts doc@(Pandoc meta _) = do let chapters = evalState (toChapters blocks'') [] + let removeNote :: Inline -> Inline + removeNote (Note _) = Str "" + removeNote x = x + let chapToEntry :: Int -> Chapter -> Entry chapToEntry num (Chapter mbnum bs) = mkEntry (showChapter num) $ renderHtml @@ -404,7 +408,9 @@ writeEPUB opts doc@(Pandoc meta _) = do fromMaybe [] mbnum } $ case bs of (Header _ _ xs : _) -> - Pandoc (setMeta "title" (fromList xs) nullMeta) bs + -- remove notes or we get doubled footnotes + Pandoc (setMeta "title" (walk removeNote $ fromList xs) + nullMeta) bs _ -> Pandoc nullMeta bs -- cgit v1.2.3 From d97b1fd14cd2b0f96fcf3bcc10cfc2540fbbe624 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Mon, 6 Jan 2014 21:51:11 -0800 Subject: EPUB writer: Strip out footnotes from toc entries. --- src/Text/Pandoc/Writers/EPUB.hs | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs index d2dd7da2e..a48300939 100644 --- a/src/Text/Pandoc/Writers/EPUB.hs +++ b/src/Text/Pandoc/Writers/EPUB.hs @@ -124,7 +124,12 @@ opfName n = QName n Nothing (Just "opf") plainify :: [Inline] -> String plainify t = - trimr $ writePlain def{ writerStandalone = False } $ Pandoc nullMeta [Plain t] + trimr $ writePlain def{ writerStandalone = False } + $ Pandoc nullMeta [Plain $ walk removeNote t] + +removeNote :: Inline -> Inline +removeNote (Note _) = Str "" +removeNote x = x getEPUBMetadata :: WriterOptions -> Meta -> IO EPUBMetadata getEPUBMetadata opts meta = do @@ -397,10 +402,6 @@ writeEPUB opts doc@(Pandoc meta _) = do let chapters = evalState (toChapters blocks'') [] - let removeNote :: Inline -> Inline - removeNote (Note _) = Str "" - removeNote x = x - let chapToEntry :: Int -> Chapter -> Entry chapToEntry num (Chapter mbnum bs) = mkEntry (showChapter num) $ renderHtml -- cgit v1.2.3 From 002c8ce14ca15bfbc800e9628cf09babf3d96acd Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Tue, 7 Jan 2014 09:01:32 -0800 Subject: Fixed small regression in docx writer. --- src/Text/Pandoc/Writers/Docx.hs | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index 25739f7c8..2a834c2da 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -30,7 +30,7 @@ Conversion of 'Pandoc' documents to docx. -} module Text.Pandoc.Writers.Docx ( writeDocx ) where import Data.Maybe (fromMaybe) -import Data.List ( intercalate, isPrefixOf ) +import Data.List ( intercalate, isPrefixOf, isSuffixOf ) import qualified Data.ByteString as B import qualified Data.ByteString.Lazy as BL import qualified Data.ByteString.Lazy.Char8 as BL8 @@ -265,6 +265,7 @@ writeDocx opts doc@(Pandoc meta _) = do webSettingsEntry <- entryFromArchive "word/webSettings.xml" let miscRels = [ f | f <- filesInArchive refArchive , "word/_rels/" `isPrefixOf` f + , ".xml.rels" `isSuffixOf` f , f /= "word/_rels/document.xml.rels" , f /= "word/_rels/footnotes.xml.rels" ] miscRelEntries <- mapM entryFromArchive miscRels @@ -815,6 +816,8 @@ br = mknode "w:r" [] [mknode "w:br" [("w:type","textWrapping")] () ] parseXml :: Archive -> String -> IO Element parseXml refArchive relpath = - case (findEntryByPath relpath refArchive >>= parseXMLDoc . UTF8.toStringLazy . fromEntry) of - Just d -> return d + case findEntryByPath relpath refArchive of + Just e -> case parseXMLDoc $ UTF8.toStringLazy $ fromEntry e of + Just d -> return d + Nothing -> fail $ relpath ++ " corrupt in reference docx" Nothing -> fail $ relpath ++ " missing in reference docx" -- cgit v1.2.3 From 2c7bf41d26dfe293f32830d92d5bfd13f2cf2b89 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Tue, 7 Jan 2014 10:32:47 -0800 Subject: Added wmf and emf mime types. --- src/Text/Pandoc/MIME.hs | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src/Text') diff --git a/src/Text/Pandoc/MIME.hs b/src/Text/Pandoc/MIME.hs index f41aa98bb..44989ee94 100644 --- a/src/Text/Pandoc/MIME.hs +++ b/src/Text/Pandoc/MIME.hs @@ -147,6 +147,7 @@ mimeTypesList = -- List borrowed from happstack-server. ,("dxr","application/x-director") ,("emb","chemical/x-embl-dl-nucleotide") ,("embl","chemical/x-embl-dl-nucleotide") + ,("emf","image/x-emf") ,("eml","message/rfc822") ,("ent","chemical/x-ncbi-asn1-ascii") ,("eot","application/vnd.ms-fontobject") @@ -465,6 +466,7 @@ mimeTypesList = -- List borrowed from happstack-server. ,("wm","video/x-ms-wm") ,("wma","audio/x-ms-wma") ,("wmd","application/x-ms-wmd") + ,("wmf","image/x-wmf") ,("wml","text/vnd.wap.wml") ,("wmlc","application/vnd.wap.wmlc") ,("wmls","text/vnd.wap.wmlscript") -- cgit v1.2.3 From d9eff99f27bb6426534651c9ff85b2b4354e6d16 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Tue, 7 Jan 2014 23:39:49 -0800 Subject: Markdown reader: Allow hard line breaks in table cells. The \-newline form must be used; the two-space+newline form won't work, since in a table cell nearly every line ends with two spaces. --- src/Text/Pandoc/Readers/Markdown.hs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 3a5d29b4e..88d0bf439 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -1126,12 +1126,12 @@ multilineTableHeader headless = try $ do then liftM (map (:[]) . tail . splitStringByIndices (init indices)) $ lookAhead anyLine else return $ transpose $ map - (\ln -> tail $ splitStringByIndices (init indices) ln) + (tail . splitStringByIndices (init indices)) rawContent let aligns = zipWith alignType rawHeadsList lengths let rawHeads = if headless then replicate (length dashes) "" - else map unwords rawHeadsList + else map (unlines . map trim) rawHeadsList heads <- fmap sequence $ mapM (parseFromString (mconcat <$> many plain)) $ map trim rawHeads @@ -1188,7 +1188,7 @@ gridTableHeader headless = try $ do -- RST does not have a notion of alignments let rawHeads = if headless then replicate (length dashes) "" - else map unwords $ transpose + else map (unlines . map trim) $ transpose $ map (gridTableSplitLine indices) rawContent heads <- fmap sequence $ mapM (parseFromString parseBlocks . trim) rawHeads return (heads, aligns, indices) -- cgit v1.2.3 From aada7b495bf4af9912603b3b7649dd0d63f9b5fc Mon Sep 17 00:00:00 2001 From: John MacFarlane <fiddlosopher@gmail.com> Date: Wed, 8 Jan 2014 12:04:08 -0800 Subject: fetchItem: Handle image URLs beginning with '//'. --- src/Text/Pandoc/Shared.hs | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index 3446f4343..714402e42 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -91,7 +91,8 @@ import Data.Char ( toLower, isLower, isUpper, isAlpha, isLetter, isDigit, isSpace ) import Data.List ( find, isPrefixOf, intercalate ) import qualified Data.Map as M -import Network.URI ( escapeURIString, isURI, unEscapeString ) +import Network.URI ( escapeURIString, isURI, nonStrictRelativeTo, + unEscapeString, parseURIReference ) import System.Directory import Text.Pandoc.MIME (getMimeType) import System.FilePath ( (</>), takeExtension, dropExtension ) @@ -623,9 +624,13 @@ fetchItem :: Maybe String -> String -> IO (Either E.SomeException (BS.ByteString, Maybe String)) fetchItem sourceURL s | isURI s = openURL s - | otherwise = case sourceURL of - Just u -> openURL (u ++ "/" ++ s) - Nothing -> E.try readLocalFile + | otherwise = + case sourceURL >>= parseURIReference of + Just u -> case parseURIReference s of + Just s' -> openURL $ show $ + s' `nonStrictRelativeTo` u + Nothing -> openURL $ show u ++ "/" ++ s + Nothing -> E.try readLocalFile where readLocalFile = do let mime = case takeExtension s of ".gz" -> getMimeType $ dropExtension s -- cgit v1.2.3 From 3bf8012bf6e6965a68de76d5bb46782086393da7 Mon Sep 17 00:00:00 2001 From: John MacFarlane <fiddlosopher@gmail.com> Date: Wed, 8 Jan 2014 19:33:14 -0800 Subject: Text.Pandoc.ImageSize: Parse EXIF format JPGs. Note: For now we just assign them all 72 dpi. It wasn't clear to me how to extract the resolution information. At least the aspect ratio will be right, and 72 dpi is the most common setting. Closes #976. --- src/Text/Pandoc/ImageSize.hs | 34 ++++++++++++++++++++++++++++------ 1 file changed, 28 insertions(+), 6 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/ImageSize.hs b/src/Text/Pandoc/ImageSize.hs index 9b0850efb..e2a8b8283 100644 --- a/src/Text/Pandoc/ImageSize.hs +++ b/src/Text/Pandoc/ImageSize.hs @@ -53,7 +53,8 @@ imageType :: ByteString -> Maybe ImageType imageType img = case B.take 4 img of "\x89\x50\x4e\x47" -> return Png "\x47\x49\x46\x38" -> return Gif - "\xff\xd8\xff\xe0" -> return Jpeg + "\xff\xd8\xff\xe0" -> return Jpeg -- JFIF + "\xff\xd8\xff\xe1" -> return Jpeg -- Exif "%PDF" -> return Pdf "%!PS" | (B.take 4 $ B.drop 1 $ B.dropWhile (/=' ') img) == "EPSF" @@ -139,8 +140,14 @@ gifSize img = do jpegSize :: ByteString -> Maybe ImageSize jpegSize img = do let (hdr, rest) = B.splitAt 4 img - guard $ hdr == "\xff\xd8\xff\xe0" guard $ B.length rest >= 14 + case hdr of + "\xff\xd8\xff\xe0" -> jfifSize rest + "\xff\xd8\xff\xe1" -> exifSize rest + _ -> mzero + +jfifSize :: ByteString -> Maybe ImageSize +jfifSize rest = do let [dpiDensity,dpix1,dpix2,dpiy1,dpiy2] = map fromIntegral $ unpack $ B.take 5 $ B.drop 9 $ rest let factor = case dpiDensity of @@ -149,11 +156,11 @@ jpegSize img = do _ -> const 72 let dpix = factor (shift dpix1 8 + dpix2) let dpiy = factor (shift dpiy1 8 + dpiy2) - (w,h) <- findJpegSize rest + (w,h) <- findJfifSize rest return $ ImageSize { pxX = w, pxY = h, dpiX = dpix, dpiY = dpiy } -findJpegSize :: ByteString -> Maybe (Integer,Integer) -findJpegSize bs = do +findJfifSize :: ByteString -> Maybe (Integer,Integer) +findJfifSize bs = do let bs' = B.dropWhile (=='\xff') $ B.dropWhile (/='\xff') bs case B.uncons bs' of Just (c,bs'') | c >= '\xc0' && c <= '\xc3' -> do @@ -165,8 +172,23 @@ findJpegSize bs = do [c1,c2] -> do let len = shift c1 8 + c2 -- skip variables - findJpegSize $ B.drop len bs'' + findJfifSize $ B.drop len bs'' _ -> fail "JPEG parse error" Nothing -> fail "Did not find length record" +exifSize :: ByteString -> Maybe ImageSize +exifSize rest = do + let bs' = B.takeWhile (/='\xff') $ B.drop 8 rest -- exif data + let (_,bs'') = B.breakSubstring "\xa0\x02" bs' -- width + let rawWidth = B.take 2 $ B.drop 10 bs'' + let (_,bs''') = B.breakSubstring "\xa0\x03" bs' -- height + let rawHeight = B.take 2 $ B.drop 10 bs''' + let tonum bs = case map fromIntegral $ unpack bs of + [x,y] -> Just $ shift x 8 + y + _ -> Nothing + case (tonum rawWidth, tonum rawHeight) of + (Just w, Just h) -> + return $ ImageSize { pxX = w, pxY = h, dpiX = 72, dpiY = 72 } + _ -> fail "Could not determine exif image size" + -- some day, figure out how to parse dpi from exif -- cgit v1.2.3 From 5c8c380a7997156964a5402974f6f464233aab9b Mon Sep 17 00:00:00 2001 From: John MacFarlane <fiddlosopher@gmail.com> Date: Thu, 9 Jan 2014 11:16:17 -0800 Subject: Better exif parsing, including image resolution. This introduces a dependency on binary >= 0.6, but we depend on binary >= 0.5 via zip-archive anyway. Closes #976. --- pandoc.cabal | 3 +- src/Text/Pandoc/ImageSize.hs | 225 ++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 212 insertions(+), 16 deletions(-) (limited to 'src/Text') diff --git a/pandoc.cabal b/pandoc.cabal index a1a4c9b40..94d382c4f 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -232,7 +232,8 @@ Library attoparsec >= 0.10 && < 0.11, yaml >= 0.8.3 && < 0.9, vector >= 0.10 && < 0.11, - hslua >= 0.3 && < 0.4 + hslua >= 0.3 && < 0.4, + binary >= 0.6 && < 0.8 Build-Tools: alex, happy if flag(http-conduit) Build-Depends: http-conduit >= 1.9 && < 2.1, diff --git a/src/Text/Pandoc/ImageSize.hs b/src/Text/Pandoc/ImageSize.hs index e2a8b8283..467205220 100644 --- a/src/Text/Pandoc/ImageSize.hs +++ b/src/Text/Pandoc/ImageSize.hs @@ -32,9 +32,14 @@ module Text.Pandoc.ImageSize ( ImageType(..), imageType, imageSize, sizeInPixels, sizeInPoints ) where import Data.ByteString (ByteString, unpack) import qualified Data.ByteString.Char8 as B +import qualified Data.ByteString.Lazy as BL +import Control.Applicative import Control.Monad import Data.Bits +import Data.Binary +import Data.Binary.Get import Text.Pandoc.Shared (safeRead) +import qualified Data.Map as M -- quick and dirty functions to get image sizes -- algorithms borrowed from wwwis.pl @@ -143,7 +148,7 @@ jpegSize img = do guard $ B.length rest >= 14 case hdr of "\xff\xd8\xff\xe0" -> jfifSize rest - "\xff\xd8\xff\xe1" -> exifSize rest + "\xff\xd8\xff\xe1" -> exifSize $ B.takeWhile (/= '\xff') rest _ -> mzero jfifSize :: ByteString -> Maybe ImageSize @@ -177,18 +182,208 @@ findJfifSize bs = do Nothing -> fail "Did not find length record" exifSize :: ByteString -> Maybe ImageSize -exifSize rest = do - let bs' = B.takeWhile (/='\xff') $ B.drop 8 rest -- exif data - let (_,bs'') = B.breakSubstring "\xa0\x02" bs' -- width - let rawWidth = B.take 2 $ B.drop 10 bs'' - let (_,bs''') = B.breakSubstring "\xa0\x03" bs' -- height - let rawHeight = B.take 2 $ B.drop 10 bs''' - let tonum bs = case map fromIntegral $ unpack bs of - [x,y] -> Just $ shift x 8 + y - _ -> Nothing - case (tonum rawWidth, tonum rawHeight) of - (Just w, Just h) -> - return $ ImageSize { pxX = w, pxY = h, dpiX = 72, dpiY = 72 } - _ -> fail "Could not determine exif image size" - -- some day, figure out how to parse dpi from exif +exifSize = -- runGet ((Just <$> exifHeader) `mplus` return Nothing) . + runGet (Just <$> exifHeader) . + BL.fromChunks . (:[]) +exifHeader :: Get ImageSize +exifHeader = do + _app1DataSize <- getWord16be + exifHdr <- getWord32be + unless (exifHdr == 0x45786966) $ fail "Did not find exif header" + zeros <- getWord16be + unless (zeros == 0) $ fail "Expected zeros after exif header" + -- beginning of tiff header -- we read whole thing to use + -- in getting data from offsets: + tiffHeader <- lookAhead getRemainingLazyByteString + byteAlign <- getWord16be + let bigEndian = byteAlign == 0x4d4d + let (getWord16, getWord32, getWord64) = + if bigEndian + then (getWord16be, getWord32be, getWord64be) + else (getWord16le, getWord32le, getWord64le) + let getRational = do + num <- getWord32 + den <- getWord32 + return $ fromIntegral num / fromIntegral den + tagmark <- getWord16 + unless (tagmark == 0x002a) $ fail "Failed alignment sanity check" + ifdOffset <- getWord32 + skip (fromIntegral ifdOffset - 8) -- skip to IDF + numentries <- getWord16 + let ifdEntry = do + tag <- getWord16 >>= \t -> + maybe (fail $ "Unknown tag type " ++ show t) return + (M.lookup t tagTypeTable) + dataFormat <- getWord16 + numComponents <- getWord32 + (fmt, bytesPerComponent) <- + case dataFormat of + 1 -> return (UnsignedByte . runGet getWord8, 1) + 2 -> return (AsciiString, 1) + 3 -> return (UnsignedShort . runGet getWord16, 2) + 4 -> return (UnsignedLong . runGet getWord32, 4) + 5 -> return (UnsignedRational . runGet getRational, 8) + 6 -> return (SignedByte . runGet getWord8, 1) + 7 -> return (Undefined . runGet getWord8, 1) + 8 -> return (SignedShort . runGet getWord16, 2) + 9 -> return (SignedLong . runGet getWord32, 4) + 10 -> return (SignedRational . runGet getRational, 8) + 11 -> return (SingleFloat . runGet getWord32 {- TODO -}, 4) + 12 -> return (DoubleFloat . runGet getWord64 {- TODO -}, 8) + _ -> fail $ "Unknown data format " ++ show dataFormat + let totalBytes = fromIntegral $ numComponents * bytesPerComponent + payload <- if totalBytes <= 4 -- data is right here + then (fmt . BL.fromChunks . (:[])) <$> + (getByteString totalBytes <* + skip (4 - totalBytes)) + else do -- get data from offset + offs <- getWord32 + return $ fmt $ BL.take (fromIntegral totalBytes) $ + BL.drop (fromIntegral offs) tiffHeader + return (tag, payload) + entries <- sequence $ replicate (fromIntegral numentries) ifdEntry + subentries <- case lookup ExifOffset entries of + Just (UnsignedLong offset) -> do + pos <- bytesRead + skip (fromIntegral offset - (fromIntegral pos - 8)) + numsubentries <- getWord16 + sequence $ + replicate (fromIntegral numsubentries) ifdEntry + _ -> return [] + let allentries = entries ++ subentries + (width, height) <- case (lookup ExifImageWidth allentries, + lookup ExifImageHeight allentries) of + (Just (UnsignedLong w), Just (UnsignedLong h)) -> + return (fromIntegral w, fromIntegral h) + _ -> fail "Could not determine image width, height" + let resfactor = case lookup ResolutionUnit allentries of + Just (UnsignedShort 1) -> (100 / 254) + _ -> 1 + let xres = maybe 72 (\(UnsignedRational x) -> floor $ x * resfactor) + $ lookup XResolution allentries + let yres = maybe 72 (\(UnsignedRational x) -> floor $ x * resfactor) + $ lookup YResolution allentries + return $ ImageSize{ + pxX = width + , pxY = height + , dpiX = xres + , dpiY = yres } + +data DataFormat = UnsignedByte Word8 + | AsciiString BL.ByteString + | UnsignedShort Word16 + | UnsignedLong Word32 + | UnsignedRational Rational + | SignedByte Word8 + | Undefined Word8 + | SignedShort Word16 + | SignedLong Word32 + | SignedRational Rational + | SingleFloat Word32 + | DoubleFloat Word64 + deriving (Show) + +data TagType = ImageDescription + | Make + | Model + | Orientation + | XResolution + | YResolution + | ResolutionUnit + | Software + | DateTime + | WhitePoint + | PrimaryChromaticities + | YCbCrCoefficients + | YCbCrPositioning + | ReferenceBlackWhite + | Copyright + | ExifOffset + | ExposureTime + | FNumber + | ExposureProgram + | ISOSpeedRatings + | ExifVersion + | DateTimeOriginal + | DateTimeDigitized + | ComponentConfiguration + | CompressedBitsPerPixel + | ShutterSpeedValue + | ApertureValue + | BrightnessValue + | ExposureBiasValue + | MaxApertureValue + | SubjectDistance + | MeteringMode + | LightSource + | Flash + | FocalLength + | MakerNote + | UserComment + | FlashPixVersion + | ColorSpace + | ExifImageWidth + | ExifImageHeight + | RelatedSoundFile + | ExifInteroperabilityOffset + | FocalPlaneXResolution + | FocalPlaneYResolution + | FocalPlaneResolutionUnit + | SensingMethod + | FileSource + | SceneType + deriving (Show, Eq, Ord) + +tagTypeTable :: M.Map Word16 TagType +tagTypeTable = M.fromList + [ (0x010e, ImageDescription) + , (0x010f, Make) + , (0x0110, Model) + , (0x0112, Orientation) + , (0x011a, XResolution) + , (0x011b, YResolution) + , (0x0128, ResolutionUnit) + , (0x0131, Software) + , (0x0132, DateTime) + , (0x013e, WhitePoint) + , (0x013f, PrimaryChromaticities) + , (0x0211, YCbCrCoefficients) + , (0x0213, YCbCrPositioning) + , (0x0214, ReferenceBlackWhite) + , (0x8298, Copyright) + , (0x8769, ExifOffset) + , (0x829a, ExposureTime) + , (0x829d, FNumber) + , (0x8822, ExposureProgram) + , (0x8827, ISOSpeedRatings) + , (0x9000, ExifVersion) + , (0x9003, DateTimeOriginal) + , (0x9004, DateTimeDigitized) + , (0x9101, ComponentConfiguration) + , (0x9102, CompressedBitsPerPixel) + , (0x9201, ShutterSpeedValue) + , (0x9202, ApertureValue) + , (0x9203, BrightnessValue) + , (0x9204, ExposureBiasValue) + , (0x9205, MaxApertureValue) + , (0x9206, SubjectDistance) + , (0x9207, MeteringMode) + , (0x9208, LightSource) + , (0x9209, Flash) + , (0x920a, FocalLength) + , (0x927c, MakerNote) + , (0x9286, UserComment) + , (0xa000, FlashPixVersion) + , (0xa001, ColorSpace) + , (0xa002, ExifImageWidth) + , (0xa003, ExifImageHeight) + , (0xa004, RelatedSoundFile) + , (0xa005, ExifInteroperabilityOffset) + , (0xa20e, FocalPlaneXResolution) + , (0xa20f, FocalPlaneYResolution) + , (0xa210, FocalPlaneResolutionUnit) + , (0xa217, SensingMethod) + , (0xa300, FileSource) + , (0xa301, SceneType) + ] -- cgit v1.2.3 From b9b1546ed238ca8fd4d65a6e02fa7ecbf9a4be65 Mon Sep 17 00:00:00 2001 From: John MacFarlane <fiddlosopher@gmail.com> Date: Thu, 9 Jan 2014 11:25:24 -0800 Subject: Markdown parser: be more permissive about citation keys. Keys may now start with an underscore as well as a letter. Underscores do not count as internal punctuation, but are treated like alphanumerics, so "key:_2008" will work, as it did not before. (This change was necessary to use keys generated by zotero.) Closes #1111, closes #1011. --- src/Text/Pandoc/Readers/Markdown.hs | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 88d0bf439..3feafd362 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -1811,9 +1811,10 @@ citeKey = try $ do guard $ lastStrPos /= Just pos suppress_author <- option False (char '-' >> return True) char '@' - first <- letter - let internal p = try $ p >>~ lookAhead (letter <|> digit) - rest <- many $ letter <|> digit <|> internal (oneOf ":.#$%&-_+?<>~/") + first <- letter <|> char '_' + let regchar = satisfy (\c -> isAlphaNum c || c == '_') + let internal p = try $ p >>~ lookAhead regchar + rest <- many $ regchar <|> internal (oneOf ":.#$%&-+?<>~/") let key = first:rest return (suppress_author, key) -- cgit v1.2.3 From b4b16d57865b1d951be79f04f8ad41fb70544077 Mon Sep 17 00:00:00 2001 From: John MacFarlane <fiddlosopher@gmail.com> Date: Thu, 9 Jan 2014 22:50:51 -0800 Subject: Minor improvement to exif parser. --- src/Text/Pandoc/ImageSize.hs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/ImageSize.hs b/src/Text/Pandoc/ImageSize.hs index 467205220..d1aacff1c 100644 --- a/src/Text/Pandoc/ImageSize.hs +++ b/src/Text/Pandoc/ImageSize.hs @@ -234,8 +234,8 @@ exifHeader = do _ -> fail $ "Unknown data format " ++ show dataFormat let totalBytes = fromIntegral $ numComponents * bytesPerComponent payload <- if totalBytes <= 4 -- data is right here - then (fmt . BL.fromChunks . (:[])) <$> - (getByteString totalBytes <* + then fmt <$> + (getLazyByteString (fromIntegral totalBytes) <* skip (4 - totalBytes)) else do -- get data from offset offs <- getWord32 -- cgit v1.2.3 From a1abb3eeea2321654a8450725ff6c0d1a18ee0c7 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Tue, 14 Jan 2014 10:12:33 -0800 Subject: Allow binary 0.5. Version bump to 1.12.3.1. --- changelog | 4 ++++ pandoc.cabal | 4 ++-- src/Text/Pandoc/ImageSize.hs | 9 ++++++--- 3 files changed, 12 insertions(+), 5 deletions(-) (limited to 'src/Text') diff --git a/changelog b/changelog index 4cbc60388..fde6873ec 100644 --- a/changelog +++ b/changelog @@ -1,3 +1,7 @@ +pandoc (1.12.3.1) + + * Relaxed version constraint on binary, allowing the use of binary 0.5. + pandoc (1.12.3) * The `--bibliography` option now sets the `biblio-files` variable. diff --git a/pandoc.cabal b/pandoc.cabal index c8ff9738a..0199996be 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -1,5 +1,5 @@ Name: pandoc -Version: 1.12.3 +Version: 1.12.3.1 Cabal-Version: >= 1.10 Build-Type: Custom License: GPL @@ -233,7 +233,7 @@ Library yaml >= 0.8.3 && < 0.9, vector >= 0.10 && < 0.11, hslua >= 0.3 && < 0.4, - binary >= 0.6 && < 0.8 + binary >= 0.5 && < 0.8 Build-Tools: alex, happy if flag(http-conduit) Build-Depends: http-conduit >= 1.9 && < 2.1, diff --git a/src/Text/Pandoc/ImageSize.hs b/src/Text/Pandoc/ImageSize.hs index d1aacff1c..14575244d 100644 --- a/src/Text/Pandoc/ImageSize.hs +++ b/src/Text/Pandoc/ImageSize.hs @@ -182,9 +182,12 @@ findJfifSize bs = do Nothing -> fail "Did not find length record" exifSize :: ByteString -> Maybe ImageSize -exifSize = -- runGet ((Just <$> exifHeader) `mplus` return Nothing) . - runGet (Just <$> exifHeader) . - BL.fromChunks . (:[]) +exifSize = runGet (Just <$> exifHeader) . BL.fromChunks . (:[]) +-- NOTE: It would be nicer to do +-- runGet ((Just <$> exifHeader) <|> return Nothing) +-- which would prevent pandoc from raising an error when an exif header can't +-- be parsed. But we only get an Alternative instance for Get in binary 0.6, +-- and binary 0.5 ships with ghc 7.6. exifHeader :: Get ImageSize exifHeader = do -- cgit v1.2.3 From 6c59f060a769c7f312137d5c1a49cf878ea5b2a2 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Mon, 20 Jan 2014 11:09:44 -0800 Subject: HTML reader: Fixed bug reading inline math with `$$`. See #225. --- src/Text/Pandoc/Readers/HTML.hs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index 506fe7770..d1e4d0024 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -476,8 +476,8 @@ pBlank = try $ do pTagContents :: Parser [Char] ParserState Inline pTagContents = - Math InlineMath `fmap` mathInline - <|> Math DisplayMath `fmap` mathDisplay + Math DisplayMath `fmap` mathDisplay + <|> Math InlineMath `fmap` mathInline <|> pStr <|> pSpace <|> smartPunctuation pTagContents -- cgit v1.2.3 From 9f3b2f6f5d06a4cf3142ffc74c8de4c1cc2bd928 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Wed, 22 Jan 2014 22:07:13 -0800 Subject: Fixed mediawiki ordered list parsing. Closes #1122. --- src/Text/Pandoc/Readers/MediaWiki.hs | 25 +++++++++++++++---------- tests/mediawiki-reader.wiki | 4 ++-- 2 files changed, 17 insertions(+), 12 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/MediaWiki.hs b/src/Text/Pandoc/Readers/MediaWiki.hs index 8d8ea0199..794890eb6 100644 --- a/src/Text/Pandoc/Readers/MediaWiki.hs +++ b/src/Text/Pandoc/Readers/MediaWiki.hs @@ -149,9 +149,16 @@ inlinesInTags tag = try $ do blocksInTags :: String -> MWParser Blocks blocksInTags tag = try $ do (_,raw) <- htmlTag (~== TagOpen tag []) + let closer = if tag == "li" + then htmlTag (~== TagClose "li") + <|> lookAhead ( + htmlTag (~== TagOpen "li" []) + <|> htmlTag (~== TagClose "ol") + <|> htmlTag (~== TagClose "ul")) + else htmlTag (~== TagClose tag) if '/' `elem` raw -- self-closing tag then return mempty - else mconcat <$> manyTill block (htmlTag (~== TagClose tag)) + else mconcat <$> manyTill block closer charsInTags :: String -> MWParser [Char] charsInTags tag = try $ do @@ -381,15 +388,13 @@ bulletList = B.bulletList <$> orderedList :: MWParser Blocks orderedList = (B.orderedList <$> many1 (listItem '#')) - <|> (B.orderedList <$> (htmlTag (~== TagOpen "ul" []) *> spaces *> - many (listItem '#' <|> li) <* - optional (htmlTag (~== TagClose "ul")))) - <|> do (tag,_) <- htmlTag (~== TagOpen "ol" []) - spaces - items <- many (listItem '#' <|> li) - optional (htmlTag (~== TagClose "ol")) - let start = fromMaybe 1 $ safeRead $ fromAttrib "start" tag - return $ B.orderedListWith (start, DefaultStyle, DefaultDelim) items + <|> try + (do (tag,_) <- htmlTag (~== TagOpen "ol" []) + spaces + items <- many (listItem '#' <|> li) + optional (htmlTag (~== TagClose "ol")) + let start = fromMaybe 1 $ safeRead $ fromAttrib "start" tag + return $ B.orderedListWith (start, DefaultStyle, DefaultDelim) items) definitionList :: MWParser Blocks definitionList = B.definitionList <$> many1 defListItem diff --git a/tests/mediawiki-reader.wiki b/tests/mediawiki-reader.wiki index 26f4ef164..c0c22bec6 100644 --- a/tests/mediawiki-reader.wiki +++ b/tests/mediawiki-reader.wiki @@ -232,11 +232,11 @@ ends the list. <li>list item A2</li> </ol> -<ul> +<ol> #abc #def #ghi -</ul> +</ol> <ol start="9"> <li>Amsterdam</li> -- cgit v1.2.3 From a333d9788e0f510f681ae1b5f0f246434ee15d62 Mon Sep 17 00:00:00 2001 From: John MacFarlane <fiddlosopher@gmail.com> Date: Fri, 24 Jan 2014 16:00:02 -0800 Subject: ImageSize: Avoid use of lookAhead, which is not in binary >= 0.6. Closes #1124. --- src/Text/Pandoc/ImageSize.hs | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/ImageSize.hs b/src/Text/Pandoc/ImageSize.hs index 14575244d..3c9623b3c 100644 --- a/src/Text/Pandoc/ImageSize.hs +++ b/src/Text/Pandoc/ImageSize.hs @@ -182,15 +182,16 @@ findJfifSize bs = do Nothing -> fail "Did not find length record" exifSize :: ByteString -> Maybe ImageSize -exifSize = runGet (Just <$> exifHeader) . BL.fromChunks . (:[]) +exifSize bs = runGet (Just <$> exifHeader bl) bl + where bl = BL.fromChunks [bs] -- NOTE: It would be nicer to do -- runGet ((Just <$> exifHeader) <|> return Nothing) -- which would prevent pandoc from raising an error when an exif header can't -- be parsed. But we only get an Alternative instance for Get in binary 0.6, -- and binary 0.5 ships with ghc 7.6. -exifHeader :: Get ImageSize -exifHeader = do +exifHeader :: BL.ByteString -> Get ImageSize +exifHeader hdr = do _app1DataSize <- getWord16be exifHdr <- getWord32be unless (exifHdr == 0x45786966) $ fail "Did not find exif header" @@ -198,7 +199,7 @@ exifHeader = do unless (zeros == 0) $ fail "Expected zeros after exif header" -- beginning of tiff header -- we read whole thing to use -- in getting data from offsets: - tiffHeader <- lookAhead getRemainingLazyByteString + let tiffHeader = BL.drop 8 hdr byteAlign <- getWord16be let bigEndian = byteAlign == 0x4d4d let (getWord16, getWord32, getWord64) = -- cgit v1.2.3 From 3127ab2b5ede10224f9f86722125573d34d7d3dd Mon Sep 17 00:00:00 2001 From: John MacFarlane <fiddlosopher@gmail.com> Date: Tue, 4 Feb 2014 10:05:52 -0800 Subject: Slight code reorganization in endline. --- src/Text/Pandoc/Readers/Markdown.hs | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 3feafd362..8a41cef49 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -1540,16 +1540,14 @@ endline :: MarkdownParser (F Inlines) endline = try $ do newline notFollowedBy blankline + -- parse potential list-starts differently if in a list: + st <- getState + when (stateParserContext st == ListItemState) $ notFollowedBy listStart guardDisabled Ext_lists_without_preceding_blankline <|> notFollowedBy listStart guardEnabled Ext_blank_before_blockquote <|> notFollowedBy emailBlockQuoteStart guardEnabled Ext_blank_before_header <|> notFollowedBy (char '#') -- atx header guardEnabled Ext_backtick_code_blocks >> notFollowedBy (() <$ (lookAhead (char '`') >> codeBlockFenced)) - -- parse potential list-starts differently if in a list: - st <- getState - when (stateParserContext st == ListItemState) $ do - notFollowedBy' bulletListStart - notFollowedBy' anyOrderedListStart (guardEnabled Ext_hard_line_breaks >> return (return B.linebreak)) <|> (guardEnabled Ext_ignore_line_breaks >> return mempty) <|> (return $ return B.space) -- cgit v1.2.3 From 286781f8014cd40ad741e52b254904ffa7dc2855 Mon Sep 17 00:00:00 2001 From: Merijn Verstraaten <merijn@inconsistent.nl> Date: Thu, 6 Feb 2014 23:10:59 +0100 Subject: Removed RenderState datatype context. Reasoning: - It's not Haskell2010 - It breaks some tools - Doesn't actually do anything - RenderState doesn't even have a Monoid instance --- src/Text/Pandoc/Pretty.hs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Pretty.hs b/src/Text/Pandoc/Pretty.hs index 033511832..5331587ce 100644 --- a/src/Text/Pandoc/Pretty.hs +++ b/src/Text/Pandoc/Pretty.hs @@ -81,8 +81,7 @@ import Data.String import Control.Monad.State import Data.Char (isSpace) -data Monoid a => - RenderState a = RenderState{ +data RenderState a = RenderState{ output :: [a] -- ^ In reverse order , prefix :: String , usePrefix :: Bool -- cgit v1.2.3 From 3f0fe345f9aa69d1faf36e6a6f913013f21b3749 Mon Sep 17 00:00:00 2001 From: Vaclav Zeman <vhaisman@gmail.com> Date: Sat, 8 Feb 2014 13:40:04 +0100 Subject: Use \/ to avoid en-dash ligature instead of -{}-. This is to fix LuaLaTeX output. The -{}- sequence does not avoid the ligature with LuaLaTeX but \/ does. --- src/Text/Pandoc/Writers/LaTeX.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs index 72b0bde6d..dbb9b477a 100644 --- a/src/Text/Pandoc/Writers/LaTeX.hs +++ b/src/Text/Pandoc/Writers/LaTeX.hs @@ -201,7 +201,7 @@ stringToLaTeX ctx (x:xs) = do '_' | not isUrl -> "\\_" ++ rest '#' -> "\\#" ++ rest '-' -> case xs of -- prevent adjacent hyphens from forming ligatures - ('-':_) -> "-{}" ++ rest + ('-':_) -> "-\\/" ++ rest _ -> '-' : rest '~' | not isUrl -> "\\textasciitilde{}" ++ rest '^' -> "\\^{}" ++ rest -- cgit v1.2.3 From fe246ce01c4c523b7391d58d910af09bf3bac6e6 Mon Sep 17 00:00:00 2001 From: Merijn Verstraaten <merijn@inconsistent.nl> Date: Sat, 15 Feb 2014 17:51:33 +0100 Subject: Enhanced Pandoc's support for rST roles. rST parser now supports: - All built-in rST roles - New role definition - Role inheritance Issues/TODO: - Silently ignores illegal fields on roles - Silently drops class annotations for roles - Only supports :format: fields with a single format for :raw: roles, requires a change to Text.Pandoc.Definition.Format to support multiple formats. - Allows direct use of :raw: role, rST only allows indirect (i.e., inherited use of :raw:). --- src/Text/Pandoc/Parsing.hs | 2 + src/Text/Pandoc/Readers/RST.hs | 91 +++++++++++++++++++++++++++++++++++++----- tests/rst-reader.native | 10 +++++ tests/rst-reader.rst | 24 +++++++++++ 4 files changed, 117 insertions(+), 10 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs index 2f21e1253..0713f4a96 100644 --- a/src/Text/Pandoc/Parsing.hs +++ b/src/Text/Pandoc/Parsing.hs @@ -853,6 +853,7 @@ data ParserState = ParserState stateHasChapters :: Bool, -- ^ True if \chapter encountered stateMacros :: [Macro], -- ^ List of macros defined so far stateRstDefaultRole :: String, -- ^ Current rST default interpreted text role + stateRstCustomRoles :: M.Map String (String, Maybe String, Attr -> (String, Attr)), -- ^ Current rST custom text roles stateWarnings :: [String] -- ^ Warnings generated by the parser } @@ -915,6 +916,7 @@ defaultParserState = stateHasChapters = False, stateMacros = [], stateRstDefaultRole = "title-reference", + stateRstCustomRoles = M.empty, stateWarnings = []} getOption :: (ReaderOptions -> a) -> Parser s ParserState a diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs index c12a1493a..a46a3a6c6 100644 --- a/src/Text/Pandoc/Readers/RST.hs +++ b/src/Text/Pandoc/Readers/RST.hs @@ -36,12 +36,13 @@ import Text.Pandoc.Builder (setMeta, fromList) import Text.Pandoc.Shared import Text.Pandoc.Parsing import Text.Pandoc.Options -import Control.Monad ( when, liftM, guard, mzero ) +import Control.Monad ( when, liftM, guard, mzero, mplus ) import Data.List ( findIndex, intersperse, intercalate, transpose, sort, deleteFirstsBy, isSuffixOf ) +import Data.Maybe (fromMaybe) import qualified Data.Map as M import Text.Printf ( printf ) -import Control.Applicative ((<$>), (<$), (<*), (*>)) +import Control.Applicative ((<$>), (<$), (<*), (*>), (<*>)) import Text.Pandoc.Builder (Inlines, Blocks, trimInlines, (<>)) import qualified Text.Pandoc.Builder as B import Data.Monoid (mconcat, mempty) @@ -530,7 +531,7 @@ directive' = do let body' = body ++ "\n\n" case label of "raw" -> return $ B.rawBlock (trim top) (stripTrailingNewlines body) - "role" -> return mempty + "role" -> addNewRole top $ map (\(k,v) -> (k, trim v)) fields "container" -> parseFromString parseBlocks body' "replace" -> B.para <$> -- consumed by substKey parseFromString (trimInlines . mconcat <$> many inline) @@ -591,7 +592,38 @@ directive' = do Nothing -> B.image src "" alt _ -> return mempty --- Can contain haracter codes as decimal numbers or +-- TODO: +-- - Silently ignores illegal fields +-- - Silently drops classes +-- - Only supports :format: fields with a single format for :raw: roles, +-- change Text.Pandoc.Definition.Format to fix +addNewRole :: String -> [(String, String)] -> RSTParser Blocks +addNewRole roleString fields = do + (role, parentRole) <- parseFromString inheritedRole roleString + customRoles <- stateRstCustomRoles <$> getState + baseRole <- case M.lookup parentRole customRoles of + Just (base, _, _) -> return base + Nothing -> return parentRole + + let fmt = if baseRole == "raw" then lookup "format" fields else Nothing + annotate = maybe id addLanguage $ + if baseRole == "code" + then lookup "language" fields + else Nothing + + updateState $ \s -> s { + stateRstCustomRoles = + M.insert role (baseRole, fmt, (,) parentRole . annotate) customRoles + } + + return $ B.singleton Null + where + addLanguage lang (ident, classes, keyValues) = + (ident, "sourceCode" : lang : classes, keyValues) + inheritedRole = + (,) <$> roleNameEndingIn (char '(') <*> roleNameEndingIn (char ')') + +-- Can contain character codes as decimal numbers or -- hexadecimal numbers, prefixed by 0x, x, \x, U+, u, or \u -- or as XML-style hexadecimal character entities, e.g. ᨫ -- or text, which is used as-is. Comments start with .. @@ -930,17 +962,56 @@ strong = B.strong . trimInlines . mconcat <$> -- Note, this doesn't precisely implement the complex rule in -- http://docutils.sourceforge.net/docs/ref/rst/restructuredtext.html#inline-markup-recognition-rules -- but it should be good enough for most purposes +-- +-- TODO: +-- - Classes are silently discarded in addNewRole +-- - Lacks sensible implementation for title-reference (which is the default) +-- - Allows direct use of the :raw: role, rST only allows inherited use. interpretedRole :: RSTParser Inlines interpretedRole = try $ do (role, contents) <- roleBefore <|> roleAfter - case role of - "sup" -> return $ B.superscript $ B.str contents - "sub" -> return $ B.subscript $ B.str contents - "math" -> return $ B.math contents - _ -> return $ B.str contents --unknown + renderRole contents Nothing role nullAttr + +renderRole :: String -> Maybe String -> String -> Attr -> RSTParser Inlines +renderRole contents fmt role attr = case role of + "sup" -> return $ B.superscript $ B.str contents + "superscript" -> return $ B.superscript $ B.str contents + "sub" -> return $ B.subscript $ B.str contents + "subscript" -> return $ B.subscript $ B.str contents + "emphasis" -> return $ B.emph $ B.str contents + "strong" -> return $ B.strong $ B.str contents + "rfc-reference" -> return $ rfcLink contents + "RFC" -> return $ rfcLink contents + "pep-reference" -> return $ pepLink contents + "PEP" -> return $ pepLink contents + "literal" -> return $ B.str contents + "math" -> return $ B.math contents + "title-reference" -> titleRef contents + "title" -> titleRef contents + "t" -> titleRef contents + "code" -> return $ B.codeWith attr contents + "raw" -> return $ B.rawInline (fromMaybe "" fmt) contents + custom -> do + customRole <- stateRstCustomRoles <$> getState + case M.lookup custom customRole of + Just (_, newFmt, inherit) -> let + fmtStr = fmt `mplus` newFmt + (newRole, newAttr) = inherit attr + in renderRole contents fmtStr newRole newAttr + Nothing -> return $ B.str contents -- Undefined role + where + titleRef ref = return $ B.str ref -- FIXME: Not a sensible behaviour + rfcLink rfcNo = B.link rfcUrl ("RFC " ++ rfcNo) $ B.str ("RFC " ++ rfcNo) + where rfcUrl = "http://www.faqs.org/rfcs/rfc" ++ rfcNo ++ ".html" + pepLink pepNo = B.link pepUrl ("PEP " ++ pepNo) $ B.str ("PEP " ++ pepNo) + where padNo = replicate (4 - length pepNo) '0' ++ pepNo + pepUrl = "http://http://www.python.org/dev/peps/pep-" ++ padNo ++ "/" + +roleNameEndingIn :: RSTParser Char -> RSTParser String +roleNameEndingIn end = many1Till (letter <|> char '-') end roleMarker :: RSTParser String -roleMarker = char ':' *> many1Till (letter <|> char '-') (char ':') +roleMarker = char ':' *> roleNameEndingIn (char ':') roleBefore :: RSTParser (String,String) roleBefore = try $ do diff --git a/tests/rst-reader.native b/tests/rst-reader.native index 497810f39..fd48bc60c 100644 --- a/tests/rst-reader.native +++ b/tests/rst-reader.native @@ -319,5 +319,15 @@ Pandoc (Meta {unMeta = fromList [("authors",MetaList [MetaInlines [Str "John",Sp ,Para [Str "Some",Space,Superscript [Str "of"],Space,Str "these",Space,Superscript [Str "words"],Space,Str "are",Space,Str "in",Space,Superscript [Str "superscript"],Str "."] ,Para [Str "Reset",Space,Str "default-role",Space,Str "to",Space,Str "the",Space,Str "default",Space,Str "default."] ,Para [Str "And",Space,Str "now",Space,Str "some-invalid-string-3231231",Space,Str "is",Space,Str "nonsense."] +,Null +,Para [Str "And",Space,Str "now",Space,Str "with",Space,RawInline (Format "html") "<b>inline</b> <span id=\"test\">HTML</span>",Str "."] +,Null +,Para [Str "And",Space,Str "some",Space,Str "inline",Space,Str "haskell",Space,Code ("",["sourceCode","haskell"],[]) "fmap id [1,2..10]",Str "."] +,Null +,Null +,Para [Str "Indirect",Space,Str "python",Space,Str "role",Space,Code ("",["sourceCode","python"],[]) "[x*x for x in [1,2,3,4,5]]",Str "."] +,Null +,Null +,Para [Str "Different",Space,Str "indirect",Space,Str "C",Space,Code ("",["sourceCode","c"],[]) "int x = 15;",Str "."] ,Header 2 ("literal-symbols",[],[]) [Str "Literal",Space,Str "symbols"] ,Para [Str "2*2",Space,Str "=",Space,Str "4*1"]] diff --git a/tests/rst-reader.rst b/tests/rst-reader.rst index 748bfe0a5..930bf2ed2 100644 --- a/tests/rst-reader.rst +++ b/tests/rst-reader.rst @@ -599,6 +599,30 @@ Reset default-role to the default default. And now `some-invalid-string-3231231` is nonsense. +.. role:: html(raw) + :format: html + +And now with :html:`<b>inline</b> <span id="test">HTML</span>`. + +.. role:: haskell(code) + :language: haskell + +And some inline haskell :haskell:`fmap id [1,2..10]`. + +.. role:: indirect(code) + +.. role:: python(indirect) + :language: python + +Indirect python role :python:`[x*x for x in [1,2,3,4,5]]`. + +.. role:: different-indirect(code) + :language: c + +.. role:: c(different-indirect) + +Different indirect C :c:`int x = 15;`. + Literal symbols --------------- -- cgit v1.2.3 From 66fd9bf7595cd97ccd24b0d64748c852dc604ce8 Mon Sep 17 00:00:00 2001 From: Merijn Verstraaten <merijn@inconsistent.nl> Date: Sat, 15 Feb 2014 17:57:08 +0100 Subject: Clarified field values in RstCustomRoles. --- src/Text/Pandoc/Parsing.hs | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs index 0713f4a96..2bc351db3 100644 --- a/src/Text/Pandoc/Parsing.hs +++ b/src/Text/Pandoc/Parsing.hs @@ -854,6 +854,10 @@ data ParserState = ParserState stateMacros :: [Macro], -- ^ List of macros defined so far stateRstDefaultRole :: String, -- ^ Current rST default interpreted text role stateRstCustomRoles :: M.Map String (String, Maybe String, Attr -> (String, Attr)), -- ^ Current rST custom text roles + -- Triple represents: 1) Base role, 2) Optional format (only for :raw: + -- roles), 3) Source language annotation for code (could be used to + -- annotate role classes too). + stateWarnings :: [String] -- ^ Warnings generated by the parser } -- cgit v1.2.3 From f6a020a906e5b03defa9924819b7f8cf2f626277 Mon Sep 17 00:00:00 2001 From: John MacFarlane <fiddlosopher@gmail.com> Date: Mon, 17 Feb 2014 15:18:52 -0800 Subject: HTML writer: Fixed bug with unnumbered section headings. Unnumbered section headings (with class 'unnumbered') were getting numbers. This commit fixes the bug. --- src/Text/Pandoc/Writers/HTML.hs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs index 805bb57f1..3ac2a836f 100644 --- a/src/Text/Pandoc/Writers/HTML.hs +++ b/src/Text/Pandoc/Writers/HTML.hs @@ -475,10 +475,11 @@ blockToHtml opts (BlockQuote blocks) = else do contents <- blockListToHtml opts blocks return $ H.blockquote $ nl opts >> contents >> nl opts -blockToHtml opts (Header level (_,_,_) lst) = do +blockToHtml opts (Header level (_,classes,_) lst) = do contents <- inlineListToHtml opts lst secnum <- liftM stSecNum get let contents' = if writerNumberSections opts && not (null secnum) + && "unnumbered" `notElem` classes then (H.span ! A.class_ "header-section-number" $ toHtml $ showSecNum secnum) >> strToHtml " " >> contents else contents -- cgit v1.2.3 From f3a062d5f928ca46376f4fda0a04172d54540454 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Wed, 19 Feb 2014 09:11:15 -0800 Subject: Make rst figures true figures. Closes #1168. Thanks to CasperVector. --- src/Text/Pandoc/Readers/RST.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs index a46a3a6c6..127eae167 100644 --- a/src/Text/Pandoc/Readers/RST.hs +++ b/src/Text/Pandoc/Readers/RST.hs @@ -581,7 +581,7 @@ directive' = do "figure" -> do (caption, legend) <- parseFromString extractCaption body' let src = escapeURI $ trim top - return $ B.para (B.image src "" caption) <> legend + return $ B.para (B.image src "fig:" caption) <> legend "image" -> do let src = escapeURI $ trim top let alt = B.str $ maybe "image" trim $ lookup "alt" fields -- cgit v1.2.3 From a826d3936dcdefe630715007917dc676eb60861d Mon Sep 17 00:00:00 2001 From: John MacFarlane <fiddlosopher@gmail.com> Date: Fri, 21 Feb 2014 17:32:02 -0800 Subject: Fixed bug in reference link parsing in markdown_mmd. The bug was triggered by: Link to [Google][]. Link to [twitter][]. [Google]: http://google.com [twitter]: http://twitter.com --- src/Text/Pandoc/Readers/Markdown.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 8a41cef49..dd6fa733a 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -354,7 +354,7 @@ referenceKey = try $ do tit <- option "" referenceTitle -- currently we just ignore MMD-style link/image attributes _kvs <- option [] $ guardEnabled Ext_link_attributes - >> many (spnl >> keyValAttr) + >> many (try $ spnl >> keyValAttr) blanklines let target = (escapeURI $ trimr src, tit) st <- getState -- cgit v1.2.3 From 19b127b8986fb1a35e14c26808cf8247d6d6f3c4 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Sun, 23 Feb 2014 20:35:07 -0800 Subject: PDF: Use ; for TEXINPUTS separator on Windows. Closes #1151, I hope. Testing needed. --- pandoc.cabal | 2 ++ src/Text/Pandoc/PDF.hs | 7 ++++++- 2 files changed, 8 insertions(+), 1 deletion(-) (limited to 'src/Text') diff --git a/pandoc.cabal b/pandoc.cabal index b99eb5420..bbf963672 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -243,6 +243,8 @@ Library cpp-options: -DEMBED_DATA_FILES -- Build-Tools: hsb2hs -- not yet recognized by cabal other-modules: Text.Pandoc.Data + if os(windows) + Cpp-options: -D_WINDOWS Ghc-Options: -rtsopts -Wall -fno-warn-unused-do-bind Ghc-Prof-Options: -auto-all -caf-all -rtsopts Default-Language: Haskell98 diff --git a/src/Text/Pandoc/PDF.hs b/src/Text/Pandoc/PDF.hs index 360338f8f..39442854d 100644 --- a/src/Text/Pandoc/PDF.hs +++ b/src/Text/Pandoc/PDF.hs @@ -149,7 +149,12 @@ runTeXProgram program runsLeft tmpDir source = do let programArgs = ["-halt-on-error", "-interaction", "nonstopmode", "-output-directory", tmpDir, file] env' <- getEnvironment - let texinputs = maybe (tmpDir ++ ":") ((tmpDir ++ ":") ++) +#ifdef _WINDOWS + let sep = ";" +#else + let sep = ":" +#endif + let texinputs = maybe (tmpDir ++ sep) ((tmpDir ++ sep) ++) $ lookup "TEXINPUTS" env' let env'' = ("TEXINPUTS", texinputs) : [(k,v) | (k,v) <- env', k /= "TEXINPUTS"] -- cgit v1.2.3 From 69f7b1dbf3423960b148d6d11ad9f25fa6cc81a9 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Tue, 25 Feb 2014 22:43:58 -0800 Subject: Added readerTrace to ReaderOptions, --trace command line opt. This is to debug backtracking-related parsing bugs. So far it is only implemented for markdown, but it would be good to extend it to latex and html readers. --- pandoc.hs | 9 +++++++++ src/Text/Pandoc/Options.hs | 2 ++ src/Text/Pandoc/Readers/Markdown.hs | 12 +++++++++++- 3 files changed, 22 insertions(+), 1 deletion(-) (limited to 'src/Text') diff --git a/pandoc.hs b/pandoc.hs index e49b3b9cf..677101746 100644 --- a/pandoc.hs +++ b/pandoc.hs @@ -162,6 +162,7 @@ data Opt = Opt , optAscii :: Bool -- ^ Use ascii characters only in html , optTeXLigatures :: Bool -- ^ Use TeX ligatures for quotes/dashes , optDefaultImageExtension :: String -- ^ Default image extension + , optTrace :: Bool -- ^ Print debug information } -- | Defaults for command-line options. @@ -217,6 +218,7 @@ defaultOpts = Opt , optAscii = False , optTeXLigatures = True , optDefaultImageExtension = "" + , optTrace = False } -- | A list of functions, each transforming the options data structure @@ -758,6 +760,11 @@ options = (\opt -> return opt { optHTMLMathMethod = GladTeX })) "" -- "Use gladtex for HTML math" + , Option "" ["trace"] + (NoArg + (\opt -> return opt { optTrace = True })) + "" -- "Turn on diagnostic tracing in readers." + , Option "" ["dump-args"] (NoArg (\opt -> return opt { optDumpArgs = True })) @@ -952,6 +959,7 @@ main = do , optAscii = ascii , optTeXLigatures = texLigatures , optDefaultImageExtension = defaultImageExtension + , optTrace = trace } = opts when dumpArgs $ @@ -1074,6 +1082,7 @@ main = do , readerIndentedCodeClasses = codeBlockClasses , readerApplyMacros = not laTeXOutput , readerDefaultImageExtension = defaultImageExtension + , readerTrace = trace } let writerOptions = def { writerStandalone = standalone', diff --git a/src/Text/Pandoc/Options.hs b/src/Text/Pandoc/Options.hs index 5f65abdde..38220f542 100644 --- a/src/Text/Pandoc/Options.hs +++ b/src/Text/Pandoc/Options.hs @@ -210,6 +210,7 @@ data ReaderOptions = ReaderOptions{ , readerIndentedCodeClasses :: [String] -- ^ Default classes for -- indented code blocks , readerDefaultImageExtension :: String -- ^ Default extension for images + , readerTrace :: Bool -- ^ Print debugging info } deriving (Show, Read) instance Default ReaderOptions @@ -225,6 +226,7 @@ instance Default ReaderOptions , readerApplyMacros = True , readerIndentedCodeClasses = [] , readerDefaultImageExtension = "" + , readerTrace = False } -- diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index dd6fa733a..c73c8f610 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -60,6 +60,8 @@ import System.FilePath (takeExtension, addExtension) import Text.HTML.TagSoup import Text.HTML.TagSoup.Match (tagOpen) import qualified Data.Set as Set +import Text.Printf (printf) +import Debug.Trace (trace) type MarkdownParser = Parser [Char] ParserState @@ -440,7 +442,10 @@ parseBlocks :: MarkdownParser (F Blocks) parseBlocks = mconcat <$> manyTill block eof block :: MarkdownParser (F Blocks) -block = choice [ mempty <$ blanklines +block = do + tr <- getOption readerTrace + pos <- getPosition + res <- choice [ mempty <$ blanklines , codeBlockFenced , yamlMetaBlock , guardEnabled Ext_latex_macros *> (macro >>= return . return) @@ -465,6 +470,11 @@ block = choice [ mempty <$ blanklines , para , plain ] <?> "block" + when tr $ do + st <- getState + trace (printf "line %d: %s" (sourceLine pos) + (take 60 $ show $ B.toList $ runF res st)) (return ()) + return res -- -- header blocks -- cgit v1.2.3 From 581075a0ca7dcdae119e3634ff98a65e79e23256 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Wed, 26 Feb 2014 22:24:50 -0800 Subject: Markdown reader: small efficiency improvement. Switched `notFollewdBy' rawHtmlBlocks` -> `notFollowedBy' (htmlTag isBlockTag)`, which is more efficient. --- src/Text/Pandoc/Readers/Markdown.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index c73c8f610..d74b32bed 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -1384,7 +1384,7 @@ ltSign :: MarkdownParser (F Inlines) ltSign = do guardDisabled Ext_raw_html <|> guardDisabled Ext_markdown_in_html_blocks - <|> (notFollowedBy' rawHtmlBlocks >> return ()) + <|> (notFollowedBy' (htmlTag isBlockTag) >> return ()) char '<' return $ return $ B.str "<" -- cgit v1.2.3 From a208a972c318c1cb37551d9662a61b7c7ab35510 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Wed, 26 Feb 2014 22:46:38 -0800 Subject: Markdown parser: avoid backtracking when closing `</div>` not found. --- src/Text/Pandoc/Readers/Markdown.hs | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index d74b32bed..9feb46f97 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -1738,12 +1738,19 @@ spanHtml = try $ do divHtml :: MarkdownParser (F Blocks) divHtml = try $ do guardEnabled Ext_markdown_in_html_blocks - (TagOpen _ attrs, _) <- htmlTag (~== TagOpen "div" []) - contents <- mconcat <$> manyTill block (htmlTag (~== TagClose "div")) - let ident = fromMaybe "" $ lookup "id" attrs - let classes = maybe [] words $ lookup "class" attrs - let keyvals = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"] - return $ B.divWith (ident, classes, keyvals) <$> contents + (TagOpen _ attrs, rawtag) <- htmlTag (~== TagOpen "div" []) + bls <- option "" (blankline >> option "" blanklines) + contents <- mconcat <$> + many (notFollowedBy' (htmlTag (~== TagClose "div")) >> block) + closed <- option False (True <$ htmlTag (~== TagClose "div")) + if closed + then do + let ident = fromMaybe "" $ lookup "id" attrs + let classes = maybe [] words $ lookup "class" attrs + let keyvals = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"] + return $ B.divWith (ident, classes, keyvals) <$> contents + else -- avoid backtracing + return $ return (B.rawBlock "html" (rawtag <> bls)) <> contents rawHtmlInline :: MarkdownParser (F Inlines) rawHtmlInline = do -- cgit v1.2.3 From 4d0bf3c5d685cbee3b13f562503a572af803ab95 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Wed, 26 Feb 2014 22:53:12 -0800 Subject: Markdown reader: Improved parsing of nested divs. Formerly a closing div tag would be missed if it came right after other block-level tags. --- src/Text/Pandoc/Readers/Markdown.hs | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 9feb46f97..0ea7f9ac5 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -954,6 +954,8 @@ rawHtmlBlocks = do htmlBlocks <- many1 $ try $ do s <- rawVerbatimBlock <|> try ( do (t,raw) <- htmlTag isBlockTag + guard $ t ~/= TagOpen "div" [] && + t ~/= TagClose "div" exts <- getOption readerExtensions -- if open tag, need markdown="1" if -- markdown_attributes extension is set -- cgit v1.2.3 From 80511f1b34d082742d78d9745469eb8c63592a9c Mon Sep 17 00:00:00 2001 From: mb21 <mb21@server.fake> Date: Sun, 1 Dec 2013 21:11:39 +0100 Subject: InDesign ICML Writer --- pandoc.cabal | 1 + src/Text/Pandoc.hs | 3 + src/Text/Pandoc/Writers/ICML.hs | 525 +++++++ tests/Tests/Old.hs | 2 +- tests/tables.icml | 748 ++++++++++ tests/writer.icml | 3023 +++++++++++++++++++++++++++++++++++++++ 6 files changed, 4301 insertions(+), 1 deletion(-) create mode 100644 src/Text/Pandoc/Writers/ICML.hs create mode 100644 tests/tables.icml create mode 100644 tests/writer.icml (limited to 'src/Text') diff --git a/pandoc.cabal b/pandoc.cabal index a4b8ac61b..e279a2cc9 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -270,6 +270,7 @@ Library Text.Pandoc.Writers.Docbook, Text.Pandoc.Writers.OPML, Text.Pandoc.Writers.HTML, + Text.Pandoc.Writers.ICML, Text.Pandoc.Writers.LaTeX, Text.Pandoc.Writers.ConTeXt, Text.Pandoc.Writers.OpenDocument, diff --git a/src/Text/Pandoc.hs b/src/Text/Pandoc.hs index 703bb876a..2c90fd09b 100644 --- a/src/Text/Pandoc.hs +++ b/src/Text/Pandoc.hs @@ -85,6 +85,7 @@ module Text.Pandoc , writeTexinfo , writeHtml , writeHtmlString + , writeICML , writeDocbook , writeOPML , writeOpenDocument @@ -133,6 +134,7 @@ import Text.Pandoc.Writers.ODT import Text.Pandoc.Writers.Docx import Text.Pandoc.Writers.EPUB import Text.Pandoc.Writers.FB2 +import Text.Pandoc.Writers.ICML import Text.Pandoc.Writers.Docbook import Text.Pandoc.Writers.OPML import Text.Pandoc.Writers.OpenDocument @@ -226,6 +228,7 @@ writers = [ ,("html" , PureStringWriter writeHtmlString) ,("html5" , PureStringWriter $ \o -> writeHtmlString o{ writerHtml5 = True }) + ,("icml" , PureStringWriter writeICML) ,("s5" , PureStringWriter $ \o -> writeHtmlString o{ writerSlideVariant = S5Slides , writerTableOfContents = False }) diff --git a/src/Text/Pandoc/Writers/ICML.hs b/src/Text/Pandoc/Writers/ICML.hs new file mode 100644 index 000000000..19d486b25 --- /dev/null +++ b/src/Text/Pandoc/Writers/ICML.hs @@ -0,0 +1,525 @@ +{-# LANGUAGE OverloadedStrings #-} + +{- | + Module : Text.Pandoc.Writers.ICML + Copyright : Copyright (C) 2013 github.com/mb21 + License : GNU GPL, version 2 or above + + Stability : alpha + +Conversion of 'Pandoc' documents to Adobe InCopy ICML, a stand-alone XML format +which is a subset of the zipped IDML format for which the documentation is +available here: http://wwwimages.adobe.com/www.adobe.com/content/dam/Adobe/en/devnet/indesign/sdk/cs6/idml/idml-specification.pdf +InCopy is the companion word-processor to Adobe InDesign and ICML documents can be integrated +into InDesign with File -> Place. +-} +module Text.Pandoc.Writers.ICML (writeICML) where +import Text.Pandoc.Definition +import Text.Pandoc.XML +import Text.Pandoc.Writers.Shared +import Text.Pandoc.Shared (splitBy) +import Text.Pandoc.Options +import Text.Pandoc.Templates (renderTemplate') +import Text.Pandoc.Pretty +import Data.List (isPrefixOf, isInfixOf, stripPrefix) +import Data.Text as Text (breakOnAll, pack) +import Data.Monoid (mappend) +import Control.Monad.State +import qualified Data.Set as Set + +type Style = [String] +type Hyperlink = [(Int, String)] + +data WriterState = WriterState{ + blockStyles :: Set.Set String + , inlineStyles :: Set.Set String + , links :: Hyperlink + , listDepth :: Int + , maxListDepth :: Int + } + +type WS a = State WriterState a + +defaultWriterState :: WriterState +defaultWriterState = WriterState{ + blockStyles = Set.empty + , inlineStyles = Set.empty + , links = [] + , listDepth = 1 + , maxListDepth = 0 + } + +-- inline names (appear in InDesign's character styles pane) +emphName :: String +strongName :: String +strikeoutName :: String +superscriptName :: String +subscriptName :: String +smallCapsName :: String +codeName :: String +linkName :: String +emphName = "Italic" +strongName = "Bold" +strikeoutName = "Strikeout" +superscriptName = "Superscript" +subscriptName = "Subscript" +smallCapsName = "SmallCaps" +codeName = "Code" +linkName = "Link" + +-- block element names (appear in InDesign's paragraph styles pane) +paragraphName :: String +codeBlockName :: String +rawBlockName :: String +blockQuoteName :: String +orderedListName :: String +bulletListName :: String +defListTermName :: String +defListDefName :: String +headerName :: String +tableName :: String +tableHeaderName :: String +tableCaptionName :: String +alignLeftName :: String +alignRightName :: String +alignCenterName :: String +firstListItemName :: String +beginsWithName :: String +lowerRomanName :: String +upperRomanName :: String +lowerAlphaName :: String +upperAlphaName :: String +subListParName :: String +footnoteName :: String +paragraphName = "Paragraph" +codeBlockName = "CodeBlock" +rawBlockName = "Rawblock" +blockQuoteName = "Blockquote" +orderedListName = "NumList" +bulletListName = "BulList" +defListTermName = "DefListTerm" +defListDefName = "DefListDef" +headerName = "Header" +tableName = "TablePar" +tableHeaderName = "TableHeader" +tableCaptionName = "TableCaption" +alignLeftName = "LeftAlign" +alignRightName = "RightAlign" +alignCenterName = "CenterAlign" +firstListItemName = "first" +beginsWithName = "beginsWith-" +lowerRomanName = "lowerRoman" +upperRomanName = "upperRoman" +lowerAlphaName = "lowerAlpha" +upperAlphaName = "upperAlpha" +subListParName = "subParagraph" +footnoteName = "Footnote" + + +-- | Convert Pandoc document to string in ICML format. +writeICML :: WriterOptions -> Pandoc -> String +writeICML opts (Pandoc meta blocks) = + let colwidth = if writerWrapText opts + then Just $ writerColumns opts + else Nothing + render' = render colwidth + renderMeta f s = Just $ render' $ fst $ runState (f opts [] s) defaultWriterState + Just metadata = metaToJSON opts + (renderMeta blocksToICML) + (renderMeta inlinesToICML) + meta + (doc, st) = runState (blocksToICML opts [] blocks) defaultWriterState + main = render' doc + context = defField "body" main + $ defField "charStyles" (render' $ charStylesToDoc st) + $ defField "parStyles" (render' $ parStylesToDoc st) + $ defField "hyperlinks" (render' $ hyperlinksToDoc $ links st) + $ metadata + in if writerStandalone opts + then renderTemplate' (writerTemplate opts) context + else main + +-- | Auxilary functions for parStylesToDoc and charStylesToDoc. +contains :: String -> (String, (String, String)) -> [(String, String)] +contains s rule = + if isInfixOf (fst rule) s + then [snd rule] + else [] + +-- | The monospaced font to use as default. +monospacedFont :: Doc +monospacedFont = inTags False "AppliedFont" [("type", "string")] $ text "Courier New" + +-- | How much to indent blockquotes etc. +defaultIndent :: Int +defaultIndent = 20 + +-- | How much to indent numbered lists before the number. +defaultListIndent :: Int +defaultListIndent = 10 + +-- other constants +lineSeparator :: String +lineSeparator = "
" + +-- | Convert a WriterState with its block styles to the ICML listing of Paragraph Styles. +parStylesToDoc :: WriterState -> Doc +parStylesToDoc st = vcat $ map makeStyle $ Set.toAscList $ blockStyles st + where + makeStyle s = + let countSubStrs sub str = length $ Text.breakOnAll (Text.pack sub) (Text.pack str) + attrs = concat $ map (contains s) $ [ + (defListTermName, ("BulletsAndNumberingListType", "BulletList")) + , (defListTermName, ("FontStyle", "Bold")) + , (tableHeaderName, ("FontStyle", "Bold")) + , (alignLeftName, ("Justification", "LeftAlign")) + , (alignRightName, ("Justification", "RightAlign")) + , (alignCenterName, ("Justification", "CenterAlign")) + , (headerName++"1", ("PointSize", "36")) + , (headerName++"2", ("PointSize", "30")) + , (headerName++"3", ("PointSize", "24")) + , (headerName++"4", ("PointSize", "18")) + , (headerName++"5", ("PointSize", "14")) + ] + -- what is the most nested list type, if any? + (isBulletList, isOrderedList) = findList $ reverse $ splitBy (==' ') s + where + findList [] = (False, False) + findList (x:xs) | x == bulletListName = (True, False) + | x == orderedListName = (False, True) + | otherwise = findList xs + nBuls = countSubStrs bulletListName s + nOrds = countSubStrs orderedListName s + attrs' = numbering ++ listType ++ indent ++ attrs + where + numbering | isOrderedList = [("NumberingExpression", "^#.^t"), ("NumberingLevel", show nOrds)] + | otherwise = [] + listType | isOrderedList && (not $ isInfixOf subListParName s) + = [("BulletsAndNumberingListType", "NumberedList")] + | isBulletList && (not $ isInfixOf subListParName s) + = [("BulletsAndNumberingListType", "BulletList")] + | otherwise = [] + indent = [("LeftIndent", show indt)] + where + nBlockQuotes = countSubStrs blockQuoteName s + nDefLists = countSubStrs defListDefName s + indt = max 0 $ defaultListIndent*(nBuls + nOrds - 1) + defaultIndent*(nBlockQuotes + nDefLists) + props = inTags True "Properties" [] $ (basedOn $$ tabList $$ numbForm) + where + font = if isInfixOf codeBlockName s + then monospacedFont + else empty + basedOn = inTags False "BasedOn" [("type", "object")] (text "$ID/NormalParagraphStyle") $$ font + tabList = if isBulletList + then inTags True "TabList" [("type","list")] $ inTags True "ListItem" [("type","record")] + $ vcat [ + inTags False "Alignment" [("type","enumeration")] $ text "LeftAlign" + , inTags False "AlignmentCharacter" [("type","string")] $ text "." + , selfClosingTag "Leader" [("type","string")] + , inTags False "Position" [("type","unit")] $ text + $ show $ defaultListIndent * (nBuls + nOrds) + ] + else empty + makeNumb name = inTags False "NumberingFormat" [("type", "string")] (text name) + numbForm | isInfixOf lowerRomanName s = makeNumb "i, ii, iii, iv..." + | isInfixOf upperRomanName s = makeNumb "I, II, III, IV..." + | isInfixOf lowerAlphaName s = makeNumb "a, b, c, d..." + | isInfixOf upperAlphaName s = makeNumb "A, B, C, D..." + | otherwise = empty + in inTags True "ParagraphStyle" ([("Self", "ParagraphStyle/"++s), ("Name", s)] ++ attrs') props + +-- | Convert a WriterState with its inline styles to the ICML listing of Character Styles. +charStylesToDoc :: WriterState -> Doc +charStylesToDoc st = vcat $ map makeStyle $ Set.toAscList $ inlineStyles st + where + makeStyle s = + let attrs = concat $ map (contains s) [ + (strikeoutName, ("StrikeThru", "true")) + , (superscriptName, ("Position", "Superscript")) + , (subscriptName, ("Position", "Subscript")) + , (smallCapsName, ("Capitalization", "SmallCaps")) + ] + attrs' | isInfixOf emphName s && isInfixOf strongName s = ("FontStyle", "Bold Italic") : attrs + | isInfixOf strongName s = ("FontStyle", "Bold") : attrs + | isInfixOf emphName s = ("FontStyle", "Italic") : attrs + | otherwise = attrs + props = inTags True "Properties" [] $ + inTags False "BasedOn" [("type", "object")] (text "$ID/NormalCharacterStyle") $$ font + where + font = + if isInfixOf codeName s + then monospacedFont + else empty + in inTags True "CharacterStyle" ([("Self", "CharacterStyle/"++s), ("Name", s)] ++ attrs') props + +-- | Convert a list of (identifier, url) pairs to the ICML listing of hyperlinks. +hyperlinksToDoc :: Hyperlink -> Doc +hyperlinksToDoc [] = empty +hyperlinksToDoc (x:xs) = hyp x $$ hyperlinksToDoc xs + where + hyp (ident, url) = hdest $$ hlink + where + hdest = selfClosingTag "HyperlinkURLDestination" + [("Self", "HyperlinkURLDestination/"++url), ("Name","link"), ("DestinationURL",url), ("DestinationUniqueKey","1")] + hlink = inTags True "Hyperlink" [("Self","uf-"++show ident), ("Name",url), + ("Source","htss-"++show ident), ("Visible","true"), ("DestinationUniqueKey","1")] + $ inTags True "Properties" [] + $ inTags False "BorderColor" [("type","enumeration")] (text "Black") + $$ (inTags False "Destination" [("type","object")] + $ text $ "HyperlinkURLDestination/"++(escapeStringForXML url)) + + +-- | Convert a list of Pandoc blocks to ICML. +blocksToICML :: WriterOptions -> Style -> [Block] -> WS Doc +blocksToICML opts style lst = vcat `fmap` mapM (blockToICML opts style) lst + +-- | Convert a Pandoc block element to ICML. +blockToICML :: WriterOptions -> Style -> Block -> WS Doc +blockToICML opts style (Plain lst) = parStyle opts style lst +blockToICML opts style (Para lst) = parStyle opts (paragraphName:style) lst +blockToICML opts style (CodeBlock _ str) = parStyle opts (codeBlockName:style) $ [Str str] +blockToICML opts style (RawBlock _ str) = parStyle opts (rawBlockName:style) $ [Str str] +blockToICML opts style (BlockQuote blocks) = blocksToICML opts (blockQuoteName:style) blocks +blockToICML opts style (OrderedList attribs lst) = listItemsToICML opts orderedListName style (Just attribs) lst +blockToICML opts style (BulletList lst) = listItemsToICML opts bulletListName style Nothing lst +blockToICML opts style (DefinitionList lst) = vcat `fmap` mapM (definitionListItemToICML opts style) lst +blockToICML opts style (Header lvl _ lst) = + let stl = (headerName ++ show lvl):style + in parStyle opts stl lst +blockToICML _ _ HorizontalRule = return empty -- we could insert a page break instead +blockToICML opts style (Table caption aligns widths headers rows) = + let style' = tableName : style + noHeader = all null headers + nrHeaders = if noHeader + then "0" + else "1" + nrRows = length rows + nrCols = if null rows + then 0 + else length $ head rows + rowsToICML [] _ = return empty + rowsToICML (col:rest) rowNr = + liftM2 ($$) (colsToICML col rowNr (0::Int)) $ rowsToICML rest (rowNr+1) + colsToICML [] _ _ = return empty + colsToICML (cell:rest) rowNr colNr = do + let stl = if rowNr == 0 && not noHeader + then tableHeaderName:style' + else style' + alig = aligns !! colNr + stl' | alig == AlignLeft = alignLeftName : stl + | alig == AlignRight = alignRightName : stl + | alig == AlignCenter = alignCenterName : stl + | otherwise = stl + c <- blocksToICML opts stl' cell + let cl = return $ inTags True "Cell" + [("Name", show colNr ++":"++ show rowNr), ("AppliedCellStyle","CellStyle/Cell")] c + liftM2 ($$) cl $ colsToICML rest rowNr (colNr+1) + in do + let tabl = if noHeader + then rows + else headers:rows + cells <- rowsToICML tabl (0::Int) + let colWidths w = if w > 0 + then [("SingleColumnWidth",show $ 500 * w)] + else [] + let tupToDoc tup = selfClosingTag "Column" $ [("Name",show $ fst tup)] ++ (colWidths $ snd tup) + let colDescs = vcat $ map tupToDoc $ zip [0..nrCols-1] widths + let tableDoc = return $ inTags True "Table" [ + ("AppliedTableStyle","TableStyle/Table") + , ("HeaderRowCount", nrHeaders) + , ("BodyRowCount", show nrRows) + , ("ColumnCount", show nrCols) + ] (colDescs $$ cells) + liftM2 ($$) tableDoc $ parStyle opts (tableCaptionName:style) caption +blockToICML opts style (Div _ lst) = blocksToICML opts style lst +blockToICML _ _ Null = return empty + +-- | Convert a list of lists of blocks to ICML list items. +listItemsToICML :: WriterOptions -> String -> Style -> Maybe ListAttributes -> [[Block]] -> WS Doc +listItemsToICML _ _ _ _ [] = return empty +listItemsToICML opts listType style attribs (first:rest) = do + st <- get + put st{ listDepth = 1 + listDepth st} + let stl = listType:style + let f = listItemToICML opts stl True attribs first + let r = map (listItemToICML opts stl False attribs) rest + docs <- sequence $ f:r + s <- get + let maxD = max (maxListDepth s) (listDepth s) + put s{ listDepth = 1, maxListDepth = maxD } + return $ vcat docs + +-- | Convert a list of blocks to ICML list items. +listItemToICML :: WriterOptions -> Style -> Bool-> Maybe ListAttributes -> [Block] -> WS Doc +listItemToICML opts style isFirst attribs item = + let makeNumbStart (Just (beginsWith, numbStl, _)) = + let doN DefaultStyle = [] + doN LowerRoman = [lowerRomanName] + doN UpperRoman = [upperRomanName] + doN LowerAlpha = [lowerAlphaName] + doN UpperAlpha = [upperAlphaName] + doN _ = [] + bw = if beginsWith > 1 + then [beginsWithName ++ show beginsWith] + else [] + in doN numbStl ++ bw + makeNumbStart Nothing = [] + stl = if isFirst + then firstListItemName:style + else style + stl' = makeNumbStart attribs ++ stl + in if length item > 1 + then do + let insertTab (Para lst) = blockToICML opts (subListParName:style) $ Para $ (Str "\t"):lst + insertTab block = blockToICML opts style block + f <- blockToICML opts stl' $ head item + r <- fmap vcat $ mapM insertTab $ tail item + return $ f $$ r + else blocksToICML opts stl' item + +definitionListItemToICML :: WriterOptions -> Style -> ([Inline],[[Block]]) -> WS Doc +definitionListItemToICML opts style (term,defs) = do + term' <- parStyle opts (defListTermName:style) term + defs' <- vcat `fmap` mapM (blocksToICML opts (defListDefName:style)) defs + return $ term' $$ defs' + + +-- | Convert a list of inline elements to ICML. +inlinesToICML :: WriterOptions -> Style -> [Inline] -> WS Doc +inlinesToICML opts style lst = vcat `fmap` mapM (inlineToICML opts style) (mergeSpaces lst) + +-- | Convert an inline element to ICML. +inlineToICML :: WriterOptions -> Style -> Inline -> WS Doc +inlineToICML _ style (Str str) = charStyle style $ text $ escapeStringForXML str +inlineToICML opts style (Emph lst) = inlinesToICML opts (emphName:style) lst +inlineToICML opts style (Strong lst) = inlinesToICML opts (strongName:style) lst +inlineToICML opts style (Strikeout lst) = inlinesToICML opts (strikeoutName:style) lst +inlineToICML opts style (Superscript lst) = inlinesToICML opts (superscriptName:style) lst +inlineToICML opts style (Subscript lst) = inlinesToICML opts (subscriptName:style) lst +inlineToICML opts style (SmallCaps lst) = inlinesToICML opts (smallCapsName:style) lst +inlineToICML opts style (Quoted SingleQuote lst) = inlinesToICML opts style $ [Str "‘"] ++ lst ++ [Str "’"] +inlineToICML opts style (Quoted DoubleQuote lst) = inlinesToICML opts style $ [Str "“"] ++ lst ++ [Str "”"] +inlineToICML opts style (Cite _ lst) = footnoteToICML opts style [Para lst] +inlineToICML _ style (Code _ str) = charStyle (codeName:style) $ text $ escapeStringForXML str +inlineToICML _ style Space = charStyle style space +inlineToICML _ style LineBreak = charStyle style $ text lineSeparator +inlineToICML _ style (Math _ str) = charStyle style $ text $ escapeStringForXML str --InDesign doesn't really do math +inlineToICML _ style (RawInline _ str) = charStyle style $ text $ escapeStringForXML str +inlineToICML opts style (Link lst (url, title)) = do + content <- inlinesToICML opts (linkName:style) lst + state $ \st -> + let ident = if null $ links st + then 1::Int + else 1 + (fst $ head $ links st) + newst = st{ links = (ident, url):(links st) } + cont = inTags True "HyperlinkTextSource" + [("Self","htss-"++show ident), ("Name",title), ("Hidden","false")] content + in (cont, newst) +inlineToICML opts style (Image alt target) = imageICML opts style alt target +inlineToICML opts style (Note lst) = footnoteToICML opts style lst +inlineToICML opts style (Span _ lst) = inlinesToICML opts style lst + +-- | Convert a list of block elements to an ICML footnote. +footnoteToICML :: WriterOptions -> Style -> [Block] -> WS Doc +footnoteToICML opts style lst = + let insertTab (Para ls) = blockToICML opts (footnoteName:style) $ Para $ (Str "\t"):ls + insertTab block = blockToICML opts (footnoteName:style) block + in do + contents <- mapM insertTab lst + let number = inTags True "ParagraphStyleRange" [] $ + inTags True "CharacterStyleRange" [] $ inTagsSimple "Content" "<?ACE 4?>" + return $ inTags True "CharacterStyleRange" + [("AppliedCharacterStyle","$ID/NormalCharacterStyle"), ("Position","Superscript")] + $ inTags True "Footnote" [] $ number $$ vcat contents + +-- | Auxiliary function to merge Space elements into the adjacent Strs. +mergeSpaces :: [Inline] -> [Inline] +mergeSpaces ((Str s):(Space:((Str s'):xs))) = mergeSpaces $ Str(s++" "++s') : xs +mergeSpaces (Space:((Str s):xs)) = mergeSpaces $ Str (" "++s) : xs +mergeSpaces ((Str s):(Space:xs)) = mergeSpaces $ Str (s++" ") : xs +mergeSpaces (x:xs) = x : (mergeSpaces xs) +mergeSpaces [] = [] + +-- | Wrap a list of inline elements in an ICML Paragraph Style +parStyle :: WriterOptions -> Style -> [Inline] -> WS Doc +parStyle opts style lst = + let slipIn x y = if null y + then x + else x ++ " > " ++ y + stlStr = foldr slipIn [] $ reverse style + stl = if null stlStr + then "" + else "ParagraphStyle/" ++ stlStr + attrs = ("AppliedParagraphStyle", stl) + attrs' = if firstListItemName `elem` style + then let ats = attrs : [("NumberingContinue", "false")] + begins = filter (isPrefixOf beginsWithName) style + in if null begins + then ats + else let i = maybe "" id $ stripPrefix beginsWithName $ head begins + in ("NumberingStartAt", i) : ats + else [attrs] + in do + content <- inlinesToICML opts [] lst + let cont = inTags True "ParagraphStyleRange" attrs' + $ mappend content $ selfClosingTag "Br" [] + state $ \st -> (cont, st{ blockStyles = Set.insert stlStr $ blockStyles st }) + +-- | Wrap a Doc in an ICML Character Style. +charStyle :: Style -> Doc -> WS Doc +charStyle style content = + let (stlStr, attrs) = styleToStrAttr style + doc = inTags True "CharacterStyleRange" attrs $ inTagsSimple "Content" $ flush content + in do + state $ \st -> + let styles = if null stlStr + then st + else st{ inlineStyles = Set.insert stlStr $ inlineStyles st } + in (doc, styles) + +-- | Transform a Style to a tuple of String (eliminating duplicates and ordered) and corresponding attribute. +styleToStrAttr :: Style -> (String, [(String, String)]) +styleToStrAttr style = + let stlStr = unwords $ Set.toAscList $ Set.fromList style + stl = if null style + then "$ID/NormalCharacterStyle" + else "CharacterStyle/" ++ stlStr + attrs = [("AppliedCharacterStyle", stl)] + in (stlStr, attrs) + +-- | Assemble an ICML Image. +imageICML :: WriterOptions -> Style -> [Inline] -> Target -> WS Doc +imageICML _ style _ (linkURI, _) = + let imgWidth = 300::Int --TODO: set width, height dynamically as in Docx.hs + imgHeight = 200::Int + scaleFact = show (1::Double) --TODO: set scaling factor so image is scaled exactly to imgWidth x imgHeight + hw = show $ imgWidth `div` 2 + hh = show $ imgHeight `div` 2 + qw = show $ imgWidth `div` 4 + qh = show $ imgHeight `div` 4 + (stlStr, attrs) = styleToStrAttr style + props = inTags True "Properties" [] $ inTags True "PathGeometry" [] + $ inTags True "GeometryPathType" [("PathOpen","false")] + $ inTags True "PathPointArray" [] + $ vcat [ + selfClosingTag "PathPointType" [("Anchor", "-"++qw++" -"++qh), + ("LeftDirection", "-"++qw++" -"++qh), ("RightDirection", "-"++qw++" -"++qh)] + , selfClosingTag "PathPointType" [("Anchor", "-"++qw++" "++qh), + ("LeftDirection", "-"++qw++" "++qh), ("RightDirection", "-"++qw++" "++qh)] + , selfClosingTag "PathPointType" [("Anchor", qw++" "++qh), + ("LeftDirection", qw++" "++qh), ("RightDirection", qw++" "++qh)] + , selfClosingTag "PathPointType" [("Anchor", qw++" -"++qh), + ("LeftDirection", qw++" -"++qh), ("RightDirection", qw++" -"++qh)] + ] + image = inTags True "Image" + [("Self","ue6"), ("ItemTransform", scaleFact++" 0 0 "++scaleFact++" -"++qw++" -"++qh)] + $ vcat [ + inTags True "Properties" [] $ inTags True "Profile" [("type","string")] $ text "$ID/Embedded" + $$ selfClosingTag "GraphicBounds" [("Left","0"), ("Top","0"), ("Right", hw), ("Bottom", hh)] + , selfClosingTag "Link" [("Self", "ueb"), ("LinkResourceURI", linkURI)] + ] + doc = inTags True "CharacterStyleRange" attrs + $ inTags True "Rectangle" [("Self","uec"), ("ItemTransform", "1 0 0 1 "++qw++" -"++qh)] + $ (props $$ image) + in do + state $ \st -> (doc, st{ inlineStyles = Set.insert stlStr $ inlineStyles st } ) diff --git a/tests/Tests/Old.hs b/tests/Tests/Old.hs index a16784889..424e1b7c5 100644 --- a/tests/Tests/Old.hs +++ b/tests/Tests/Old.hs @@ -135,7 +135,7 @@ tests = [ testGroup "markdown" "haddock-reader.haddock" "haddock-reader.native" ] , testGroup "other writers" $ map (\f -> testGroup f $ writerTests f) - [ "opendocument" , "context" , "texinfo" + [ "opendocument" , "context" , "texinfo", "icml" , "man" , "plain" , "rtf", "org", "asciidoc" ] ] diff --git a/tests/tables.icml b/tests/tables.icml new file mode 100644 index 000000000..eb73af670 --- /dev/null +++ b/tests/tables.icml @@ -0,0 +1,748 @@ +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Simple table with caption:</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<Table AppliedTableStyle="TableStyle/Table" HeaderRowCount="1" BodyRowCount="3" ColumnCount="4"> + <Column Name="0" /> + <Column Name="1" /> + <Column Name="2" /> + <Column Name="3" /> + <Cell Name="0:0" AppliedCellStyle="CellStyle/Cell"> + <ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/TablePar > TableHeader > RightAlign"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Right</Content> + </CharacterStyleRange><Br /> + </ParagraphStyleRange> + </Cell> + <Cell Name="1:0" AppliedCellStyle="CellStyle/Cell"> + <ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/TablePar > TableHeader > LeftAlign"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Left</Content> + </CharacterStyleRange><Br /> + </ParagraphStyleRange> + </Cell> + <Cell Name="2:0" AppliedCellStyle="CellStyle/Cell"> + <ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/TablePar > TableHeader > CenterAlign"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Center</Content> + </CharacterStyleRange><Br /> + </ParagraphStyleRange> + </Cell> + <Cell Name="3:0" AppliedCellStyle="CellStyle/Cell"> + <ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/TablePar > TableHeader"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Default</Content> + </CharacterStyleRange><Br /> + </ParagraphStyleRange> + </Cell> + <Cell Name="0:1" AppliedCellStyle="CellStyle/Cell"> + <ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/TablePar > RightAlign"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>12</Content> + </CharacterStyleRange><Br /> + </ParagraphStyleRange> + </Cell> + <Cell Name="1:1" AppliedCellStyle="CellStyle/Cell"> + <ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/TablePar > LeftAlign"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>12</Content> + </CharacterStyleRange><Br /> + </ParagraphStyleRange> + </Cell> + <Cell Name="2:1" AppliedCellStyle="CellStyle/Cell"> + <ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/TablePar > CenterAlign"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>12</Content> + </CharacterStyleRange><Br /> + </ParagraphStyleRange> + </Cell> + <Cell Name="3:1" AppliedCellStyle="CellStyle/Cell"> + <ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/TablePar"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>12</Content> + </CharacterStyleRange><Br /> + </ParagraphStyleRange> + </Cell> + <Cell Name="0:2" AppliedCellStyle="CellStyle/Cell"> + <ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/TablePar > RightAlign"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>123</Content> + </CharacterStyleRange><Br /> + </ParagraphStyleRange> + </Cell> + <Cell Name="1:2" AppliedCellStyle="CellStyle/Cell"> + <ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/TablePar > LeftAlign"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>123</Content> + </CharacterStyleRange><Br /> + </ParagraphStyleRange> + </Cell> + <Cell Name="2:2" AppliedCellStyle="CellStyle/Cell"> + <ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/TablePar > CenterAlign"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>123</Content> + </CharacterStyleRange><Br /> + </ParagraphStyleRange> + </Cell> + <Cell Name="3:2" AppliedCellStyle="CellStyle/Cell"> + <ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/TablePar"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>123</Content> + </CharacterStyleRange><Br /> + </ParagraphStyleRange> + </Cell> + <Cell Name="0:3" AppliedCellStyle="CellStyle/Cell"> + <ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/TablePar > RightAlign"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>1</Content> + </CharacterStyleRange><Br /> + </ParagraphStyleRange> + </Cell> + <Cell Name="1:3" AppliedCellStyle="CellStyle/Cell"> + <ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/TablePar > LeftAlign"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>1</Content> + </CharacterStyleRange><Br /> + </ParagraphStyleRange> + </Cell> + <Cell Name="2:3" AppliedCellStyle="CellStyle/Cell"> + <ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/TablePar > CenterAlign"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>1</Content> + </CharacterStyleRange><Br /> + </ParagraphStyleRange> + </Cell> + <Cell Name="3:3" AppliedCellStyle="CellStyle/Cell"> + <ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/TablePar"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>1</Content> + </CharacterStyleRange><Br /> + </ParagraphStyleRange> + </Cell> +</Table> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/TableCaption"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Demonstration of simple table syntax.</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Simple table without caption:</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<Table AppliedTableStyle="TableStyle/Table" HeaderRowCount="1" BodyRowCount="3" ColumnCount="4"> + <Column Name="0" /> + <Column Name="1" /> + <Column Name="2" /> + <Column Name="3" /> + <Cell Name="0:0" AppliedCellStyle="CellStyle/Cell"> + <ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/TablePar > TableHeader > RightAlign"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Right</Content> + </CharacterStyleRange><Br /> + </ParagraphStyleRange> + </Cell> + <Cell Name="1:0" AppliedCellStyle="CellStyle/Cell"> + <ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/TablePar > TableHeader > LeftAlign"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Left</Content> + </CharacterStyleRange><Br /> + </ParagraphStyleRange> + </Cell> + <Cell Name="2:0" AppliedCellStyle="CellStyle/Cell"> + <ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/TablePar > TableHeader > CenterAlign"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Center</Content> + </CharacterStyleRange><Br /> + </ParagraphStyleRange> + </Cell> + <Cell Name="3:0" AppliedCellStyle="CellStyle/Cell"> + <ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/TablePar > TableHeader"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Default</Content> + </CharacterStyleRange><Br /> + </ParagraphStyleRange> + </Cell> + <Cell Name="0:1" AppliedCellStyle="CellStyle/Cell"> + <ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/TablePar > RightAlign"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>12</Content> + </CharacterStyleRange><Br /> + </ParagraphStyleRange> + </Cell> + <Cell Name="1:1" AppliedCellStyle="CellStyle/Cell"> + <ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/TablePar > LeftAlign"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>12</Content> + </CharacterStyleRange><Br /> + </ParagraphStyleRange> + </Cell> + <Cell Name="2:1" AppliedCellStyle="CellStyle/Cell"> + <ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/TablePar > CenterAlign"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>12</Content> + </CharacterStyleRange><Br /> + </ParagraphStyleRange> + </Cell> + <Cell Name="3:1" AppliedCellStyle="CellStyle/Cell"> + <ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/TablePar"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>12</Content> + </CharacterStyleRange><Br /> + </ParagraphStyleRange> + </Cell> + <Cell Name="0:2" AppliedCellStyle="CellStyle/Cell"> + <ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/TablePar > RightAlign"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>123</Content> + </CharacterStyleRange><Br /> + </ParagraphStyleRange> + </Cell> + <Cell Name="1:2" AppliedCellStyle="CellStyle/Cell"> + <ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/TablePar > LeftAlign"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>123</Content> + </CharacterStyleRange><Br /> + </ParagraphStyleRange> + </Cell> + <Cell Name="2:2" AppliedCellStyle="CellStyle/Cell"> + <ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/TablePar > CenterAlign"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>123</Content> + </CharacterStyleRange><Br /> + </ParagraphStyleRange> + </Cell> + <Cell Name="3:2" AppliedCellStyle="CellStyle/Cell"> + <ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/TablePar"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>123</Content> + </CharacterStyleRange><Br /> + </ParagraphStyleRange> + </Cell> + <Cell Name="0:3" AppliedCellStyle="CellStyle/Cell"> + <ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/TablePar > RightAlign"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>1</Content> + </CharacterStyleRange><Br /> + </ParagraphStyleRange> + </Cell> + <Cell Name="1:3" AppliedCellStyle="CellStyle/Cell"> + <ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/TablePar > LeftAlign"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>1</Content> + </CharacterStyleRange><Br /> + </ParagraphStyleRange> + </Cell> + <Cell Name="2:3" AppliedCellStyle="CellStyle/Cell"> + <ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/TablePar > CenterAlign"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>1</Content> + </CharacterStyleRange><Br /> + </ParagraphStyleRange> + </Cell> + <Cell Name="3:3" AppliedCellStyle="CellStyle/Cell"> + <ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/TablePar"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>1</Content> + </CharacterStyleRange><Br /> + </ParagraphStyleRange> + </Cell> +</Table> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/TableCaption"> + <Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Simple table indented two spaces:</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<Table AppliedTableStyle="TableStyle/Table" HeaderRowCount="1" BodyRowCount="3" ColumnCount="4"> + <Column Name="0" /> + <Column Name="1" /> + <Column Name="2" /> + <Column Name="3" /> + <Cell Name="0:0" AppliedCellStyle="CellStyle/Cell"> + <ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/TablePar > TableHeader > RightAlign"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Right</Content> + </CharacterStyleRange><Br /> + </ParagraphStyleRange> + </Cell> + <Cell Name="1:0" AppliedCellStyle="CellStyle/Cell"> + <ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/TablePar > TableHeader > LeftAlign"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Left</Content> + </CharacterStyleRange><Br /> + </ParagraphStyleRange> + </Cell> + <Cell Name="2:0" AppliedCellStyle="CellStyle/Cell"> + <ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/TablePar > TableHeader > CenterAlign"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Center</Content> + </CharacterStyleRange><Br /> + </ParagraphStyleRange> + </Cell> + <Cell Name="3:0" AppliedCellStyle="CellStyle/Cell"> + <ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/TablePar > TableHeader"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Default</Content> + </CharacterStyleRange><Br /> + </ParagraphStyleRange> + </Cell> + <Cell Name="0:1" AppliedCellStyle="CellStyle/Cell"> + <ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/TablePar > RightAlign"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>12</Content> + </CharacterStyleRange><Br /> + </ParagraphStyleRange> + </Cell> + <Cell Name="1:1" AppliedCellStyle="CellStyle/Cell"> + <ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/TablePar > LeftAlign"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>12</Content> + </CharacterStyleRange><Br /> + </ParagraphStyleRange> + </Cell> + <Cell Name="2:1" AppliedCellStyle="CellStyle/Cell"> + <ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/TablePar > CenterAlign"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>12</Content> + </CharacterStyleRange><Br /> + </ParagraphStyleRange> + </Cell> + <Cell Name="3:1" AppliedCellStyle="CellStyle/Cell"> + <ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/TablePar"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>12</Content> + </CharacterStyleRange><Br /> + </ParagraphStyleRange> + </Cell> + <Cell Name="0:2" AppliedCellStyle="CellStyle/Cell"> + <ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/TablePar > RightAlign"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>123</Content> + </CharacterStyleRange><Br /> + </ParagraphStyleRange> + </Cell> + <Cell Name="1:2" AppliedCellStyle="CellStyle/Cell"> + <ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/TablePar > LeftAlign"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>123</Content> + </CharacterStyleRange><Br /> + </ParagraphStyleRange> + </Cell> + <Cell Name="2:2" AppliedCellStyle="CellStyle/Cell"> + <ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/TablePar > CenterAlign"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>123</Content> + </CharacterStyleRange><Br /> + </ParagraphStyleRange> + </Cell> + <Cell Name="3:2" AppliedCellStyle="CellStyle/Cell"> + <ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/TablePar"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>123</Content> + </CharacterStyleRange><Br /> + </ParagraphStyleRange> + </Cell> + <Cell Name="0:3" AppliedCellStyle="CellStyle/Cell"> + <ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/TablePar > RightAlign"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>1</Content> + </CharacterStyleRange><Br /> + </ParagraphStyleRange> + </Cell> + <Cell Name="1:3" AppliedCellStyle="CellStyle/Cell"> + <ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/TablePar > LeftAlign"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>1</Content> + </CharacterStyleRange><Br /> + </ParagraphStyleRange> + </Cell> + <Cell Name="2:3" AppliedCellStyle="CellStyle/Cell"> + <ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/TablePar > CenterAlign"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>1</Content> + </CharacterStyleRange><Br /> + </ParagraphStyleRange> + </Cell> + <Cell Name="3:3" AppliedCellStyle="CellStyle/Cell"> + <ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/TablePar"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>1</Content> + </CharacterStyleRange><Br /> + </ParagraphStyleRange> + </Cell> +</Table> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/TableCaption"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Demonstration of simple table syntax.</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Multiline table with caption:</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<Table AppliedTableStyle="TableStyle/Table" HeaderRowCount="1" BodyRowCount="2" ColumnCount="4"> + <Column Name="0" SingleColumnWidth="75.0" /> + <Column Name="1" SingleColumnWidth="68.75" /> + <Column Name="2" SingleColumnWidth="81.25" /> + <Column Name="3" SingleColumnWidth="168.75" /> + <Cell Name="0:0" AppliedCellStyle="CellStyle/Cell"> + <ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/TablePar > TableHeader > CenterAlign"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Centered Header</Content> + </CharacterStyleRange><Br /> + </ParagraphStyleRange> + </Cell> + <Cell Name="1:0" AppliedCellStyle="CellStyle/Cell"> + <ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/TablePar > TableHeader > LeftAlign"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Left Aligned</Content> + </CharacterStyleRange><Br /> + </ParagraphStyleRange> + </Cell> + <Cell Name="2:0" AppliedCellStyle="CellStyle/Cell"> + <ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/TablePar > TableHeader > RightAlign"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Right Aligned</Content> + </CharacterStyleRange><Br /> + </ParagraphStyleRange> + </Cell> + <Cell Name="3:0" AppliedCellStyle="CellStyle/Cell"> + <ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/TablePar > TableHeader > LeftAlign"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Default aligned</Content> + </CharacterStyleRange><Br /> + </ParagraphStyleRange> + </Cell> + <Cell Name="0:1" AppliedCellStyle="CellStyle/Cell"> + <ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/TablePar > CenterAlign"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>First</Content> + </CharacterStyleRange><Br /> + </ParagraphStyleRange> + </Cell> + <Cell Name="1:1" AppliedCellStyle="CellStyle/Cell"> + <ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/TablePar > LeftAlign"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>row</Content> + </CharacterStyleRange><Br /> + </ParagraphStyleRange> + </Cell> + <Cell Name="2:1" AppliedCellStyle="CellStyle/Cell"> + <ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/TablePar > RightAlign"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>12.0</Content> + </CharacterStyleRange><Br /> + </ParagraphStyleRange> + </Cell> + <Cell Name="3:1" AppliedCellStyle="CellStyle/Cell"> + <ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/TablePar > LeftAlign"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Example of a row that spans multiple lines.</Content> + </CharacterStyleRange><Br /> + </ParagraphStyleRange> + </Cell> + <Cell Name="0:2" AppliedCellStyle="CellStyle/Cell"> + <ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/TablePar > CenterAlign"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Second</Content> + </CharacterStyleRange><Br /> + </ParagraphStyleRange> + </Cell> + <Cell Name="1:2" AppliedCellStyle="CellStyle/Cell"> + <ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/TablePar > LeftAlign"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>row</Content> + </CharacterStyleRange><Br /> + </ParagraphStyleRange> + </Cell> + <Cell Name="2:2" AppliedCellStyle="CellStyle/Cell"> + <ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/TablePar > RightAlign"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>5.0</Content> + </CharacterStyleRange><Br /> + </ParagraphStyleRange> + </Cell> + <Cell Name="3:2" AppliedCellStyle="CellStyle/Cell"> + <ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/TablePar > LeftAlign"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Here's another one. Note the blank line between rows.</Content> + </CharacterStyleRange><Br /> + </ParagraphStyleRange> + </Cell> +</Table> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/TableCaption"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Here's the caption. It may span multiple lines.</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Multiline table without caption:</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<Table AppliedTableStyle="TableStyle/Table" HeaderRowCount="1" BodyRowCount="2" ColumnCount="4"> + <Column Name="0" SingleColumnWidth="75.0" /> + <Column Name="1" SingleColumnWidth="68.75" /> + <Column Name="2" SingleColumnWidth="81.25" /> + <Column Name="3" SingleColumnWidth="168.75" /> + <Cell Name="0:0" AppliedCellStyle="CellStyle/Cell"> + <ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/TablePar > TableHeader > CenterAlign"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Centered Header</Content> + </CharacterStyleRange><Br /> + </ParagraphStyleRange> + </Cell> + <Cell Name="1:0" AppliedCellStyle="CellStyle/Cell"> + <ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/TablePar > TableHeader > LeftAlign"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Left Aligned</Content> + </CharacterStyleRange><Br /> + </ParagraphStyleRange> + </Cell> + <Cell Name="2:0" AppliedCellStyle="CellStyle/Cell"> + <ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/TablePar > TableHeader > RightAlign"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Right Aligned</Content> + </CharacterStyleRange><Br /> + </ParagraphStyleRange> + </Cell> + <Cell Name="3:0" AppliedCellStyle="CellStyle/Cell"> + <ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/TablePar > TableHeader > LeftAlign"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Default aligned</Content> + </CharacterStyleRange><Br /> + </ParagraphStyleRange> + </Cell> + <Cell Name="0:1" AppliedCellStyle="CellStyle/Cell"> + <ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/TablePar > CenterAlign"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>First</Content> + </CharacterStyleRange><Br /> + </ParagraphStyleRange> + </Cell> + <Cell Name="1:1" AppliedCellStyle="CellStyle/Cell"> + <ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/TablePar > LeftAlign"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>row</Content> + </CharacterStyleRange><Br /> + </ParagraphStyleRange> + </Cell> + <Cell Name="2:1" AppliedCellStyle="CellStyle/Cell"> + <ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/TablePar > RightAlign"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>12.0</Content> + </CharacterStyleRange><Br /> + </ParagraphStyleRange> + </Cell> + <Cell Name="3:1" AppliedCellStyle="CellStyle/Cell"> + <ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/TablePar > LeftAlign"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Example of a row that spans multiple lines.</Content> + </CharacterStyleRange><Br /> + </ParagraphStyleRange> + </Cell> + <Cell Name="0:2" AppliedCellStyle="CellStyle/Cell"> + <ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/TablePar > CenterAlign"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Second</Content> + </CharacterStyleRange><Br /> + </ParagraphStyleRange> + </Cell> + <Cell Name="1:2" AppliedCellStyle="CellStyle/Cell"> + <ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/TablePar > LeftAlign"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>row</Content> + </CharacterStyleRange><Br /> + </ParagraphStyleRange> + </Cell> + <Cell Name="2:2" AppliedCellStyle="CellStyle/Cell"> + <ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/TablePar > RightAlign"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>5.0</Content> + </CharacterStyleRange><Br /> + </ParagraphStyleRange> + </Cell> + <Cell Name="3:2" AppliedCellStyle="CellStyle/Cell"> + <ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/TablePar > LeftAlign"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Here's another one. Note the blank line between rows.</Content> + </CharacterStyleRange><Br /> + </ParagraphStyleRange> + </Cell> +</Table> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/TableCaption"> + <Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Table without column headers:</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<Table AppliedTableStyle="TableStyle/Table" HeaderRowCount="0" BodyRowCount="3" ColumnCount="4"> + <Column Name="0" /> + <Column Name="1" /> + <Column Name="2" /> + <Column Name="3" /> + <Cell Name="0:0" AppliedCellStyle="CellStyle/Cell"> + <ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/TablePar > RightAlign"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>12</Content> + </CharacterStyleRange><Br /> + </ParagraphStyleRange> + </Cell> + <Cell Name="1:0" AppliedCellStyle="CellStyle/Cell"> + <ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/TablePar > LeftAlign"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>12</Content> + </CharacterStyleRange><Br /> + </ParagraphStyleRange> + </Cell> + <Cell Name="2:0" AppliedCellStyle="CellStyle/Cell"> + <ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/TablePar > CenterAlign"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>12</Content> + </CharacterStyleRange><Br /> + </ParagraphStyleRange> + </Cell> + <Cell Name="3:0" AppliedCellStyle="CellStyle/Cell"> + <ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/TablePar > RightAlign"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>12</Content> + </CharacterStyleRange><Br /> + </ParagraphStyleRange> + </Cell> + <Cell Name="0:1" AppliedCellStyle="CellStyle/Cell"> + <ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/TablePar > RightAlign"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>123</Content> + </CharacterStyleRange><Br /> + </ParagraphStyleRange> + </Cell> + <Cell Name="1:1" AppliedCellStyle="CellStyle/Cell"> + <ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/TablePar > LeftAlign"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>123</Content> + </CharacterStyleRange><Br /> + </ParagraphStyleRange> + </Cell> + <Cell Name="2:1" AppliedCellStyle="CellStyle/Cell"> + <ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/TablePar > CenterAlign"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>123</Content> + </CharacterStyleRange><Br /> + </ParagraphStyleRange> + </Cell> + <Cell Name="3:1" AppliedCellStyle="CellStyle/Cell"> + <ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/TablePar > RightAlign"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>123</Content> + </CharacterStyleRange><Br /> + </ParagraphStyleRange> + </Cell> + <Cell Name="0:2" AppliedCellStyle="CellStyle/Cell"> + <ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/TablePar > RightAlign"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>1</Content> + </CharacterStyleRange><Br /> + </ParagraphStyleRange> + </Cell> + <Cell Name="1:2" AppliedCellStyle="CellStyle/Cell"> + <ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/TablePar > LeftAlign"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>1</Content> + </CharacterStyleRange><Br /> + </ParagraphStyleRange> + </Cell> + <Cell Name="2:2" AppliedCellStyle="CellStyle/Cell"> + <ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/TablePar > CenterAlign"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>1</Content> + </CharacterStyleRange><Br /> + </ParagraphStyleRange> + </Cell> + <Cell Name="3:2" AppliedCellStyle="CellStyle/Cell"> + <ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/TablePar > RightAlign"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>1</Content> + </CharacterStyleRange><Br /> + </ParagraphStyleRange> + </Cell> +</Table> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/TableCaption"> + <Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Multiline table without column headers:</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<Table AppliedTableStyle="TableStyle/Table" HeaderRowCount="0" BodyRowCount="2" ColumnCount="4"> + <Column Name="0" SingleColumnWidth="75.0" /> + <Column Name="1" SingleColumnWidth="68.75" /> + <Column Name="2" SingleColumnWidth="81.25" /> + <Column Name="3" SingleColumnWidth="168.75" /> + <Cell Name="0:0" AppliedCellStyle="CellStyle/Cell"> + <ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/TablePar > CenterAlign"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>First</Content> + </CharacterStyleRange><Br /> + </ParagraphStyleRange> + </Cell> + <Cell Name="1:0" AppliedCellStyle="CellStyle/Cell"> + <ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/TablePar > LeftAlign"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>row</Content> + </CharacterStyleRange><Br /> + </ParagraphStyleRange> + </Cell> + <Cell Name="2:0" AppliedCellStyle="CellStyle/Cell"> + <ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/TablePar > RightAlign"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>12.0</Content> + </CharacterStyleRange><Br /> + </ParagraphStyleRange> + </Cell> + <Cell Name="3:0" AppliedCellStyle="CellStyle/Cell"> + <ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/TablePar"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Example of a row that spans multiple lines.</Content> + </CharacterStyleRange><Br /> + </ParagraphStyleRange> + </Cell> + <Cell Name="0:1" AppliedCellStyle="CellStyle/Cell"> + <ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/TablePar > CenterAlign"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Second</Content> + </CharacterStyleRange><Br /> + </ParagraphStyleRange> + </Cell> + <Cell Name="1:1" AppliedCellStyle="CellStyle/Cell"> + <ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/TablePar > LeftAlign"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>row</Content> + </CharacterStyleRange><Br /> + </ParagraphStyleRange> + </Cell> + <Cell Name="2:1" AppliedCellStyle="CellStyle/Cell"> + <ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/TablePar > RightAlign"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>5.0</Content> + </CharacterStyleRange><Br /> + </ParagraphStyleRange> + </Cell> + <Cell Name="3:1" AppliedCellStyle="CellStyle/Cell"> + <ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/TablePar"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Here's another one. Note the blank line between rows.</Content> + </CharacterStyleRange><Br /> + </ParagraphStyleRange> + </Cell> +</Table> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/TableCaption"> + <Br /> +</ParagraphStyleRange> diff --git a/tests/writer.icml b/tests/writer.icml new file mode 100644 index 000000000..ef6ddcf64 --- /dev/null +++ b/tests/writer.icml @@ -0,0 +1,3023 @@ +<?xml version="1.0" encoding="UTF-8" standalone="yes"?> +<?aid style="50" type="snippet" readerVersion="6.0" featureSet="513" product="8.0(370)" ?> +<?aid SnippetType="InCopyInterchange"?> +<Document DOMVersion="8.0" Self="pandoc_doc"> + <RootCharacterStyleGroup Self="pandoc_character_styles"> + <CharacterStyle Self="$ID/NormalCharacterStyle" Name="Default" /> + <CharacterStyle Self="CharacterStyle/" Name=""> + <Properties> + <BasedOn type="object">$ID/NormalCharacterStyle</BasedOn> + </Properties> + </CharacterStyle> + <CharacterStyle Self="CharacterStyle/Bold" Name="Bold" FontStyle="Bold"> + <Properties> + <BasedOn type="object">$ID/NormalCharacterStyle</BasedOn> + </Properties> + </CharacterStyle> + <CharacterStyle Self="CharacterStyle/Bold Italic" Name="Bold Italic" FontStyle="Bold Italic"> + <Properties> + <BasedOn type="object">$ID/NormalCharacterStyle</BasedOn> + </Properties> + </CharacterStyle> + <CharacterStyle Self="CharacterStyle/Code" Name="Code"> + <Properties> + <BasedOn type="object">$ID/NormalCharacterStyle</BasedOn> + <AppliedFont type="string">Courier New</AppliedFont> + </Properties> + </CharacterStyle> + <CharacterStyle Self="CharacterStyle/Italic" Name="Italic" FontStyle="Italic"> + <Properties> + <BasedOn type="object">$ID/NormalCharacterStyle</BasedOn> + </Properties> + </CharacterStyle> + <CharacterStyle Self="CharacterStyle/Italic Link" Name="Italic Link" FontStyle="Italic"> + <Properties> + <BasedOn type="object">$ID/NormalCharacterStyle</BasedOn> + </Properties> + </CharacterStyle> + <CharacterStyle Self="CharacterStyle/Italic Strikeout" Name="Italic Strikeout" FontStyle="Italic" StrikeThru="true"> + <Properties> + <BasedOn type="object">$ID/NormalCharacterStyle</BasedOn> + </Properties> + </CharacterStyle> + <CharacterStyle Self="CharacterStyle/Italic Superscript" Name="Italic Superscript" FontStyle="Italic" Position="Superscript"> + <Properties> + <BasedOn type="object">$ID/NormalCharacterStyle</BasedOn> + </Properties> + </CharacterStyle> + <CharacterStyle Self="CharacterStyle/Link" Name="Link"> + <Properties> + <BasedOn type="object">$ID/NormalCharacterStyle</BasedOn> + </Properties> + </CharacterStyle> + <CharacterStyle Self="CharacterStyle/Strikeout" Name="Strikeout" StrikeThru="true"> + <Properties> + <BasedOn type="object">$ID/NormalCharacterStyle</BasedOn> + </Properties> + </CharacterStyle> + <CharacterStyle Self="CharacterStyle/Subscript" Name="Subscript" Position="Subscript"> + <Properties> + <BasedOn type="object">$ID/NormalCharacterStyle</BasedOn> + </Properties> + </CharacterStyle> + <CharacterStyle Self="CharacterStyle/Superscript" Name="Superscript" Position="Superscript"> + <Properties> + <BasedOn type="object">$ID/NormalCharacterStyle</BasedOn> + </Properties> + </CharacterStyle> + </RootCharacterStyleGroup> + <RootParagraphStyleGroup Self="pandoc_paragraph_styles"> + <ParagraphStyle Self="$ID/NormalParagraphStyle" Name="$ID/NormalParagraphStyle" + SpaceBefore="6" SpaceAfter="6"> <!-- paragraph spacing --> + <Properties> + <TabList type="list"> + <ListItem type="record"> + <Alignment type="enumeration">LeftAlign</Alignment> + <AlignmentCharacter type="string">.</AlignmentCharacter> + <Leader type="string"></Leader> + <Position type="unit">10</Position> <!-- first tab stop --> + </ListItem> + </TabList> + </Properties> + </ParagraphStyle> + <ParagraphStyle Self="ParagraphStyle/" Name="" LeftIndent="0"> + <Properties> + <BasedOn type="object">$ID/NormalParagraphStyle</BasedOn> + </Properties> + </ParagraphStyle> + <ParagraphStyle Self="ParagraphStyle/Blockquote > Blockquote > Paragraph" Name="Blockquote > Blockquote > Paragraph" LeftIndent="30"> + <Properties> + <BasedOn type="object">$ID/NormalParagraphStyle</BasedOn> + </Properties> + </ParagraphStyle> + <ParagraphStyle Self="ParagraphStyle/Blockquote > CodeBlock" Name="Blockquote > CodeBlock" LeftIndent="10"> + <Properties> + <BasedOn type="object">$ID/NormalParagraphStyle</BasedOn> + <AppliedFont type="string">Courier New</AppliedFont> + </Properties> + </ParagraphStyle> + <ParagraphStyle Self="ParagraphStyle/Blockquote > NumList" Name="Blockquote > NumList" NumberingExpression="^#.^t" NumberingLevel="1" BulletsAndNumberingListType="NumberedList" LeftIndent="20"> + <Properties> + <BasedOn type="object">$ID/NormalParagraphStyle</BasedOn> + </Properties> + </ParagraphStyle> + <ParagraphStyle Self="ParagraphStyle/Blockquote > NumList > first" Name="Blockquote > NumList > first" NumberingExpression="^#.^t" NumberingLevel="1" BulletsAndNumberingListType="NumberedList" LeftIndent="20"> + <Properties> + <BasedOn type="object">$ID/NormalParagraphStyle</BasedOn> + </Properties> + </ParagraphStyle> + <ParagraphStyle Self="ParagraphStyle/Blockquote > Paragraph" Name="Blockquote > Paragraph" LeftIndent="10"> + <Properties> + <BasedOn type="object">$ID/NormalParagraphStyle</BasedOn> + </Properties> + </ParagraphStyle> + <ParagraphStyle Self="ParagraphStyle/BulList" Name="BulList" BulletsAndNumberingListType="BulletList" LeftIndent="0"> + <Properties> + <BasedOn type="object">$ID/NormalParagraphStyle</BasedOn> + <TabList type="list"> + <ListItem type="record"> + <Alignment type="enumeration">LeftAlign</Alignment> + <AlignmentCharacter type="string">.</AlignmentCharacter> + <Leader type="string" /> + <Position type="unit">10</Position> + </ListItem> + </TabList> + </Properties> + </ParagraphStyle> + <ParagraphStyle Self="ParagraphStyle/BulList > BulList > BulList > first" Name="BulList > BulList > BulList > first" BulletsAndNumberingListType="BulletList" LeftIndent="20"> + <Properties> + <BasedOn type="object">$ID/NormalParagraphStyle</BasedOn> + <TabList type="list"> + <ListItem type="record"> + <Alignment type="enumeration">LeftAlign</Alignment> + <AlignmentCharacter type="string">.</AlignmentCharacter> + <Leader type="string" /> + <Position type="unit">30</Position> + </ListItem> + </TabList> + </Properties> + </ParagraphStyle> + <ParagraphStyle Self="ParagraphStyle/BulList > BulList > Paragraph" Name="BulList > BulList > Paragraph" BulletsAndNumberingListType="BulletList" LeftIndent="10"> + <Properties> + <BasedOn type="object">$ID/NormalParagraphStyle</BasedOn> + <TabList type="list"> + <ListItem type="record"> + <Alignment type="enumeration">LeftAlign</Alignment> + <AlignmentCharacter type="string">.</AlignmentCharacter> + <Leader type="string" /> + <Position type="unit">20</Position> + </ListItem> + </TabList> + </Properties> + </ParagraphStyle> + <ParagraphStyle Self="ParagraphStyle/BulList > BulList > first" Name="BulList > BulList > first" BulletsAndNumberingListType="BulletList" LeftIndent="10"> + <Properties> + <BasedOn type="object">$ID/NormalParagraphStyle</BasedOn> + <TabList type="list"> + <ListItem type="record"> + <Alignment type="enumeration">LeftAlign</Alignment> + <AlignmentCharacter type="string">.</AlignmentCharacter> + <Leader type="string" /> + <Position type="unit">20</Position> + </ListItem> + </TabList> + </Properties> + </ParagraphStyle> + <ParagraphStyle Self="ParagraphStyle/BulList > BulList > first > Paragraph" Name="BulList > BulList > first > Paragraph" BulletsAndNumberingListType="BulletList" LeftIndent="10"> + <Properties> + <BasedOn type="object">$ID/NormalParagraphStyle</BasedOn> + <TabList type="list"> + <ListItem type="record"> + <Alignment type="enumeration">LeftAlign</Alignment> + <AlignmentCharacter type="string">.</AlignmentCharacter> + <Leader type="string" /> + <Position type="unit">20</Position> + </ListItem> + </TabList> + </Properties> + </ParagraphStyle> + <ParagraphStyle Self="ParagraphStyle/BulList > Paragraph" Name="BulList > Paragraph" BulletsAndNumberingListType="BulletList" LeftIndent="0"> + <Properties> + <BasedOn type="object">$ID/NormalParagraphStyle</BasedOn> + <TabList type="list"> + <ListItem type="record"> + <Alignment type="enumeration">LeftAlign</Alignment> + <AlignmentCharacter type="string">.</AlignmentCharacter> + <Leader type="string" /> + <Position type="unit">10</Position> + </ListItem> + </TabList> + </Properties> + </ParagraphStyle> + <ParagraphStyle Self="ParagraphStyle/BulList > first" Name="BulList > first" BulletsAndNumberingListType="BulletList" LeftIndent="0"> + <Properties> + <BasedOn type="object">$ID/NormalParagraphStyle</BasedOn> + <TabList type="list"> + <ListItem type="record"> + <Alignment type="enumeration">LeftAlign</Alignment> + <AlignmentCharacter type="string">.</AlignmentCharacter> + <Leader type="string" /> + <Position type="unit">10</Position> + </ListItem> + </TabList> + </Properties> + </ParagraphStyle> + <ParagraphStyle Self="ParagraphStyle/BulList > first > Paragraph" Name="BulList > first > Paragraph" BulletsAndNumberingListType="BulletList" LeftIndent="0"> + <Properties> + <BasedOn type="object">$ID/NormalParagraphStyle</BasedOn> + <TabList type="list"> + <ListItem type="record"> + <Alignment type="enumeration">LeftAlign</Alignment> + <AlignmentCharacter type="string">.</AlignmentCharacter> + <Leader type="string" /> + <Position type="unit">10</Position> + </ListItem> + </TabList> + </Properties> + </ParagraphStyle> + <ParagraphStyle Self="ParagraphStyle/CodeBlock" Name="CodeBlock" LeftIndent="0"> + <Properties> + <BasedOn type="object">$ID/NormalParagraphStyle</BasedOn> + <AppliedFont type="string">Courier New</AppliedFont> + </Properties> + </ParagraphStyle> + <ParagraphStyle Self="ParagraphStyle/DefListDef" Name="DefListDef" LeftIndent="10"> + <Properties> + <BasedOn type="object">$ID/NormalParagraphStyle</BasedOn> + </Properties> + </ParagraphStyle> + <ParagraphStyle Self="ParagraphStyle/DefListDef > Blockquote > Paragraph" Name="DefListDef > Blockquote > Paragraph" LeftIndent="30"> + <Properties> + <BasedOn type="object">$ID/NormalParagraphStyle</BasedOn> + </Properties> + </ParagraphStyle> + <ParagraphStyle Self="ParagraphStyle/DefListDef > CodeBlock" Name="DefListDef > CodeBlock" LeftIndent="10"> + <Properties> + <BasedOn type="object">$ID/NormalParagraphStyle</BasedOn> + <AppliedFont type="string">Courier New</AppliedFont> + </Properties> + </ParagraphStyle> + <ParagraphStyle Self="ParagraphStyle/DefListDef > NumList" Name="DefListDef > NumList" NumberingExpression="^#.^t" NumberingLevel="1" BulletsAndNumberingListType="NumberedList" LeftIndent="20"> + <Properties> + <BasedOn type="object">$ID/NormalParagraphStyle</BasedOn> + </Properties> + </ParagraphStyle> + <ParagraphStyle Self="ParagraphStyle/DefListDef > NumList > first" Name="DefListDef > NumList > first" NumberingExpression="^#.^t" NumberingLevel="1" BulletsAndNumberingListType="NumberedList" LeftIndent="20"> + <Properties> + <BasedOn type="object">$ID/NormalParagraphStyle</BasedOn> + </Properties> + </ParagraphStyle> + <ParagraphStyle Self="ParagraphStyle/DefListDef > Paragraph" Name="DefListDef > Paragraph" LeftIndent="10"> + <Properties> + <BasedOn type="object">$ID/NormalParagraphStyle</BasedOn> + </Properties> + </ParagraphStyle> + <ParagraphStyle Self="ParagraphStyle/DefListTerm" Name="DefListTerm" LeftIndent="0" BulletsAndNumberingListType="BulletList" FontStyle="Bold"> + <Properties> + <BasedOn type="object">$ID/NormalParagraphStyle</BasedOn> + </Properties> + </ParagraphStyle> + <ParagraphStyle Self="ParagraphStyle/Footnote > CodeBlock" Name="Footnote > CodeBlock" LeftIndent="0"> + <Properties> + <BasedOn type="object">$ID/NormalParagraphStyle</BasedOn> + <AppliedFont type="string">Courier New</AppliedFont> + </Properties> + </ParagraphStyle> + <ParagraphStyle Self="ParagraphStyle/Footnote > Paragraph" Name="Footnote > Paragraph" LeftIndent="0"> + <Properties> + <BasedOn type="object">$ID/NormalParagraphStyle</BasedOn> + </Properties> + </ParagraphStyle> + <ParagraphStyle Self="ParagraphStyle/Header1" Name="Header1" LeftIndent="0" PointSize="36"> + <Properties> + <BasedOn type="object">$ID/NormalParagraphStyle</BasedOn> + </Properties> + </ParagraphStyle> + <ParagraphStyle Self="ParagraphStyle/Header2" Name="Header2" LeftIndent="0" PointSize="30"> + <Properties> + <BasedOn type="object">$ID/NormalParagraphStyle</BasedOn> + </Properties> + </ParagraphStyle> + <ParagraphStyle Self="ParagraphStyle/Header3" Name="Header3" LeftIndent="0" PointSize="24"> + <Properties> + <BasedOn type="object">$ID/NormalParagraphStyle</BasedOn> + </Properties> + </ParagraphStyle> + <ParagraphStyle Self="ParagraphStyle/Header4" Name="Header4" LeftIndent="0" PointSize="18"> + <Properties> + <BasedOn type="object">$ID/NormalParagraphStyle</BasedOn> + </Properties> + </ParagraphStyle> + <ParagraphStyle Self="ParagraphStyle/Header5" Name="Header5" LeftIndent="0" PointSize="14"> + <Properties> + <BasedOn type="object">$ID/NormalParagraphStyle</BasedOn> + </Properties> + </ParagraphStyle> + <ParagraphStyle Self="ParagraphStyle/NumList" Name="NumList" NumberingExpression="^#.^t" NumberingLevel="1" BulletsAndNumberingListType="NumberedList" LeftIndent="0"> + <Properties> + <BasedOn type="object">$ID/NormalParagraphStyle</BasedOn> + </Properties> + </ParagraphStyle> + <ParagraphStyle Self="ParagraphStyle/NumList > BulList" Name="NumList > BulList" BulletsAndNumberingListType="BulletList" LeftIndent="10"> + <Properties> + <BasedOn type="object">$ID/NormalParagraphStyle</BasedOn> + <TabList type="list"> + <ListItem type="record"> + <Alignment type="enumeration">LeftAlign</Alignment> + <AlignmentCharacter type="string">.</AlignmentCharacter> + <Leader type="string" /> + <Position type="unit">20</Position> + </ListItem> + </TabList> + </Properties> + </ParagraphStyle> + <ParagraphStyle Self="ParagraphStyle/NumList > BulList > first" Name="NumList > BulList > first" BulletsAndNumberingListType="BulletList" LeftIndent="10"> + <Properties> + <BasedOn type="object">$ID/NormalParagraphStyle</BasedOn> + <TabList type="list"> + <ListItem type="record"> + <Alignment type="enumeration">LeftAlign</Alignment> + <AlignmentCharacter type="string">.</AlignmentCharacter> + <Leader type="string" /> + <Position type="unit">20</Position> + </ListItem> + </TabList> + </Properties> + </ParagraphStyle> + <ParagraphStyle Self="ParagraphStyle/NumList > NumList > NumList > NumList > first > beginsWith-3 > lowerAlpha" Name="NumList > NumList > NumList > NumList > first > beginsWith-3 > lowerAlpha" NumberingExpression="^#.^t" NumberingLevel="4" BulletsAndNumberingListType="NumberedList" LeftIndent="30"> + <Properties> + <BasedOn type="object">$ID/NormalParagraphStyle</BasedOn> + <NumberingFormat type="string">a, b, c, d...</NumberingFormat> + </Properties> + </ParagraphStyle> + <ParagraphStyle Self="ParagraphStyle/NumList > NumList > NumList > first > beginsWith-6" Name="NumList > NumList > NumList > first > beginsWith-6" NumberingExpression="^#.^t" NumberingLevel="3" BulletsAndNumberingListType="NumberedList" LeftIndent="20"> + <Properties> + <BasedOn type="object">$ID/NormalParagraphStyle</BasedOn> + </Properties> + </ParagraphStyle> + <ParagraphStyle Self="ParagraphStyle/NumList > NumList > NumList > first > upperAlpha" Name="NumList > NumList > NumList > first > upperAlpha" NumberingExpression="^#.^t" NumberingLevel="3" BulletsAndNumberingListType="NumberedList" LeftIndent="20"> + <Properties> + <BasedOn type="object">$ID/NormalParagraphStyle</BasedOn> + <NumberingFormat type="string">A, B, C, D...</NumberingFormat> + </Properties> + </ParagraphStyle> + <ParagraphStyle Self="ParagraphStyle/NumList > NumList > NumList > upperAlpha" Name="NumList > NumList > NumList > upperAlpha" NumberingExpression="^#.^t" NumberingLevel="3" BulletsAndNumberingListType="NumberedList" LeftIndent="20"> + <Properties> + <BasedOn type="object">$ID/NormalParagraphStyle</BasedOn> + <NumberingFormat type="string">A, B, C, D...</NumberingFormat> + </Properties> + </ParagraphStyle> + <ParagraphStyle Self="ParagraphStyle/NumList > NumList > beginsWith-4 > lowerRoman" Name="NumList > NumList > beginsWith-4 > lowerRoman" NumberingExpression="^#.^t" NumberingLevel="2" BulletsAndNumberingListType="NumberedList" LeftIndent="10"> + <Properties> + <BasedOn type="object">$ID/NormalParagraphStyle</BasedOn> + <NumberingFormat type="string">i, ii, iii, iv...</NumberingFormat> + </Properties> + </ParagraphStyle> + <ParagraphStyle Self="ParagraphStyle/NumList > NumList > first" Name="NumList > NumList > first" NumberingExpression="^#.^t" NumberingLevel="2" BulletsAndNumberingListType="NumberedList" LeftIndent="10"> + <Properties> + <BasedOn type="object">$ID/NormalParagraphStyle</BasedOn> + </Properties> + </ParagraphStyle> + <ParagraphStyle Self="ParagraphStyle/NumList > NumList > first > beginsWith-4 > lowerRoman" Name="NumList > NumList > first > beginsWith-4 > lowerRoman" NumberingExpression="^#.^t" NumberingLevel="2" BulletsAndNumberingListType="NumberedList" LeftIndent="10"> + <Properties> + <BasedOn type="object">$ID/NormalParagraphStyle</BasedOn> + <NumberingFormat type="string">i, ii, iii, iv...</NumberingFormat> + </Properties> + </ParagraphStyle> + <ParagraphStyle Self="ParagraphStyle/NumList > NumList > first > upperRoman" Name="NumList > NumList > first > upperRoman" NumberingExpression="^#.^t" NumberingLevel="2" BulletsAndNumberingListType="NumberedList" LeftIndent="10"> + <Properties> + <BasedOn type="object">$ID/NormalParagraphStyle</BasedOn> + <NumberingFormat type="string">I, II, III, IV...</NumberingFormat> + </Properties> + </ParagraphStyle> + <ParagraphStyle Self="ParagraphStyle/NumList > Paragraph" Name="NumList > Paragraph" NumberingExpression="^#.^t" NumberingLevel="1" BulletsAndNumberingListType="NumberedList" LeftIndent="0"> + <Properties> + <BasedOn type="object">$ID/NormalParagraphStyle</BasedOn> + </Properties> + </ParagraphStyle> + <ParagraphStyle Self="ParagraphStyle/NumList > beginsWith-2 > Paragraph" Name="NumList > beginsWith-2 > Paragraph" NumberingExpression="^#.^t" NumberingLevel="1" BulletsAndNumberingListType="NumberedList" LeftIndent="0"> + <Properties> + <BasedOn type="object">$ID/NormalParagraphStyle</BasedOn> + </Properties> + </ParagraphStyle> + <ParagraphStyle Self="ParagraphStyle/NumList > first" Name="NumList > first" NumberingExpression="^#.^t" NumberingLevel="1" BulletsAndNumberingListType="NumberedList" LeftIndent="0"> + <Properties> + <BasedOn type="object">$ID/NormalParagraphStyle</BasedOn> + </Properties> + </ParagraphStyle> + <ParagraphStyle Self="ParagraphStyle/NumList > first > Paragraph" Name="NumList > first > Paragraph" NumberingExpression="^#.^t" NumberingLevel="1" BulletsAndNumberingListType="NumberedList" LeftIndent="0"> + <Properties> + <BasedOn type="object">$ID/NormalParagraphStyle</BasedOn> + </Properties> + </ParagraphStyle> + <ParagraphStyle Self="ParagraphStyle/NumList > first > beginsWith-2" Name="NumList > first > beginsWith-2" NumberingExpression="^#.^t" NumberingLevel="1" BulletsAndNumberingListType="NumberedList" LeftIndent="0"> + <Properties> + <BasedOn type="object">$ID/NormalParagraphStyle</BasedOn> + </Properties> + </ParagraphStyle> + <ParagraphStyle Self="ParagraphStyle/NumList > first > upperAlpha" Name="NumList > first > upperAlpha" NumberingExpression="^#.^t" NumberingLevel="1" BulletsAndNumberingListType="NumberedList" LeftIndent="0"> + <Properties> + <BasedOn type="object">$ID/NormalParagraphStyle</BasedOn> + <NumberingFormat type="string">A, B, C, D...</NumberingFormat> + </Properties> + </ParagraphStyle> + <ParagraphStyle Self="ParagraphStyle/NumList > subParagraph > Paragraph" Name="NumList > subParagraph > Paragraph" NumberingExpression="^#.^t" NumberingLevel="1" LeftIndent="0"> + <Properties> + <BasedOn type="object">$ID/NormalParagraphStyle</BasedOn> + </Properties> + </ParagraphStyle> + <ParagraphStyle Self="ParagraphStyle/Paragraph" Name="Paragraph" LeftIndent="0"> + <Properties> + <BasedOn type="object">$ID/NormalParagraphStyle</BasedOn> + </Properties> + </ParagraphStyle> + <ParagraphStyle Self="ParagraphStyle/Rawblock" Name="Rawblock" LeftIndent="0"> + <Properties> + <BasedOn type="object">$ID/NormalParagraphStyle</BasedOn> + </Properties> + </ParagraphStyle> + </RootParagraphStyleGroup> + <RootTableStyleGroup Self="pandoc_table_styles"> + <TableStyle Self="TableStyle/Table" Name="Table" /> + </RootTableStyleGroup> + <RootCellStyleGroup Self="pandoc_cell_styles"> + <CellStyle Self="CellStyle/Cell" AppliedParagraphStyle="ParagraphStyle/$ID/[No paragraph style]" Name="Cell" /> + </RootCellStyleGroup> + <Story Self="pandoc_story" + TrackChanges="false" + StoryTitle="" + AppliedTOCStyle="n" + AppliedNamedGrid="n" > + <StoryPreference OpticalMarginAlignment="true" OpticalMarginSize="12" /> + +<!-- body needs to be non-indented, otherwise code blocks are indented too far --> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>This is a set of tests for pandoc. Most of them are adapted from John Gruber’s markdown test suite.</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Header1"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Headers</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Header2"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Level 2 with an </Content> + </CharacterStyleRange> + <HyperlinkTextSource Self="htss-1" Name="" Hidden="false"> + <CharacterStyleRange AppliedCharacterStyle="CharacterStyle/Link"> + <Content>embedded link</Content> + </CharacterStyleRange> + </HyperlinkTextSource><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Header3"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Level 3 with </Content> + </CharacterStyleRange> + <CharacterStyleRange AppliedCharacterStyle="CharacterStyle/Italic"> + <Content>emphasis</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Header4"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Level 4</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Header5"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Level 5</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Header1"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Level 1</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Header2"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Level 2 with </Content> + </CharacterStyleRange> + <CharacterStyleRange AppliedCharacterStyle="CharacterStyle/Italic"> + <Content>emphasis</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Header3"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Level 3</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>with no blank line</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Header2"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Level 2</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>with no blank line</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Header1"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Paragraphs</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Here’s a regular paragraph.</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>In Markdown 1.0.0 and earlier. Version 8. This line turns into a list item. Because a hard-wrapped line in the middle of a paragraph looked like a list item.</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Here’s one with a bullet. * criminey.</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>There should be a hard line break</Content> + </CharacterStyleRange> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>
</Content> + </CharacterStyleRange> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>here.</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Header1"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Block Quotes</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>E-mail style:</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Blockquote > Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>This is a block quote. It is pretty short.</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Blockquote > Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Code in a block quote:</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Blockquote > CodeBlock"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>sub status { + print "working"; +}</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Blockquote > Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>A list:</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Blockquote > NumList > first" NumberingContinue="false"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>item one</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Blockquote > NumList"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>item two</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Blockquote > Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Nested block quotes:</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Blockquote > Blockquote > Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>nested</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Blockquote > Blockquote > Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>nested</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>This should not be a block quote: 2 > 1.</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>And a following paragraph.</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Header1"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Code Blocks</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Code:</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/CodeBlock"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>---- (should be four hyphens) + +sub status { + print "working"; +} + +this code block is indented by one tab</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>And:</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/CodeBlock"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content> this code block is indented by two tabs + +These should not be escaped: \$ \\ \> \[ \{</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Header1"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Lists</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Header2"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Unordered</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Asterisks tight:</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/BulList > first" NumberingContinue="false"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>asterisk 1</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/BulList"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>asterisk 2</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/BulList"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>asterisk 3</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Asterisks loose:</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/BulList > first > Paragraph" NumberingContinue="false"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>asterisk 1</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/BulList > Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>asterisk 2</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/BulList > Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>asterisk 3</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Pluses tight:</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/BulList > first" NumberingContinue="false"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Plus 1</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/BulList"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Plus 2</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/BulList"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Plus 3</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Pluses loose:</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/BulList > first > Paragraph" NumberingContinue="false"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Plus 1</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/BulList > Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Plus 2</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/BulList > Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Plus 3</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Minuses tight:</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/BulList > first" NumberingContinue="false"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Minus 1</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/BulList"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Minus 2</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/BulList"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Minus 3</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Minuses loose:</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/BulList > first > Paragraph" NumberingContinue="false"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Minus 1</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/BulList > Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Minus 2</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/BulList > Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Minus 3</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Header2"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Ordered</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Tight:</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/NumList > first" NumberingContinue="false"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>First</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/NumList"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Second</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/NumList"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Third</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>and:</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/NumList > first" NumberingContinue="false"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>One</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/NumList"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Two</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/NumList"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Three</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Loose using tabs:</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/NumList > first > Paragraph" NumberingContinue="false"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>First</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/NumList > Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Second</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/NumList > Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Third</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>and using spaces:</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/NumList > first > Paragraph" NumberingContinue="false"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>One</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/NumList > Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Two</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/NumList > Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Three</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Multiple paragraphs:</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/NumList > first > Paragraph" NumberingContinue="false"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Item 1, graf one.</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/NumList > subParagraph > Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content> </Content> + </CharacterStyleRange> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Item 1. graf two. The quick brown fox jumped over the lazy dog’s back.</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/NumList > Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Item 2.</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/NumList > Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Item 3.</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Header2"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Nested</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/BulList > first" NumberingContinue="false"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Tab</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/BulList > BulList > first" NumberingContinue="false"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Tab</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/BulList > BulList > BulList > first" NumberingContinue="false"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Tab</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Here’s another:</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/NumList > first" NumberingContinue="false"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>First</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/NumList"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Second:</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/NumList > BulList > first" NumberingContinue="false"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Fee</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/NumList > BulList"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Fie</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/NumList > BulList"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Foe</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/NumList"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Third</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Same thing but with paragraphs:</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/NumList > first > Paragraph" NumberingContinue="false"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>First</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/NumList > Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Second:</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/NumList > BulList > first" NumberingContinue="false"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Fee</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/NumList > BulList"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Fie</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/NumList > BulList"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Foe</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/NumList > Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Third</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Header2"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Tabs and spaces</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/BulList > first > Paragraph" NumberingContinue="false"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>this is a list item indented with tabs</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/BulList > Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>this is a list item indented with spaces</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/BulList > BulList > first > Paragraph" NumberingContinue="false"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>this is an example list item indented with tabs</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/BulList > BulList > Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>this is an example list item indented with spaces</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Header2"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Fancy list markers</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange NumberingStartAt="2" AppliedParagraphStyle="ParagraphStyle/NumList > first > beginsWith-2" NumberingContinue="false"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>begins with 2</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/NumList > beginsWith-2 > Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>and now 3</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/NumList > subParagraph > Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content> </Content> + </CharacterStyleRange> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>with a continuation</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange NumberingStartAt="4" AppliedParagraphStyle="ParagraphStyle/NumList > NumList > first > beginsWith-4 > lowerRoman" NumberingContinue="false"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>sublist with roman numerals, starting with 4</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/NumList > NumList > beginsWith-4 > lowerRoman"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>more items</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/NumList > NumList > NumList > first > upperAlpha" NumberingContinue="false"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>a subsublist</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/NumList > NumList > NumList > upperAlpha"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>a subsublist</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Nesting:</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/NumList > first > upperAlpha" NumberingContinue="false"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Upper Alpha</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/NumList > NumList > first > upperRoman" NumberingContinue="false"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Upper Roman.</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange NumberingStartAt="6" AppliedParagraphStyle="ParagraphStyle/NumList > NumList > NumList > first > beginsWith-6" NumberingContinue="false"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Decimal start with 6</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange NumberingStartAt="3" AppliedParagraphStyle="ParagraphStyle/NumList > NumList > NumList > NumList > first > beginsWith-3 > lowerAlpha" NumberingContinue="false"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Lower alpha with paren</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Autonumbering:</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/NumList > first" NumberingContinue="false"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Autonumber.</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/NumList"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>More.</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/NumList > NumList > first" NumberingContinue="false"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Nested.</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Should not be a list item:</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>M.A. 2007</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>B. Williams</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Header1"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Definition Lists</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Tight using spaces:</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/DefListTerm"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>apple</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/DefListDef"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>red fruit</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/DefListTerm"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>orange</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/DefListDef"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>orange fruit</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/DefListTerm"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>banana</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/DefListDef"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>yellow fruit</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Tight using tabs:</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/DefListTerm"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>apple</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/DefListDef"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>red fruit</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/DefListTerm"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>orange</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/DefListDef"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>orange fruit</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/DefListTerm"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>banana</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/DefListDef"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>yellow fruit</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Loose:</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/DefListTerm"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>apple</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/DefListDef > Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>red fruit</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/DefListTerm"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>orange</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/DefListDef > Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>orange fruit</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/DefListTerm"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>banana</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/DefListDef > Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>yellow fruit</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Multiple blocks with italics:</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/DefListTerm"> + <CharacterStyleRange AppliedCharacterStyle="CharacterStyle/Italic"> + <Content>apple</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/DefListDef > Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>red fruit</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/DefListDef > Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>contains seeds, crisp, pleasant to taste</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/DefListTerm"> + <CharacterStyleRange AppliedCharacterStyle="CharacterStyle/Italic"> + <Content>orange</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/DefListDef > Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>orange fruit</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/DefListDef > CodeBlock"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>{ orange code block }</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/DefListDef > Blockquote > Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>orange block quote</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Multiple definitions, tight:</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/DefListTerm"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>apple</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/DefListDef"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>red fruit</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/DefListDef"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>computer</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/DefListTerm"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>orange</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/DefListDef"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>orange fruit</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/DefListDef"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>bank</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Multiple definitions, loose:</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/DefListTerm"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>apple</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/DefListDef > Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>red fruit</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/DefListDef > Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>computer</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/DefListTerm"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>orange</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/DefListDef > Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>orange fruit</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/DefListDef > Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>bank</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Blank line after term, indented marker, alternate markers:</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/DefListTerm"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>apple</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/DefListDef > Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>red fruit</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/DefListDef > Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>computer</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/DefListTerm"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>orange</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/DefListDef > Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>orange fruit</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/DefListDef > NumList > first" NumberingContinue="false"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>sublist</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/DefListDef > NumList"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>sublist</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Header1"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>HTML Blocks</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Simple block on one line:</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle=""> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>foo</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>And nested without indentation:</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle=""> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>foo</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle=""> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>bar</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Interpreted markdown in a table:</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Rawblock"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content><table> +<tr> +<td></Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle=""> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>This is </Content> + </CharacterStyleRange> + <CharacterStyleRange AppliedCharacterStyle="CharacterStyle/Italic"> + <Content>emphasized</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Rawblock"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content></td> +<td></Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle=""> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>And this is </Content> + </CharacterStyleRange> + <CharacterStyleRange AppliedCharacterStyle="CharacterStyle/Bold"> + <Content>strong</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Rawblock"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content></td> +</tr> +</table> + +<script type="text/javascript">document.write('This *should not* be interpreted as markdown');</script> + </Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Here’s a simple block:</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle=""> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>foo</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>This should be a code block, though:</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/CodeBlock"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content><div> + foo +</div></Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>As should this:</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/CodeBlock"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content><div>foo</div></Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Now, nested:</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle=""> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>foo</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>This should just be an HTML comment:</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Rawblock"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content><!-- Comment --> + </Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Multiline:</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Rawblock"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content><!-- +Blah +Blah +--> + +<!-- + This is another comment. +--> + </Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Code block:</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/CodeBlock"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content><!-- Comment --></Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Just plain comment, with trailing spaces on the line:</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Rawblock"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content><!-- foo --> + </Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Code:</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/CodeBlock"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content><hr /></Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Hr’s:</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Rawblock"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content><hr> + +<hr /> + +<hr /> + +<hr> + +<hr /> + +<hr /> + +<hr class="foo" id="bar" /> + +<hr class="foo" id="bar" /> + +<hr class="foo" id="bar"> + </Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Header1"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Inline Markup</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>This is </Content> + </CharacterStyleRange> + <CharacterStyleRange AppliedCharacterStyle="CharacterStyle/Italic"> + <Content>emphasized</Content> + </CharacterStyleRange> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>, and so </Content> + </CharacterStyleRange> + <CharacterStyleRange AppliedCharacterStyle="CharacterStyle/Italic"> + <Content>is this</Content> + </CharacterStyleRange> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>.</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>This is </Content> + </CharacterStyleRange> + <CharacterStyleRange AppliedCharacterStyle="CharacterStyle/Bold"> + <Content>strong</Content> + </CharacterStyleRange> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>, and so </Content> + </CharacterStyleRange> + <CharacterStyleRange AppliedCharacterStyle="CharacterStyle/Bold"> + <Content>is this</Content> + </CharacterStyleRange> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>.</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>An </Content> + </CharacterStyleRange> + <HyperlinkTextSource Self="htss-2" Name="" Hidden="false"> + <CharacterStyleRange AppliedCharacterStyle="CharacterStyle/Italic Link"> + <Content>emphasized link</Content> + </CharacterStyleRange> + </HyperlinkTextSource> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>.</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="CharacterStyle/Bold Italic"> + <Content>This is strong and em.</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>So is </Content> + </CharacterStyleRange> + <CharacterStyleRange AppliedCharacterStyle="CharacterStyle/Bold Italic"> + <Content>this</Content> + </CharacterStyleRange> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content> word.</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="CharacterStyle/Bold Italic"> + <Content>This is strong and em.</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>So is </Content> + </CharacterStyleRange> + <CharacterStyleRange AppliedCharacterStyle="CharacterStyle/Bold Italic"> + <Content>this</Content> + </CharacterStyleRange> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content> word.</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>This is code: </Content> + </CharacterStyleRange> + <CharacterStyleRange AppliedCharacterStyle="CharacterStyle/Code"> + <Content>></Content> + </CharacterStyleRange> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>, </Content> + </CharacterStyleRange> + <CharacterStyleRange AppliedCharacterStyle="CharacterStyle/Code"> + <Content>$</Content> + </CharacterStyleRange> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>, </Content> + </CharacterStyleRange> + <CharacterStyleRange AppliedCharacterStyle="CharacterStyle/Code"> + <Content>\</Content> + </CharacterStyleRange> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>, </Content> + </CharacterStyleRange> + <CharacterStyleRange AppliedCharacterStyle="CharacterStyle/Code"> + <Content>\$</Content> + </CharacterStyleRange> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>, </Content> + </CharacterStyleRange> + <CharacterStyleRange AppliedCharacterStyle="CharacterStyle/Code"> + <Content><html></Content> + </CharacterStyleRange> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>.</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="CharacterStyle/Strikeout"> + <Content>This is </Content> + </CharacterStyleRange> + <CharacterStyleRange AppliedCharacterStyle="CharacterStyle/Italic Strikeout"> + <Content>strikeout</Content> + </CharacterStyleRange> + <CharacterStyleRange AppliedCharacterStyle="CharacterStyle/Strikeout"> + <Content>.</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Superscripts: a</Content> + </CharacterStyleRange> + <CharacterStyleRange AppliedCharacterStyle="CharacterStyle/Superscript"> + <Content>bc</Content> + </CharacterStyleRange> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>d a</Content> + </CharacterStyleRange> + <CharacterStyleRange AppliedCharacterStyle="CharacterStyle/Italic Superscript"> + <Content>hello</Content> + </CharacterStyleRange> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content> a</Content> + </CharacterStyleRange> + <CharacterStyleRange AppliedCharacterStyle="CharacterStyle/Superscript"> + <Content>hello there</Content> + </CharacterStyleRange> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>.</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Subscripts: H</Content> + </CharacterStyleRange> + <CharacterStyleRange AppliedCharacterStyle="CharacterStyle/Subscript"> + <Content>2</Content> + </CharacterStyleRange> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>O, H</Content> + </CharacterStyleRange> + <CharacterStyleRange AppliedCharacterStyle="CharacterStyle/Subscript"> + <Content>23</Content> + </CharacterStyleRange> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>O, H</Content> + </CharacterStyleRange> + <CharacterStyleRange AppliedCharacterStyle="CharacterStyle/Subscript"> + <Content>many of them</Content> + </CharacterStyleRange> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>O.</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>These should not be superscripts or subscripts, because of the unescaped spaces: a^b c^d, a~b c~d.</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Header1"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Smart quotes, ellipses, dashes</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>“</Content> + </CharacterStyleRange> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Hello,</Content> + </CharacterStyleRange> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>”</Content> + </CharacterStyleRange> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content> said the spider. </Content> + </CharacterStyleRange> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>“</Content> + </CharacterStyleRange> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>‘</Content> + </CharacterStyleRange> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Shelob</Content> + </CharacterStyleRange> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>’</Content> + </CharacterStyleRange> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content> is my name.</Content> + </CharacterStyleRange> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>”</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>‘</Content> + </CharacterStyleRange> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>A</Content> + </CharacterStyleRange> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>’</Content> + </CharacterStyleRange> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>, </Content> + </CharacterStyleRange> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>‘</Content> + </CharacterStyleRange> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>B</Content> + </CharacterStyleRange> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>’</Content> + </CharacterStyleRange> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>, and </Content> + </CharacterStyleRange> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>‘</Content> + </CharacterStyleRange> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>C</Content> + </CharacterStyleRange> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>’</Content> + </CharacterStyleRange> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content> are letters.</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>‘</Content> + </CharacterStyleRange> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Oak,</Content> + </CharacterStyleRange> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>’</Content> + </CharacterStyleRange> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content> </Content> + </CharacterStyleRange> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>‘</Content> + </CharacterStyleRange> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>elm,</Content> + </CharacterStyleRange> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>’</Content> + </CharacterStyleRange> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content> and </Content> + </CharacterStyleRange> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>‘</Content> + </CharacterStyleRange> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>beech</Content> + </CharacterStyleRange> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>’</Content> + </CharacterStyleRange> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content> are names of trees. So is </Content> + </CharacterStyleRange> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>‘</Content> + </CharacterStyleRange> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>pine.</Content> + </CharacterStyleRange> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>’</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>‘</Content> + </CharacterStyleRange> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>He said, </Content> + </CharacterStyleRange> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>“</Content> + </CharacterStyleRange> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>I want to go.</Content> + </CharacterStyleRange> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>”</Content> + </CharacterStyleRange> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>’</Content> + </CharacterStyleRange> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content> Were you alive in the 70’s?</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Here is some quoted </Content> + </CharacterStyleRange> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>‘</Content> + </CharacterStyleRange> + <CharacterStyleRange AppliedCharacterStyle="CharacterStyle/Code"> + <Content>code</Content> + </CharacterStyleRange> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>’</Content> + </CharacterStyleRange> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content> and a </Content> + </CharacterStyleRange> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>“</Content> + </CharacterStyleRange> + <HyperlinkTextSource Self="htss-3" Name="" Hidden="false"> + <CharacterStyleRange AppliedCharacterStyle="CharacterStyle/Link"> + <Content>quoted link</Content> + </CharacterStyleRange> + </HyperlinkTextSource> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>”</Content> + </CharacterStyleRange> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>.</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Some dashes: one—two — three—four — five.</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Dashes between numbers: 5–7, 255–66, 1987–1999.</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Ellipses…and…and….</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Header1"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>LaTeX</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/BulList > first" NumberingContinue="false"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>\cite[22-23]{smith.1899}</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/BulList"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>2+2=4</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/BulList"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>x \in y</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/BulList"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>\alpha \wedge \omega</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/BulList"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>223</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/BulList"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>p</Content> + </CharacterStyleRange> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>-Tree</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/BulList"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Here’s some display math: </Content> + </CharacterStyleRange> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>\frac{d}{dx}f(x)=\lim_{h\to 0}\frac{f(x+h)-f(x)}{h}</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/BulList"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Here’s one that has a line break in it: </Content> + </CharacterStyleRange> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>\alpha + \omega \times x^2</Content> + </CharacterStyleRange> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>.</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>These shouldn’t be math:</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/BulList > first" NumberingContinue="false"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>To get the famous equation, write </Content> + </CharacterStyleRange> + <CharacterStyleRange AppliedCharacterStyle="CharacterStyle/Code"> + <Content>$e = mc^2$</Content> + </CharacterStyleRange> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>.</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/BulList"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>$22,000 is a </Content> + </CharacterStyleRange> + <CharacterStyleRange AppliedCharacterStyle="CharacterStyle/Italic"> + <Content>lot</Content> + </CharacterStyleRange> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content> of money. So is $34,000. (It worked if </Content> + </CharacterStyleRange> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>“</Content> + </CharacterStyleRange> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>lot</Content> + </CharacterStyleRange> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>”</Content> + </CharacterStyleRange> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content> is emphasized.)</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/BulList"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Shoes ($20) and socks ($5).</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/BulList"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Escaped </Content> + </CharacterStyleRange> + <CharacterStyleRange AppliedCharacterStyle="CharacterStyle/Code"> + <Content>$</Content> + </CharacterStyleRange> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>: $73 </Content> + </CharacterStyleRange> + <CharacterStyleRange AppliedCharacterStyle="CharacterStyle/Italic"> + <Content>this should be emphasized</Content> + </CharacterStyleRange> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content> 23$.</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Here’s a LaTeX table:</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Rawblock"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>\begin{tabular}{|l|l|}\hline +Animal & Number \\ \hline +Dog & 2 \\ +Cat & 1 \\ \hline +\end{tabular}</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Header1"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Special Characters</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Here is some unicode:</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/BulList > first" NumberingContinue="false"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>I hat: Î</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/BulList"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>o umlaut: ö</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/BulList"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>section: §</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/BulList"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>set membership: ∈</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/BulList"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>copyright: ©</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>AT&T has an ampersand in their name.</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>AT&T is another way to write it.</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>This & that.</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>4 < 5.</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>6 > 5.</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Backslash: \</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Backtick: `</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Asterisk: *</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Underscore: _</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Left brace: {</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Right brace: }</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Left bracket: [</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Right bracket: ]</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Left paren: (</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Right paren: )</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Greater-than: ></Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Hash: #</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Period: .</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Bang: !</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Plus: +</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Minus: -</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Header1"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Links</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Header2"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Explicit</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Just a </Content> + </CharacterStyleRange> + <HyperlinkTextSource Self="htss-4" Name="" Hidden="false"> + <CharacterStyleRange AppliedCharacterStyle="CharacterStyle/Link"> + <Content>URL</Content> + </CharacterStyleRange> + </HyperlinkTextSource> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>.</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Paragraph"> + <HyperlinkTextSource Self="htss-5" Name="title" Hidden="false"> + <CharacterStyleRange AppliedCharacterStyle="CharacterStyle/Link"> + <Content>URL and title</Content> + </CharacterStyleRange> + </HyperlinkTextSource> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>.</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Paragraph"> + <HyperlinkTextSource Self="htss-6" Name="title preceded by two spaces" Hidden="false"> + <CharacterStyleRange AppliedCharacterStyle="CharacterStyle/Link"> + <Content>URL and title</Content> + </CharacterStyleRange> + </HyperlinkTextSource> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>.</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Paragraph"> + <HyperlinkTextSource Self="htss-7" Name="title preceded by a tab" Hidden="false"> + <CharacterStyleRange AppliedCharacterStyle="CharacterStyle/Link"> + <Content>URL and title</Content> + </CharacterStyleRange> + </HyperlinkTextSource> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>.</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Paragraph"> + <HyperlinkTextSource Self="htss-8" Name="title with "quotes" in it" Hidden="false"> + <CharacterStyleRange AppliedCharacterStyle="CharacterStyle/Link"> + <Content>URL and title</Content> + </CharacterStyleRange> + </HyperlinkTextSource><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Paragraph"> + <HyperlinkTextSource Self="htss-9" Name="title with single quotes" Hidden="false"> + <CharacterStyleRange AppliedCharacterStyle="CharacterStyle/Link"> + <Content>URL and title</Content> + </CharacterStyleRange> + </HyperlinkTextSource><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Paragraph"> + <HyperlinkTextSource Self="htss-10" Name="" Hidden="false"> + <CharacterStyleRange AppliedCharacterStyle="CharacterStyle/Link"> + <Content>with_underscore</Content> + </CharacterStyleRange> + </HyperlinkTextSource><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Paragraph"> + <HyperlinkTextSource Self="htss-11" Name="" Hidden="false"> + <CharacterStyleRange AppliedCharacterStyle="CharacterStyle/Link"> + <Content>Email link</Content> + </CharacterStyleRange> + </HyperlinkTextSource><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Paragraph"> + <HyperlinkTextSource Self="htss-12" Name="" Hidden="false"> + <CharacterStyleRange AppliedCharacterStyle="CharacterStyle/Link"> + <Content>Empty</Content> + </CharacterStyleRange> + </HyperlinkTextSource> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>.</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Header2"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Reference</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Foo </Content> + </CharacterStyleRange> + <HyperlinkTextSource Self="htss-13" Name="" Hidden="false"> + <CharacterStyleRange AppliedCharacterStyle="CharacterStyle/Link"> + <Content>bar</Content> + </CharacterStyleRange> + </HyperlinkTextSource> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>.</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Foo </Content> + </CharacterStyleRange> + <HyperlinkTextSource Self="htss-14" Name="" Hidden="false"> + <CharacterStyleRange AppliedCharacterStyle="CharacterStyle/Link"> + <Content>bar</Content> + </CharacterStyleRange> + </HyperlinkTextSource> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>.</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Foo </Content> + </CharacterStyleRange> + <HyperlinkTextSource Self="htss-15" Name="" Hidden="false"> + <CharacterStyleRange AppliedCharacterStyle="CharacterStyle/Link"> + <Content>bar</Content> + </CharacterStyleRange> + </HyperlinkTextSource> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>.</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>With </Content> + </CharacterStyleRange> + <HyperlinkTextSource Self="htss-16" Name="" Hidden="false"> + <CharacterStyleRange AppliedCharacterStyle="CharacterStyle/Link"> + <Content>embedded [brackets]</Content> + </CharacterStyleRange> + </HyperlinkTextSource> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>.</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Paragraph"> + <HyperlinkTextSource Self="htss-17" Name="" Hidden="false"> + <CharacterStyleRange AppliedCharacterStyle="CharacterStyle/Link"> + <Content>b</Content> + </CharacterStyleRange> + </HyperlinkTextSource> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content> by itself should be a link.</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Indented </Content> + </CharacterStyleRange> + <HyperlinkTextSource Self="htss-18" Name="" Hidden="false"> + <CharacterStyleRange AppliedCharacterStyle="CharacterStyle/Link"> + <Content>once</Content> + </CharacterStyleRange> + </HyperlinkTextSource> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>.</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Indented </Content> + </CharacterStyleRange> + <HyperlinkTextSource Self="htss-19" Name="" Hidden="false"> + <CharacterStyleRange AppliedCharacterStyle="CharacterStyle/Link"> + <Content>twice</Content> + </CharacterStyleRange> + </HyperlinkTextSource> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>.</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Indented </Content> + </CharacterStyleRange> + <HyperlinkTextSource Self="htss-20" Name="" Hidden="false"> + <CharacterStyleRange AppliedCharacterStyle="CharacterStyle/Link"> + <Content>thrice</Content> + </CharacterStyleRange> + </HyperlinkTextSource> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>.</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>This should [not][] be a link.</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/CodeBlock"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>[not]: /url</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Foo </Content> + </CharacterStyleRange> + <HyperlinkTextSource Self="htss-21" Name="Title with "quotes" inside" Hidden="false"> + <CharacterStyleRange AppliedCharacterStyle="CharacterStyle/Link"> + <Content>bar</Content> + </CharacterStyleRange> + </HyperlinkTextSource> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>.</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Foo </Content> + </CharacterStyleRange> + <HyperlinkTextSource Self="htss-22" Name="Title with "quote" inside" Hidden="false"> + <CharacterStyleRange AppliedCharacterStyle="CharacterStyle/Link"> + <Content>biz</Content> + </CharacterStyleRange> + </HyperlinkTextSource> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>.</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Header2"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>With ampersands</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Here’s a </Content> + </CharacterStyleRange> + <HyperlinkTextSource Self="htss-23" Name="" Hidden="false"> + <CharacterStyleRange AppliedCharacterStyle="CharacterStyle/Link"> + <Content>link with an ampersand in the URL</Content> + </CharacterStyleRange> + </HyperlinkTextSource> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>.</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Here’s a link with an amersand in the link text: </Content> + </CharacterStyleRange> + <HyperlinkTextSource Self="htss-24" Name="AT&T" Hidden="false"> + <CharacterStyleRange AppliedCharacterStyle="CharacterStyle/Link"> + <Content>AT&T</Content> + </CharacterStyleRange> + </HyperlinkTextSource> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>.</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Here’s an </Content> + </CharacterStyleRange> + <HyperlinkTextSource Self="htss-25" Name="" Hidden="false"> + <CharacterStyleRange AppliedCharacterStyle="CharacterStyle/Link"> + <Content>inline link</Content> + </CharacterStyleRange> + </HyperlinkTextSource> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>.</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Here’s an </Content> + </CharacterStyleRange> + <HyperlinkTextSource Self="htss-26" Name="" Hidden="false"> + <CharacterStyleRange AppliedCharacterStyle="CharacterStyle/Link"> + <Content>inline link in pointy braces</Content> + </CharacterStyleRange> + </HyperlinkTextSource> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>.</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Header2"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Autolinks</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>With an ampersand: </Content> + </CharacterStyleRange> + <HyperlinkTextSource Self="htss-27" Name="" Hidden="false"> + <CharacterStyleRange AppliedCharacterStyle="CharacterStyle/Link"> + <Content>http://example.com/?foo=1&bar=2</Content> + </CharacterStyleRange> + </HyperlinkTextSource><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/BulList > first" NumberingContinue="false"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>In a list?</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/BulList"> + <HyperlinkTextSource Self="htss-28" Name="" Hidden="false"> + <CharacterStyleRange AppliedCharacterStyle="CharacterStyle/Link"> + <Content>http://example.com/</Content> + </CharacterStyleRange> + </HyperlinkTextSource><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/BulList"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>It should.</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>An e-mail address: </Content> + </CharacterStyleRange> + <HyperlinkTextSource Self="htss-29" Name="" Hidden="false"> + <CharacterStyleRange AppliedCharacterStyle="CharacterStyle/Link"> + <Content>nobody@nowhere.net</Content> + </CharacterStyleRange> + </HyperlinkTextSource><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Blockquote > Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Blockquoted: </Content> + </CharacterStyleRange> + <HyperlinkTextSource Self="htss-30" Name="" Hidden="false"> + <CharacterStyleRange AppliedCharacterStyle="CharacterStyle/Link"> + <Content>http://example.com/</Content> + </CharacterStyleRange> + </HyperlinkTextSource><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Auto-links should not occur here: </Content> + </CharacterStyleRange> + <CharacterStyleRange AppliedCharacterStyle="CharacterStyle/Code"> + <Content><http://example.com/></Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/CodeBlock"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>or here: <http://example.com/></Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Header1"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Images</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>From </Content> + </CharacterStyleRange> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>“</Content> + </CharacterStyleRange> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Voyage dans la Lune</Content> + </CharacterStyleRange> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>”</Content> + </CharacterStyleRange> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content> by Georges Melies (1902):</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Rectangle Self="uec" ItemTransform="1 0 0 1 75 -50"> + <Properties> + <PathGeometry> + <GeometryPathType PathOpen="false"> + <PathPointArray> + <PathPointType Anchor="-75 -50" LeftDirection="-75 -50" RightDirection="-75 -50" /> + <PathPointType Anchor="-75 50" LeftDirection="-75 50" RightDirection="-75 50" /> + <PathPointType Anchor="75 50" LeftDirection="75 50" RightDirection="75 50" /> + <PathPointType Anchor="75 -50" LeftDirection="75 -50" RightDirection="75 -50" /> + </PathPointArray> + </GeometryPathType> + </PathGeometry> + </Properties> + <Image Self="ue6" ItemTransform="1.0 0 0 1.0 -75 -50"> + <Properties> + <Profile type="string"> + $ID/Embedded + <GraphicBounds Left="0" Top="0" Right="150" Bottom="100" /> + </Profile> + </Properties> + <Link Self="ueb" LinkResourceURI="lalune.jpg" /> + </Image> + </Rectangle> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Here is a movie </Content> + </CharacterStyleRange> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Rectangle Self="uec" ItemTransform="1 0 0 1 75 -50"> + <Properties> + <PathGeometry> + <GeometryPathType PathOpen="false"> + <PathPointArray> + <PathPointType Anchor="-75 -50" LeftDirection="-75 -50" RightDirection="-75 -50" /> + <PathPointType Anchor="-75 50" LeftDirection="-75 50" RightDirection="-75 50" /> + <PathPointType Anchor="75 50" LeftDirection="75 50" RightDirection="75 50" /> + <PathPointType Anchor="75 -50" LeftDirection="75 -50" RightDirection="75 -50" /> + </PathPointArray> + </GeometryPathType> + </PathGeometry> + </Properties> + <Image Self="ue6" ItemTransform="1.0 0 0 1.0 -75 -50"> + <Properties> + <Profile type="string"> + $ID/Embedded + <GraphicBounds Left="0" Top="0" Right="150" Bottom="100" /> + </Profile> + </Properties> + <Link Self="ueb" LinkResourceURI="movie.jpg" /> + </Image> + </Rectangle> + </CharacterStyleRange> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content> icon.</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Header1"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Footnotes</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Here is a footnote reference,</Content> + </CharacterStyleRange> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle" Position="Superscript"> + <Footnote> + <ParagraphStyleRange> + <CharacterStyleRange> + <Content><?ACE 4?></Content> + </CharacterStyleRange> + </ParagraphStyleRange> + <ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Footnote > Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content> </Content> + </CharacterStyleRange> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Here is the footnote. It can go anywhere after the footnote reference. It need not be placed at the end of the document.</Content> + </CharacterStyleRange><Br /> + </ParagraphStyleRange> + </Footnote> + </CharacterStyleRange> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content> and another.</Content> + </CharacterStyleRange> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle" Position="Superscript"> + <Footnote> + <ParagraphStyleRange> + <CharacterStyleRange> + <Content><?ACE 4?></Content> + </CharacterStyleRange> + </ParagraphStyleRange> + <ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Footnote > Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content> </Content> + </CharacterStyleRange> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Here’s the long note. This one contains multiple blocks.</Content> + </CharacterStyleRange><Br /> + </ParagraphStyleRange> + <ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Footnote > Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content> </Content> + </CharacterStyleRange> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Subsequent blocks are indented to show that they belong to the footnote (as with list items).</Content> + </CharacterStyleRange><Br /> + </ParagraphStyleRange> + <ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Footnote > CodeBlock"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content> { <code> }</Content> + </CharacterStyleRange><Br /> + </ParagraphStyleRange> + <ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Footnote > Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content> </Content> + </CharacterStyleRange> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>If you want, you can indent every line, but you can also be lazy and just indent the first line of each block.</Content> + </CharacterStyleRange><Br /> + </ParagraphStyleRange> + </Footnote> + </CharacterStyleRange> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content> This should </Content> + </CharacterStyleRange> + <CharacterStyleRange AppliedCharacterStyle="CharacterStyle/Italic"> + <Content>not</Content> + </CharacterStyleRange> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content> be a footnote reference, because it contains a space.[^my note] Here is an inline note.</Content> + </CharacterStyleRange> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle" Position="Superscript"> + <Footnote> + <ParagraphStyleRange> + <CharacterStyleRange> + <Content><?ACE 4?></Content> + </CharacterStyleRange> + </ParagraphStyleRange> + <ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Footnote > Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content> </Content> + </CharacterStyleRange> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>This is </Content> + </CharacterStyleRange> + <CharacterStyleRange AppliedCharacterStyle="CharacterStyle/Italic"> + <Content>easier</Content> + </CharacterStyleRange> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content> to type. Inline notes may contain </Content> + </CharacterStyleRange> + <HyperlinkTextSource Self="htss-31" Name="" Hidden="false"> + <CharacterStyleRange AppliedCharacterStyle="CharacterStyle/Link"> + <Content>links</Content> + </CharacterStyleRange> + </HyperlinkTextSource> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content> and </Content> + </CharacterStyleRange> + <CharacterStyleRange AppliedCharacterStyle="CharacterStyle/Code"> + <Content>]</Content> + </CharacterStyleRange> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content> verbatim characters, as well as [bracketed text].</Content> + </CharacterStyleRange><Br /> + </ParagraphStyleRange> + </Footnote> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Blockquote > Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>Notes can go in quotes.</Content> + </CharacterStyleRange> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle" Position="Superscript"> + <Footnote> + <ParagraphStyleRange> + <CharacterStyleRange> + <Content><?ACE 4?></Content> + </CharacterStyleRange> + </ParagraphStyleRange> + <ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Footnote > Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content> </Content> + </CharacterStyleRange> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>In quote.</Content> + </CharacterStyleRange><Br /> + </ParagraphStyleRange> + </Footnote> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/NumList > first" NumberingContinue="false"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>And in list items.</Content> + </CharacterStyleRange> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle" Position="Superscript"> + <Footnote> + <ParagraphStyleRange> + <CharacterStyleRange> + <Content><?ACE 4?></Content> + </CharacterStyleRange> + </ParagraphStyleRange> + <ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Footnote > Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content> </Content> + </CharacterStyleRange> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>In list.</Content> + </CharacterStyleRange><Br /> + </ParagraphStyleRange> + </Footnote> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> +<ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Paragraph"> + <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> + <Content>This paragraph should not be part of the note, as it is not indented.</Content> + </CharacterStyleRange><Br /> +</ParagraphStyleRange> + + </Story> + <HyperlinkURLDestination Self="HyperlinkURLDestination/http://google.com" Name="link" DestinationURL="http://google.com" DestinationUniqueKey="1" /> + <Hyperlink Self="uf-31" Name="http://google.com" Source="htss-31" Visible="true" DestinationUniqueKey="1"> + <Properties> + <BorderColor type="enumeration">Black</BorderColor> + <Destination type="object">HyperlinkURLDestination/http://google.com</Destination> + </Properties> + </Hyperlink> + <HyperlinkURLDestination Self="HyperlinkURLDestination/http://example.com/" Name="link" DestinationURL="http://example.com/" DestinationUniqueKey="1" /> + <Hyperlink Self="uf-30" Name="http://example.com/" Source="htss-30" Visible="true" DestinationUniqueKey="1"> + <Properties> + <BorderColor type="enumeration">Black</BorderColor> + <Destination type="object">HyperlinkURLDestination/http://example.com/</Destination> + </Properties> + </Hyperlink> + <HyperlinkURLDestination Self="HyperlinkURLDestination/mailto:nobody@nowhere.net" Name="link" DestinationURL="mailto:nobody@nowhere.net" DestinationUniqueKey="1" /> + <Hyperlink Self="uf-29" Name="mailto:nobody@nowhere.net" Source="htss-29" Visible="true" DestinationUniqueKey="1"> + <Properties> + <BorderColor type="enumeration">Black</BorderColor> + <Destination type="object">HyperlinkURLDestination/mailto:nobody@nowhere.net</Destination> + </Properties> + </Hyperlink> + <HyperlinkURLDestination Self="HyperlinkURLDestination/http://example.com/" Name="link" DestinationURL="http://example.com/" DestinationUniqueKey="1" /> + <Hyperlink Self="uf-28" Name="http://example.com/" Source="htss-28" Visible="true" DestinationUniqueKey="1"> + <Properties> + <BorderColor type="enumeration">Black</BorderColor> + <Destination type="object">HyperlinkURLDestination/http://example.com/</Destination> + </Properties> + </Hyperlink> + <HyperlinkURLDestination Self="HyperlinkURLDestination/http://example.com/?foo=1&bar=2" Name="link" DestinationURL="http://example.com/?foo=1&bar=2" DestinationUniqueKey="1" /> + <Hyperlink Self="uf-27" Name="http://example.com/?foo=1&bar=2" Source="htss-27" Visible="true" DestinationUniqueKey="1"> + <Properties> + <BorderColor type="enumeration">Black</BorderColor> + <Destination type="object">HyperlinkURLDestination/http://example.com/?foo=1&bar=2</Destination> + </Properties> + </Hyperlink> + <HyperlinkURLDestination Self="HyperlinkURLDestination//script?foo=1&bar=2" Name="link" DestinationURL="/script?foo=1&bar=2" DestinationUniqueKey="1" /> + <Hyperlink Self="uf-26" Name="/script?foo=1&bar=2" Source="htss-26" Visible="true" DestinationUniqueKey="1"> + <Properties> + <BorderColor type="enumeration">Black</BorderColor> + <Destination type="object">HyperlinkURLDestination//script?foo=1&bar=2</Destination> + </Properties> + </Hyperlink> + <HyperlinkURLDestination Self="HyperlinkURLDestination//script?foo=1&bar=2" Name="link" DestinationURL="/script?foo=1&bar=2" DestinationUniqueKey="1" /> + <Hyperlink Self="uf-25" Name="/script?foo=1&bar=2" Source="htss-25" Visible="true" DestinationUniqueKey="1"> + <Properties> + <BorderColor type="enumeration">Black</BorderColor> + <Destination type="object">HyperlinkURLDestination//script?foo=1&bar=2</Destination> + </Properties> + </Hyperlink> + <HyperlinkURLDestination Self="HyperlinkURLDestination/http://att.com/" Name="link" DestinationURL="http://att.com/" DestinationUniqueKey="1" /> + <Hyperlink Self="uf-24" Name="http://att.com/" Source="htss-24" Visible="true" DestinationUniqueKey="1"> + <Properties> + <BorderColor type="enumeration">Black</BorderColor> + <Destination type="object">HyperlinkURLDestination/http://att.com/</Destination> + </Properties> + </Hyperlink> + <HyperlinkURLDestination Self="HyperlinkURLDestination/http://example.com/?foo=1&bar=2" Name="link" DestinationURL="http://example.com/?foo=1&bar=2" DestinationUniqueKey="1" /> + <Hyperlink Self="uf-23" Name="http://example.com/?foo=1&bar=2" Source="htss-23" Visible="true" DestinationUniqueKey="1"> + <Properties> + <BorderColor type="enumeration">Black</BorderColor> + <Destination type="object">HyperlinkURLDestination/http://example.com/?foo=1&bar=2</Destination> + </Properties> + </Hyperlink> + <HyperlinkURLDestination Self="HyperlinkURLDestination//url/" Name="link" DestinationURL="/url/" DestinationUniqueKey="1" /> + <Hyperlink Self="uf-22" Name="/url/" Source="htss-22" Visible="true" DestinationUniqueKey="1"> + <Properties> + <BorderColor type="enumeration">Black</BorderColor> + <Destination type="object">HyperlinkURLDestination//url/</Destination> + </Properties> + </Hyperlink> + <HyperlinkURLDestination Self="HyperlinkURLDestination//url/" Name="link" DestinationURL="/url/" DestinationUniqueKey="1" /> + <Hyperlink Self="uf-21" Name="/url/" Source="htss-21" Visible="true" DestinationUniqueKey="1"> + <Properties> + <BorderColor type="enumeration">Black</BorderColor> + <Destination type="object">HyperlinkURLDestination//url/</Destination> + </Properties> + </Hyperlink> + <HyperlinkURLDestination Self="HyperlinkURLDestination//url" Name="link" DestinationURL="/url" DestinationUniqueKey="1" /> + <Hyperlink Self="uf-20" Name="/url" Source="htss-20" Visible="true" DestinationUniqueKey="1"> + <Properties> + <BorderColor type="enumeration">Black</BorderColor> + <Destination type="object">HyperlinkURLDestination//url</Destination> + </Properties> + </Hyperlink> + <HyperlinkURLDestination Self="HyperlinkURLDestination//url" Name="link" DestinationURL="/url" DestinationUniqueKey="1" /> + <Hyperlink Self="uf-19" Name="/url" Source="htss-19" Visible="true" DestinationUniqueKey="1"> + <Properties> + <BorderColor type="enumeration">Black</BorderColor> + <Destination type="object">HyperlinkURLDestination//url</Destination> + </Properties> + </Hyperlink> + <HyperlinkURLDestination Self="HyperlinkURLDestination//url" Name="link" DestinationURL="/url" DestinationUniqueKey="1" /> + <Hyperlink Self="uf-18" Name="/url" Source="htss-18" Visible="true" DestinationUniqueKey="1"> + <Properties> + <BorderColor type="enumeration">Black</BorderColor> + <Destination type="object">HyperlinkURLDestination//url</Destination> + </Properties> + </Hyperlink> + <HyperlinkURLDestination Self="HyperlinkURLDestination//url/" Name="link" DestinationURL="/url/" DestinationUniqueKey="1" /> + <Hyperlink Self="uf-17" Name="/url/" Source="htss-17" Visible="true" DestinationUniqueKey="1"> + <Properties> + <BorderColor type="enumeration">Black</BorderColor> + <Destination type="object">HyperlinkURLDestination//url/</Destination> + </Properties> + </Hyperlink> + <HyperlinkURLDestination Self="HyperlinkURLDestination//url/" Name="link" DestinationURL="/url/" DestinationUniqueKey="1" /> + <Hyperlink Self="uf-16" Name="/url/" Source="htss-16" Visible="true" DestinationUniqueKey="1"> + <Properties> + <BorderColor type="enumeration">Black</BorderColor> + <Destination type="object">HyperlinkURLDestination//url/</Destination> + </Properties> + </Hyperlink> + <HyperlinkURLDestination Self="HyperlinkURLDestination//url/" Name="link" DestinationURL="/url/" DestinationUniqueKey="1" /> + <Hyperlink Self="uf-15" Name="/url/" Source="htss-15" Visible="true" DestinationUniqueKey="1"> + <Properties> + <BorderColor type="enumeration">Black</BorderColor> + <Destination type="object">HyperlinkURLDestination//url/</Destination> + </Properties> + </Hyperlink> + <HyperlinkURLDestination Self="HyperlinkURLDestination//url/" Name="link" DestinationURL="/url/" DestinationUniqueKey="1" /> + <Hyperlink Self="uf-14" Name="/url/" Source="htss-14" Visible="true" DestinationUniqueKey="1"> + <Properties> + <BorderColor type="enumeration">Black</BorderColor> + <Destination type="object">HyperlinkURLDestination//url/</Destination> + </Properties> + </Hyperlink> + <HyperlinkURLDestination Self="HyperlinkURLDestination//url/" Name="link" DestinationURL="/url/" DestinationUniqueKey="1" /> + <Hyperlink Self="uf-13" Name="/url/" Source="htss-13" Visible="true" DestinationUniqueKey="1"> + <Properties> + <BorderColor type="enumeration">Black</BorderColor> + <Destination type="object">HyperlinkURLDestination//url/</Destination> + </Properties> + </Hyperlink> + <HyperlinkURLDestination Self="HyperlinkURLDestination/" Name="link" DestinationURL="" DestinationUniqueKey="1" /> + <Hyperlink Self="uf-12" Name="" Source="htss-12" Visible="true" DestinationUniqueKey="1"> + <Properties> + <BorderColor type="enumeration">Black</BorderColor> + <Destination type="object">HyperlinkURLDestination/</Destination> + </Properties> + </Hyperlink> + <HyperlinkURLDestination Self="HyperlinkURLDestination/mailto:nobody@nowhere.net" Name="link" DestinationURL="mailto:nobody@nowhere.net" DestinationUniqueKey="1" /> + <Hyperlink Self="uf-11" Name="mailto:nobody@nowhere.net" Source="htss-11" Visible="true" DestinationUniqueKey="1"> + <Properties> + <BorderColor type="enumeration">Black</BorderColor> + <Destination type="object">HyperlinkURLDestination/mailto:nobody@nowhere.net</Destination> + </Properties> + </Hyperlink> + <HyperlinkURLDestination Self="HyperlinkURLDestination//url/with_underscore" Name="link" DestinationURL="/url/with_underscore" DestinationUniqueKey="1" /> + <Hyperlink Self="uf-10" Name="/url/with_underscore" Source="htss-10" Visible="true" DestinationUniqueKey="1"> + <Properties> + <BorderColor type="enumeration">Black</BorderColor> + <Destination type="object">HyperlinkURLDestination//url/with_underscore</Destination> + </Properties> + </Hyperlink> + <HyperlinkURLDestination Self="HyperlinkURLDestination//url/" Name="link" DestinationURL="/url/" DestinationUniqueKey="1" /> + <Hyperlink Self="uf-9" Name="/url/" Source="htss-9" Visible="true" DestinationUniqueKey="1"> + <Properties> + <BorderColor type="enumeration">Black</BorderColor> + <Destination type="object">HyperlinkURLDestination//url/</Destination> + </Properties> + </Hyperlink> + <HyperlinkURLDestination Self="HyperlinkURLDestination//url/" Name="link" DestinationURL="/url/" DestinationUniqueKey="1" /> + <Hyperlink Self="uf-8" Name="/url/" Source="htss-8" Visible="true" DestinationUniqueKey="1"> + <Properties> + <BorderColor type="enumeration">Black</BorderColor> + <Destination type="object">HyperlinkURLDestination//url/</Destination> + </Properties> + </Hyperlink> + <HyperlinkURLDestination Self="HyperlinkURLDestination//url/" Name="link" DestinationURL="/url/" DestinationUniqueKey="1" /> + <Hyperlink Self="uf-7" Name="/url/" Source="htss-7" Visible="true" DestinationUniqueKey="1"> + <Properties> + <BorderColor type="enumeration">Black</BorderColor> + <Destination type="object">HyperlinkURLDestination//url/</Destination> + </Properties> + </Hyperlink> + <HyperlinkURLDestination Self="HyperlinkURLDestination//url/" Name="link" DestinationURL="/url/" DestinationUniqueKey="1" /> + <Hyperlink Self="uf-6" Name="/url/" Source="htss-6" Visible="true" DestinationUniqueKey="1"> + <Properties> + <BorderColor type="enumeration">Black</BorderColor> + <Destination type="object">HyperlinkURLDestination//url/</Destination> + </Properties> + </Hyperlink> + <HyperlinkURLDestination Self="HyperlinkURLDestination//url/" Name="link" DestinationURL="/url/" DestinationUniqueKey="1" /> + <Hyperlink Self="uf-5" Name="/url/" Source="htss-5" Visible="true" DestinationUniqueKey="1"> + <Properties> + <BorderColor type="enumeration">Black</BorderColor> + <Destination type="object">HyperlinkURLDestination//url/</Destination> + </Properties> + </Hyperlink> + <HyperlinkURLDestination Self="HyperlinkURLDestination//url/" Name="link" DestinationURL="/url/" DestinationUniqueKey="1" /> + <Hyperlink Self="uf-4" Name="/url/" Source="htss-4" Visible="true" DestinationUniqueKey="1"> + <Properties> + <BorderColor type="enumeration">Black</BorderColor> + <Destination type="object">HyperlinkURLDestination//url/</Destination> + </Properties> + </Hyperlink> + <HyperlinkURLDestination Self="HyperlinkURLDestination/http://example.com/?foo=1&bar=2" Name="link" DestinationURL="http://example.com/?foo=1&bar=2" DestinationUniqueKey="1" /> + <Hyperlink Self="uf-3" Name="http://example.com/?foo=1&bar=2" Source="htss-3" Visible="true" DestinationUniqueKey="1"> + <Properties> + <BorderColor type="enumeration">Black</BorderColor> + <Destination type="object">HyperlinkURLDestination/http://example.com/?foo=1&bar=2</Destination> + </Properties> + </Hyperlink> + <HyperlinkURLDestination Self="HyperlinkURLDestination//url" Name="link" DestinationURL="/url" DestinationUniqueKey="1" /> + <Hyperlink Self="uf-2" Name="/url" Source="htss-2" Visible="true" DestinationUniqueKey="1"> + <Properties> + <BorderColor type="enumeration">Black</BorderColor> + <Destination type="object">HyperlinkURLDestination//url</Destination> + </Properties> + </Hyperlink> + <HyperlinkURLDestination Self="HyperlinkURLDestination//url" Name="link" DestinationURL="/url" DestinationUniqueKey="1" /> + <Hyperlink Self="uf-1" Name="/url" Source="htss-1" Visible="true" DestinationUniqueKey="1"> + <Properties> + <BorderColor type="enumeration">Black</BorderColor> + <Destination type="object">HyperlinkURLDestination//url</Destination> + </Properties> + </Hyperlink> +</Document> -- cgit v1.2.3 From 24b2ac43b0a8596f7baea10579c95ee75b6e584f Mon Sep 17 00:00:00 2001 From: Albert Krewinkel <tarleb@moltkeplatz.de> Date: Tue, 4 Mar 2014 00:33:25 +0100 Subject: Add a simple Emacs Org-mode reader The basic structure of org-mode documents is recognized; however, org-mode features like todo markers, tags etc. are not supported yet. --- pandoc.cabal | 2 + pandoc.hs | 1 + src/Text/Pandoc.hs | 3 + src/Text/Pandoc/Readers/Org.hs | 552 +++++++++++++++++++++++++++++++++++++++++ tests/Tests/Readers/Org.hs | 533 +++++++++++++++++++++++++++++++++++++++ tests/test-pandoc.hs | 2 + 6 files changed, 1093 insertions(+) create mode 100644 src/Text/Pandoc/Readers/Org.hs create mode 100644 tests/Tests/Readers/Org.hs (limited to 'src/Text') diff --git a/pandoc.cabal b/pandoc.cabal index bbf963672..ccd23e551 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -263,6 +263,7 @@ Library Text.Pandoc.Readers.Markdown, Text.Pandoc.Readers.MediaWiki, Text.Pandoc.Readers.RST, + Text.Pandoc.Readers.Org, Text.Pandoc.Readers.DocBook, Text.Pandoc.Readers.OPML, Text.Pandoc.Readers.TeXMath, @@ -381,6 +382,7 @@ Test-Suite test-pandoc Tests.Walk Tests.Readers.LaTeX Tests.Readers.Markdown + Tests.Readers.Org Tests.Readers.RST Tests.Writers.Native Tests.Writers.ConTeXt diff --git a/pandoc.hs b/pandoc.hs index 677101746..709b5a777 100644 --- a/pandoc.hs +++ b/pandoc.hs @@ -834,6 +834,7 @@ defaultReaderName fallback (x:xs) = ".latex" -> "latex" ".ltx" -> "latex" ".rst" -> "rst" + ".org" -> "org" ".lhs" -> "markdown+lhs" ".db" -> "docbook" ".opml" -> "opml" diff --git a/src/Text/Pandoc.hs b/src/Text/Pandoc.hs index 3ae81db00..e511ed861 100644 --- a/src/Text/Pandoc.hs +++ b/src/Text/Pandoc.hs @@ -65,6 +65,7 @@ module Text.Pandoc , readMarkdown , readMediaWiki , readRST + , readOrg , readLaTeX , readHtml , readTextile @@ -115,6 +116,7 @@ import Text.Pandoc.JSON import Text.Pandoc.Readers.Markdown import Text.Pandoc.Readers.MediaWiki import Text.Pandoc.Readers.RST +import Text.Pandoc.Readers.Org import Text.Pandoc.Readers.DocBook import Text.Pandoc.Readers.OPML import Text.Pandoc.Readers.LaTeX @@ -201,6 +203,7 @@ readers = [ ("native" , \_ s -> return $ readNative s) ,("mediawiki" , \o s -> return $ readMediaWiki o s) ,("docbook" , \o s -> return $ readDocBook o s) ,("opml" , \o s -> return $ readOPML o s) + ,("org" , \o s -> return $ readOrg o s) ,("textile" , \o s -> return $ readTextile o s) -- TODO : textile+lhs ,("html" , \o s -> return $ readHtml o s) ,("latex" , \o s -> return $ readLaTeX o s) diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs new file mode 100644 index 000000000..5dc250f04 --- /dev/null +++ b/src/Text/Pandoc/Readers/Org.hs @@ -0,0 +1,552 @@ +{-# LANGUAGE OverloadedStrings #-} +{- +Copyright (C) 2014 Albert Krewinkel <tarleb@moltkeplatz.de> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Readers.Org + Copyright : Copyright (C) 2014 Albert Krewinkel + License : GNU GPL, version 2 or above + + Maintainer : Albert Krewinkel <tarleb@moltkeplatz.de> + +Conversion of Org-Mode to 'Pandoc' document. +-} +module Text.Pandoc.Readers.Org ( readOrg ) where + +import qualified Text.Pandoc.Builder as B +import Text.Pandoc.Builder (Inlines, Blocks, trimInlines, (<>)) +import Text.Pandoc.Definition +import Text.Pandoc.Options +import Text.Pandoc.Parsing hiding (orderedListMarker) +import Text.Pandoc.Shared (compactify') + +import Control.Applicative (pure, (<$>), (<$), (<*>), (<*), (*>), (<**>)) +import Control.Monad (guard, mzero) +import Data.Char (toLower) +import Data.List (foldl') +import Data.Maybe (listToMaybe, fromMaybe) +import Data.Monoid (mconcat, mempty, mappend) + +-- | Parse org-mode string and return a Pandoc document. +readOrg :: ReaderOptions -- ^ Reader options + -> String -- ^ String to parse (assuming @'\n'@ line endings) + -> Pandoc +readOrg opts s = (readWith parseOrg) def{ stateOptions = opts } (s ++ "\n\n") + +type OrgParser = Parser [Char] ParserState + +parseOrg:: OrgParser Pandoc +parseOrg = do + blocks' <- B.toList <$> parseBlocks + st <- getState + let meta = stateMeta st + return $ Pandoc meta $ filter (/= Null) blocks' + +-- +-- parsing blocks +-- + +parseBlocks :: OrgParser Blocks +parseBlocks = mconcat <$> manyTill block eof + +block :: OrgParser Blocks +block = choice [ mempty <$ blanklines + , orgBlock + , example + , drawer + , specialLine + , header + , hline + , list + , table + , paraOrPlain + ] <?> "block" + +-- +-- Org Blocks (#+BEGIN_... / #+END_...) +-- + +orgBlock :: OrgParser Blocks +orgBlock = try $ do + (indent, blockType, args) <- blockHeader + blockStr <- rawBlockContent indent blockType + let classArgs = [ translateLang . fromMaybe [] $ listToMaybe args ] + case blockType of + "comment" -> return mempty + "src" -> return $ B.codeBlockWith ("", classArgs, []) blockStr + _ -> B.divWith ("", [blockType], []) + <$> (parseFromString parseBlocks blockStr) + +blockHeader :: OrgParser (Int, String, [String]) +blockHeader = (,,) <$> blockIndent + <*> blockType + <*> (skipSpaces *> blockArgs) + where blockIndent = length <$> many spaceChar + blockType = map toLower <$> (stringAnyCase "#+begin_" *> many letter) + blockArgs = manyTill (many nonspaceChar <* skipSpaces) newline + +rawBlockContent :: Int -> String -> OrgParser String +rawBlockContent indent blockType = + unlines . map commaEscaped <$> manyTill indentedLine blockEnder + where + indentedLine = try $ choice [ blankline *> pure "\n" + , indentWith indent *> anyLine + ] + blockEnder = try $ indentWith indent *> stringAnyCase ("#+end_" <> blockType) + +-- indent by specified number of spaces (or equiv. tabs) +indentWith :: Int -> OrgParser String +indentWith num = do + tabStop <- getOption readerTabStop + if (num < tabStop) + then count num (char ' ') + else choice [ try (count num (char ' ')) + , try (char '\t' >> count (num - tabStop) (char ' ')) ] + +translateLang :: String -> String +translateLang "sh" = "bash" +translateLang cs = cs + +commaEscaped :: String -> String +commaEscaped (',':cs@('*':_)) = cs +commaEscaped (',':cs@('#':'+':_)) = cs +commaEscaped cs = cs + +example :: OrgParser Blocks +example = try $ + B.codeBlockWith ("", ["example"], []) . unlines <$> many1 exampleLine + +exampleLine :: OrgParser String +exampleLine = try $ string ": " *> anyLine + +-- Drawers for properties or a logbook +drawer :: OrgParser Blocks +drawer = try $ do + drawerStart + manyTill drawerLine (try drawerEnd) + return mempty + +drawerStart :: OrgParser String +drawerStart = try $ + skipSpaces *> drawerName <* skipSpaces <* newline + where drawerName = try $ char ':' *> validDrawerName <* char ':' + validDrawerName = stringAnyCase "PROPERTIES" + <|> stringAnyCase "LOGBOOK" + +drawerLine :: OrgParser String +drawerLine = try $ anyLine + +drawerEnd :: OrgParser String +drawerEnd = try $ + skipSpaces *> stringAnyCase ":END:" <* skipSpaces <* newline + + +-- Comments, Options and Metadata +specialLine :: OrgParser Blocks +specialLine = try $ metaLine <|> commentLine + +metaLine :: OrgParser Blocks +metaLine = try $ metaLineStart *> declarationLine + +commentLine :: OrgParser Blocks +commentLine = try $ commentLineStart *> anyLine *> pure mempty + +-- The order, in which blocks are tried, makes sure that we're not looking at +-- the beginning of a block, so we don't need to check for it +metaLineStart :: OrgParser String +metaLineStart = try $ mappend <$> many spaceChar <*> string "#+" + +commentLineStart :: OrgParser String +commentLineStart = try $ mappend <$> many spaceChar <*> string "# " + +declarationLine :: OrgParser Blocks +declarationLine = try $ do + meta' <- B.setMeta <$> metaKey <*> metaValue <*> pure nullMeta + updateState $ \st -> st { stateMeta = stateMeta st <> meta' } + return mempty + +metaValue :: OrgParser MetaValue +metaValue = MetaInlines . B.toList . trimInlines <$> restOfLine + +metaKey :: OrgParser [Char] +metaKey = map toLower <$> many1 (noneOf ": \n\r") + <* char ':' + <* skipSpaces + +-- | Headers +header :: OrgParser Blocks +header = try $ + B.header <$> headerStart + <*> (trimInlines <$> restOfLine) + +headerStart :: OrgParser Int +headerStart = try $ + (length <$> many1 (char '*')) <* many1 (char ' ') + +-- Horizontal Line (five dashes or more) +hline :: OrgParser Blocks +hline = try $ do + skipSpaces + string "-----" + many (char '-') + skipSpaces + newline + return B.horizontalRule + +-- +-- Tables +-- + +data OrgTableRow = OrgContentRow [Blocks] + | OrgAlignRow [Alignment] + | OrgHlineRow + deriving (Eq, Show) + +type OrgTableContent = (Int, [Alignment], [Double], [Blocks], [[Blocks]]) + +table :: OrgParser Blocks +table = try $ do + lookAhead tableStart + (_, aligns, widths, heads, lns) <- normalizeTable . tableContent <$> tableRows + return $ B.table "" (zip aligns widths) heads lns + +tableStart :: OrgParser Char +tableStart = try $ skipSpaces *> char '|' + +tableRows :: OrgParser [OrgTableRow] +tableRows = try $ many (tableAlignRow <|> tableHline <|> tableContentRow) + +tableContentRow :: OrgParser OrgTableRow +tableContentRow = try $ + OrgContentRow <$> (tableStart *> manyTill tableContentCell newline) + +tableContentCell :: OrgParser Blocks +tableContentCell = try $ + B.plain . trimInlines . mconcat <$> many1Till inline (try endOfCell) + +endOfCell :: OrgParser Char +-- endOfCell = char '|' <|> newline +endOfCell = try $ char '|' <|> lookAhead newline + +tableAlignRow :: OrgParser OrgTableRow +tableAlignRow = try $ + OrgAlignRow <$> (tableStart *> manyTill tableAlignCell newline) + +tableAlignCell :: OrgParser Alignment +tableAlignCell = + choice [ try $ emptyCell *> return (AlignDefault) + , try $ skipSpaces + *> char '<' + *> tableAlignFromChar + <* many digit + <* char '>' + <* emptyCell + ] <?> "alignment info" + where emptyCell = try $ skipSpaces *> endOfCell + +tableAlignFromChar :: OrgParser Alignment +tableAlignFromChar = try $ choice [ char 'l' *> return AlignLeft + , char 'c' *> return AlignCenter + , char 'r' *> return AlignRight + ] + +tableHline :: OrgParser OrgTableRow +tableHline = try $ + OrgHlineRow <$ (tableStart *> char '-' *> anyLine) + +tableContent :: [OrgTableRow] + -> OrgTableContent +tableContent = foldl' (flip rowToContent) (0, mempty, repeat 0, mempty, mempty) + +normalizeTable :: OrgTableContent + -> OrgTableContent +normalizeTable (cols, aligns, widths, heads, lns) = + let aligns' = fillColumns aligns AlignDefault + widths' = fillColumns widths 0.0 + heads' = if heads == mempty + then heads + else fillColumns heads (B.plain mempty) + lns' = map (flip fillColumns (B.plain mempty)) lns + fillColumns base padding = take cols $ base ++ repeat padding + in (cols, aligns', widths', heads', lns') + + +-- One or more horizontal rules after the first content line mark the previous +-- line as a header. All other horizontal lines are discarded. +rowToContent :: OrgTableRow + -> OrgTableContent + -> OrgTableContent +rowToContent OrgHlineRow = maybeBodyToHeader +rowToContent (OrgContentRow rs) = setLongestRow rs . appendToBody rs +rowToContent (OrgAlignRow as) = setLongestRow as . setAligns as + +setLongestRow :: [a] + -> OrgTableContent + -> OrgTableContent +setLongestRow r (cols, aligns, widths, heads, lns) = + (max cols (length r), aligns, widths, heads, lns) + +maybeBodyToHeader :: OrgTableContent + -> OrgTableContent +maybeBodyToHeader (cols, aligns, widths, [], b:[]) = (cols, aligns, widths, b, []) +maybeBodyToHeader content = content + +appendToBody :: [Blocks] + -> OrgTableContent + -> OrgTableContent +appendToBody r (cols, aligns, widths, heads, lns) = + (cols, aligns, widths, heads, lns ++ [r]) + +setAligns :: [Alignment] + -> OrgTableContent + -> OrgTableContent +setAligns aligns (cols, _, widths, heads, lns) = + (cols, aligns, widths, heads, lns) + +-- Paragraphs or Plain text +paraOrPlain :: OrgParser Blocks +paraOrPlain = try $ + trimInlines . mconcat + <$> many1 inline + <**> option B.plain + (try $ newline *> pure B.para) + +restOfLine :: OrgParser Inlines +restOfLine = mconcat <$> manyTill inline newline + + +-- +-- list blocks +-- + +list :: OrgParser Blocks +list = choice [ bulletList, orderedList ] <?> "list" + +bulletList :: OrgParser Blocks +bulletList = B.bulletList . compactify' <$> many1 (listItem bulletListStart) + +orderedList :: OrgParser Blocks +orderedList = B.orderedList . compactify' <$> many1 (listItem orderedListStart) + +genericListStart :: OrgParser String + -> OrgParser Int +genericListStart listMarker = try $ + (+) <$> (length <$> many spaceChar) + <*> (length <$> listMarker <* many1 spaceChar) + +-- parses bullet list start and returns its length (excl. following whitespace) +bulletListStart :: OrgParser Int +bulletListStart = genericListStart bulletListMarker + where bulletListMarker = pure <$> oneOf "*-+" + +orderedListStart :: OrgParser Int +orderedListStart = genericListStart orderedListMarker + -- Ordered list markers allowed in org-mode + where orderedListMarker = mappend <$> many1 digit <*> (pure <$> oneOf ".)") + +listItem :: OrgParser Int + -> OrgParser Blocks +listItem start = try $ do + (markerLength, first) <- try (start >>= rawListItem) + rest <- many (listContinuation markerLength) + parseFromString parseBlocks $ concat (first:rest) + +-- parse raw text for one list item, excluding start marker and continuations +rawListItem :: Int + -> OrgParser (Int, String) +rawListItem markerLength = try $ do + firstLine <- anyLine + restLines <- many (listLine markerLength) + return (markerLength, (firstLine ++ "\n" ++ (concat restLines))) + +-- continuation of a list item - indented and separated by blankline or endline. +-- Note: nested lists are parsed as continuations. +listContinuation :: Int + -> OrgParser String +listContinuation markerLength = try $ + mappend <$> many blankline + <*> (concat <$> many1 (listLine markerLength)) + +-- parse a line of a list item +listLine :: Int + -> OrgParser String +listLine markerLength = try $ + indentWith markerLength *> anyLine + <**> pure (++ "\n") + + +-- +-- inline +-- + +inline :: OrgParser Inlines +inline = choice inlineParsers <?> "inline" + where inlineParsers = [ whitespace + , link + , str + , endline + , emph + , strong + , strikeout + , underline + , code + , verbatim + , subscript + , superscript + , symbol + ] + +-- treat these as potentially non-text when parsing inline: +specialChars :: [Char] +specialChars = "\"$'()*+-./:<=>[\\]^_{|}~" + + +whitespace :: OrgParser Inlines +whitespace = B.space <$ skipMany1 spaceChar <?> "whitespace" + +str :: OrgParser Inlines +str = B.str <$> many1 (noneOf $ specialChars ++ "\n\r ") + <* updateLastStrPos + +-- an endline character that can be treated as a space, not a structural break +endline :: OrgParser Inlines +endline = try $ do + newline + notFollowedBy blankline + notFollowedBy' exampleLine + notFollowedBy' hline + notFollowedBy' tableStart + notFollowedBy' drawerStart + notFollowedBy' headerStart + notFollowedBy' metaLineStart + notFollowedBy' commentLineStart + notFollowedBy' bulletListStart + notFollowedBy' orderedListStart + return B.space + +link :: OrgParser Inlines +link = explicitLink <|> selfLink <?> "link" + +explicitLink :: OrgParser Inlines +explicitLink = try $ do + char '[' + src <- enclosedRaw (char '[') (char ']') + title <- enclosedInlines (char '[') (char ']') + char ']' + return $ B.link src "" title + +selfLink :: OrgParser Inlines +selfLink = try $ do + src <- enclosedRaw (string "[[") (string "]]") + return $ B.link src "" (B.str src) + +emph :: OrgParser Inlines +emph = B.emph <$> inlinesEnclosedBy '/' + +strong :: OrgParser Inlines +strong = B.strong <$> inlinesEnclosedBy '*' + +strikeout :: OrgParser Inlines +strikeout = B.strikeout <$> inlinesEnclosedBy '+' + +-- There is no underline, so we use strong instead. +underline :: OrgParser Inlines +underline = B.strong <$> inlinesEnclosedBy '_' + +code :: OrgParser Inlines +code = B.code <$> rawEnclosedBy '=' + +verbatim :: OrgParser Inlines +verbatim = B.rawInline "" <$> rawEnclosedBy '~' + +subscript :: OrgParser Inlines +subscript = B.subscript <$> (try $ char '_' *> maybeGroupedByBraces) + +superscript :: OrgParser Inlines +superscript = B.superscript <$> (try $ char '^' *> maybeGroupedByBraces) + +maybeGroupedByBraces :: OrgParser Inlines +maybeGroupedByBraces = try $ + choice [ try $ enclosedInlines (char '{') (char '}') + , B.str . (:"") <$> anyChar + ] + +symbol :: OrgParser Inlines +symbol = B.str . (: "") <$> oneOf specialChars + +enclosedInlines :: OrgParser a + -> OrgParser b + -> OrgParser Inlines +enclosedInlines start end = try $ + trimInlines . mconcat <$> enclosed start end inline + +-- FIXME: This is a hack +inlinesEnclosedBy :: Char + -> OrgParser Inlines +inlinesEnclosedBy c = enclosedInlines (atStart (char c) <* endsOnThisOrNextLine c) + (atEnd $ char c) + +enclosedRaw :: OrgParser a + -> OrgParser b + -> OrgParser String +enclosedRaw start end = try $ + start *> (onSingleLine <|> spanningTwoLines) + where onSingleLine = try $ many1Till (noneOf "\n\r") end + spanningTwoLines = try $ + anyLine >>= \f -> mappend (f <> " ") <$> onSingleLine + +rawEnclosedBy :: Char + -> OrgParser String +rawEnclosedBy c = enclosedRaw (atStart $ char c) (atEnd $ char c) + +-- succeeds only if we're not right after a str (ie. in middle of word) +atStart :: OrgParser a -> OrgParser a +atStart p = do + pos <- getPosition + st <- getState + guard $ stateLastStrPos st /= Just pos + p + +-- | succeeds only if we're at the end of a word +atEnd :: OrgParser a -> OrgParser a +atEnd p = try $ p <* lookingAtEndOfWord + where lookingAtEndOfWord = lookAhead . oneOf $ postWordChars + +postWordChars :: [Char] +postWordChars = "\t\n\r !\"'),-.:?}" + +-- FIXME: These functions are hacks and should be replaced +endsOnThisOrNextLine :: Char + -> OrgParser () +endsOnThisOrNextLine c = do + inp <- getInput + let doOtherwise = \rest -> endsOnThisLine rest c (const mzero) + endsOnThisLine inp c doOtherwise + +endsOnThisLine :: [Char] + -> Char + -> ([Char] -> OrgParser ()) + -> OrgParser () +endsOnThisLine input c doOnOtherLines = do + case break (`elem` c:"\n") input of + (_,'\n':rest) -> doOnOtherLines rest + (_,_:rest@(n:_)) -> if n `elem` postWordChars + then return () + else endsOnThisLine rest c doOnOtherLines + _ -> mzero + diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs new file mode 100644 index 000000000..8c5982302 --- /dev/null +++ b/tests/Tests/Readers/Org.hs @@ -0,0 +1,533 @@ +{-# LANGUAGE OverloadedStrings #-} +module Tests.Readers.Org (tests) where + +import Text.Pandoc.Definition +import Test.Framework +import Tests.Helpers +import Tests.Arbitrary() +import Text.Pandoc.Builder +import Text.Pandoc +import Data.List (intersperse) +import Data.Monoid (mempty, mconcat) + +org :: String -> Pandoc +org = readOrg def + +infix 4 =: +(=:) :: ToString c + => String -> (String, c) -> Test +(=:) = test org + +spcSep :: [Inlines] -> Inlines +spcSep = mconcat . intersperse space + +simpleTable' :: Int + -> [Blocks] + -> [[Blocks]] + -> Blocks +simpleTable' n = table "" (take n $ repeat (AlignDefault, 0.0)) + +tests :: [Test] +tests = + [ testGroup "Inlines" $ + [ "Plain String" =: + "Hello, World" =?> + para (spcSep [ "Hello,", "World" ]) + + , "Emphasis" =: + "/Planet Punk/" =?> + para (emph . spcSep $ ["Planet", "Punk"]) + + , "Strong" =: + "*Cider*" =?> + para (strong "Cider") + + , "Strikeout" =: + "+Kill Bill+" =?> + para (strikeout . spcSep $ [ "Kill", "Bill" ]) + + , "Code" =: + "=Robot.rock()=" =?> + para (code "Robot.rock()") + + , "Verbatim" =: + "~word for word~" =?> + para (rawInline "" "word for word") + + , "Symbol" =: + "A * symbol" =?> + para (str "A" <> space <> str "*" <> space <> "symbol") + + , "Superscript single char" =: + "2^n" =?> + para (str "2" <> superscript "n") + + , "Superscript multi char" =: + "2^{n-1}" =?> + para (str "2" <> superscript "n-1") + + , "Subscript single char" =: + "a_n" =?> + para (str "a" <> subscript "n") + + , "Subscript multi char" =: + "a_{n+1}" =?> + para (str "a" <> subscript "n+1") + + , "Markup-chars not occuring on word break are symbols" =: + unlines [ "this+that+ +so+on" + , "seven*eight* nine*" + , "+not+funny+" + ] =?> + para (spcSep [ "this+that+", "+so+on" + , "seven*eight*", "nine*" + , strikeout "not+funny" + ]) + + , "Markup may not span more than two lines" =: + unlines [ "/this *is", "not*", "emph/" ] =?> + para (spcSep [ "/this" + , (strong ("is" <> space <> "not")) + , "emph/" ]) + + , "Explicit link" =: + "[[http://zeitlens.com/][pseudo-random nonsense]]" =?> + (para $ link "http://zeitlens.com/" "" + ("pseudo-random" <> space <> "nonsense")) + + , "Self-link" =: + "[[http://zeitlens.com/]]" =?> + (para $ link "http://zeitlens.com/" "" "http://zeitlens.com/") + ] + + , testGroup "Meta Information" $ + [ "Comment" =: + "# Nothing to see here" =?> + (mempty::Blocks) + + , "Not a comment" =: + "#-tag" =?> + para "#-tag" + + , "Comment surrounded by Text" =: + unlines [ "Before" + , "# Comment" + , "After" + ] =?> + mconcat [ para "Before" + , para "After" + ] + + , "Title" =: + "#+TITLE: Hello, World" =?> + let titleInline = toList $ "Hello," <> space <> "World" + meta = setMeta "title" (MetaInlines titleInline) $ nullMeta + in Pandoc meta mempty + + , "Author" =: + "#+author: Albert /Emacs-Fanboy/ Krewinkel" =?> + let author = toList . spcSep $ [ "Albert", emph "Emacs-Fanboy", "Krewinkel" ] + meta = setMeta "author" (MetaInlines author) $ nullMeta + in Pandoc meta mempty + + , "Date" =: + "#+Date: Feb. *28*, 2014" =?> + let date = toList . spcSep $ [ "Feb.", (strong "28") <> ",", "2014" ] + meta = setMeta "date" (MetaInlines date) $ nullMeta + in Pandoc meta mempty + + , "Description" =: + "#+DESCRIPTION: Explanatory text" =?> + let description = toList . spcSep $ [ "Explanatory", "text" ] + meta = setMeta "description" (MetaInlines description) $ nullMeta + in Pandoc meta mempty + + , "Properties drawer" =: + unlines [ " :PROPERTIES:" + , " :setting: foo" + , " :END:" + ] =?> + (mempty::Blocks) + + , "Logbook drawer" =: + unlines [ " :LogBook:" + , " - State \"DONE\" from \"TODO\" [2014-03-03 Mon 11:00]" + , " :END:" + ] =?> + (mempty::Blocks) + + , "Drawer surrounded by text" =: + unlines [ "Before" + , ":PROPERTIES:" + , ":END:" + , "After" + ] =?> + para "Before" <> para "After" + + , "Drawer start is the only text in first line of a drawer" =: + unlines [ " :LOGBOOK: foo" + , " :END:" + ] =?> + para (spcSep [ ":LOGBOOK:", "foo", ":END:" ]) + + , "Drawers with unknown names are just text" =: + unlines [ ":FOO:" + , ":END:" + ] =?> + para (":FOO:" <> space <> ":END:") + ] + + , testGroup "Basic Blocks" $ + [ "Paragraph" =: + "Paragraph\n" =?> + para "Paragraph" + + , "First Level Header" =: + "* Headline\n" =?> + header 1 "Headline" + + , "Third Level Header" =: + "*** Third Level Headline\n" =?> + header 3 ("Third" <> space <> + "Level" <> space <> + "Headline") + + , "Compact Headers with Paragraph" =: + unlines [ "* First Level" + , "** Second Level" + , " Text" + ] =?> + mconcat [ header 1 ("First" <> space <> "Level") + , header 2 ("Second" <> space <> "Level") + , para "Text" + ] + + , "Separated Headers with Paragraph" =: + unlines [ "* First Level" + , "" + , "** Second Level" + , "" + , " Text" + ] =?> + mconcat [ header 1 ("First" <> space <> "Level") + , header 2 ("Second" <> space <> "Level") + , para "Text" + ] + + , "Headers not preceded by a blank line" =: + unlines [ "** eat dinner" + , "Spaghetti and meatballs tonight." + , "** walk dog" + ] =?> + mconcat [ header 2 ("eat" <> space <> "dinner") + , para $ spcSep [ "Spaghetti", "and", "meatballs", "tonight." ] + , header 2 ("walk" <> space <> "dog") + ] + + , "Paragraph starting with an asterisk" =: + "*five" =?> + para "*five" + + , "Paragraph containing asterisk at beginning of line" =: + unlines [ "lucky" + , "*star" + ] =?> + para ("lucky" <> space <> "*star") + + , "Example block" =: + unlines [ ": echo hello" + , ": echo dear tester" + ] =?> + codeBlockWith ("", ["example"], []) "echo hello\necho dear tester\n" + + , "Example block surrounded by text" =: + unlines [ "Greetings" + , ": echo hello" + , ": echo dear tester" + , "Bye" + ] =?> + mconcat [ para "Greetings" + , codeBlockWith ("", ["example"], []) + "echo hello\necho dear tester\n" + , para "Bye" + ] + + , "Horizontal Rule" =: + unlines [ "before" + , "-----" + , "after" + ] =?> + mconcat [ para "before" + , horizontalRule + , para "after" + ] + + , "Not a Horizontal Rule" =: + "----- five dashes" =?> + (para $ spcSep [ "-----", "five", "dashes" ]) + + , "Comment Block" =: + unlines [ "#+BEGIN_COMMENT" + , "stuff" + , "bla" + , "#+END_COMMENT"] =?> + (mempty::Blocks) + + , "Source Block in Text" =: + unlines [ "Low German greeting" + , " #+BEGIN_SRC haskell" + , " main = putStrLn greeting" + , " where greeting = \"moin\"" + , " #+END_SRC" ] =?> + let attr' = ("", ["haskell"], []) + code' = "main = putStrLn greeting\n" ++ + " where greeting = \"moin\"\n" + in mconcat [ para $ spcSep [ "Low", "German", "greeting" ] + , codeBlockWith attr' code' + ] + + , "Source Block" =: + unlines [ " #+BEGIN_SRC haskell" + , " main = putStrLn greeting" + , " where greeting = \"moin\"" + , " #+END_SRC" ] =?> + let attr' = ("", ["haskell"], []) + code' = "main = putStrLn greeting\n" ++ + " where greeting = \"moin\"\n" + in codeBlockWith attr' code' + ] + + , testGroup "Lists" $ + [ "Simple Bullet Lists" =: + ("- Item1\n" ++ + "- Item2\n") =?> + bulletList [ plain "Item1" + , plain "Item2" + ] + + , "Indented Bullet Lists" =: + (" - Item1\n" ++ + " - Item2\n") =?> + bulletList [ plain "Item1" + , plain "Item2" + ] + + , "Multi-line Bullet Lists" =: + ("- *Fat\n" ++ + " Tony*\n" ++ + "- /Sideshow\n" ++ + " Bob/") =?> + bulletList [ plain $ strong ("Fat" <> space <> "Tony") + , plain $ emph ("Sideshow" <> space <> "Bob") + ] + + , "Nested Bullet Lists" =: + ("- Discovery\n" ++ + " + One More Time\n" ++ + " + Harder, Better, Faster, Stronger\n" ++ + "- Homework\n" ++ + " + Around the World\n"++ + "- Human After All\n" ++ + " + Technologic\n" ++ + " + Robot Rock\n") =?> + bulletList [ mconcat + [ para "Discovery" + , bulletList [ plain ("One" <> space <> + "More" <> space <> + "Time") + , plain ("Harder," <> space <> + "Better," <> space <> + "Faster," <> space <> + "Stronger") + ] + ] + , mconcat + [ para "Homework" + , bulletList [ plain ("Around" <> space <> + "the" <> space <> + "World") + ] + ] + , mconcat + [ para ("Human" <> space <> "After" <> space <> "All") + , bulletList [ plain "Technologic" + , plain ("Robot" <> space <> "Rock") + ] + ] + ] + + , "Simple Ordered List" =: + ("1. Item1\n" ++ + "2. Item2\n") =?> + let listStyle = (1, DefaultStyle, DefaultDelim) + listStructure = [ plain "Item1" + , plain "Item2" + ] + in orderedListWith listStyle listStructure + + , "Simple Ordered List with Parens" =: + ("1) Item1\n" ++ + "2) Item2\n") =?> + let listStyle = (1, DefaultStyle, DefaultDelim) + listStructure = [ plain "Item1" + , plain "Item2" + ] + in orderedListWith listStyle listStructure + + , "Indented Ordered List" =: + (" 1. Item1\n" ++ + " 2. Item2\n") =?> + let listStyle = (1, DefaultStyle, DefaultDelim) + listStructure = [ plain "Item1" + , plain "Item2" + ] + in orderedListWith listStyle listStructure + + , "Nested Ordered Lists" =: + ("1. One\n" ++ + " 1. One-One\n" ++ + " 2. One-Two\n" ++ + "2. Two\n" ++ + " 1. Two-One\n"++ + " 2. Two-Two\n") =?> + let listStyle = (1, DefaultStyle, DefaultDelim) + listStructure = [ mconcat + [ para "One" + , orderedList [ plain "One-One" + , plain "One-Two" + ] + ] + , mconcat + [ para "Two" + , orderedList [ plain "Two-One" + , plain "Two-Two" + ] + ] + ] + in orderedListWith listStyle listStructure + + , "Ordered List in Bullet List" =: + ("- Emacs\n" ++ + " 1. Org\n") =?> + bulletList [ (para "Emacs") <> + (orderedList [ plain "Org"]) + ] + + , "Bullet List in Ordered List" =: + ("1. GNU\n" ++ + " - Freedom\n") =?> + orderedList [ (para "GNU") <> bulletList [ (plain "Freedom") ] ] + ] + + , testGroup "Tables" + [ "Single cell table" =: + "|Test|" =?> + simpleTable' 1 mempty [[plain "Test"]] + + , "Multi cell table" =: + "| One | Two |" =?> + simpleTable' 2 mempty [ [ plain "One", plain "Two" ] ] + + , "Multi line table" =: + unlines [ "| One |" + , "| Two |" + , "| Three |" + ] =?> + simpleTable' 1 mempty + [ [ plain "One" ] + , [ plain "Two" ] + , [ plain "Three" ] + ] + + , "Empty table" =: + "||" =?> + simpleTable' 1 mempty mempty + + , "Glider Table" =: + unlines [ "| 1 | 0 | 0 |" + , "| 0 | 1 | 1 |" + , "| 1 | 1 | 0 |" + ] =?> + simpleTable' 3 mempty + [ [ plain "1", plain "0", plain "0" ] + , [ plain "0", plain "1", plain "1" ] + , [ plain "1", plain "1", plain "0" ] + ] + + , "Table between Paragraphs" =: + unlines [ "Before" + , "| One | Two |" + , "After" + ] =?> + mconcat [ para "Before" + , simpleTable' 2 mempty [ [ plain "One", plain "Two" ] ] + , para "After" + ] + + , "Table with Header" =: + unlines [ "| Species | Status |" + , "|--------------+--------------|" + , "| cervisiae | domesticated |" + , "| paradoxus | wild |" + ] =?> + simpleTable [ plain "Species", plain "Status" ] + [ [ plain "cervisiae", plain "domesticated" ] + , [ plain "paradoxus", plain "wild" ] + ] + + , "Table with final hline" =: + unlines [ "| cervisiae | domesticated |" + , "| paradoxus | wild |" + , "|--------------+--------------|" + ] =?> + simpleTable' 2 mempty + [ [ plain "cervisiae", plain "domesticated" ] + , [ plain "paradoxus", plain "wild" ] + ] + + , "Table in a box" =: + unlines [ "|---------|---------|" + , "| static | Haskell |" + , "| dynamic | Lisp |" + , "|---------+---------|" + ] =?> + simpleTable' 2 mempty + [ [ plain "static", plain "Haskell" ] + , [ plain "dynamic", plain "Lisp" ] + ] + + , "Table with alignment row" =: + unlines [ "| Numbers | Text | More |" + , "| <c> | <r> | |" + , "| 1 | One | foo |" + , "| 2 | Two | bar |" + ] =?> + table "" (zip [AlignCenter, AlignRight, AlignDefault] [0, 0, 0]) + [] + [ [ plain "Numbers", plain "Text", plain "More" ] + , [ plain "1" , plain "One" , plain "foo" ] + , [ plain "2" , plain "Two" , plain "bar" ] + ] + + , "Pipe within text doesn't start a table" =: + "Ceci n'est pas une | pipe " =?> + para (spcSep [ "Ceci", "n'est", "pas", "une", "|", "pipe" ]) + + , "Missing pipe at end of row" =: + "|incomplete-but-valid" =?> + simpleTable' 1 mempty [ [ plain "incomplete-but-valid" ] ] + + , "Table with differing row lengths" =: + unlines [ "| Numbers | Text " + , "|-" + , "| <c> | <r> |" + , "| 1 | One | foo |" + , "| 2" + ] =?> + table "" (zip [AlignCenter, AlignRight, AlignDefault] [0, 0, 0]) + [ plain "Numbers", plain "Text" , plain mempty ] + [ [ plain "1" , plain "One" , plain "foo" ] + , [ plain "2" , plain mempty , plain mempty ] + ] + ] + ] diff --git a/tests/test-pandoc.hs b/tests/test-pandoc.hs index ae521541a..74f8e5044 100644 --- a/tests/test-pandoc.hs +++ b/tests/test-pandoc.hs @@ -7,6 +7,7 @@ import GHC.IO.Encoding import qualified Tests.Old import qualified Tests.Readers.LaTeX import qualified Tests.Readers.Markdown +import qualified Tests.Readers.Org import qualified Tests.Readers.RST import qualified Tests.Writers.ConTeXt import qualified Tests.Writers.LaTeX @@ -31,6 +32,7 @@ tests = [ testGroup "Old" Tests.Old.tests , testGroup "Readers" [ testGroup "LaTeX" Tests.Readers.LaTeX.tests , testGroup "Markdown" Tests.Readers.Markdown.tests + , testGroup "Org" Tests.Readers.Org.tests , testGroup "RST" Tests.Readers.RST.tests ] ] -- cgit v1.2.3 From 3126b00f11a684a20633cdc3810722e2bf210d36 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Wed, 5 Mar 2014 08:47:20 -0800 Subject: Templates: YAML objects resolve to "true" in conditionals. Closes #1133. Note: If address is a YAML object and you just have $address$ in your template, the word "true" will appear, which may be unexpected. (Previously nothing would appear.) --- src/Text/Pandoc/Templates.hs | 1 + 1 file changed, 1 insertion(+) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Templates.hs b/src/Text/Pandoc/Templates.hs index ad8838f72..52625abf6 100644 --- a/src/Text/Pandoc/Templates.hs +++ b/src/Text/Pandoc/Templates.hs @@ -190,6 +190,7 @@ resolveVar var' val = Just (String t) -> T.stripEnd t Just (Number n) -> T.pack $ show n Just (Bool True) -> "true" + Just (Object _) -> "true" Just _ -> mempty Nothing -> mempty -- cgit v1.2.3 From 6fda3619770f7c07ccbfc8a51614c55e01b740d8 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Wed, 5 Mar 2014 09:10:09 -0800 Subject: SelfContained: Handle "poster" attribute in "video" tags. Closes #1188. --- src/Text/Pandoc/SelfContained.hs | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/SelfContained.hs b/src/Text/Pandoc/SelfContained.hs index 6112e764f..7fc9c2966 100644 --- a/src/Text/Pandoc/SelfContained.hs +++ b/src/Text/Pandoc/SelfContained.hs @@ -50,14 +50,16 @@ isOk c = isAscii c && isAlphaNum c convertTag :: Maybe FilePath -> Tag String -> IO (Tag String) convertTag userdata t@(TagOpen tagname as) - | tagname `elem` ["img", "embed", "video", "input", "audio", "source"] = - case fromAttrib "src" t of - [] -> return t - src -> do - (raw, mime) <- getRaw userdata (fromAttrib "type" t) src - let enc = "data:" ++ mime ++ ";base64," ++ toString (encode raw) - return $ TagOpen tagname - (("src",enc) : [(x,y) | (x,y) <- as, x /= "src"]) + | tagname `elem` ["img", "embed", "video", "input", "audio", "source"] = do + as' <- mapM processAttribute as + return $ TagOpen tagname as' + where processAttribute (x,y) = + if x == "src" || x == "href" || x == "poster" + then do + (raw, mime) <- getRaw userdata (fromAttrib "type" t) y + let enc = "data:" ++ mime ++ ";base64," ++ toString (encode raw) + return (x, enc) + else return (x,y) convertTag userdata t@(TagOpen "script" as) = case fromAttrib "src" t of [] -> return t -- cgit v1.2.3 From f3c9d3788530e450d1bb23a4fd829bc5a6eed266 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Wed, 5 Mar 2014 13:01:23 -0800 Subject: HTML writer: Add colgroup around col tags. Also affects EPUB writer. Closes #877. --- src/Text/Pandoc/Writers/HTML.hs | 15 ++++++++++----- tests/tables.html | 6 ++++++ 2 files changed, 16 insertions(+), 5 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs index 3ac2a836f..e0385af25 100644 --- a/src/Text/Pandoc/Writers/HTML.hs +++ b/src/Text/Pandoc/Writers/HTML.hs @@ -533,11 +533,16 @@ blockToHtml opts (Table capt aligns widths headers rows') = do let percent w = show (truncate (100*w) :: Integer) ++ "%" let coltags = if all (== 0.0) widths then mempty - else mconcat $ map (\w -> - if writerHtml5 opts - then H.col ! A.style (toValue $ "width: " ++ percent w) - else H.col ! A.width (toValue $ percent w) >> nl opts) - widths + else do + H.colgroup $ do + nl opts + mapM_ (\w -> do + if writerHtml5 opts + then H.col ! A.style (toValue $ "width: " ++ + percent w) + else H.col ! A.width (toValue $ percent w) + nl opts) widths + nl opts head' <- if all null headers then return mempty else do diff --git a/tests/tables.html b/tests/tables.html index b72aa784e..a9b2b247d 100644 --- a/tests/tables.html +++ b/tests/tables.html @@ -96,10 +96,12 @@ <p>Multiline table with caption:</p> <table> <caption>Here's the caption. It may span multiple lines.</caption> +<colgroup> <col width="15%" /> <col width="13%" /> <col width="16%" /> <col width="33%" /> +</colgroup> <thead> <tr class="header"> <th align="center">Centered Header</th> @@ -125,10 +127,12 @@ </table> <p>Multiline table without caption:</p> <table> +<colgroup> <col width="15%" /> <col width="13%" /> <col width="16%" /> <col width="33%" /> +</colgroup> <thead> <tr class="header"> <th align="center">Centered Header</th> @@ -177,10 +181,12 @@ </table> <p>Multiline table without column headers:</p> <table> +<colgroup> <col width="15%" /> <col width="13%" /> <col width="16%" /> <col width="33%" /> +</colgroup> <tbody> <tr class="odd"> <td align="center">First</td> -- cgit v1.2.3 From c026c16fa6d9313dc2aa30e8348176e292dc78e0 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Sun, 9 Mar 2014 21:26:25 -0700 Subject: PDF: Use / as path separators even on Windows. This seems to be necessary for texlive. Closes #1151 (again!). --- src/Text/Pandoc/PDF.hs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/PDF.hs b/src/Text/Pandoc/PDF.hs index 39442854d..06e4d2aae 100644 --- a/src/Text/Pandoc/PDF.hs +++ b/src/Text/Pandoc/PDF.hs @@ -91,7 +91,8 @@ handleImage' baseURL tmpdir (Image ils (src,tit)) = do let ext = fromMaybe (takeExtension src) $ extensionFromMimeType mime let basename = UTF8.toString $ B64.encode $ UTF8.fromString src - let fname = tmpdir </> basename <.> ext + -- note: we want / even on Windows, for TexLive: + let fname = tmpdir ++ "/" ++ basename <.> ext BS.writeFile fname contents return $ Image ils (fname,tit) _ -> do @@ -143,7 +144,7 @@ extractMsg log' = do runTeXProgram :: String -> Int -> FilePath -> String -> IO (ExitCode, ByteString, Maybe ByteString) runTeXProgram program runsLeft tmpDir source = do - let file = tmpDir </> "input.tex" + let file = tmpDir ++ "/input.tex" exists <- doesFileExist file unless exists $ UTF8.writeFile file source let programArgs = ["-halt-on-error", "-interaction", "nonstopmode", -- cgit v1.2.3 From 5040f3ede08188610054ff2afab1c3cd9c16c58a Mon Sep 17 00:00:00 2001 From: John MacFarlane <fiddlosopher@gmail.com> Date: Mon, 10 Mar 2014 11:16:09 -0700 Subject: PDF: Use / as path separators in tempdir on Windows. This is needed for texlive. Note that the / is used only in the body of withTempDir, so when the directory is deleted, the original separators will be used. See #1151. --- src/Text/Pandoc/PDF.hs | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/PDF.hs b/src/Text/Pandoc/PDF.hs index 06e4d2aae..4dfa1d827 100644 --- a/src/Text/Pandoc/PDF.hs +++ b/src/Text/Pandoc/PDF.hs @@ -51,13 +51,21 @@ import Text.Pandoc.Options (WriterOptions(..)) import Text.Pandoc.MIME (extensionFromMimeType) import Text.Pandoc.Process (pipeProcess) import qualified Data.ByteString.Lazy as BL +#ifdef _WINDOWS +import Data.List (intercalate) +#endif withTempDir :: String -> (FilePath -> IO a) -> IO a -withTempDir = +withTempDir f = #ifdef _WINDOWS - withTempDirectory "." + withTempDirectory "." (f . changePathSeparators) #else - withSystemTempDirectory + withSystemTempDirectory f +#endif + +#ifdef _WINDOWS +changePathSeparators :: FilePath -> FilePath +changePathSeparators = intercalate "/" . splitDirectories #endif makePDF :: String -- ^ pdf creator (pdflatex, lualatex, xelatex) -- cgit v1.2.3 From 1aed9208f883c3bf67f81582b149912de832273a Mon Sep 17 00:00:00 2001 From: Tim Lin <timtylin@gmail.com> Date: Mon, 10 Mar 2014 16:23:57 -0700 Subject: PDF: Use / as path separators in latex input only Fixes compile error on Windows for 5040f3e Reverted back to canonical file separators </> in all places except for arguments to the LaTeX builder and in TEXINPUTS See #1151. Note: Temporary directories still fail to be removed in Windows due to call of ByteString.Lazy.readFile creating process ownership of the compiled pdf file. --- src/Text/Pandoc/PDF.hs | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/PDF.hs b/src/Text/Pandoc/PDF.hs index 4dfa1d827..a5f1597bd 100644 --- a/src/Text/Pandoc/PDF.hs +++ b/src/Text/Pandoc/PDF.hs @@ -56,11 +56,11 @@ import Data.List (intercalate) #endif withTempDir :: String -> (FilePath -> IO a) -> IO a -withTempDir f = +withTempDir = #ifdef _WINDOWS - withTempDirectory "." (f . changePathSeparators) + withTempDirectory "." #else - withSystemTempDirectory f + withSystemTempDirectory #endif #ifdef _WINDOWS @@ -99,8 +99,7 @@ handleImage' baseURL tmpdir (Image ils (src,tit)) = do let ext = fromMaybe (takeExtension src) $ extensionFromMimeType mime let basename = UTF8.toString $ B64.encode $ UTF8.fromString src - -- note: we want / even on Windows, for TexLive: - let fname = tmpdir ++ "/" ++ basename <.> ext + let fname = tmpdir </> basename <.> ext BS.writeFile fname contents return $ Image ils (fname,tit) _ -> do @@ -152,18 +151,22 @@ extractMsg log' = do runTeXProgram :: String -> Int -> FilePath -> String -> IO (ExitCode, ByteString, Maybe ByteString) runTeXProgram program runsLeft tmpDir source = do - let file = tmpDir ++ "/input.tex" + let file = tmpDir </> "input.tex" exists <- doesFileExist file unless exists $ UTF8.writeFile file source - let programArgs = ["-halt-on-error", "-interaction", "nonstopmode", - "-output-directory", tmpDir, file] - env' <- getEnvironment #ifdef _WINDOWS - let sep = ";" + -- note: we want / even on Windows, for TexLive + let tmpDir' = changePathSeparators tmpDir + let file' = changePathSeparators file #else - let sep = ":" + let tmpDir' = tmpDir + let file' = file #endif - let texinputs = maybe (tmpDir ++ sep) ((tmpDir ++ sep) ++) + let programArgs = ["-halt-on-error", "-interaction", "nonstopmode", + "-output-directory", tmpDir', file'] + env' <- getEnvironment + let sep = searchPathSeparator:[] + let texinputs = maybe (tmpDir' ++ sep) ((tmpDir' ++ sep) ++) $ lookup "TEXINPUTS" env' let env'' = ("TEXINPUTS", texinputs) : [(k,v) | (k,v) <- env', k /= "TEXINPUTS"] -- cgit v1.2.3 From 76ef65f0b36d3a613e004350609d3696d0bf5658 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Wed, 12 Mar 2014 10:23:45 -0700 Subject: Man writer: Ensure that terms in definition lists aren't line wrapped. Closes #1195. --- src/Text/Pandoc/Writers/Man.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Writers/Man.hs b/src/Text/Pandoc/Writers/Man.hs index b31cc2b70..680bfef44 100644 --- a/src/Text/Pandoc/Writers/Man.hs +++ b/src/Text/Pandoc/Writers/Man.hs @@ -283,7 +283,7 @@ definitionListItemToMan opts (label, defs) = do mapM (\item -> blockToMan opts item) rest first' <- blockToMan opts first return $ first' $$ text ".RS" $$ rest' $$ text ".RE" - return $ text ".TP" $$ text ".B " <> labelText $$ contents + return $ text ".TP" $$ nowrap (text ".B " <> labelText) $$ contents -- | Convert list of Pandoc block elements to man. blockListToMan :: WriterOptions -- ^ Options -- cgit v1.2.3 From 814af2002e4837c160526123ef753bb34547d811 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Fri, 14 Mar 2014 14:03:15 -0700 Subject: RST writer: Avoid stack overflow with certain tables. Closes #1197. Note that there are still problems with the formatting of the tables inside tables with output produced from the input file in the original bug report. But this fixes the stack overflow problem. --- src/Text/Pandoc/Writers/RST.hs | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Writers/RST.hs b/src/Text/Pandoc/Writers/RST.hs index 37bb66632..1e7596b21 100644 --- a/src/Text/Pandoc/Writers/RST.hs +++ b/src/Text/Pandoc/Writers/RST.hs @@ -219,11 +219,15 @@ blockToRST (Table caption _ widths headers rows) = do else blankline <> text "Table: " <> caption' headers' <- mapM blockListToRST headers rawRows <- mapM (mapM blockListToRST) rows - let isSimple = all (==0) widths && all (all (\bs -> length bs <= 1)) rows + -- let isSimpleCell [Plain _] = True + -- isSimpleCell [Para _] = True + -- isSimpleCell [] = True + -- isSimpleCell _ = False + -- let isSimple = all (==0) widths && all (all isSimpleCell) rows let numChars = maximum . map offset opts <- get >>= return . stOptions let widthsInChars = - if isSimple + if all (== 0) widths then map ((+2) . numChars) $ transpose (headers' : rawRows) else map (floor . (fromIntegral (writerColumns opts) *)) widths let hpipeBlocks blocks = hcat [beg, middle, end] -- cgit v1.2.3 From f6141aa241eb2e636cda369c12d26c8f4b4a3308 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Fri, 14 Mar 2014 15:18:43 -0700 Subject: EPUB writer: Incorporate files linked in <video> tags. src and poster will both be incorporated into content.opf and the epub container. This partially address #1170. Still need to do something similar for <audio>. --- src/Text/Pandoc/MIME.hs | 1 + src/Text/Pandoc/Writers/EPUB.hs | 56 +++++++++++++++++++++++++++++++++-------- 2 files changed, 47 insertions(+), 10 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/MIME.hs b/src/Text/Pandoc/MIME.hs index 44989ee94..977cb576b 100644 --- a/src/Text/Pandoc/MIME.hs +++ b/src/Text/Pandoc/MIME.hs @@ -246,6 +246,7 @@ mimeTypesList = -- List borrowed from happstack-server. ,("lzx","application/x-lzx") ,("m3u","audio/mpegurl") ,("m4a","audio/mpeg") + ,("m4v","video/x-m4v") ,("maker","application/x-maker") ,("man","application/x-troff-man") ,("mcif","chemical/x-mmcif") diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs index a48300939..d4044d475 100644 --- a/src/Text/Pandoc/Writers/EPUB.hs +++ b/src/Text/Pandoc/Writers/EPUB.hs @@ -65,6 +65,7 @@ import Prelude hiding (catch) #endif import Control.Exception (catch, SomeException) import Text.Blaze.Html.Renderer.Utf8 (renderHtml) +import Text.HTML.TagSoup -- A Chapter includes a list of blocks and maybe a section -- number offset. Note, some chapters are unnumbered. The section @@ -342,8 +343,8 @@ writeEPUB opts doc@(Pandoc meta _) = do -- handle pictures picsRef <- newIORef [] - Pandoc _ blocks <- walkM - (transformInline opts' picsRef) doc + Pandoc _ blocks <- walkM (transformInline opts' picsRef) doc >>= + walkM (transformBlock opts' picsRef) pics <- readIORef picsRef let readPicEntry entries (oldsrc, newsrc) = do res <- fetchItem (writerSourceURL opts') oldsrc @@ -715,21 +716,55 @@ metadataElement version md currentTime = showDateTimeISO8601 :: UTCTime -> String showDateTimeISO8601 = formatTime defaultTimeLocale "%FT%TZ" +transformTag :: WriterOptions + -> IORef [(FilePath, FilePath)] -- ^ (oldpath, newpath) images + -> Tag String + -> IO (Tag String) +transformTag opts picsRef tag@(TagOpen "video" attr) = do + let src = fromAttrib "src" tag + let poster = fromAttrib "poster" tag + let oldsrc = maybe src (</> src) $ writerSourceURL opts + let oldposter = maybe poster (</> poster) $ writerSourceURL opts + newsrc <- modifyPicsRef picsRef oldsrc + newposter <- modifyPicsRef picsRef oldposter + let attr' = filter (\(x,_) -> x /= "src" && x /= "poster") attr ++ + [("src", newsrc) | not (null newsrc)] ++ + [("poster", newposter) | not (null newposter)] + return $ TagOpen "video" attr' +transformTag _ _ tag = return tag + +modifyPicsRef :: IORef [(FilePath, FilePath)] -> FilePath -> IO FilePath +modifyPicsRef _ "" = return "" +modifyPicsRef picsRef oldsrc = do + pics <- readIORef picsRef + case lookup oldsrc pics of + Just n -> return n + Nothing -> do + let new = "images/img" ++ show (length pics) ++ + takeExtension oldsrc + modifyIORef picsRef ( (oldsrc, new): ) + return new + +transformBlock :: WriterOptions + -> IORef [(FilePath, FilePath)] -- ^ (oldpath, newpath) images + -> Block + -> IO Block +transformBlock opts picsRef (RawBlock fmt raw) + | fmt == Format "html" = do + let tags = parseTags raw + -- look for video tags and add poster and src to images + tags' <- mapM (transformTag opts picsRef) tags + return $ RawBlock fmt (renderTags tags') +transformBlock _ _ b = return b + transformInline :: WriterOptions -> IORef [(FilePath, FilePath)] -- ^ (oldpath, newpath) images -> Inline -> IO Inline transformInline opts picsRef (Image lab (src,tit)) = do let src' = unEscapeString src - pics <- readIORef picsRef let oldsrc = maybe src' (</> src) $ writerSourceURL opts - let ext = takeExtension src' - newsrc <- case lookup oldsrc pics of - Just n -> return n - Nothing -> do - let new = "images/img" ++ show (length pics) ++ ext - modifyIORef picsRef ( (oldsrc, new): ) - return new + newsrc <- modifyPicsRef picsRef oldsrc return $ Image lab (newsrc, tit) transformInline opts _ (x@(Math _ _)) | WebTeX _ <- writerHTMLMathMethod opts = do @@ -762,6 +797,7 @@ ppTopElement = ("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" ++) . unEntity . imageTypeOf :: FilePath -> Maybe String imageTypeOf x = case getMimeType x of Just y@('i':'m':'a':'g':'e':_) -> Just y + Just y@('v':'i':'d':'e':'o':_) -> Just y _ -> Nothing data IdentState = IdentState{ -- cgit v1.2.3 From 91696c62c431b85de7704b5444431230464d77b3 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Fri, 14 Mar 2014 15:30:11 -0700 Subject: EPUB writer: Handle media in audio source tags. This should resolve the rest of #1170, but it needs extensive testing. Note that we now use a 'media' directory rather than 'images'. --- src/Text/Pandoc/Writers/EPUB.hs | 55 +++++++++++++++++++++-------------------- 1 file changed, 28 insertions(+), 27 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs index d4044d475..75aae55c1 100644 --- a/src/Text/Pandoc/Writers/EPUB.hs +++ b/src/Text/Pandoc/Writers/EPUB.hs @@ -342,15 +342,15 @@ writeEPUB opts doc@(Pandoc meta _) = do let tpEntry = mkEntry "title_page.xhtml" tpContent -- handle pictures - picsRef <- newIORef [] - Pandoc _ blocks <- walkM (transformInline opts' picsRef) doc >>= - walkM (transformBlock opts' picsRef) - pics <- readIORef picsRef + mediaRef <- newIORef [] + Pandoc _ blocks <- walkM (transformInline opts' mediaRef) doc >>= + walkM (transformBlock opts' mediaRef) + pics <- readIORef mediaRef let readPicEntry entries (oldsrc, newsrc) = do res <- fetchItem (writerSourceURL opts') oldsrc case res of Left _ -> do - warn $ "Could not find image `" ++ oldsrc ++ "', skipping..." + warn $ "Could not find media `" ++ oldsrc ++ "', skipping..." return entries Right (img,_) -> return $ (toEntry newsrc epochtime $ B.fromChunks . (:[]) $ img) : entries @@ -440,7 +440,7 @@ writeEPUB opts doc@(Pandoc meta _) = do [("id", takeBaseName $ eRelativePath ent), ("href", eRelativePath ent), ("media-type", fromMaybe "application/octet-stream" - $ imageTypeOf $ eRelativePath ent)] $ () + $ mediaTypeOf $ eRelativePath ent)] $ () let fontNode ent = unode "item" ! [("id", takeBaseName $ eRelativePath ent), ("href", eRelativePath ent), @@ -717,54 +717,54 @@ showDateTimeISO8601 :: UTCTime -> String showDateTimeISO8601 = formatTime defaultTimeLocale "%FT%TZ" transformTag :: WriterOptions - -> IORef [(FilePath, FilePath)] -- ^ (oldpath, newpath) images + -> IORef [(FilePath, FilePath)] -- ^ (oldpath, newpath) media -> Tag String -> IO (Tag String) -transformTag opts picsRef tag@(TagOpen "video" attr) = do +transformTag opts mediaRef tag@(TagOpen name attr) + | name == "video" || name == "source" = do let src = fromAttrib "src" tag let poster = fromAttrib "poster" tag let oldsrc = maybe src (</> src) $ writerSourceURL opts let oldposter = maybe poster (</> poster) $ writerSourceURL opts - newsrc <- modifyPicsRef picsRef oldsrc - newposter <- modifyPicsRef picsRef oldposter + newsrc <- modifyMediaRef mediaRef oldsrc + newposter <- modifyMediaRef mediaRef oldposter let attr' = filter (\(x,_) -> x /= "src" && x /= "poster") attr ++ [("src", newsrc) | not (null newsrc)] ++ [("poster", newposter) | not (null newposter)] - return $ TagOpen "video" attr' + return $ TagOpen name attr' transformTag _ _ tag = return tag -modifyPicsRef :: IORef [(FilePath, FilePath)] -> FilePath -> IO FilePath -modifyPicsRef _ "" = return "" -modifyPicsRef picsRef oldsrc = do - pics <- readIORef picsRef - case lookup oldsrc pics of +modifyMediaRef :: IORef [(FilePath, FilePath)] -> FilePath -> IO FilePath +modifyMediaRef _ "" = return "" +modifyMediaRef mediaRef oldsrc = do + media <- readIORef mediaRef + case lookup oldsrc media of Just n -> return n Nothing -> do - let new = "images/img" ++ show (length pics) ++ + let new = "media/file" ++ show (length media) ++ takeExtension oldsrc - modifyIORef picsRef ( (oldsrc, new): ) + modifyIORef mediaRef ( (oldsrc, new): ) return new transformBlock :: WriterOptions - -> IORef [(FilePath, FilePath)] -- ^ (oldpath, newpath) images + -> IORef [(FilePath, FilePath)] -- ^ (oldpath, newpath) media -> Block -> IO Block -transformBlock opts picsRef (RawBlock fmt raw) +transformBlock opts mediaRef (RawBlock fmt raw) | fmt == Format "html" = do let tags = parseTags raw - -- look for video tags and add poster and src to images - tags' <- mapM (transformTag opts picsRef) tags + tags' <- mapM (transformTag opts mediaRef) tags return $ RawBlock fmt (renderTags tags') transformBlock _ _ b = return b transformInline :: WriterOptions - -> IORef [(FilePath, FilePath)] -- ^ (oldpath, newpath) images + -> IORef [(FilePath, FilePath)] -- ^ (oldpath, newpath) media -> Inline -> IO Inline -transformInline opts picsRef (Image lab (src,tit)) = do +transformInline opts mediaRef (Image lab (src,tit)) = do let src' = unEscapeString src let oldsrc = maybe src' (</> src) $ writerSourceURL opts - newsrc <- modifyPicsRef picsRef oldsrc + newsrc <- modifyMediaRef mediaRef oldsrc return $ Image lab (newsrc, tit) transformInline opts _ (x@(Math _ _)) | WebTeX _ <- writerHTMLMathMethod opts = do @@ -794,10 +794,11 @@ ppTopElement = ("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" ++) . unEntity . Nothing -> '&':'#':unEntity xs unEntity (x:xs) = x : unEntity xs -imageTypeOf :: FilePath -> Maybe String -imageTypeOf x = case getMimeType x of +mediaTypeOf :: FilePath -> Maybe String +mediaTypeOf x = case getMimeType x of Just y@('i':'m':'a':'g':'e':_) -> Just y Just y@('v':'i':'d':'e':'o':_) -> Just y + Just y@('a':'u':'d':'i':'o':_) -> Just y _ -> Nothing data IdentState = IdentState{ -- cgit v1.2.3 From 44f58e7e380b2ec38ba65e1cebb295661d3bad62 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Fri, 14 Mar 2014 15:41:28 -0700 Subject: EPUB writer: Handle files linked in raw img tags. See #1170. --- src/Text/Pandoc/Writers/EPUB.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs index 75aae55c1..dae45b90f 100644 --- a/src/Text/Pandoc/Writers/EPUB.hs +++ b/src/Text/Pandoc/Writers/EPUB.hs @@ -721,7 +721,7 @@ transformTag :: WriterOptions -> Tag String -> IO (Tag String) transformTag opts mediaRef tag@(TagOpen name attr) - | name == "video" || name == "source" = do + | name == "video" || name == "source" || name == "img" = do let src = fromAttrib "src" tag let poster = fromAttrib "poster" tag let oldsrc = maybe src (</> src) $ writerSourceURL opts -- cgit v1.2.3 From 3df75bc160827fbd0322ecf48e543977cda0156f Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Wed, 19 Mar 2014 11:09:36 -0700 Subject: PDF: Changes to error reporting, to handle non-UTF8 error output. --- pandoc.hs | 5 ++++- src/Text/Pandoc/PDF.hs | 5 ++--- 2 files changed, 6 insertions(+), 4 deletions(-) (limited to 'src/Text') diff --git a/pandoc.hs b/pandoc.hs index 709b5a777..4a4e53bdd 100644 --- a/pandoc.hs +++ b/pandoc.hs @@ -1172,7 +1172,10 @@ main = do res <- makePDF latexEngine f writerOptions doc2 case res of Right pdf -> writeBinary pdf - Left err' -> err 43 $ UTF8.toStringLazy err' + Left err' -> do + B.hPutStr stderr $ err' + B.hPut stderr $ B.pack [10] + err 43 "Error producing PDF from TeX source" | otherwise -> selfcontain (f writerOptions doc2 ++ ['\n' | not standalone']) >>= writerFn outputFile . handleEntities diff --git a/src/Text/Pandoc/PDF.hs b/src/Text/Pandoc/PDF.hs index a5f1597bd..608cad2e9 100644 --- a/src/Text/Pandoc/PDF.hs +++ b/src/Text/Pandoc/PDF.hs @@ -116,7 +116,6 @@ tex2pdf' tmpDir program source = do then 3 -- to get page numbers else 2 -- 1 run won't give you PDF bookmarks (exit, log', mbPdf) <- runTeXProgram program numruns tmpDir source - let msg = "Error producing PDF from TeX source.\n" case (exit, mbPdf) of (ExitFailure _, _) -> do let logmsg = extractMsg log' @@ -125,8 +124,8 @@ tex2pdf' tmpDir program source = do x | "! Package inputenc Error" `BC.isPrefixOf` x -> "\nTry running pandoc with --latex-engine=xelatex." _ -> "" - return $ Left $ msg <> logmsg <> extramsg - (ExitSuccess, Nothing) -> return $ Left msg + return $ Left $ logmsg <> extramsg + (ExitSuccess, Nothing) -> return $ Left "" (ExitSuccess, Just pdf) -> return $ Right pdf (<>) :: ByteString -> ByteString -> ByteString -- cgit v1.2.3 From dd058b38b0e7ed511353338c4830aab156c39a0e Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Mon, 24 Mar 2014 09:56:16 -0700 Subject: Markdown reader: Fixed regression on line breaks in strict mode. Closes #1203. --- src/Text/Pandoc/Readers/Markdown.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 0ea7f9ac5..aa0252266 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -1558,7 +1558,7 @@ endline = try $ do guardDisabled Ext_lists_without_preceding_blankline <|> notFollowedBy listStart guardEnabled Ext_blank_before_blockquote <|> notFollowedBy emailBlockQuoteStart guardEnabled Ext_blank_before_header <|> notFollowedBy (char '#') -- atx header - guardEnabled Ext_backtick_code_blocks >> + guardDisabled Ext_backtick_code_blocks <|> notFollowedBy (() <$ (lookAhead (char '`') >> codeBlockFenced)) (guardEnabled Ext_hard_line_breaks >> return (return B.linebreak)) <|> (guardEnabled Ext_ignore_line_breaks >> return mempty) -- cgit v1.2.3 From 3fa38db80b422af627ba260024756faf2a1d9147 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Mon, 24 Mar 2014 10:29:24 -0700 Subject: Parsing: Make F an instance of Applicative. Closes #1138. --- src/Text/Pandoc/Parsing.hs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs index 2bc351db3..883a560d0 100644 --- a/src/Text/Pandoc/Parsing.hs +++ b/src/Text/Pandoc/Parsing.hs @@ -174,13 +174,13 @@ import Text.Pandoc.Asciify (toAsciiChar) import Data.Default import qualified Data.Set as Set import Control.Monad.Reader -import Control.Applicative ((*>), (<*), (<$), liftA2) +import Control.Applicative ((*>), (<*), (<$), liftA2, Applicative) import Data.Monoid import Data.Maybe (catMaybes) type Parser t s = Parsec t s -newtype F a = F { unF :: Reader ParserState a } deriving (Monad, Functor) +newtype F a = F { unF :: Reader ParserState a } deriving (Monad, Applicative, Functor) runF :: F a -> ParserState -> a runF = runReader . unF -- cgit v1.2.3 From d7fbc40dff9771181f26d7d9cb3129c9884a5f01 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Mon, 24 Mar 2014 15:07:19 -0700 Subject: RTF writer: Fixed tables cells containing paragraphs. This moves \intbl after \pard. --- src/Text/Pandoc/Writers/RTF.hs | 2 +- tests/tables.rtf | 184 ++++++++++++++++++++--------------------- 2 files changed, 93 insertions(+), 93 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Writers/RTF.hs b/src/Text/Pandoc/Writers/RTF.hs index fb935fa6a..3e0bd9976 100644 --- a/src/Text/Pandoc/Writers/RTF.hs +++ b/src/Text/Pandoc/Writers/RTF.hs @@ -259,7 +259,7 @@ tableRowToRTF header indent aligns sizes' cols = tableItemToRTF :: Int -> Alignment -> [Block] -> String tableItemToRTF indent alignment item = let contents = concatMap (blockToRTF indent alignment) item - in "{\\intbl " ++ contents ++ "\\cell}\n" + in "{" ++ substitute "\\pard" "\\pard\\intbl" contents ++ "\\cell}\n" -- | Ensure that there's the same amount of space after compact -- lists as after regular lists. diff --git a/tests/tables.rtf b/tests/tables.rtf index 011724967..e1fe4aab1 100644 --- a/tests/tables.rtf +++ b/tests/tables.rtf @@ -4,13 +4,13 @@ \clbrdrb\brdrs\cellx2160\clbrdrb\brdrs\cellx4320\clbrdrb\brdrs\cellx6480\clbrdrb\brdrs\cellx8640 \trkeep\intbl { -{\intbl {\pard \qr \f0 \sa0 \li0 \fi0 Right\par} +{{\pard\intbl \qr \f0 \sa0 \li0 \fi0 Right\par} \cell} -{\intbl {\pard \ql \f0 \sa0 \li0 \fi0 Left\par} +{{\pard\intbl \ql \f0 \sa0 \li0 \fi0 Left\par} \cell} -{\intbl {\pard \qc \f0 \sa0 \li0 \fi0 Center\par} +{{\pard\intbl \qc \f0 \sa0 \li0 \fi0 Center\par} \cell} -{\intbl {\pard \ql \f0 \sa0 \li0 \fi0 Default\par} +{{\pard\intbl \ql \f0 \sa0 \li0 \fi0 Default\par} \cell} } \intbl\row} @@ -19,13 +19,13 @@ \cellx2160\cellx4320\cellx6480\cellx8640 \trkeep\intbl { -{\intbl {\pard \qr \f0 \sa0 \li0 \fi0 12\par} +{{\pard\intbl \qr \f0 \sa0 \li0 \fi0 12\par} \cell} -{\intbl {\pard \ql \f0 \sa0 \li0 \fi0 12\par} +{{\pard\intbl \ql \f0 \sa0 \li0 \fi0 12\par} \cell} -{\intbl {\pard \qc \f0 \sa0 \li0 \fi0 12\par} +{{\pard\intbl \qc \f0 \sa0 \li0 \fi0 12\par} \cell} -{\intbl {\pard \ql \f0 \sa0 \li0 \fi0 12\par} +{{\pard\intbl \ql \f0 \sa0 \li0 \fi0 12\par} \cell} } \intbl\row} @@ -34,13 +34,13 @@ \cellx2160\cellx4320\cellx6480\cellx8640 \trkeep\intbl { -{\intbl {\pard \qr \f0 \sa0 \li0 \fi0 123\par} +{{\pard\intbl \qr \f0 \sa0 \li0 \fi0 123\par} \cell} -{\intbl {\pard \ql \f0 \sa0 \li0 \fi0 123\par} +{{\pard\intbl \ql \f0 \sa0 \li0 \fi0 123\par} \cell} -{\intbl {\pard \qc \f0 \sa0 \li0 \fi0 123\par} +{{\pard\intbl \qc \f0 \sa0 \li0 \fi0 123\par} \cell} -{\intbl {\pard \ql \f0 \sa0 \li0 \fi0 123\par} +{{\pard\intbl \ql \f0 \sa0 \li0 \fi0 123\par} \cell} } \intbl\row} @@ -49,13 +49,13 @@ \cellx2160\cellx4320\cellx6480\cellx8640 \trkeep\intbl { -{\intbl {\pard \qr \f0 \sa0 \li0 \fi0 1\par} +{{\pard\intbl \qr \f0 \sa0 \li0 \fi0 1\par} \cell} -{\intbl {\pard \ql \f0 \sa0 \li0 \fi0 1\par} +{{\pard\intbl \ql \f0 \sa0 \li0 \fi0 1\par} \cell} -{\intbl {\pard \qc \f0 \sa0 \li0 \fi0 1\par} +{{\pard\intbl \qc \f0 \sa0 \li0 \fi0 1\par} \cell} -{\intbl {\pard \ql \f0 \sa0 \li0 \fi0 1\par} +{{\pard\intbl \ql \f0 \sa0 \li0 \fi0 1\par} \cell} } \intbl\row} @@ -66,13 +66,13 @@ \clbrdrb\brdrs\cellx2160\clbrdrb\brdrs\cellx4320\clbrdrb\brdrs\cellx6480\clbrdrb\brdrs\cellx8640 \trkeep\intbl { -{\intbl {\pard \qr \f0 \sa0 \li0 \fi0 Right\par} +{{\pard\intbl \qr \f0 \sa0 \li0 \fi0 Right\par} \cell} -{\intbl {\pard \ql \f0 \sa0 \li0 \fi0 Left\par} +{{\pard\intbl \ql \f0 \sa0 \li0 \fi0 Left\par} \cell} -{\intbl {\pard \qc \f0 \sa0 \li0 \fi0 Center\par} +{{\pard\intbl \qc \f0 \sa0 \li0 \fi0 Center\par} \cell} -{\intbl {\pard \ql \f0 \sa0 \li0 \fi0 Default\par} +{{\pard\intbl \ql \f0 \sa0 \li0 \fi0 Default\par} \cell} } \intbl\row} @@ -81,13 +81,13 @@ \cellx2160\cellx4320\cellx6480\cellx8640 \trkeep\intbl { -{\intbl {\pard \qr \f0 \sa0 \li0 \fi0 12\par} +{{\pard\intbl \qr \f0 \sa0 \li0 \fi0 12\par} \cell} -{\intbl {\pard \ql \f0 \sa0 \li0 \fi0 12\par} +{{\pard\intbl \ql \f0 \sa0 \li0 \fi0 12\par} \cell} -{\intbl {\pard \qc \f0 \sa0 \li0 \fi0 12\par} +{{\pard\intbl \qc \f0 \sa0 \li0 \fi0 12\par} \cell} -{\intbl {\pard \ql \f0 \sa0 \li0 \fi0 12\par} +{{\pard\intbl \ql \f0 \sa0 \li0 \fi0 12\par} \cell} } \intbl\row} @@ -96,13 +96,13 @@ \cellx2160\cellx4320\cellx6480\cellx8640 \trkeep\intbl { -{\intbl {\pard \qr \f0 \sa0 \li0 \fi0 123\par} +{{\pard\intbl \qr \f0 \sa0 \li0 \fi0 123\par} \cell} -{\intbl {\pard \ql \f0 \sa0 \li0 \fi0 123\par} +{{\pard\intbl \ql \f0 \sa0 \li0 \fi0 123\par} \cell} -{\intbl {\pard \qc \f0 \sa0 \li0 \fi0 123\par} +{{\pard\intbl \qc \f0 \sa0 \li0 \fi0 123\par} \cell} -{\intbl {\pard \ql \f0 \sa0 \li0 \fi0 123\par} +{{\pard\intbl \ql \f0 \sa0 \li0 \fi0 123\par} \cell} } \intbl\row} @@ -111,13 +111,13 @@ \cellx2160\cellx4320\cellx6480\cellx8640 \trkeep\intbl { -{\intbl {\pard \qr \f0 \sa0 \li0 \fi0 1\par} +{{\pard\intbl \qr \f0 \sa0 \li0 \fi0 1\par} \cell} -{\intbl {\pard \ql \f0 \sa0 \li0 \fi0 1\par} +{{\pard\intbl \ql \f0 \sa0 \li0 \fi0 1\par} \cell} -{\intbl {\pard \qc \f0 \sa0 \li0 \fi0 1\par} +{{\pard\intbl \qc \f0 \sa0 \li0 \fi0 1\par} \cell} -{\intbl {\pard \ql \f0 \sa0 \li0 \fi0 1\par} +{{\pard\intbl \ql \f0 \sa0 \li0 \fi0 1\par} \cell} } \intbl\row} @@ -128,13 +128,13 @@ \clbrdrb\brdrs\cellx2160\clbrdrb\brdrs\cellx4320\clbrdrb\brdrs\cellx6480\clbrdrb\brdrs\cellx8640 \trkeep\intbl { -{\intbl {\pard \qr \f0 \sa0 \li0 \fi0 Right\par} +{{\pard\intbl \qr \f0 \sa0 \li0 \fi0 Right\par} \cell} -{\intbl {\pard \ql \f0 \sa0 \li0 \fi0 Left\par} +{{\pard\intbl \ql \f0 \sa0 \li0 \fi0 Left\par} \cell} -{\intbl {\pard \qc \f0 \sa0 \li0 \fi0 Center\par} +{{\pard\intbl \qc \f0 \sa0 \li0 \fi0 Center\par} \cell} -{\intbl {\pard \ql \f0 \sa0 \li0 \fi0 Default\par} +{{\pard\intbl \ql \f0 \sa0 \li0 \fi0 Default\par} \cell} } \intbl\row} @@ -143,13 +143,13 @@ \cellx2160\cellx4320\cellx6480\cellx8640 \trkeep\intbl { -{\intbl {\pard \qr \f0 \sa0 \li0 \fi0 12\par} +{{\pard\intbl \qr \f0 \sa0 \li0 \fi0 12\par} \cell} -{\intbl {\pard \ql \f0 \sa0 \li0 \fi0 12\par} +{{\pard\intbl \ql \f0 \sa0 \li0 \fi0 12\par} \cell} -{\intbl {\pard \qc \f0 \sa0 \li0 \fi0 12\par} +{{\pard\intbl \qc \f0 \sa0 \li0 \fi0 12\par} \cell} -{\intbl {\pard \ql \f0 \sa0 \li0 \fi0 12\par} +{{\pard\intbl \ql \f0 \sa0 \li0 \fi0 12\par} \cell} } \intbl\row} @@ -158,13 +158,13 @@ \cellx2160\cellx4320\cellx6480\cellx8640 \trkeep\intbl { -{\intbl {\pard \qr \f0 \sa0 \li0 \fi0 123\par} +{{\pard\intbl \qr \f0 \sa0 \li0 \fi0 123\par} \cell} -{\intbl {\pard \ql \f0 \sa0 \li0 \fi0 123\par} +{{\pard\intbl \ql \f0 \sa0 \li0 \fi0 123\par} \cell} -{\intbl {\pard \qc \f0 \sa0 \li0 \fi0 123\par} +{{\pard\intbl \qc \f0 \sa0 \li0 \fi0 123\par} \cell} -{\intbl {\pard \ql \f0 \sa0 \li0 \fi0 123\par} +{{\pard\intbl \ql \f0 \sa0 \li0 \fi0 123\par} \cell} } \intbl\row} @@ -173,13 +173,13 @@ \cellx2160\cellx4320\cellx6480\cellx8640 \trkeep\intbl { -{\intbl {\pard \qr \f0 \sa0 \li0 \fi0 1\par} +{{\pard\intbl \qr \f0 \sa0 \li0 \fi0 1\par} \cell} -{\intbl {\pard \ql \f0 \sa0 \li0 \fi0 1\par} +{{\pard\intbl \ql \f0 \sa0 \li0 \fi0 1\par} \cell} -{\intbl {\pard \qc \f0 \sa0 \li0 \fi0 1\par} +{{\pard\intbl \qc \f0 \sa0 \li0 \fi0 1\par} \cell} -{\intbl {\pard \ql \f0 \sa0 \li0 \fi0 1\par} +{{\pard\intbl \ql \f0 \sa0 \li0 \fi0 1\par} \cell} } \intbl\row} @@ -190,13 +190,13 @@ \clbrdrb\brdrs\cellx1296\clbrdrb\brdrs\cellx2484\clbrdrb\brdrs\cellx3888\clbrdrb\brdrs\cellx6804 \trkeep\intbl { -{\intbl {\pard \qc \f0 \sa0 \li0 \fi0 Centered Header\par} +{{\pard\intbl \qc \f0 \sa0 \li0 \fi0 Centered Header\par} \cell} -{\intbl {\pard \ql \f0 \sa0 \li0 \fi0 Left Aligned\par} +{{\pard\intbl \ql \f0 \sa0 \li0 \fi0 Left Aligned\par} \cell} -{\intbl {\pard \qr \f0 \sa0 \li0 \fi0 Right Aligned\par} +{{\pard\intbl \qr \f0 \sa0 \li0 \fi0 Right Aligned\par} \cell} -{\intbl {\pard \ql \f0 \sa0 \li0 \fi0 Default aligned\par} +{{\pard\intbl \ql \f0 \sa0 \li0 \fi0 Default aligned\par} \cell} } \intbl\row} @@ -205,13 +205,13 @@ \cellx1296\cellx2484\cellx3888\cellx6804 \trkeep\intbl { -{\intbl {\pard \qc \f0 \sa0 \li0 \fi0 First\par} +{{\pard\intbl \qc \f0 \sa0 \li0 \fi0 First\par} \cell} -{\intbl {\pard \ql \f0 \sa0 \li0 \fi0 row\par} +{{\pard\intbl \ql \f0 \sa0 \li0 \fi0 row\par} \cell} -{\intbl {\pard \qr \f0 \sa0 \li0 \fi0 12.0\par} +{{\pard\intbl \qr \f0 \sa0 \li0 \fi0 12.0\par} \cell} -{\intbl {\pard \ql \f0 \sa0 \li0 \fi0 Example of a row that spans multiple lines.\par} +{{\pard\intbl \ql \f0 \sa0 \li0 \fi0 Example of a row that spans multiple lines.\par} \cell} } \intbl\row} @@ -220,13 +220,13 @@ \cellx1296\cellx2484\cellx3888\cellx6804 \trkeep\intbl { -{\intbl {\pard \qc \f0 \sa0 \li0 \fi0 Second\par} +{{\pard\intbl \qc \f0 \sa0 \li0 \fi0 Second\par} \cell} -{\intbl {\pard \ql \f0 \sa0 \li0 \fi0 row\par} +{{\pard\intbl \ql \f0 \sa0 \li0 \fi0 row\par} \cell} -{\intbl {\pard \qr \f0 \sa0 \li0 \fi0 5.0\par} +{{\pard\intbl \qr \f0 \sa0 \li0 \fi0 5.0\par} \cell} -{\intbl {\pard \ql \f0 \sa0 \li0 \fi0 Here's another one. Note the blank line between rows.\par} +{{\pard\intbl \ql \f0 \sa0 \li0 \fi0 Here's another one. Note the blank line between rows.\par} \cell} } \intbl\row} @@ -237,13 +237,13 @@ \clbrdrb\brdrs\cellx1296\clbrdrb\brdrs\cellx2484\clbrdrb\brdrs\cellx3888\clbrdrb\brdrs\cellx6804 \trkeep\intbl { -{\intbl {\pard \qc \f0 \sa0 \li0 \fi0 Centered Header\par} +{{\pard\intbl \qc \f0 \sa0 \li0 \fi0 Centered Header\par} \cell} -{\intbl {\pard \ql \f0 \sa0 \li0 \fi0 Left Aligned\par} +{{\pard\intbl \ql \f0 \sa0 \li0 \fi0 Left Aligned\par} \cell} -{\intbl {\pard \qr \f0 \sa0 \li0 \fi0 Right Aligned\par} +{{\pard\intbl \qr \f0 \sa0 \li0 \fi0 Right Aligned\par} \cell} -{\intbl {\pard \ql \f0 \sa0 \li0 \fi0 Default aligned\par} +{{\pard\intbl \ql \f0 \sa0 \li0 \fi0 Default aligned\par} \cell} } \intbl\row} @@ -252,13 +252,13 @@ \cellx1296\cellx2484\cellx3888\cellx6804 \trkeep\intbl { -{\intbl {\pard \qc \f0 \sa0 \li0 \fi0 First\par} +{{\pard\intbl \qc \f0 \sa0 \li0 \fi0 First\par} \cell} -{\intbl {\pard \ql \f0 \sa0 \li0 \fi0 row\par} +{{\pard\intbl \ql \f0 \sa0 \li0 \fi0 row\par} \cell} -{\intbl {\pard \qr \f0 \sa0 \li0 \fi0 12.0\par} +{{\pard\intbl \qr \f0 \sa0 \li0 \fi0 12.0\par} \cell} -{\intbl {\pard \ql \f0 \sa0 \li0 \fi0 Example of a row that spans multiple lines.\par} +{{\pard\intbl \ql \f0 \sa0 \li0 \fi0 Example of a row that spans multiple lines.\par} \cell} } \intbl\row} @@ -267,13 +267,13 @@ \cellx1296\cellx2484\cellx3888\cellx6804 \trkeep\intbl { -{\intbl {\pard \qc \f0 \sa0 \li0 \fi0 Second\par} +{{\pard\intbl \qc \f0 \sa0 \li0 \fi0 Second\par} \cell} -{\intbl {\pard \ql \f0 \sa0 \li0 \fi0 row\par} +{{\pard\intbl \ql \f0 \sa0 \li0 \fi0 row\par} \cell} -{\intbl {\pard \qr \f0 \sa0 \li0 \fi0 5.0\par} +{{\pard\intbl \qr \f0 \sa0 \li0 \fi0 5.0\par} \cell} -{\intbl {\pard \ql \f0 \sa0 \li0 \fi0 Here's another one. Note the blank line between rows.\par} +{{\pard\intbl \ql \f0 \sa0 \li0 \fi0 Here's another one. Note the blank line between rows.\par} \cell} } \intbl\row} @@ -284,13 +284,13 @@ \cellx2160\cellx4320\cellx6480\cellx8640 \trkeep\intbl { -{\intbl {\pard \qr \f0 \sa0 \li0 \fi0 12\par} +{{\pard\intbl \qr \f0 \sa0 \li0 \fi0 12\par} \cell} -{\intbl {\pard \ql \f0 \sa0 \li0 \fi0 12\par} +{{\pard\intbl \ql \f0 \sa0 \li0 \fi0 12\par} \cell} -{\intbl {\pard \qc \f0 \sa0 \li0 \fi0 12\par} +{{\pard\intbl \qc \f0 \sa0 \li0 \fi0 12\par} \cell} -{\intbl {\pard \qr \f0 \sa0 \li0 \fi0 12\par} +{{\pard\intbl \qr \f0 \sa0 \li0 \fi0 12\par} \cell} } \intbl\row} @@ -299,13 +299,13 @@ \cellx2160\cellx4320\cellx6480\cellx8640 \trkeep\intbl { -{\intbl {\pard \qr \f0 \sa0 \li0 \fi0 123\par} +{{\pard\intbl \qr \f0 \sa0 \li0 \fi0 123\par} \cell} -{\intbl {\pard \ql \f0 \sa0 \li0 \fi0 123\par} +{{\pard\intbl \ql \f0 \sa0 \li0 \fi0 123\par} \cell} -{\intbl {\pard \qc \f0 \sa0 \li0 \fi0 123\par} +{{\pard\intbl \qc \f0 \sa0 \li0 \fi0 123\par} \cell} -{\intbl {\pard \qr \f0 \sa0 \li0 \fi0 123\par} +{{\pard\intbl \qr \f0 \sa0 \li0 \fi0 123\par} \cell} } \intbl\row} @@ -314,13 +314,13 @@ \cellx2160\cellx4320\cellx6480\cellx8640 \trkeep\intbl { -{\intbl {\pard \qr \f0 \sa0 \li0 \fi0 1\par} +{{\pard\intbl \qr \f0 \sa0 \li0 \fi0 1\par} \cell} -{\intbl {\pard \ql \f0 \sa0 \li0 \fi0 1\par} +{{\pard\intbl \ql \f0 \sa0 \li0 \fi0 1\par} \cell} -{\intbl {\pard \qc \f0 \sa0 \li0 \fi0 1\par} +{{\pard\intbl \qc \f0 \sa0 \li0 \fi0 1\par} \cell} -{\intbl {\pard \qr \f0 \sa0 \li0 \fi0 1\par} +{{\pard\intbl \qr \f0 \sa0 \li0 \fi0 1\par} \cell} } \intbl\row} @@ -331,13 +331,13 @@ \cellx1296\cellx2484\cellx3888\cellx6804 \trkeep\intbl { -{\intbl {\pard \qc \f0 \sa0 \li0 \fi0 First\par} +{{\pard\intbl \qc \f0 \sa0 \li0 \fi0 First\par} \cell} -{\intbl {\pard \ql \f0 \sa0 \li0 \fi0 row\par} +{{\pard\intbl \ql \f0 \sa0 \li0 \fi0 row\par} \cell} -{\intbl {\pard \qr \f0 \sa0 \li0 \fi0 12.0\par} +{{\pard\intbl \qr \f0 \sa0 \li0 \fi0 12.0\par} \cell} -{\intbl {\pard \ql \f0 \sa0 \li0 \fi0 Example of a row that spans multiple lines.\par} +{{\pard\intbl \ql \f0 \sa0 \li0 \fi0 Example of a row that spans multiple lines.\par} \cell} } \intbl\row} @@ -346,13 +346,13 @@ \cellx1296\cellx2484\cellx3888\cellx6804 \trkeep\intbl { -{\intbl {\pard \qc \f0 \sa0 \li0 \fi0 Second\par} +{{\pard\intbl \qc \f0 \sa0 \li0 \fi0 Second\par} \cell} -{\intbl {\pard \ql \f0 \sa0 \li0 \fi0 row\par} +{{\pard\intbl \ql \f0 \sa0 \li0 \fi0 row\par} \cell} -{\intbl {\pard \qr \f0 \sa0 \li0 \fi0 5.0\par} +{{\pard\intbl \qr \f0 \sa0 \li0 \fi0 5.0\par} \cell} -{\intbl {\pard \ql \f0 \sa0 \li0 \fi0 Here's another one. Note the blank line between rows.\par} +{{\pard\intbl \ql \f0 \sa0 \li0 \fi0 Here's another one. Note the blank line between rows.\par} \cell} } \intbl\row} -- cgit v1.2.3 From 5e69f845d534964bd3d0d1fe275b5cecc0bf3098 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Tue, 25 Mar 2014 12:04:25 -0700 Subject: LaTeX reader: Better handling of "table" environment. Positioning options no longer rendered verbatim. Partially addresses #1204. --- src/Text/Pandoc/Readers/LaTeX.hs | 1 + 1 file changed, 1 insertion(+) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index 51271edc5..8c3dac263 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -894,6 +894,7 @@ environments = M.fromList , ("letter", env "letter" letter_contents) , ("figure", env "figure" $ skipopts *> blocks) , ("center", env "center" blocks) + , ("table", env "table" $ skipopts *> blocks) , ("tabular", env "tabular" simpTable) , ("quote", blockQuote <$> env "quote" blocks) , ("quotation", blockQuote <$> env "quotation" blocks) -- cgit v1.2.3 From 08d1404b31a0fd6edd94b1e5674d3b07d8e4de6a Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Tue, 25 Mar 2014 13:43:34 -0700 Subject: API changes to HasReaderOptions, HasHeaderMap, HasIdentifierList. Previously these were typeclasses of monads. They've been changed to be typeclasses of states. This ismplifies the instance definitions and provides more flexibility. This is an API change! However, it should be backwards compatible unless you're defining instances of HasReaderOptions, HasHeaderMap, or HasIdentifierList. The old getOption function should work as before (albeit with a more general type). The function askReaderOption has been removed. extractReaderOptions has been added. getOption has been given a default definition. In HasHeaderMap, extractHeaderMap and updateHeaderMap have been added. Default definitions have been given for getHeaderMap, putHeaderMap, and modifyHeaderMap. In HasIdentifierList, extractIdentifierList and updateIdentifierList have been added. Default definitions have been given for getIdentifierList, putIdentifierList, and modifyIdentifierList. The ultimate goal here is to allow different parsers to use their own, tailored parser states (instead of ParserState) while still using shared functions. --- src/Text/Pandoc/Parsing.hs | 70 ++++++++++++++++++++---------------- src/Text/Pandoc/Readers/MediaWiki.hs | 16 ++++----- 2 files changed, 47 insertions(+), 39 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs index 883a560d0..c12e967dc 100644 --- a/src/Text/Pandoc/Parsing.hs +++ b/src/Text/Pandoc/Parsing.hs @@ -64,7 +64,6 @@ module Text.Pandoc.Parsing ( (>>~), gridTableWith, readWith, testStringWith, - getOption, guardEnabled, guardDisabled, ParserState (..), @@ -870,33 +869,45 @@ instance HasMeta ParserState where deleteMeta field st = st{ stateMeta = deleteMeta field $ stateMeta st } -class Monad m => HasReaderOptions m where - askReaderOption :: (ReaderOptions -> b) -> m b - -class Monad m => HasHeaderMap m where - getHeaderMap :: m (M.Map Inlines String) - putHeaderMap :: M.Map Inlines String -> m () - modifyHeaderMap :: (M.Map Inlines String -> M.Map Inlines String) -> m () +class HasReaderOptions st where + extractReaderOptions :: st -> ReaderOptions + getOption :: (ReaderOptions -> b) -> Parser s st b + -- default + getOption f = (f . extractReaderOptions) `fmap` getState + +instance HasReaderOptions ParserState where + extractReaderOptions = stateOptions + +class HasHeaderMap st where + extractHeaderMap :: st -> M.Map Inlines String + updateHeaderMap :: M.Map Inlines String -> st -> st + getHeaderMap :: Parser s st (M.Map Inlines String) + putHeaderMap :: M.Map Inlines String -> Parser s st () + modifyHeaderMap :: (M.Map Inlines String -> M.Map Inlines String) + -> Parser s st () -- default + getHeaderMap = extractHeaderMap `fmap` getState + putHeaderMap x = updateState (updateHeaderMap x) modifyHeaderMap f = getHeaderMap >>= putHeaderMap . f -class Monad m => HasIdentifierList m where - getIdentifierList :: m [String] - putIdentifierList :: [String] -> m () - modifyIdentifierList :: ([String] -> [String]) -> m () +instance HasHeaderMap ParserState where + extractHeaderMap = stateHeaders + updateHeaderMap x st = st{ stateHeaders = x } + +class HasIdentifierList st where + extractIdentifierList :: st -> [String] + updateIdentifierList :: [String] -> st -> st + getIdentifierList :: Parser s st ([String]) + putIdentifierList :: [String] -> Parser s st () + modifyIdentifierList :: ([String] -> [String]) -> Parser s st () -- default + getIdentifierList = extractIdentifierList `fmap` getState + putIdentifierList x = updateState (updateIdentifierList x) modifyIdentifierList f = getIdentifierList >>= putIdentifierList . f -instance HasReaderOptions (Parser s ParserState) where - askReaderOption = getOption - -instance HasHeaderMap (Parser s ParserState) where - getHeaderMap = fmap stateHeaders getState - putHeaderMap hm = updateState $ \st -> st{ stateHeaders = hm } - -instance HasIdentifierList (Parser s ParserState) where - getIdentifierList = fmap stateIdentifiers getState - putIdentifierList l = updateState $ \st -> st{ stateIdentifiers = l } +instance HasIdentifierList ParserState where + extractIdentifierList = stateIdentifiers + updateIdentifierList x st = st{ stateIdentifiers = x } defaultParserState :: ParserState defaultParserState = @@ -923,15 +934,12 @@ defaultParserState = stateRstCustomRoles = M.empty, stateWarnings = []} -getOption :: (ReaderOptions -> a) -> Parser s ParserState a -getOption f = (f . stateOptions) `fmap` getState - -- | Succeed only if the extension is enabled. -guardEnabled :: Extension -> Parser s ParserState () +guardEnabled :: HasReaderOptions st => Extension -> Parser s st () guardEnabled ext = getOption readerExtensions >>= guard . Set.member ext -- | Succeed only if the extension is disabled. -guardDisabled :: Extension -> Parser s ParserState () +guardDisabled :: HasReaderOptions st => Extension -> Parser s st () guardDisabled ext = getOption readerExtensions >>= guard . not . Set.member ext data HeaderType @@ -968,11 +976,11 @@ type SubstTable = M.Map Key Inlines -- and the auto_identifers extension is set, generate a new -- unique identifier, and update the list of identifiers -- in state. -registerHeader :: (HasReaderOptions m, HasHeaderMap m, HasIdentifierList m) - => Attr -> Inlines -> m Attr +registerHeader :: (HasReaderOptions st, HasHeaderMap st, HasIdentifierList st) + => Attr -> Inlines -> Parser s st Attr registerHeader (ident,classes,kvs) header' = do ids <- getIdentifierList - exts <- askReaderOption readerExtensions + exts <- getOption readerExtensions let insert' = M.insertWith (\_new old -> old) if null ident && Ext_auto_identifiers `Set.member` exts then do @@ -990,7 +998,7 @@ registerHeader (ident,classes,kvs) header' = do return (ident,classes,kvs) -- | Fail unless we're in "smart typography" mode. -failUnlessSmart :: Parser [tok] ParserState () +failUnlessSmart :: HasReaderOptions st => Parser s st () failUnlessSmart = getOption readerSmart >>= guard smartPunctuation :: Parser [Char] ParserState Inline diff --git a/src/Text/Pandoc/Readers/MediaWiki.hs b/src/Text/Pandoc/Readers/MediaWiki.hs index 7ac2f33ba..7bad4d346 100644 --- a/src/Text/Pandoc/Readers/MediaWiki.hs +++ b/src/Text/Pandoc/Readers/MediaWiki.hs @@ -82,16 +82,16 @@ data MWState = MWState { mwOptions :: ReaderOptions type MWParser = Parser [Char] MWState -instance HasReaderOptions MWParser where - askReaderOption f = (f . mwOptions) `fmap` getState +instance HasReaderOptions MWState where + extractReaderOptions = mwOptions -instance HasHeaderMap MWParser where - getHeaderMap = fmap mwHeaderMap getState - putHeaderMap hm = updateState $ \st -> st{ mwHeaderMap = hm } +instance HasHeaderMap MWState where + extractHeaderMap = mwHeaderMap + updateHeaderMap x st = st{ mwHeaderMap = x } -instance HasIdentifierList MWParser where - getIdentifierList = fmap mwIdentifierList getState - putIdentifierList l = updateState $ \st -> st{ mwIdentifierList = l } +instance HasIdentifierList MWState where + extractIdentifierList = mwIdentifierList + updateIdentifierList x st = st{ mwIdentifierList = x } -- -- auxiliary functions -- cgit v1.2.3 From 6ec3ee3a67c30d2e83f638035521896cbbd70f1e Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Tue, 25 Mar 2014 13:51:55 -0700 Subject: Whitespace change, and note: Contrary to the previous commit message, there was no API change, since Text.Pandoc.Parsing is not an exposed module. --- src/Text/Pandoc/Parsing.hs | 1 + 1 file changed, 1 insertion(+) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs index c12e967dc..bc0c5bdf8 100644 --- a/src/Text/Pandoc/Parsing.hs +++ b/src/Text/Pandoc/Parsing.hs @@ -909,6 +909,7 @@ instance HasIdentifierList ParserState where extractIdentifierList = stateIdentifiers updateIdentifierList x st = st{ stateIdentifiers = x } + defaultParserState :: ParserState defaultParserState = ParserState { stateOptions = def, -- cgit v1.2.3 From 6992050161f1bbe8d18d7d78beb3b38a4b69a23e Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Tue, 25 Mar 2014 14:55:18 -0700 Subject: Parsing: Added HasMacros, simplified other typeclasses. Removed updateHeaderMap, setHeaderMap, getHeaderMap, updateIdentifierList, setIdentifierList, getIdentifierList. --- src/Text/Pandoc/Parsing.hs | 50 ++++++++++++++++-------------------- src/Text/Pandoc/Readers/MediaWiki.hs | 4 +-- 2 files changed, 24 insertions(+), 30 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs index bc0c5bdf8..d8c7e71d5 100644 --- a/src/Text/Pandoc/Parsing.hs +++ b/src/Text/Pandoc/Parsing.hs @@ -70,6 +70,7 @@ module Text.Pandoc.Parsing ( (>>~), HasReaderOptions (..), HasHeaderMap (..), HasIdentifierList (..), + HasMacros (..), defaultParserState, HeaderType (..), ParserContext (..), @@ -880,35 +881,28 @@ instance HasReaderOptions ParserState where class HasHeaderMap st where extractHeaderMap :: st -> M.Map Inlines String - updateHeaderMap :: M.Map Inlines String -> st -> st - getHeaderMap :: Parser s st (M.Map Inlines String) - putHeaderMap :: M.Map Inlines String -> Parser s st () - modifyHeaderMap :: (M.Map Inlines String -> M.Map Inlines String) - -> Parser s st () - -- default - getHeaderMap = extractHeaderMap `fmap` getState - putHeaderMap x = updateState (updateHeaderMap x) - modifyHeaderMap f = getHeaderMap >>= putHeaderMap . f + updateHeaderMap :: (M.Map Inlines String -> M.Map Inlines String) -> + st -> st instance HasHeaderMap ParserState where extractHeaderMap = stateHeaders - updateHeaderMap x st = st{ stateHeaders = x } + updateHeaderMap f st = st{ stateHeaders = f $ stateHeaders st } class HasIdentifierList st where extractIdentifierList :: st -> [String] - updateIdentifierList :: [String] -> st -> st - getIdentifierList :: Parser s st ([String]) - putIdentifierList :: [String] -> Parser s st () - modifyIdentifierList :: ([String] -> [String]) -> Parser s st () - -- default - getIdentifierList = extractIdentifierList `fmap` getState - putIdentifierList x = updateState (updateIdentifierList x) - modifyIdentifierList f = getIdentifierList >>= putIdentifierList . f + updateIdentifierList :: ([String] -> [String]) -> st -> st instance HasIdentifierList ParserState where extractIdentifierList = stateIdentifiers - updateIdentifierList x st = st{ stateIdentifiers = x } + updateIdentifierList f st = st{ stateIdentifiers = f $ stateIdentifiers st } + +class HasMacros st where + extractMacros :: st -> [Macro] + updateMacros :: ([Macro] -> [Macro]) -> st -> st +instance HasMacros ParserState where + extractMacros = stateMacros + updateMacros f st = st{ stateMacros = f $ stateMacros st } defaultParserState :: ParserState defaultParserState = @@ -980,7 +974,7 @@ type SubstTable = M.Map Key Inlines registerHeader :: (HasReaderOptions st, HasHeaderMap st, HasIdentifierList st) => Attr -> Inlines -> Parser s st Attr registerHeader (ident,classes,kvs) header' = do - ids <- getIdentifierList + ids <- extractIdentifierList `fmap` getState exts <- getOption readerExtensions let insert' = M.insertWith (\_new old -> old) if null ident && Ext_auto_identifiers `Set.member` exts @@ -989,13 +983,13 @@ registerHeader (ident,classes,kvs) header' = do let id'' = if Ext_ascii_identifiers `Set.member` exts then catMaybes $ map toAsciiChar id' else id' - putIdentifierList $ if id' == id'' - then id' : ids - else id' : id'' : ids - modifyHeaderMap $ insert' header' id' + updateState $ updateIdentifierList $ + if id' == id'' then (id' :) else ([id', id''] ++) + updateState $ updateHeaderMap $ insert' header' id' return (id'',classes,kvs) else do - unless (null ident) $ modifyHeaderMap $ insert' header' ident + unless (null ident) $ + updateState $ updateHeaderMap $ insert' header' ident return (ident,classes,kvs) -- | Fail unless we're in "smart typography" mode. @@ -1140,7 +1134,7 @@ nested p = do -- -- | Parse a \newcommand or \renewcommand macro definition. -macro :: Parser [Char] ParserState Blocks +macro :: (HasMacros st, HasReaderOptions st) => Parser [Char] st Blocks macro = do apply <- getOption readerApplyMacros inp <- getInput @@ -1150,7 +1144,7 @@ macro = do if apply then do updateState $ \st -> - st { stateMacros = ms ++ stateMacros st } + updateMacros (ms ++) st return mempty else return $ rawBlock "latex" def' @@ -1159,7 +1153,7 @@ applyMacros' :: String -> Parser [Char] ParserState String applyMacros' target = do apply <- getOption readerApplyMacros if apply - then do macros <- liftM stateMacros getState + then do macros <- extractMacros `fmap` getState return $ applyMacros macros target else return target diff --git a/src/Text/Pandoc/Readers/MediaWiki.hs b/src/Text/Pandoc/Readers/MediaWiki.hs index 7bad4d346..f70b44aad 100644 --- a/src/Text/Pandoc/Readers/MediaWiki.hs +++ b/src/Text/Pandoc/Readers/MediaWiki.hs @@ -87,11 +87,11 @@ instance HasReaderOptions MWState where instance HasHeaderMap MWState where extractHeaderMap = mwHeaderMap - updateHeaderMap x st = st{ mwHeaderMap = x } + updateHeaderMap f st = st{ mwHeaderMap = f $ mwHeaderMap st } instance HasIdentifierList MWState where extractIdentifierList = mwIdentifierList - updateIdentifierList x st = st{ mwIdentifierList = x } + updateIdentifierList f st = st{ mwIdentifierList = f $ mwIdentifierList st } -- -- auxiliary functions -- cgit v1.2.3 From 82ddec698e782fef83dcd1b1fba79cd3b698c717 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Tue, 25 Mar 2014 15:38:30 -0700 Subject: LaTeX reader: Added LPState. Plan is to use this instead of ParserState in LP. --- src/Text/Pandoc/Readers/LaTeX.hs | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index 8c3dac263..dd94c0ac1 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -43,6 +43,7 @@ import Text.Pandoc.Parsing hiding ((<|>), many, optional, space, import qualified Text.Pandoc.UTF8 as UTF8 import Data.Char ( chr, ord ) import Control.Monad +import Data.Default import Text.Pandoc.Builder import Data.Char (isLetter, isAlphaNum) import Control.Applicative @@ -71,6 +72,23 @@ parseLaTeX = do let (Pandoc _ bs') = doc bs return $ Pandoc meta bs' +data LPState = LPState{ + lpMeta :: Meta + , lpOptions :: ReaderOptions + , lpHasChapters :: Bool + , lpParserContext :: ParserContext + , lpQuoteContext :: QuoteContext + } deriving Show + +instance Default LPState where + def = LPState{ + lpMeta = nullMeta + , lpOptions = def + , lpHasChapters = False + , lpParserContext = NullState + , lpQuoteContext = NoQuote + } + type LP = Parser [Char] ParserState anyControlSeq :: LP String -- cgit v1.2.3 From 994597f071cddbf9631e0d27bc8441afd2b1ebeb Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Tue, 25 Mar 2014 22:40:18 -0700 Subject: Revert "LaTeX reader: Added LPState." This reverts commit 82ddec698e782fef83dcd1b1fba79cd3b698c717. --- src/Text/Pandoc/Readers/LaTeX.hs | 18 ------------------ 1 file changed, 18 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index dd94c0ac1..8c3dac263 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -43,7 +43,6 @@ import Text.Pandoc.Parsing hiding ((<|>), many, optional, space, import qualified Text.Pandoc.UTF8 as UTF8 import Data.Char ( chr, ord ) import Control.Monad -import Data.Default import Text.Pandoc.Builder import Data.Char (isLetter, isAlphaNum) import Control.Applicative @@ -72,23 +71,6 @@ parseLaTeX = do let (Pandoc _ bs') = doc bs return $ Pandoc meta bs' -data LPState = LPState{ - lpMeta :: Meta - , lpOptions :: ReaderOptions - , lpHasChapters :: Bool - , lpParserContext :: ParserContext - , lpQuoteContext :: QuoteContext - } deriving Show - -instance Default LPState where - def = LPState{ - lpMeta = nullMeta - , lpOptions = def - , lpHasChapters = False - , lpParserContext = NullState - , lpQuoteContext = NoQuote - } - type LP = Parser [Char] ParserState anyControlSeq :: LP String -- cgit v1.2.3 From 0934c4430a91551f8d293fa7eb6a2c3e745d3a48 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Tue, 25 Mar 2014 22:44:16 -0700 Subject: Parsing: Added stateCaption. This is primarily for use in the LaTeX reader, so far. --- src/Text/Pandoc/Parsing.hs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs index d8c7e71d5..68d4605ee 100644 --- a/src/Text/Pandoc/Parsing.hs +++ b/src/Text/Pandoc/Parsing.hs @@ -857,7 +857,7 @@ data ParserState = ParserState -- Triple represents: 1) Base role, 2) Optional format (only for :raw: -- roles), 3) Source language annotation for code (could be used to -- annotate role classes too). - + stateCaption :: Maybe Inlines, -- ^ Caption in current environment stateWarnings :: [String] -- ^ Warnings generated by the parser } @@ -927,6 +927,7 @@ defaultParserState = stateMacros = [], stateRstDefaultRole = "title-reference", stateRstCustomRoles = M.empty, + stateCaption = Nothing, stateWarnings = []} -- | Succeed only if the extension is enabled. -- cgit v1.2.3 From 69a7c9f634b1e9133f7c6e4e135404bdf8caa3ab Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Tue, 25 Mar 2014 23:10:43 -0700 Subject: LaTeX reader: Better handling of figure and table with caption. We now look for a \caption inside the environment; if one is found, it is attached to the graphic or tabular found there. Closes #1204. --- src/Text/Pandoc/Readers/LaTeX.hs | 45 ++++++++++++++++++++++++++++++---------- 1 file changed, 34 insertions(+), 11 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index 8c3dac263..f23a5b35e 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -302,6 +302,7 @@ blockCommands = M.fromList $ , ("item", skipopts *> loose_item) , ("documentclass", skipopts *> braced *> preamble) , ("centerline", (para . trimInlines) <$> (skipopts *> tok)) + , ("caption", tok >>= setCaption) ] ++ map ignoreBlocks -- these commands will be ignored unless --parse-raw is specified, -- in which case they will appear as raw latex blocks @@ -323,6 +324,14 @@ blockCommands = M.fromList $ addMeta :: ToMetaValue a => String -> a -> LP () addMeta field val = updateState $ setMeta field val +setCaption :: Inlines -> LP Blocks +setCaption ils = do + updateState $ \st -> st{ stateCaption = Just ils } + return mempty + +resetCaption :: LP () +resetCaption = updateState $ \st -> st{ stateCaption = Nothing } + authors :: LP () authors = try $ do char '{' @@ -523,18 +532,12 @@ inlineCommands = M.fromList $ mkImage :: String -> LP Inlines mkImage src = do - -- try for a caption - (alt, tit) <- option (str "image", "") $ try $ do - spaces - controlSeq "caption" - optional (char '*') - ils <- grouped inline - return (ils, "fig:") + let alt = str "image" case takeExtension src of "" -> do defaultExt <- getOption readerDefaultImageExtension - return $ image (addExtension src defaultExt) tit alt - _ -> return $ image src tit alt + return $ image (addExtension src defaultExt) "" alt + _ -> return $ image src "" alt inNote :: Inlines -> Inlines inNote ils = @@ -888,13 +891,33 @@ rawLaTeXInline = do raw <- (snd <$> withRaw inlineCommand) <|> (snd <$> withRaw blockCommand) RawInline "latex" <$> applyMacros' raw +addImageCaption :: Blocks -> LP Blocks +addImageCaption = walkM go + where go (Image alt (src,tit)) = do + mbcapt <- stateCaption <$> getState + case mbcapt of + Just ils -> return (Image (toList ils) (src, "fig:")) + Nothing -> return (Image alt (src,tit)) + go x = return x + +addTableCaption :: Blocks -> LP Blocks +addTableCaption = walkM go + where go (Table c als ws hs rs) = do + mbcapt <- stateCaption <$> getState + case mbcapt of + Just ils -> return (Table (toList ils) als ws hs rs) + Nothing -> return (Table c als ws hs rs) + go x = return x + environments :: M.Map String (LP Blocks) environments = M.fromList [ ("document", env "document" blocks <* skipMany anyChar) , ("letter", env "letter" letter_contents) - , ("figure", env "figure" $ skipopts *> blocks) + , ("figure", env "figure" $ + resetCaption *> skipopts *> blocks >>= addImageCaption) , ("center", env "center" blocks) - , ("table", env "table" $ skipopts *> blocks) + , ("table", env "table" $ + resetCaption *> skipopts *> blocks >>= addTableCaption) , ("tabular", env "tabular" simpTable) , ("quote", blockQuote <$> env "quote" blocks) , ("quotation", blockQuote <$> env "quotation" blocks) -- cgit v1.2.3 From 361167deff57e0e2d3508c598785a7de7b887fb7 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Sun, 30 Mar 2014 15:48:40 -0700 Subject: Markdown writer: Use longer backtick fences if needed. If the content contains a backtick fence and there are attributes, make sure longer fences are used to delimit the code. Note: This works well in pandoc, but github markdown is more limited, and will interpret the first string of three or more backticks as ending the code block. Closes #1206. --- src/Text/Pandoc/Writers/Markdown.hs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs index 278e5cc9d..e8f976da1 100644 --- a/src/Text/Pandoc/Writers/Markdown.hs +++ b/src/Text/Pandoc/Writers/Markdown.hs @@ -392,7 +392,11 @@ blockToMarkdown opts (CodeBlock attribs str) = return $ xs -> case maximum $ map length xs of n | n < 3 -> "~~~~" | otherwise -> replicate (n+1) '~' - backticks = text "```" + backticks = text $ case [ln | ln <- lines str, all (=='`') ln] of + [] -> "```" + xs -> case maximum $ map length xs of + n | n < 3 -> "```" + | otherwise -> replicate (n+1) '`' attrs = if isEnabled Ext_fenced_code_attributes opts then nowrap $ " " <> attrsToMarkdown attribs else case attribs of -- cgit v1.2.3 From 99f4f636df36d84a4bc58f67337af3584595f5c1 Mon Sep 17 00:00:00 2001 From: John MacFarlane <fiddlosopher@gmail.com> Date: Mon, 31 Mar 2014 11:08:10 -0700 Subject: Make --toc-depth work well with books in latex/pdf output. Closes #1210. --- src/Text/Pandoc/Writers/LaTeX.hs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs index b2eff4490..07be6e9af 100644 --- a/src/Text/Pandoc/Writers/LaTeX.hs +++ b/src/Text/Pandoc/Writers/LaTeX.hs @@ -133,7 +133,7 @@ pandocToLaTeX options (Pandoc meta blocks) = do authorsMeta <- mapM (stringToLaTeX TextString . stringify) $ docAuthors meta let context = defField "toc" (writerTableOfContents options) $ defField "toc-depth" (show (writerTOCDepth options - - if writerChapters options + if stBook st then 1 else 0)) $ defField "body" main $ @@ -141,7 +141,7 @@ pandocToLaTeX options (Pandoc meta blocks) = do defField "author-meta" (intercalate "; " authorsMeta) $ defField "documentclass" (if writerBeamer options then ("beamer" :: String) - else if writerChapters options + else if stBook st then "book" else "article") $ defField "verbatim-in-note" (stVerbInNote st) $ -- cgit v1.2.3 From 0ccca94b4cb22e846289c004deaa023245e14981 Mon Sep 17 00:00:00 2001 From: Matthew Pickering <matthewtpickering@gmail.com> Date: Tue, 25 Mar 2014 18:22:23 +0000 Subject: Bugfix for #1175 and convert textile reader to use builder. The reader did not correctly parse inline markup. The behavoir is now as follows. (a) The markup must start at the start of a line, be inside previous inline markup or be preceeded by whitespace. (b) The markup can not span across paragraphs (delimited by \n\n) (c) The markup can not be followed by a alphanumeric character. (d) Square brackets can be placed around the markup to avoid having to have white space before it. In order to make these changes it was either necessary to convert the parser to return a list of inlines or to convert the whole reader to use the builder. The latter approach whilst more work makes a bit more sense as it becomes easy to arbitarily append and prepend elements without changing the type. Tests are accordingly updated in a later commit to reflect the different normalisation behavoir specified by the builder monoid. --- src/Text/Pandoc/Readers/Textile.hs | 301 ++++++++++++++++++++----------------- 1 file changed, 167 insertions(+), 134 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs index 93658cdea..ede50c6de 100644 --- a/src/Text/Pandoc/Readers/Textile.hs +++ b/src/Text/Pandoc/Readers/Textile.hs @@ -50,20 +50,20 @@ TODO : refactor common patterns across readers : module Text.Pandoc.Readers.Textile ( readTextile) where - import Text.Pandoc.Definition +import Text.Pandoc.Builder (Inlines, Blocks, trimInlines) import qualified Text.Pandoc.Builder as B -import Text.Pandoc.Shared import Text.Pandoc.Options -import Text.Pandoc.Parsing +import Text.Pandoc.Parsing import Text.Pandoc.Readers.HTML ( htmlTag, isInlineTag, isBlockTag ) import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXBlock ) import Text.HTML.TagSoup (parseTags, innerText, fromAttrib, Tag(..)) import Text.HTML.TagSoup.Match import Data.List ( intercalate ) -import Data.Char ( digitToInt, isUpper ) +import Data.Char ( digitToInt, isUpper) import Control.Monad ( guard, liftM ) import Control.Applicative ((<$>), (*>), (<*)) +import Data.Monoid -- | Parse a Textile text and return a Pandoc document. readTextile :: ReaderOptions -- ^ Reader options @@ -95,7 +95,7 @@ parseTextile = do updateState $ \s -> s { stateNotes = reverse reversedNotes } -- now parse it for real... blocks <- parseBlocks - return $ Pandoc nullMeta blocks -- FIXME + return $ Pandoc nullMeta (B.toList blocks) -- FIXME noteMarker :: Parser [Char] ParserState [Char] noteMarker = skipMany spaceChar >> string "fn" >> manyTill digit (char '.') @@ -115,11 +115,11 @@ noteBlock = try $ do return $ replicate (sourceLine endPos - sourceLine startPos) '\n' -- | Parse document blocks -parseBlocks :: Parser [Char] ParserState [Block] -parseBlocks = manyTill block eof +parseBlocks :: Parser [Char] ParserState Blocks +parseBlocks = mconcat <$> manyTill block eof -- | Block parsers list tried in definition order -blockParsers :: [Parser [Char] ParserState Block] +blockParsers :: [Parser [Char] ParserState Blocks] blockParsers = [ codeBlock , header , blockQuote @@ -130,29 +130,32 @@ blockParsers = [ codeBlock , rawLaTeXBlock' , maybeExplicitBlock "table" table , maybeExplicitBlock "p" para + , endBlock ] +endBlock :: Parser [Char] ParserState Blocks +endBlock = string "\n\n" >> return mempty -- | Any block in the order of definition of blockParsers -block :: Parser [Char] ParserState Block +block :: Parser [Char] ParserState Blocks block = choice blockParsers <?> "block" -commentBlock :: Parser [Char] ParserState Block +commentBlock :: Parser [Char] ParserState Blocks commentBlock = try $ do string "###." manyTill anyLine blanklines - return Null + return mempty -codeBlock :: Parser [Char] ParserState Block +codeBlock :: Parser [Char] ParserState Blocks codeBlock = codeBlockBc <|> codeBlockPre -codeBlockBc :: Parser [Char] ParserState Block +codeBlockBc :: Parser [Char] ParserState Blocks codeBlockBc = try $ do string "bc. " contents <- manyTill anyLine blanklines - return $ CodeBlock ("",[],[]) $ unlines contents + return $ B.codeBlock (unlines contents) -- | Code Blocks in Textile are between <pre> and </pre> -codeBlockPre :: Parser [Char] ParserState Block +codeBlockPre :: Parser [Char] ParserState Blocks codeBlockPre = try $ do (t@(TagOpen _ attrs),_) <- htmlTag (tagOpen (=="pre") (const True)) result' <- (innerText . parseTags) `fmap` -- remove internal tags @@ -169,29 +172,29 @@ codeBlockPre = try $ do let classes = words $ fromAttrib "class" t let ident = fromAttrib "id" t let kvs = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"] - return $ CodeBlock (ident,classes,kvs) result''' + return $ B.codeBlockWith (ident,classes,kvs) result''' -- | Header of the form "hN. content" with N in 1..6 -header :: Parser [Char] ParserState Block +header :: Parser [Char] ParserState Blocks header = try $ do char 'h' level <- digitToInt <$> oneOf "123456" attr <- attributes char '.' - whitespace - name <- normalizeSpaces <$> manyTill inline blockBreak - attr' <- registerHeader attr (B.fromList name) - return $ Header level attr' name + lookAhead whitespace + name <- trimInlines . mconcat <$> manyTill inline blockBreak + attr' <- registerHeader attr name + return $ B.headerWith attr' level name -- | Blockquote of the form "bq. content" -blockQuote :: Parser [Char] ParserState Block +blockQuote :: Parser [Char] ParserState Blocks blockQuote = try $ do string "bq" >> attributes >> char '.' >> whitespace - BlockQuote . singleton <$> para + B.blockQuote <$> para -- Horizontal rule -hrule :: Parser [Char] st Block +hrule :: Parser [Char] st Blocks hrule = try $ do skipSpaces start <- oneOf "-*" @@ -199,62 +202,62 @@ hrule = try $ do skipMany (spaceChar <|> char start) newline optional blanklines - return HorizontalRule + return B.horizontalRule -- Lists handling -- | Can be a bullet list or an ordered list. This implementation is -- strict in the nesting, sublist must start at exactly "parent depth -- plus one" -anyList :: Parser [Char] ParserState Block +anyList :: Parser [Char] ParserState Blocks anyList = try $ anyListAtDepth 1 <* blanklines -- | This allow one type of list to be nested into an other type, -- provided correct nesting -anyListAtDepth :: Int -> Parser [Char] ParserState Block +anyListAtDepth :: Int -> Parser [Char] ParserState Blocks anyListAtDepth depth = choice [ bulletListAtDepth depth, orderedListAtDepth depth, definitionList ] -- | Bullet List of given depth, depth being the number of leading '*' -bulletListAtDepth :: Int -> Parser [Char] ParserState Block -bulletListAtDepth depth = try $ BulletList <$> many1 (bulletListItemAtDepth depth) +bulletListAtDepth :: Int -> Parser [Char] ParserState Blocks +bulletListAtDepth depth = try $ B.bulletList <$> many1 (bulletListItemAtDepth depth) -- | Bullet List Item of given depth, depth being the number of -- leading '*' -bulletListItemAtDepth :: Int -> Parser [Char] ParserState [Block] +bulletListItemAtDepth :: Int -> Parser [Char] ParserState Blocks bulletListItemAtDepth = genericListItemAtDepth '*' -- | Ordered List of given depth, depth being the number of -- leading '#' -orderedListAtDepth :: Int -> Parser [Char] ParserState Block +orderedListAtDepth :: Int -> Parser [Char] ParserState Blocks orderedListAtDepth depth = try $ do items <- many1 (orderedListItemAtDepth depth) - return (OrderedList (1, DefaultStyle, DefaultDelim) items) + return $ B.orderedList items -- | Ordered List Item of given depth, depth being the number of -- leading '#' -orderedListItemAtDepth :: Int -> Parser [Char] ParserState [Block] +orderedListItemAtDepth :: Int -> Parser [Char] ParserState Blocks orderedListItemAtDepth = genericListItemAtDepth '#' -- | Common implementation of list items -genericListItemAtDepth :: Char -> Int -> Parser [Char] ParserState [Block] +genericListItemAtDepth :: Char -> Int -> Parser [Char] ParserState Blocks genericListItemAtDepth c depth = try $ do count depth (char c) >> attributes >> whitespace - p <- many listInline + p <- mconcat <$> many listInline newline - sublist <- option [] (singleton <$> anyListAtDepth (depth + 1)) - return (Plain p : sublist) + sublist <- option mempty (anyListAtDepth (depth + 1)) + return $ (B.plain p) <> sublist -- | A definition list is a set of consecutive definition items -definitionList :: Parser [Char] ParserState Block -definitionList = try $ DefinitionList <$> many1 definitionListItem +definitionList :: Parser [Char] ParserState Blocks +definitionList = try $ B.definitionList <$> many1 definitionListItem -- | List start character. listStart :: Parser [Char] st Char listStart = oneOf "*#-" -listInline :: Parser [Char] ParserState Inline +listInline :: Parser [Char] ParserState Inlines listInline = try (notFollowedBy newline >> inline) <|> try (endline <* notFollowedBy listStart) @@ -262,16 +265,16 @@ listInline = try (notFollowedBy newline >> inline) -- the term defined, then spaces and ":=". The definition follows, on -- the same single line, or spaned on multiple line, after a line -- break. -definitionListItem :: Parser [Char] ParserState ([Inline], [[Block]]) +definitionListItem :: Parser [Char] ParserState (Inlines, [Blocks]) definitionListItem = try $ do string "- " - term <- many1Till inline (try (whitespace >> string ":=")) + term <- mconcat <$> many1Till inline (try (whitespace >> string ":=")) def' <- multilineDef <|> inlineDef return (term, def') - where inlineDef :: Parser [Char] ParserState [[Block]] - inlineDef = liftM (\d -> [[Plain d]]) - $ optional whitespace >> many listInline <* newline - multilineDef :: Parser [Char] ParserState [[Block]] + where inlineDef :: Parser [Char] ParserState [Blocks] + inlineDef = liftM (\d -> [B.plain d]) + $ optional whitespace >> (trimInlines . mconcat <$> many listInline) <* newline + multilineDef :: Parser [Char] ParserState [Blocks] multilineDef = try $ do optional whitespace >> newline s <- many1Till anyChar (try (string "=:" >> newline)) @@ -288,59 +291,59 @@ blockBreak = try (newline >> blanklines >> return ()) <|> -- raw content -- | A raw Html Block, optionally followed by blanklines -rawHtmlBlock :: Parser [Char] ParserState Block +rawHtmlBlock :: Parser [Char] ParserState Blocks rawHtmlBlock = try $ do (_,b) <- htmlTag isBlockTag optional blanklines - return $ RawBlock (Format "html") b + return $ B.rawBlock "html" b -- | Raw block of LaTeX content -rawLaTeXBlock' :: Parser [Char] ParserState Block +rawLaTeXBlock' :: Parser [Char] ParserState Blocks rawLaTeXBlock' = do guardEnabled Ext_raw_tex - RawBlock (Format "latex") <$> (rawLaTeXBlock <* spaces) + B.rawBlock "latex" <$> (rawLaTeXBlock <* spaces) -- | In textile, paragraphs are separated by blank lines. -para :: Parser [Char] ParserState Block -para = try $ Para . normalizeSpaces <$> manyTill inline blockBreak - +para :: Parser [Char] ParserState Blocks +para = do + a <- manyTill inline blockBreak + return $ (B.para . trimInlines . mconcat) a -- Tables -- | A table cell spans until a pipe | -tableCell :: Parser [Char] ParserState TableCell +tableCell :: Parser [Char] ParserState Blocks tableCell = do c <- many1 (noneOf "|\n") - content <- parseFromString (many1 inline) c - return $ [ Plain $ normalizeSpaces content ] + content <- trimInlines . mconcat <$> parseFromString (many1 inline) c + return $ B.plain content -- | A table row is made of many table cells -tableRow :: Parser [Char] ParserState [TableCell] +tableRow :: Parser [Char] ParserState [Blocks] tableRow = try $ ( char '|' *> (endBy1 tableCell (optional blankline *> char '|')) <* newline) -- | Many table rows -tableRows :: Parser [Char] ParserState [[TableCell]] +tableRows :: Parser [Char] ParserState [[Blocks]] tableRows = many1 tableRow -- | Table headers are made of cells separated by a tag "|_." -tableHeaders :: Parser [Char] ParserState [TableCell] +tableHeaders :: Parser [Char] ParserState [Blocks] tableHeaders = let separator = (try $ string "|_.") in try $ ( separator *> (sepBy1 tableCell separator) <* char '|' <* newline ) -- | A table with an optional header. Current implementation can -- handle tables with and without header, but will parse cells -- alignment attributes as content. -table :: Parser [Char] ParserState Block +table :: Parser [Char] ParserState Blocks table = try $ do - headers <- option [] tableHeaders + headers <- option mempty tableHeaders rows <- tableRows blanklines let nbOfCols = max (length headers) (length $ head rows) - return $ Table [] - (replicate nbOfCols AlignDefault) - (replicate nbOfCols 0.0) + return $ B.table mempty + (zip (replicate nbOfCols AlignDefault) (replicate nbOfCols 0.0)) headers rows @@ -348,8 +351,8 @@ table = try $ do -- | Blocks like 'p' and 'table' do not need explicit block tag. -- However, they can be used to set HTML/CSS attributes when needed. maybeExplicitBlock :: String -- ^ block tag name - -> Parser [Char] ParserState Block -- ^ implicit block - -> Parser [Char] ParserState Block + -> Parser [Char] ParserState Blocks -- ^ implicit block + -> Parser [Char] ParserState Blocks maybeExplicitBlock name blk = try $ do optional $ try $ string name >> attributes >> char '.' >> optional whitespace >> optional endline @@ -363,12 +366,14 @@ maybeExplicitBlock name blk = try $ do -- | Any inline element -inline :: Parser [Char] ParserState Inline -inline = choice inlineParsers <?> "inline" +inline :: Parser [Char] ParserState Inlines +inline = do + choice inlineParsers <?> "inline" -- | Inline parsers tried in order -inlineParsers :: [Parser [Char] ParserState Inline] -inlineParsers = [ str +inlineParsers :: [Parser [Char] ParserState Inlines] +inlineParsers = [ inlineMarkup + , str , whitespace , endline , code @@ -378,58 +383,57 @@ inlineParsers = [ str , rawLaTeXInline' , note , try $ (char '[' *> inlineMarkup <* char ']') - , inlineMarkup , link , image , mark - , (Str . (:[])) <$> characterReference + , (B.str . (:[])) <$> characterReference , smartPunctuation inline , symbol ] -- | Inline markups -inlineMarkup :: Parser [Char] ParserState Inline -inlineMarkup = choice [ simpleInline (string "??") (Cite []) - , simpleInline (string "**") Strong - , simpleInline (string "__") Emph - , simpleInline (char '*') Strong - , simpleInline (char '_') Emph - , simpleInline (char '+') Emph -- approximates underline - , simpleInline (char '-' <* notFollowedBy (char '-')) Strikeout - , simpleInline (char '^') Superscript - , simpleInline (char '~') Subscript +inlineMarkup :: Parser [Char] ParserState Inlines +inlineMarkup = choice [ simpleInline (string "??") (B.cite []) + , simpleInline (string "**") B.strong + , simpleInline (string "__") B.emph + , simpleInline (char '*') B.strong + , simpleInline (char '_') B.emph + , simpleInline (char '+') B.emph -- approximates underline + , simpleInline (char '-' <* notFollowedBy (char '-')) B.strikeout + , simpleInline (char '^') B.superscript + , simpleInline (char '~') B.subscript ] -- | Trademark, registered, copyright -mark :: Parser [Char] st Inline +mark :: Parser [Char] st Inlines mark = try $ char '(' >> (try tm <|> try reg <|> copy) -reg :: Parser [Char] st Inline +reg :: Parser [Char] st Inlines reg = do oneOf "Rr" char ')' - return $ Str "\174" + return $ B.str "\174" -tm :: Parser [Char] st Inline +tm :: Parser [Char] st Inlines tm = do oneOf "Tt" oneOf "Mm" char ')' - return $ Str "\8482" + return $ B.str "\8482" -copy :: Parser [Char] st Inline +copy :: Parser [Char] st Inlines copy = do oneOf "Cc" char ')' - return $ Str "\169" + return $ B.str "\169" -note :: Parser [Char] ParserState Inline +note :: Parser [Char] ParserState Inlines note = try $ do ref <- (char '[' *> many1 digit <* char ']') notes <- stateNotes <$> getState case lookup ref notes of Nothing -> fail "note not found" - Just raw -> liftM Note $ parseFromString parseBlocks raw + Just raw -> B.note <$> parseFromString parseBlocks raw -- | Special chars markupChars :: [Char] @@ -450,7 +454,7 @@ wordBoundaries = markupChars ++ stringBreakers hyphenedWords :: Parser [Char] ParserState String hyphenedWords = do x <- wordChunk - xs <- many (try $ char '-' >> wordChunk) + xs <- many (try $ char '-' >> wordChunk) return $ intercalate "-" (x:xs) wordChunk :: Parser [Char] ParserState String @@ -462,7 +466,7 @@ wordChunk = try $ do return $ hd:tl -- | Any string -str :: Parser [Char] ParserState Inline +str :: Parser [Char] ParserState Inlines str = do baseStr <- hyphenedWords -- RedCloth compliance : if parsed word is uppercase and immediatly @@ -472,89 +476,89 @@ str = do acro <- enclosed (char '(') (char ')') anyChar return $ concat [baseStr, " (", acro, ")"] updateLastStrPos - return $ Str fullStr + return $ B.str fullStr -- | Textile allows HTML span infos, we discard them -htmlSpan :: Parser [Char] ParserState Inline -htmlSpan = try $ Str <$> ( char '%' *> attributes *> manyTill anyChar (char '%') ) +htmlSpan :: Parser [Char] ParserState Inlines +htmlSpan = try $ B.str <$> ( char '%' *> attributes *> manyTill anyChar (char '%') ) -- | Some number of space chars -whitespace :: Parser [Char] ParserState Inline -whitespace = many1 spaceChar >> return Space <?> "whitespace" +whitespace :: Parser [Char] ParserState Inlines +whitespace = many1 spaceChar >> return B.space <?> "whitespace" -- | In Textile, an isolated endline character is a line break -endline :: Parser [Char] ParserState Inline +endline :: Parser [Char] ParserState Inlines endline = try $ do newline >> notFollowedBy blankline - return LineBreak + return B.linebreak -rawHtmlInline :: Parser [Char] ParserState Inline -rawHtmlInline = RawInline (Format "html") . snd <$> htmlTag isInlineTag +rawHtmlInline :: Parser [Char] ParserState Inlines +rawHtmlInline = B.rawInline "html" . snd <$> htmlTag isInlineTag -- | Raw LaTeX Inline -rawLaTeXInline' :: Parser [Char] ParserState Inline +rawLaTeXInline' :: Parser [Char] ParserState Inlines rawLaTeXInline' = try $ do guardEnabled Ext_raw_tex - rawLaTeXInline + B.singleton <$> rawLaTeXInline -- | Textile standard link syntax is "label":target. But we -- can also have ["label":target]. -link :: Parser [Char] ParserState Inline +link :: Parser [Char] ParserState Inlines link = linkB <|> linkNoB -linkNoB :: Parser [Char] ParserState Inline +linkNoB :: Parser [Char] ParserState Inlines linkNoB = try $ do - name <- surrounded (char '"') inline + name <- mconcat <$> surrounded (char '"') (withQuoteContext InDoubleQuote inline) char ':' let stopChars = "!.,;:" url <- manyTill nonspaceChar (lookAhead $ space <|> try (oneOf stopChars >> (space <|> newline))) - let name' = if name == [Str "$"] then [Str url] else name - return $ Link name' (url, "") + let name' = if B.toList name == [Str "$"] then B.str url else name + return $ B.link url "" name' -linkB :: Parser [Char] ParserState Inline +linkB :: Parser [Char] ParserState Inlines linkB = try $ do char '[' - name <- surrounded (char '"') inline + name <- mconcat <$> surrounded (char '"') inline char ':' url <- manyTill nonspaceChar (char ']') - let name' = if name == [Str "$"] then [Str url] else name - return $ Link name' (url, "") + let name' = if B.toList name == [Str "$"] then B.str url else name + return $ B.link url "" name' -- | image embedding -image :: Parser [Char] ParserState Inline +image :: Parser [Char] ParserState Inlines image = try $ do char '!' >> notFollowedBy space src <- manyTill anyChar (lookAhead $ oneOf "!(") alt <- option "" (try $ (char '(' >> manyTill anyChar (char ')'))) char '!' - return $ Image [Str alt] (src, alt) + return $ B.image src alt (B.str alt) -escapedInline :: Parser [Char] ParserState Inline +escapedInline :: Parser [Char] ParserState Inlines escapedInline = escapedEqs <|> escapedTag -escapedEqs :: Parser [Char] ParserState Inline -escapedEqs = Str <$> (try $ string "==" *> manyTill anyChar (try $ string "==")) +escapedEqs :: Parser [Char] ParserState Inlines +escapedEqs = B.str <$> (try $ string "==" *> manyTill anyChar (try $ string "==")) -- | literal text escaped btw <notextile> tags -escapedTag :: Parser [Char] ParserState Inline -escapedTag = Str <$> +escapedTag :: Parser [Char] ParserState Inlines +escapedTag = B.str <$> (try $ string "<notextile>" *> manyTill anyChar (try $ string "</notextile>")) -- | Any special symbol defined in wordBoundaries -symbol :: Parser [Char] ParserState Inline -symbol = Str . singleton <$> (oneOf wordBoundaries <|> oneOf markupChars) +symbol :: Parser [Char] ParserState Inlines +symbol = B.str . singleton <$> (oneOf wordBoundaries <|> oneOf markupChars) -- | Inline code -code :: Parser [Char] ParserState Inline +code :: Parser [Char] ParserState Inlines code = code1 <|> code2 -code1 :: Parser [Char] ParserState Inline -code1 = Code nullAttr <$> surrounded (char '@') anyChar +code1 :: Parser [Char] ParserState Inlines +code1 = B.code <$> surrounded (char '@') anyChar -code2 :: Parser [Char] ParserState Inline +code2 :: Parser [Char] ParserState Inlines code2 = do htmlTag (tagOpen (=="tt") null) - Code nullAttr <$> manyTill anyChar (try $ htmlTag $ tagClose (=="tt")) + B.code <$> manyTill anyChar (try $ htmlTag $ tagClose (=="tt")) -- | Html / CSS attributes attributes :: Parser [Char] ParserState Attr @@ -581,7 +585,7 @@ styleAttr = do langAttr :: Parser [Char] ParserState (Attr -> Attr) langAttr = do - lang <- try $ enclosed (char '[') (char ']') anyChar + lang <- try $ enclosed (char '[') (char ']') alphaNum return $ \(id',classes,keyvals) -> (id',classes,("lang",lang):keyvals) -- | Parses material surrounded by a parser. @@ -590,14 +594,43 @@ surrounded :: Parser [Char] st t -- ^ surrounding parser -> Parser [Char] st [a] surrounded border = enclosed (border *> notFollowedBy (oneOf " \t\n\r")) (try border) --- | Inlines are most of the time of the same form + simpleInline :: Parser [Char] ParserState t -- ^ surrounding parser - -> ([Inline] -> Inline) -- ^ Inline constructor - -> Parser [Char] ParserState Inline -- ^ content parser (to be used repeatedly) -simpleInline border construct = surrounded border inlineWithAttribute >>= - return . construct . normalizeSpaces - where inlineWithAttribute = (try $ optional attributes) >> inline + -> (Inlines -> Inlines) -- ^ Inline constructor + -> Parser [Char] ParserState Inlines -- ^ content parser (to be used repeatedly) +simpleInline border construct = groupedSimpleInline border construct <|> ungroupedSimpleInline border construct + +ungroupedSimpleInline :: Parser [Char] ParserState t -- ^ surrounding parser + -> (Inlines -> Inlines) -- ^ Inline constructor + -> Parser [Char] ParserState Inlines -- ^ content parser (to be used repeatedly) +ungroupedSimpleInline border construct = try $ do + st <- getState + pos <- getPosition + isWhitespace <- option False (whitespace >> return True) + guard $ (stateQuoteContext st /= NoQuote) + || (sourceColumn pos == 1) + || isWhitespace + body <- surrounded border inlineWithAttribute + lookAhead (notFollowedBy alphaNum) + let result = construct $ mconcat body + return $ if isWhitespace then B.space <> result + else result + where + inlineWithAttribute = (try $ optional attributes) >> notFollowedBy (string "\n\n") + >> (withQuoteContext InSingleQuote inline) + + +groupedSimpleInline :: Parser [Char] ParserState t + -> (Inlines -> Inlines) + -> Parser [Char] ParserState Inlines +groupedSimpleInline border construct = try $ do + char '[' + withQuoteContext InSingleQuote (simpleInline border construct) >>~ char ']' + + + -- | Create a singleton list singleton :: a -> [a] singleton x = [x] + -- cgit v1.2.3 From 9b5d474e79c0b508ac0da9943b9bb385671aad85 Mon Sep 17 00:00:00 2001 From: Matthew Pickering <matthewtpickering@gmail.com> Date: Thu, 27 Mar 2014 19:53:32 +0000 Subject: Converted HTML reader to use builder. Fixes #1162. --- src/Text/Pandoc/Readers/HTML.hs | 235 +++++++++++++++++++++------------------- 1 file changed, 126 insertions(+), 109 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index d1e4d0024..4fab251bb 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -40,6 +40,7 @@ import Text.HTML.TagSoup import Text.HTML.TagSoup.Match import Text.Pandoc.Definition import qualified Text.Pandoc.Builder as B +import Text.Pandoc.Builder (Blocks, Inlines, trimInlines) import Text.Pandoc.Shared import Text.Pandoc.Options import Text.Pandoc.Parsing @@ -48,6 +49,8 @@ import Data.List ( intercalate ) import Data.Char ( isDigit ) import Control.Monad ( liftM, guard, when, mzero ) import Control.Applicative ( (<$>), (<$), (<*) ) +import Data.Monoid +import Data.Sequence (ViewL(..), ViewR(..), viewr, viewl) isSpace :: Char -> Bool isSpace ' ' = True @@ -66,30 +69,30 @@ readHtml opts inp = where tags = canonicalizeTags $ parseTagsOptions parseOptions{ optTagPosition = True } inp parseDoc = do - blocks <- (fixPlains False . concat) <$> manyTill block eof + blocks <- (fixPlains False) . mconcat <$> manyTill block eof meta <- stateMeta <$> getState - return $ Pandoc meta blocks + return $ Pandoc meta (B.toList blocks) type TagParser = Parser [Tag String] ParserState -pBody :: TagParser [Block] +pBody :: TagParser Blocks pBody = pInTags "body" block -pHead :: TagParser [Block] -pHead = pInTags "head" $ pTitle <|> pMetaTag <|> ([] <$ pAnyTag) - where pTitle = pInTags "title" inline >>= setTitle . normalizeSpaces - setTitle t = [] <$ (updateState $ B.setMeta "title" (B.fromList t)) +pHead :: TagParser Blocks +pHead = pInTags "head" $ pTitle <|> pMetaTag <|> (mempty <$ pAnyTag) + where pTitle = pInTags "title" inline >>= setTitle . trimInlines + setTitle t = mempty <$ (updateState $ B.setMeta "title" t) pMetaTag = do mt <- pSatisfy (~== TagOpen "meta" []) let name = fromAttrib "name" mt if null name - then return [] + then return mempty else do let content = fromAttrib "content" mt updateState $ B.setMeta name (B.text content) - return [] + return mempty -block :: TagParser [Block] +block :: TagParser Blocks block = choice [ pPara , pHeader @@ -105,10 +108,10 @@ block = choice , pRawHtmlBlock ] -pList :: TagParser [Block] +pList :: TagParser Blocks pList = pBulletList <|> pOrderedList <|> pDefinitionList -pBulletList :: TagParser [Block] +pBulletList :: TagParser Blocks pBulletList = try $ do pSatisfy (~== TagOpen "ul" []) let nonItem = pSatisfy (\t -> @@ -118,9 +121,9 @@ pBulletList = try $ do -- treat it as a list item, though it's not valid xhtml... skipMany nonItem items <- manyTill (pInTags "li" block >>~ skipMany nonItem) (pCloses "ul") - return [BulletList $ map (fixPlains True) items] + return $ B.bulletList $ map (fixPlains True) items -pOrderedList :: TagParser [Block] +pOrderedList :: TagParser Blocks pOrderedList = try $ do TagOpen _ attribs <- pSatisfy (~== TagOpen "ol" []) let (start, style) = (sta', sty') @@ -146,27 +149,27 @@ pOrderedList = try $ do -- treat it as a list item, though it's not valid xhtml... skipMany nonItem items <- manyTill (pInTags "li" block >>~ skipMany nonItem) (pCloses "ol") - return [OrderedList (start, style, DefaultDelim) $ map (fixPlains True) items] + return $ B.orderedListWith (start, style, DefaultDelim) $ map (fixPlains True) items -pDefinitionList :: TagParser [Block] +pDefinitionList :: TagParser Blocks pDefinitionList = try $ do pSatisfy (~== TagOpen "dl" []) items <- manyTill pDefListItem (pCloses "dl") - return [DefinitionList items] + return $ B.definitionList items -pDefListItem :: TagParser ([Inline],[[Block]]) +pDefListItem :: TagParser (Inlines, [Blocks]) pDefListItem = try $ do let nonItem = pSatisfy (\t -> not (t ~== TagOpen "dt" []) && not (t ~== TagOpen "dd" []) && not (t ~== TagClose "dl")) terms <- many1 (try $ skipMany nonItem >> pInTags "dt" inline) defs <- many1 (try $ skipMany nonItem >> pInTags "dd" block) skipMany nonItem - let term = intercalate [LineBreak] terms + let term = foldl1 (\x y -> x <> B.linebreak <> y) terms return (term, map (fixPlains True) defs) -fixPlains :: Bool -> [Block] -> [Block] -fixPlains inList bs = if any isParaish bs - then map plainToPara bs +fixPlains :: Bool -> Blocks -> Blocks +fixPlains inList bs = if any isParaish bs' + then B.fromList $ map plainToPara bs' else bs where isParaish (Para _) = True isParaish (CodeBlock _ _) = True @@ -178,6 +181,7 @@ fixPlains inList bs = if any isParaish bs isParaish _ = False plainToPara (Plain xs) = Para xs plainToPara x = x + bs' = B.toList bs pRawTag :: TagParser String pRawTag = do @@ -187,20 +191,20 @@ pRawTag = do then return [] else return $ renderTags' [tag] -pDiv :: TagParser [Block] +pDiv :: TagParser Blocks pDiv = try $ do getOption readerParseRaw >>= guard TagOpen _ attr <- lookAhead $ pSatisfy $ tagOpen (=="div") (const True) contents <- pInTags "div" block - return [Div (mkAttr attr) contents] + return $ B.divWith (mkAttr attr) contents -pRawHtmlBlock :: TagParser [Block] +pRawHtmlBlock :: TagParser Blocks pRawHtmlBlock = do raw <- pHtmlBlock "script" <|> pHtmlBlock "style" <|> pRawTag parseRaw <- getOption readerParseRaw if parseRaw && not (null raw) - then return [RawBlock (Format "html") raw] - else return [] + then return $ B.rawBlock "html" raw + else return mempty pHtmlBlock :: String -> TagParser String pHtmlBlock t = try $ do @@ -208,35 +212,34 @@ pHtmlBlock t = try $ do contents <- manyTill pAnyTag (pSatisfy (~== TagClose t)) return $ renderTags' $ [open] ++ contents ++ [TagClose t] -pHeader :: TagParser [Block] +pHeader :: TagParser Blocks pHeader = try $ do TagOpen tagtype attr <- pSatisfy $ tagOpen (`elem` ["h1","h2","h3","h4","h5","h6"]) (const True) let bodyTitle = TagOpen tagtype attr ~== TagOpen "h1" [("class","title")] let level = read (drop 1 tagtype) - contents <- liftM concat $ manyTill inline (pCloses tagtype <|> eof) + contents <- trimInlines . mconcat <$> manyTill inline (pCloses tagtype <|> eof) let ident = fromMaybe "" $ lookup "id" attr let classes = maybe [] words $ lookup "class" attr let keyvals = [(k,v) | (k,v) <- attr, k /= "class", k /= "id"] return $ if bodyTitle - then [] -- skip a representation of the title in the body - else [Header level (ident, classes, keyvals) $ - normalizeSpaces contents] + then mempty -- skip a representation of the title in the body + else B.headerWith (ident, classes, keyvals) level contents -pHrule :: TagParser [Block] +pHrule :: TagParser Blocks pHrule = do pSelfClosing (=="hr") (const True) - return [HorizontalRule] + return B.horizontalRule -pTable :: TagParser [Block] +pTable :: TagParser Blocks pTable = try $ do TagOpen _ _ <- pSatisfy (~== TagOpen "table" []) skipMany pBlank - caption <- option [] $ pInTags "caption" inline >>~ skipMany pBlank + caption <- option mempty $ pInTags "caption" inline >>~ skipMany pBlank -- TODO actually read these and take width information from them widths' <- pColgroup <|> many pCol - head' <- option [] $ pOptInTag "thead" $ pInTags "tr" (pCell "th") + head' <- option mempty $ pOptInTag "thead" $ pInTags "tr" (pCell "th") skipMany pBlank rows <- pOptInTag "tbody" $ many1 $ try $ skipMany pBlank >> pInTags "tr" (pCell "td") @@ -245,19 +248,21 @@ pTable = try $ do let isSinglePlain [] = True isSinglePlain [Plain _] = True isSinglePlain _ = False - let isSimple = all isSinglePlain $ concat (head':rows) - let cols = length $ if null head' - then head rows - else head' + let lHead = B.toList head' + let lRows = map B.toList rows + let isSimple = all isSinglePlain (lHead:lRows) + let cols = length $ if null lHead + then head lRows + else lHead -- fail if there are colspans or rowspans - guard $ all (\r -> length r == cols) rows + guard $ all (\r -> length r == cols) lRows let aligns = replicate cols AlignLeft let widths = if null widths' then if isSimple then replicate cols 0 else replicate cols (1.0 / fromIntegral cols) else widths' - return [Table caption aligns widths head' rows] + return $ B.table caption (zip aligns widths) [head'] [rows] pCol :: TagParser Double pCol = try $ do @@ -275,31 +280,31 @@ pColgroup = try $ do skipMany pBlank manyTill pCol (pCloses "colgroup" <|> eof) <* skipMany pBlank -pCell :: String -> TagParser [TableCell] +pCell :: String -> TagParser Blocks pCell celltype = try $ do skipMany pBlank res <- pInTags celltype block skipMany pBlank - return [res] + return res -pBlockQuote :: TagParser [Block] +pBlockQuote :: TagParser Blocks pBlockQuote = do contents <- pInTags "blockquote" block - return [BlockQuote $ fixPlains False contents] + return $ B.blockQuote $ fixPlains False contents -pPlain :: TagParser [Block] +pPlain :: TagParser Blocks pPlain = do - contents <- liftM (normalizeSpaces . concat) $ many1 inline - if null contents - then return [] - else return [Plain contents] + contents <- trimInlines . mconcat <$> many1 inline + if B.isNull contents + then return mempty + else return $ B.plain contents -pPara :: TagParser [Block] +pPara :: TagParser Blocks pPara = do - contents <- pInTags "p" inline - return [Para $ normalizeSpaces contents] + contents <- trimInlines <$> pInTags "p" inline + return $ B.para contents -pCodeBlock :: TagParser [Block] +pCodeBlock :: TagParser Blocks pCodeBlock = try $ do TagOpen _ attr <- pSatisfy (~== TagOpen "pre" []) contents <- manyTill pAnyTag (pCloses "pre" <|> eof) @@ -312,9 +317,9 @@ pCodeBlock = try $ do let result = case reverse result' of '\n':_ -> init result' _ -> result' - return [CodeBlock (mkAttr attr) result] + return $ B.codeBlockWith (mkAttr attr) result -inline :: TagParser [Inline] +inline :: TagParser Inlines inline = choice [ pTagText , pQ @@ -354,7 +359,7 @@ pSelfClosing f g = do optional $ pSatisfy (tagClose f) return open -pQ :: TagParser [Inline] +pQ :: TagParser Inlines pQ = do quoteContext <- stateQuoteContext `fmap` getState let quoteType = case quoteContext of @@ -363,82 +368,93 @@ pQ = do let innerQuoteContext = if quoteType == SingleQuote then InSingleQuote else InDoubleQuote - withQuoteContext innerQuoteContext $ pInlinesInTags "q" (Quoted quoteType) + let constructor = case quoteType of + SingleQuote -> B.singleQuoted + DoubleQuote -> B.doubleQuoted + withQuoteContext innerQuoteContext $ + pInlinesInTags "q" constructor -pEmph :: TagParser [Inline] -pEmph = pInlinesInTags "em" Emph <|> pInlinesInTags "i" Emph +pEmph :: TagParser Inlines +pEmph = pInlinesInTags "em" B.emph <|> pInlinesInTags "i" B.emph -pStrong :: TagParser [Inline] -pStrong = pInlinesInTags "strong" Strong <|> pInlinesInTags "b" Strong +pStrong :: TagParser Inlines +pStrong = pInlinesInTags "strong" B.strong <|> pInlinesInTags "b" B.strong -pSuperscript :: TagParser [Inline] -pSuperscript = pInlinesInTags "sup" Superscript +pSuperscript :: TagParser Inlines +pSuperscript = pInlinesInTags "sup" B.superscript -pSubscript :: TagParser [Inline] -pSubscript = pInlinesInTags "sub" Subscript +pSubscript :: TagParser Inlines +pSubscript = pInlinesInTags "sub" B.subscript -pStrikeout :: TagParser [Inline] +pStrikeout :: TagParser Inlines pStrikeout = do - pInlinesInTags "s" Strikeout <|> - pInlinesInTags "strike" Strikeout <|> - pInlinesInTags "del" Strikeout <|> + pInlinesInTags "s" B.strikeout <|> + pInlinesInTags "strike" B.strikeout <|> + pInlinesInTags "del" B.strikeout <|> try (do pSatisfy (~== TagOpen "span" [("class","strikeout")]) - contents <- liftM concat $ manyTill inline (pCloses "span") - return [Strikeout contents]) + contents <- mconcat <$> manyTill inline (pCloses "span") + return $ B.strikeout contents) -pLineBreak :: TagParser [Inline] +pLineBreak :: TagParser Inlines pLineBreak = do pSelfClosing (=="br") (const True) - return [LineBreak] + return B.linebreak -pLink :: TagParser [Inline] +pLink :: TagParser Inlines pLink = try $ do tag <- pSatisfy (tagOpenLit "a" (isJust . lookup "href")) let url = fromAttrib "href" tag let title = fromAttrib "title" tag - lab <- liftM concat $ manyTill inline (pCloses "a") - return [Link (normalizeSpaces lab) (escapeURI url, title)] + lab <- trimInlines . mconcat <$> manyTill inline (pCloses "a") + return $ B.link (escapeURI url) title lab -pImage :: TagParser [Inline] +pImage :: TagParser Inlines pImage = do tag <- pSelfClosing (=="img") (isJust . lookup "src") let url = fromAttrib "src" tag let title = fromAttrib "title" tag let alt = fromAttrib "alt" tag - return [Image (B.toList $ B.text alt) (escapeURI url, title)] + return $ B.image (escapeURI url) title (B.text alt) -pCode :: TagParser [Inline] +pCode :: TagParser Inlines pCode = try $ do (TagOpen open attr) <- pSatisfy $ tagOpen (`elem` ["code","tt"]) (const True) result <- manyTill pAnyTag (pCloses open) - return [Code (mkAttr attr) $ intercalate " " $ lines $ innerText result] + return $ B.codeWith (mkAttr attr) $ intercalate " " $ lines $ innerText result -pSpan :: TagParser [Inline] +pSpan :: TagParser Inlines pSpan = try $ do getOption readerParseRaw >>= guard TagOpen _ attr <- lookAhead $ pSatisfy $ tagOpen (=="span") (const True) contents <- pInTags "span" inline - return [Span (mkAttr attr) contents] + return $ B.spanWith (mkAttr attr) contents -pRawHtmlInline :: TagParser [Inline] +pRawHtmlInline :: TagParser Inlines pRawHtmlInline = do result <- pSatisfy (tagComment (const True)) <|> pSatisfy isInlineTag parseRaw <- getOption readerParseRaw if parseRaw - then return [RawInline (Format "html") $ renderTags' [result]] - else return [] + then return $ B.rawInline "html" $ renderTags' [result] + else return mempty -pInlinesInTags :: String -> ([Inline] -> Inline) - -> TagParser [Inline] +pInlinesInTags :: String -> (Inlines -> Inlines) + -> TagParser Inlines pInlinesInTags tagtype f = do - contents <- pInTags tagtype inline - return [f $ normalizeSpaces contents] - -pInTags :: String -> TagParser [a] - -> TagParser [a] + contents <- B.unMany <$> pInTags tagtype inline + let left = case viewl contents of + EmptyL -> mempty + (a :< _) -> padSpace a + let right = case viewr contents of + EmptyR -> mempty + (_ :> a) -> padSpace a + return (left <> f (trimInlines . B.Many $ contents) <> right) + where padSpace a = if a == Space then B.space else mempty + +pInTags :: (Monoid a) => String -> TagParser a + -> TagParser a pInTags tagtype parser = try $ do pSatisfy (~== TagOpen tagtype []) - liftM concat $ manyTill parser (pCloses tagtype <|> eof) + mconcat <$> manyTill parser (pCloses tagtype <|> eof) pOptInTag :: String -> TagParser a -> TagParser a @@ -461,36 +477,36 @@ pCloses tagtype = try $ do (TagClose "dl") | tagtype == "li" -> return () _ -> mzero -pTagText :: TagParser [Inline] +pTagText :: TagParser Inlines pTagText = try $ do (TagText str) <- pSatisfy isTagText st <- getState case runParser (many pTagContents) st "text" str of Left _ -> fail $ "Could not parse `" ++ str ++ "'" - Right result -> return result + Right result -> return $ mconcat result pBlank :: TagParser () pBlank = try $ do (TagText str) <- pSatisfy isTagText guard $ all isSpace str -pTagContents :: Parser [Char] ParserState Inline +pTagContents :: Parser [Char] ParserState Inlines pTagContents = - Math DisplayMath `fmap` mathDisplay - <|> Math InlineMath `fmap` mathInline + B.displayMath <$> mathDisplay + <|> B.math <$> mathInline <|> pStr <|> pSpace <|> smartPunctuation pTagContents <|> pSymbol <|> pBad -pStr :: Parser [Char] ParserState Inline +pStr :: Parser [Char] ParserState Inlines pStr = do result <- many1 $ satisfy $ \c -> not (isSpace c) && not (isSpecial c) && not (isBad c) pos <- getPosition updateState $ \s -> s{ stateLastStrPos = Just pos } - return $ Str result + return $ B.str result isSpecial :: Char -> Bool isSpecial '"' = True @@ -504,13 +520,13 @@ isSpecial '\8220' = True isSpecial '\8221' = True isSpecial _ = False -pSymbol :: Parser [Char] ParserState Inline -pSymbol = satisfy isSpecial >>= return . Str . (:[]) +pSymbol :: Parser [Char] ParserState Inlines +pSymbol = satisfy isSpecial >>= return . B.str . (:[]) isBad :: Char -> Bool isBad c = c >= '\128' && c <= '\159' -- not allowed in HTML -pBad :: Parser [Char] ParserState Inline +pBad :: Parser [Char] ParserState Inlines pBad = do c <- satisfy isBad let c' = case c of @@ -542,10 +558,10 @@ pBad = do '\158' -> '\382' '\159' -> '\376' _ -> '?' - return $ Str [c'] + return $ B.str [c'] -pSpace :: Parser [Char] ParserState Inline -pSpace = many1 (satisfy isSpace) >> return Space +pSpace :: Parser [Char] ParserState Inlines +pSpace = many1 (satisfy isSpace) >> return B.space -- -- Constants @@ -679,3 +695,4 @@ mkAttr attr = (attribsId, attribsClasses, attribsKV) attribsClasses = words $ fromMaybe "" $ lookup "class" attr attribsKV = filter (\(k,_) -> k /= "class" && k /= "id") attr + -- cgit v1.2.3 From 5a51a67abda59c177f3a6d0f6cba59d41e866287 Mon Sep 17 00:00:00 2001 From: Matthew Pickering <matthewtpickering@gmail.com> Date: Thu, 27 Mar 2014 19:56:47 +0000 Subject: Changed the smart punctuation parser to return Inlines rather than an Inline element and updated files accordingly --- src/Text/Pandoc/Parsing.hs | 43 ++++++++++++++++++------------------- src/Text/Pandoc/Readers/Markdown.hs | 2 +- src/Text/Pandoc/Readers/RST.hs | 2 +- src/Text/Pandoc/Readers/Textile.hs | 4 +--- 4 files changed, 24 insertions(+), 27 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs index 68d4605ee..a9009eaa2 100644 --- a/src/Text/Pandoc/Parsing.hs +++ b/src/Text/Pandoc/Parsing.hs @@ -997,17 +997,17 @@ registerHeader (ident,classes,kvs) header' = do failUnlessSmart :: HasReaderOptions st => Parser s st () failUnlessSmart = getOption readerSmart >>= guard -smartPunctuation :: Parser [Char] ParserState Inline - -> Parser [Char] ParserState Inline +smartPunctuation :: Parser [Char] ParserState Inlines + -> Parser [Char] ParserState Inlines smartPunctuation inlineParser = do failUnlessSmart choice [ quoted inlineParser, apostrophe, dash, ellipses ] -apostrophe :: Parser [Char] ParserState Inline -apostrophe = (char '\'' <|> char '\8217') >> return (Str "\x2019") +apostrophe :: Parser [Char] ParserState Inlines +apostrophe = (char '\'' <|> char '\8217') >> return (B.str "\x2019") -quoted :: Parser [Char] ParserState Inline - -> Parser [Char] ParserState Inline +quoted :: Parser [Char] ParserState Inlines + -> Parser [Char] ParserState Inlines quoted inlineParser = doubleQuoted inlineParser <|> singleQuoted inlineParser withQuoteContext :: QuoteContext @@ -1022,20 +1022,19 @@ withQuoteContext context parser = do setState newState { stateQuoteContext = oldQuoteContext } return result -singleQuoted :: Parser [Char] ParserState Inline - -> Parser [Char] ParserState Inline +singleQuoted :: Parser [Char] ParserState Inlines + -> Parser [Char] ParserState Inlines singleQuoted inlineParser = try $ do singleQuoteStart withQuoteContext InSingleQuote $ many1Till inlineParser singleQuoteEnd >>= - return . Quoted SingleQuote . normalizeSpaces + return . B.singleQuoted . mconcat -doubleQuoted :: Parser [Char] ParserState Inline - -> Parser [Char] ParserState Inline +doubleQuoted :: Parser [Char] ParserState Inlines + -> Parser [Char] ParserState Inlines doubleQuoted inlineParser = try $ do doubleQuoteStart - withQuoteContext InDoubleQuote $ do - contents <- manyTill inlineParser doubleQuoteEnd - return . Quoted DoubleQuote . normalizeSpaces $ contents + withQuoteContext InDoubleQuote $ manyTill inlineParser doubleQuoteEnd >>= + return . B.doubleQuoted . mconcat failIfInQuoteContext :: QuoteContext -> Parser [tok] ParserState () failIfInQuoteContext context = do @@ -1079,17 +1078,17 @@ doubleQuoteEnd = do charOrRef "\"\8221\148" return () -ellipses :: Parser [Char] st Inline +ellipses :: Parser [Char] st Inlines ellipses = do try (charOrRef "\8230\133") <|> try (string "..." >> return '…') - return (Str "\8230") + return (B.str "\8230") -dash :: Parser [Char] ParserState Inline +dash :: Parser [Char] ParserState Inlines dash = do oldDashes <- getOption readerOldDashes if oldDashes then emDashOld <|> enDashOld - else Str `fmap` (hyphenDash <|> emDash <|> enDash) + else B.str `fmap` (hyphenDash <|> emDash <|> enDash) -- Two hyphens = en-dash, three = em-dash hyphenDash :: Parser [Char] st String @@ -1107,16 +1106,16 @@ enDash = do try (charOrRef "\8212\151") return "\8211" -enDashOld :: Parser [Char] st Inline +enDashOld :: Parser [Char] st Inlines enDashOld = do try (charOrRef "\8211\150") <|> try (char '-' >> lookAhead (satisfy isDigit) >> return '–') - return (Str "\8211") + return (B.str "\8211") -emDashOld :: Parser [Char] st Inline +emDashOld :: Parser [Char] st Inlines emDashOld = do try (charOrRef "\8212\151") <|> (try $ string "--" >> optional (char '-') >> return '-') - return (Str "\8212") + return (B.str "\8212") -- This is used to prevent exponential blowups for things like: -- a**a*a**a*a**a*a**a*a**a*a**a*a** diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index aa0252266..57e1ca560 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -1873,7 +1873,7 @@ smart :: MarkdownParser (F Inlines) smart = do getOption readerSmart >>= guard doubleQuoted <|> singleQuoted <|> - choice (map (return . B.singleton <$>) [apostrophe, dash, ellipses]) + choice (map (return <$>) [apostrophe, dash, ellipses]) singleQuoted :: MarkdownParser (F Inlines) singleQuoted = try $ do diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs index 127eae167..a574f343a 100644 --- a/src/Text/Pandoc/Readers/RST.hs +++ b/src/Text/Pandoc/Readers/RST.hs @@ -1140,7 +1140,7 @@ smart :: RSTParser Inlines smart = do getOption readerSmart >>= guard doubleQuoted <|> singleQuoted <|> - choice (map (B.singleton <$>) [apostrophe, dash, ellipses]) + choice [apostrophe, dash, ellipses] singleQuoted :: RSTParser Inlines singleQuoted = try $ do diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs index ede50c6de..c6f992275 100644 --- a/src/Text/Pandoc/Readers/Textile.hs +++ b/src/Text/Pandoc/Readers/Textile.hs @@ -306,9 +306,7 @@ rawLaTeXBlock' = do -- | In textile, paragraphs are separated by blank lines. para :: Parser [Char] ParserState Blocks -para = do - a <- manyTill inline blockBreak - return $ (B.para . trimInlines . mconcat) a +para = B.para . trimInlines . mconcat <$> manyTill inline blockBreak -- Tables -- cgit v1.2.3 From 1cadba16eb91a1f9dae03d87201fe1e8a7e0d1b6 Mon Sep 17 00:00:00 2001 From: John MacFarlane <fiddlosopher@gmail.com> Date: Tue, 1 Apr 2014 10:10:46 -0700 Subject: HTML reader: idiomatic rewriting for clarity. --- src/Text/Pandoc/Readers/HTML.hs | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index 4fab251bb..38619d040 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -442,13 +442,12 @@ pInlinesInTags :: String -> (Inlines -> Inlines) pInlinesInTags tagtype f = do contents <- B.unMany <$> pInTags tagtype inline let left = case viewl contents of - EmptyL -> mempty - (a :< _) -> padSpace a + (Space :< _) -> B.space + _ -> mempty let right = case viewr contents of - EmptyR -> mempty - (_ :> a) -> padSpace a + (_ :> Space) -> B.space + _ -> mempty return (left <> f (trimInlines . B.Many $ contents) <> right) - where padSpace a = if a == Space then B.space else mempty pInTags :: (Monoid a) => String -> TagParser a -> TagParser a -- cgit v1.2.3 From 4ee92dce0ce624db2d02c60ae2856a70cfeb6c42 Mon Sep 17 00:00:00 2001 From: John MacFarlane <fiddlosopher@gmail.com> Date: Tue, 1 Apr 2014 10:36:23 -0700 Subject: MediaWiki reader: Fixed bug in certain nested lists. The bug: If a level 2 list was followed by a level 1 list, the first item of the level 1 list would be lost. Closes #1213. --- src/Text/Pandoc/Readers/MediaWiki.hs | 3 ++- tests/mediawiki-reader.native | 4 ++++ tests/mediawiki-reader.wiki | 3 +++ 3 files changed, 9 insertions(+), 1 deletion(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/MediaWiki.hs b/src/Text/Pandoc/Readers/MediaWiki.hs index f70b44aad..9bbabd44b 100644 --- a/src/Text/Pandoc/Readers/MediaWiki.hs +++ b/src/Text/Pandoc/Readers/MediaWiki.hs @@ -438,7 +438,8 @@ listItem c = try $ do skipMany spaceChar first <- concat <$> manyTill listChunk newline rest <- many - (try $ string extras *> (concat <$> manyTill listChunk newline)) + (try $ string extras *> lookAhead listStartChar *> + (concat <$> manyTill listChunk newline)) contents <- parseFromString (many1 $ listItem' c) (unlines (first : rest)) case c of diff --git a/tests/mediawiki-reader.native b/tests/mediawiki-reader.native index 87e4043f7..2e97e9484 100644 --- a/tests/mediawiki-reader.native +++ b/tests/mediawiki-reader.native @@ -102,6 +102,10 @@ Pandoc (Meta {unMeta = fromList []}) [[BulletList [[Plain [Str "But",Space,Str "jumping",Space,Str "levels",Space,Str "creates",Space,Str "empty",Space,Str "space."]]]]]]] ,Para [Str "Any",Space,Str "other",Space,Str "start",Space,Str "ends",Space,Str "the",Space,Str "list."] +,BulletList + [[BulletList + [[Plain [Str "two"]]]] + ,[Plain [Str "one"]]] ,OrderedList (1,DefaultStyle,DefaultDelim) [[Plain [Str "Start",Space,Str "each",Space,Str "line"]] ,[Plain [Str "with",Space,Str "a",Space,Str "number",Space,Str "sign",Space,Str "(#)."] diff --git a/tests/mediawiki-reader.wiki b/tests/mediawiki-reader.wiki index 641f98eb9..6a6bc226d 100644 --- a/tests/mediawiki-reader.wiki +++ b/tests/mediawiki-reader.wiki @@ -185,6 +185,9 @@ http://johnmacfarlane.net/pandoc/ *** But jumping levels creates empty space. Any other start ends the list. +** two +* one + # Start each line # with a number sign (#). ## More number signs gives deeper -- cgit v1.2.3 From fa0f73aef9d9ff3ca89c1479e3644ac7e7c15991 Mon Sep 17 00:00:00 2001 From: John MacFarlane <fiddlosopher@gmail.com> Date: Fri, 4 Apr 2014 10:07:56 -0700 Subject: Custom writer: read lua script as UTF-8. This should fix #1189. --- src/Text/Pandoc/Writers/Custom.hs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Writers/Custom.hs b/src/Text/Pandoc/Writers/Custom.hs index 0234e1e35..0d58afb61 100644 --- a/src/Text/Pandoc/Writers/Custom.hs +++ b/src/Text/Pandoc/Writers/Custom.hs @@ -37,6 +37,7 @@ import Data.Char ( toLower ) import Scripting.Lua (LuaState, StackValue, callfunc) import qualified Scripting.Lua as Lua import Text.Pandoc.UTF8 (fromString, toString) +import qualified Text.Pandoc.UTF8 as UTF8 import Data.ByteString (ByteString) import qualified Data.ByteString.Char8 as C8 import Data.Monoid @@ -131,7 +132,7 @@ instance StackValue MetaValue where -- | Convert Pandoc to custom markup. writeCustom :: FilePath -> WriterOptions -> Pandoc -> IO String writeCustom luaFile opts doc = do - luaScript <- readFile luaFile + luaScript <- UTF8.readFile luaFile lua <- Lua.newstate Lua.openlibs lua Lua.loadstring lua luaScript "custom" -- cgit v1.2.3 From 036576c5a2660897e1c977407c621c8c436b7600 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Fri, 4 Apr 2014 15:43:47 -0700 Subject: Correctly handle UTF-8 in custom lua scripts. Closes #1189. --- src/Text/Pandoc/Writers/Custom.hs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Writers/Custom.hs b/src/Text/Pandoc/Writers/Custom.hs index 0d58afb61..0b30287f5 100644 --- a/src/Text/Pandoc/Writers/Custom.hs +++ b/src/Text/Pandoc/Writers/Custom.hs @@ -37,7 +37,6 @@ import Data.Char ( toLower ) import Scripting.Lua (LuaState, StackValue, callfunc) import qualified Scripting.Lua as Lua import Text.Pandoc.UTF8 (fromString, toString) -import qualified Text.Pandoc.UTF8 as UTF8 import Data.ByteString (ByteString) import qualified Data.ByteString.Char8 as C8 import Data.Monoid @@ -132,7 +131,7 @@ instance StackValue MetaValue where -- | Convert Pandoc to custom markup. writeCustom :: FilePath -> WriterOptions -> Pandoc -> IO String writeCustom luaFile opts doc = do - luaScript <- UTF8.readFile luaFile + luaScript <- C8.unpack `fmap` C8.readFile luaFile lua <- Lua.newstate Lua.openlibs lua Lua.loadstring lua luaScript "custom" -- cgit v1.2.3 From 98658b55fc922d9ad3d883bdc3bdaddd2318dff2 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Fri, 4 Apr 2014 17:39:56 -0700 Subject: LaTeX writer: handle line breaks in simple table cells. Closes #1217. --- src/Text/Pandoc/Writers/LaTeX.hs | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs index 07be6e9af..6cf7ed730 100644 --- a/src/Text/Pandoc/Writers/LaTeX.hs +++ b/src/Text/Pandoc/Writers/LaTeX.hs @@ -500,9 +500,28 @@ tableRowToLaTeX header aligns widths cols = do cells <- mapM (tableCellToLaTeX header) $ zip3 widths' aligns cols return $ hsep (intersperse "&" cells) $$ "\\\\\\addlinespace" +-- For simple latex tables (without minipages or parboxes), +-- we need to go to some lengths to get line breaks working: +-- as LineBreak bs = \vtop{\hbox{\strut as}\hbox{\strut bs}}. +fixLineBreaks :: Block -> Block +fixLineBreaks (Para ils) = Para $ fixLineBreaks' ils +fixLineBreaks (Plain ils) = Plain $ fixLineBreaks' ils +fixLineBreaks x = x + +fixLineBreaks' :: [Inline] -> [Inline] +fixLineBreaks' ils = case splitBy (== LineBreak) ils of + [] -> [] + [xs] -> xs + chunks -> RawInline "tex" "\\vtop{" : + concatMap tohbox chunks ++ + [RawInline "tex" "}"] + where tohbox ys = RawInline "tex" "\\hbox{\\strut " : ys ++ + [RawInline "tex" "}"] + tableCellToLaTeX :: Bool -> (Double, Alignment, [Block]) -> State WriterState Doc -tableCellToLaTeX _ (0, _, blocks) = blockListToLaTeX blocks +tableCellToLaTeX _ (0, _, blocks) = + blockListToLaTeX $ walk fixLineBreaks blocks tableCellToLaTeX header (width, align, blocks) = do modify $ \st -> st{ stInMinipage = True, stNotes = [] } cellContents <- blockListToLaTeX blocks -- cgit v1.2.3 From b1c865ccc6c9a66fbebe59575bd1c3a4a8081cd3 Mon Sep 17 00:00:00 2001 From: Matthew Pickering <matthewtpickering@gmail.com> Date: Fri, 4 Apr 2014 15:18:30 +0100 Subject: Converted current meta information parsing in DocBook to a more extensible version which is aware of the more recent meta representation. --- src/Text/Pandoc/Readers/DocBook.hs | 82 ++++++++++++++++++++++---------------- 1 file changed, 48 insertions(+), 34 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs index 56cb16b20..2c7ec61d3 100644 --- a/src/Text/Pandoc/Readers/DocBook.hs +++ b/src/Text/Pandoc/Readers/DocBook.hs @@ -499,24 +499,19 @@ type DB = State DBState data DBState = DBState{ dbSectionLevel :: Int , dbQuoteType :: QuoteType - , dbDocTitle :: Inlines - , dbDocAuthors :: [Inlines] - , dbDocDate :: Inlines + , dbMeta :: Meta + , dbAcceptsMeta :: Bool , dbBook :: Bool , dbFigureTitle :: Inlines } deriving Show readDocBook :: ReaderOptions -> String -> Pandoc -readDocBook _ inp = setTitle (dbDocTitle st') - $ setAuthors (dbDocAuthors st') - $ setDate (dbDocDate st') - $ doc $ mconcat bs +readDocBook _ inp = Pandoc (dbMeta st') (toList $ mconcat bs) where (bs, st') = runState (mapM parseBlock $ normalizeTree $ parseXML inp) DBState{ dbSectionLevel = 0 , dbQuoteType = DoubleQuote - , dbDocTitle = mempty - , dbDocAuthors = [] - , dbDocDate = mempty + , dbMeta = mempty + , dbAcceptsMeta = False , dbBook = False , dbFigureTitle = mempty } @@ -560,6 +555,29 @@ attrValue attr elt = named :: String -> Element -> Bool named s e = qName (elName e) == s +-- + +acceptingMetadata :: DB a -> DB a +acceptingMetadata p = do + modify (\s -> s { dbAcceptsMeta = True } ) + res <- p + modify (\s -> s { dbAcceptsMeta = False }) + return res + +checkInMeta :: Monoid a => DB a -> DB a +checkInMeta p = do + accepts <- dbAcceptsMeta <$> get + if accepts then p else return mempty + + + +addMeta :: ToMetaValue a => String -> a -> DB () +addMeta field val = modify (setMeta field val) + +instance HasMeta DBState where + setMeta str v s = s {dbMeta = setMeta str v (dbMeta s)} + + isBlockElement :: Content -> Bool isBlockElement (Elem e) = qName (elName e) `elem` blocktags where blocktags = ["toc","index","para","formalpara","simpara", @@ -606,6 +624,7 @@ getImage e = do getBlocks :: Element -> DB Blocks getBlocks e = mconcat <$> (mapM parseBlock $ elContent e) + parseBlock :: Content -> DB Blocks parseBlock (Text (CData CDataRaw _ _)) = return mempty -- DOCTYPE parseBlock (Text (CData _ s _)) = if all isSpace s @@ -630,7 +649,9 @@ parseBlock (Elem e) = "attribution" -> return mempty "titleabbrev" -> return mempty "authorinitials" -> return mempty - "title" -> return mempty -- handled by getTitle or sect or figure + "title" -> checkInMeta getTitle >> return mempty -- handled by getTitle or sect or figure + "author" -> checkInMeta getAuthor >> return mempty + "date" -> checkInMeta getDate >> return mempty "bibliography" -> sect 0 "bibliodiv" -> sect 1 "biblioentry" -> parseMixed para (elContent e) @@ -693,8 +714,8 @@ parseBlock (Elem e) = "figure" -> getFigure e "mediaobject" -> para <$> getImage e "caption" -> return mempty - "info" -> getTitle >> getAuthors >> getDate >> return mempty - "articleinfo" -> getTitle >> getAuthors >> getDate >> return mempty + "info" -> metaBlock + "articleinfo" -> metaBlock "sectioninfo" -> return mempty -- keywords & other metadata "refsectioninfo" -> return mempty -- keywords & other metadata "refsect1info" -> return mempty -- keywords & other metadata @@ -708,10 +729,10 @@ parseBlock (Elem e) = "chapterinfo" -> return mempty -- keywords & other metadata "glossaryinfo" -> return mempty -- keywords & other metadata "appendixinfo" -> return mempty -- keywords & other metadata - "bookinfo" -> getTitle >> getAuthors >> getDate >> return mempty + "bookinfo" -> metaBlock "article" -> modify (\st -> st{ dbBook = False }) >> - getTitle >> getBlocks e - "book" -> modify (\st -> st{ dbBook = True }) >> getTitle >> getBlocks e + getBlocks e + "book" -> modify (\st -> st{ dbBook = True }) >> getBlocks e "table" -> parseTable "informaltable" -> parseTable "literallayout" -> codeBlockWithLang @@ -757,24 +778,16 @@ parseBlock (Elem e) = terms' <- mapM getInlines terms items' <- mapM getBlocks items return (mconcat $ intersperse (str "; ") terms', items') - getTitle = case filterChild (named "title") e of - Just t -> do - tit <- getInlines t - subtit <- case filterChild (named "subtitle") e of - Just s -> (text ": " <>) <$> - getInlines s - Nothing -> return mempty - modify $ \st -> st{dbDocTitle = tit <> subtit} - Nothing -> return () - getAuthors = do - auths <- mapM getInlines - $ filterChildren (named "author") e - modify $ \st -> st{dbDocAuthors = auths} - getDate = case filterChild (named "date") e of - Just t -> do - dat <- getInlines t - modify $ \st -> st{dbDocDate = dat} - Nothing -> return () + getTitle = do + tit <- getInlines e + subtit <- case filterChild (named "subtitle") e of + Just s -> (text ": " <>) <$> + getInlines s + Nothing -> return mempty + addMeta "title" (tit <> subtit) + + getAuthor = getInlines e >>= addMeta "author" + getDate = getInlines e >>= addMeta "date" parseTable = do let isCaption x = named "title" x || named "caption" x caption <- case filterChild isCaption e of @@ -836,6 +849,7 @@ parseBlock (Elem e) = b <- getBlocks e modify $ \st -> st{ dbSectionLevel = n - 1 } return $ header n' headerText <> b + metaBlock = acceptingMetadata (getBlocks e) getInlines :: Element -> DB Inlines getInlines e' = (trimInlines . mconcat) <$> (mapM parseInline $ elContent e') -- cgit v1.2.3 From 1b930a9670403f9c6ceadbc067fb3ac7400bd042 Mon Sep 17 00:00:00 2001 From: Matthew Pickering <matthewtpickering@gmail.com> Date: Fri, 4 Apr 2014 16:21:24 +0100 Subject: Added recognition of authorgroup element and releaseinfo element to DocBook reader. Closes #1214 --- src/Text/Pandoc/Readers/DocBook.hs | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs index 2c7ec61d3..76bc7374f 100644 --- a/src/Text/Pandoc/Readers/DocBook.hs +++ b/src/Text/Pandoc/Readers/DocBook.hs @@ -45,7 +45,7 @@ List of all DocBook tags, with [x] indicating implemented, [ ] audioobject - A wrapper for audio data and its associated meta-information [x] author - The name of an individual author [ ] authorblurb - A short description or note about an author -[ ] authorgroup - Wrapper for author information when a document has +[x] authorgroup - Wrapper for author information when a document has multiple authors or collabarators [x] authorinitials - The initials or other short identifier for an author [o] beginpage - The location of a page break in a print version of the document @@ -341,7 +341,7 @@ List of all DocBook tags, with [x] indicating implemented, [x] refsectioninfo - Meta-information for a refsection [ ] refsynopsisdiv - A syntactic synopsis of the subject of the reference page [ ] refsynopsisdivinfo - Meta-information for a RefSynopsisDiv -[ ] releaseinfo - Information about a particular release of a document +[x] releaseinfo - Information about a particular release of a document [ ] remark - A remark (or comment) intended for presentation in a draft manuscript [ ] replaceable - Content that may or must be replaced by the user @@ -564,10 +564,11 @@ acceptingMetadata p = do modify (\s -> s { dbAcceptsMeta = False }) return res -checkInMeta :: Monoid a => DB a -> DB a +checkInMeta :: Monoid a => DB () -> DB a checkInMeta p = do accepts <- dbAcceptsMeta <$> get - if accepts then p else return mempty + when accepts p + return mempty @@ -649,9 +650,11 @@ parseBlock (Elem e) = "attribution" -> return mempty "titleabbrev" -> return mempty "authorinitials" -> return mempty - "title" -> checkInMeta getTitle >> return mempty -- handled by getTitle or sect or figure - "author" -> checkInMeta getAuthor >> return mempty - "date" -> checkInMeta getDate >> return mempty + "title" -> checkInMeta getTitle + "author" -> checkInMeta getAuthor + "authorgroup" -> checkInMeta getAuthorGroup + "releaseinfo" -> checkInMeta (getInlines e >>= addMeta "release") + "date" -> checkInMeta getDate "bibliography" -> sect 0 "bibliodiv" -> sect 1 "biblioentry" -> parseMixed para (elContent e) @@ -786,7 +789,10 @@ parseBlock (Elem e) = Nothing -> return mempty addMeta "title" (tit <> subtit) - getAuthor = getInlines e >>= addMeta "author" + getAuthor = (:[]) <$> getInlines e >>= addMeta "authors" + getAuthorGroup = do + let terms = filterChildren (named "author") e + mapM getInlines terms >>= addMeta "authors" getDate = getInlines e >>= addMeta "date" parseTable = do let isCaption x = named "title" x || named "caption" x @@ -849,7 +855,7 @@ parseBlock (Elem e) = b <- getBlocks e modify $ \st -> st{ dbSectionLevel = n - 1 } return $ header n' headerText <> b - metaBlock = acceptingMetadata (getBlocks e) + metaBlock = acceptingMetadata (getBlocks e) >> return mempty getInlines :: Element -> DB Inlines getInlines e' = (trimInlines . mconcat) <$> (mapM parseInline $ elContent e') @@ -913,6 +919,7 @@ parseInline (Elem e) = _ -> emph <$> innerInlines "footnote" -> (note . mconcat) <$> (mapM parseBlock $ elContent e) "title" -> return mempty + "affiliation" -> return mempty _ -> innerInlines where innerInlines = (trimInlines . mconcat) <$> (mapM parseInline $ elContent e) -- cgit v1.2.3 From c99ecf6cc566a90eef1816eb5a1a1167b81ad9ac Mon Sep 17 00:00:00 2001 From: John MacFarlane <fiddlosopher@gmail.com> Date: Fri, 4 Apr 2014 21:33:16 -0700 Subject: DocBook reader: set "author" not "authors". --- src/Text/Pandoc/Readers/DocBook.hs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs index 76bc7374f..279f1e7f8 100644 --- a/src/Text/Pandoc/Readers/DocBook.hs +++ b/src/Text/Pandoc/Readers/DocBook.hs @@ -576,8 +576,8 @@ addMeta :: ToMetaValue a => String -> a -> DB () addMeta field val = modify (setMeta field val) instance HasMeta DBState where - setMeta str v s = s {dbMeta = setMeta str v (dbMeta s)} - + setMeta field v s = s {dbMeta = setMeta field v (dbMeta s)} + deleteMeta field s = s {dbMeta = deleteMeta field (dbMeta s)} isBlockElement :: Content -> Bool isBlockElement (Elem e) = qName (elName e) `elem` blocktags @@ -789,7 +789,7 @@ parseBlock (Elem e) = Nothing -> return mempty addMeta "title" (tit <> subtit) - getAuthor = (:[]) <$> getInlines e >>= addMeta "authors" + getAuthor = (:[]) <$> getInlines e >>= addMeta "author" getAuthorGroup = do let terms = filterChildren (named "author") e mapM getInlines terms >>= addMeta "authors" -- cgit v1.2.3 From c5a0c7a8199698c3973e50db9e91ae2694e018d4 Mon Sep 17 00:00:00 2001 From: John MacFarlane <fiddlosopher@gmail.com> Date: Fri, 4 Apr 2014 21:34:12 -0700 Subject: Removed trailing whitespace. --- src/Text/Pandoc/Readers/DocBook.hs | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs index 279f1e7f8..28af646c1 100644 --- a/src/Text/Pandoc/Readers/DocBook.hs +++ b/src/Text/Pandoc/Readers/DocBook.hs @@ -555,23 +555,23 @@ attrValue attr elt = named :: String -> Element -> Bool named s e = qName (elName e) == s --- +-- acceptingMetadata :: DB a -> DB a acceptingMetadata p = do modify (\s -> s { dbAcceptsMeta = True } ) res <- p modify (\s -> s { dbAcceptsMeta = False }) - return res + return res -checkInMeta :: Monoid a => DB () -> DB a +checkInMeta :: Monoid a => DB () -> DB a checkInMeta p = do - accepts <- dbAcceptsMeta <$> get - when accepts p + accepts <- dbAcceptsMeta <$> get + when accepts p return mempty - - - + + + addMeta :: ToMetaValue a => String -> a -> DB () addMeta field val = modify (setMeta field val) @@ -650,9 +650,9 @@ parseBlock (Elem e) = "attribution" -> return mempty "titleabbrev" -> return mempty "authorinitials" -> return mempty - "title" -> checkInMeta getTitle - "author" -> checkInMeta getAuthor - "authorgroup" -> checkInMeta getAuthorGroup + "title" -> checkInMeta getTitle + "author" -> checkInMeta getAuthor + "authorgroup" -> checkInMeta getAuthorGroup "releaseinfo" -> checkInMeta (getInlines e >>= addMeta "release") "date" -> checkInMeta getDate "bibliography" -> sect 0 @@ -717,7 +717,7 @@ parseBlock (Elem e) = "figure" -> getFigure e "mediaobject" -> para <$> getImage e "caption" -> return mempty - "info" -> metaBlock + "info" -> metaBlock "articleinfo" -> metaBlock "sectioninfo" -> return mempty -- keywords & other metadata "refsectioninfo" -> return mempty -- keywords & other metadata @@ -788,12 +788,12 @@ parseBlock (Elem e) = getInlines s Nothing -> return mempty addMeta "title" (tit <> subtit) - + getAuthor = (:[]) <$> getInlines e >>= addMeta "author" getAuthorGroup = do let terms = filterChildren (named "author") e - mapM getInlines terms >>= addMeta "authors" - getDate = getInlines e >>= addMeta "date" + mapM getInlines terms >>= addMeta "authors" + getDate = getInlines e >>= addMeta "date" parseTable = do let isCaption x = named "title" x || named "caption" x caption <- case filterChild isCaption e of -- cgit v1.2.3 From e06aa97bb93cab5ddeadca5b65e45649663e527c Mon Sep 17 00:00:00 2001 From: John MacFarlane <fiddlosopher@gmail.com> Date: Fri, 4 Apr 2014 21:39:08 -0700 Subject: DocBook reader: set metadata "author" not "authors" --- src/Text/Pandoc/Readers/DocBook.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs index 28af646c1..4fb38a07d 100644 --- a/src/Text/Pandoc/Readers/DocBook.hs +++ b/src/Text/Pandoc/Readers/DocBook.hs @@ -792,7 +792,7 @@ parseBlock (Elem e) = getAuthor = (:[]) <$> getInlines e >>= addMeta "author" getAuthorGroup = do let terms = filterChildren (named "author") e - mapM getInlines terms >>= addMeta "authors" + mapM getInlines terms >>= addMeta "author" getDate = getInlines e >>= addMeta "date" parseTable = do let isCaption x = named "title" x || named "caption" x -- cgit v1.2.3 From ee2e769cd7ee075876c974f94817e6eee294bedd Mon Sep 17 00:00:00 2001 From: John MacFarlane <fiddlosopher@gmail.com> Date: Fri, 4 Apr 2014 22:01:12 -0700 Subject: DocBook reader: Better treatment of formalpara. We now emit the title (if present) as a separate paragraph with boldface text. Closes #1215. --- src/Text/Pandoc/Readers/DocBook.hs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs index 4fb38a07d..d58f8b3c5 100644 --- a/src/Text/Pandoc/Readers/DocBook.hs +++ b/src/Text/Pandoc/Readers/DocBook.hs @@ -639,10 +639,10 @@ parseBlock (Elem e) = "para" -> parseMixed para (elContent e) "formalpara" -> do tit <- case filterChild (named "title") e of - Just t -> (<> str "." <> linebreak) <$> emph - <$> getInlines t + Just t -> (para . strong . (<> str ".")) <$> + getInlines t Nothing -> return mempty - addToStart tit <$> parseMixed para (elContent e) + (tit <>) <$> parseMixed para (elContent e) "simpara" -> parseMixed para (elContent e) "ackno" -> parseMixed para (elContent e) "epigraph" -> parseBlockquote -- cgit v1.2.3 From 7cf7e45e4cbb99b320a92b4bd31e433f535d3ef7 Mon Sep 17 00:00:00 2001 From: Albert Krewinkel <tarleb@moltkeplatz.de> Date: Fri, 4 Apr 2014 14:17:43 +0200 Subject: Org reader: Slight cleaning of table parsing code --- src/Text/Pandoc/Readers/Org.hs | 68 ++++++++++++++++++++++-------------------- 1 file changed, 35 insertions(+), 33 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index 5dc250f04..8b155194b 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -217,13 +217,18 @@ data OrgTableRow = OrgContentRow [Blocks] | OrgHlineRow deriving (Eq, Show) -type OrgTableContent = (Int, [Alignment], [Double], [Blocks], [[Blocks]]) +data OrgTable = OrgTable + { orgTableColumns :: Int + , orgTableAlignments :: [Alignment] + , orgTableHeader :: [Blocks] + , orgTableRows :: [[Blocks]] + } deriving (Eq, Show) table :: OrgParser Blocks table = try $ do lookAhead tableStart - (_, aligns, widths, heads, lns) <- normalizeTable . tableContent <$> tableRows - return $ B.table "" (zip aligns widths) heads lns + OrgTable _ aligns heads lns <- normalizeTable . rowsToTable <$> tableRows + return $ B.table "" (zip aligns $ repeat 0) heads lns tableStart :: OrgParser Char tableStart = try $ skipSpaces *> char '|' @@ -237,10 +242,9 @@ tableContentRow = try $ tableContentCell :: OrgParser Blocks tableContentCell = try $ - B.plain . trimInlines . mconcat <$> many1Till inline (try endOfCell) + B.plain . trimInlines . mconcat <$> many1Till inline endOfCell endOfCell :: OrgParser Char --- endOfCell = char '|' <|> newline endOfCell = try $ char '|' <|> lookAhead newline tableAlignRow :: OrgParser OrgTableRow @@ -269,54 +273,53 @@ tableHline :: OrgParser OrgTableRow tableHline = try $ OrgHlineRow <$ (tableStart *> char '-' *> anyLine) -tableContent :: [OrgTableRow] - -> OrgTableContent -tableContent = foldl' (flip rowToContent) (0, mempty, repeat 0, mempty, mempty) +rowsToTable :: [OrgTableRow] + -> OrgTable +rowsToTable = foldl' (flip rowToContent) zeroTable + where zeroTable = OrgTable 0 mempty mempty mempty -normalizeTable :: OrgTableContent - -> OrgTableContent -normalizeTable (cols, aligns, widths, heads, lns) = +normalizeTable :: OrgTable + -> OrgTable +normalizeTable (OrgTable cols aligns heads lns) = let aligns' = fillColumns aligns AlignDefault - widths' = fillColumns widths 0.0 heads' = if heads == mempty - then heads + then mempty else fillColumns heads (B.plain mempty) lns' = map (flip fillColumns (B.plain mempty)) lns fillColumns base padding = take cols $ base ++ repeat padding - in (cols, aligns', widths', heads', lns') + in OrgTable cols aligns' heads' lns' -- One or more horizontal rules after the first content line mark the previous -- line as a header. All other horizontal lines are discarded. rowToContent :: OrgTableRow - -> OrgTableContent - -> OrgTableContent + -> OrgTable + -> OrgTable rowToContent OrgHlineRow = maybeBodyToHeader rowToContent (OrgContentRow rs) = setLongestRow rs . appendToBody rs rowToContent (OrgAlignRow as) = setLongestRow as . setAligns as setLongestRow :: [a] - -> OrgTableContent - -> OrgTableContent -setLongestRow r (cols, aligns, widths, heads, lns) = - (max cols (length r), aligns, widths, heads, lns) + -> OrgTable + -> OrgTable +setLongestRow rs t = t{ orgTableColumns = max (length rs) (orgTableColumns t) } -maybeBodyToHeader :: OrgTableContent - -> OrgTableContent -maybeBodyToHeader (cols, aligns, widths, [], b:[]) = (cols, aligns, widths, b, []) -maybeBodyToHeader content = content +maybeBodyToHeader :: OrgTable + -> OrgTable +maybeBodyToHeader t = case t of + OrgTable{ orgTableHeader = [], orgTableRows = b:[] } -> + t{ orgTableHeader = b , orgTableRows = [] } + _ -> t appendToBody :: [Blocks] - -> OrgTableContent - -> OrgTableContent -appendToBody r (cols, aligns, widths, heads, lns) = - (cols, aligns, widths, heads, lns ++ [r]) + -> OrgTable + -> OrgTable +appendToBody r t = t{ orgTableRows = orgTableRows t ++ [r] } setAligns :: [Alignment] - -> OrgTableContent - -> OrgTableContent -setAligns aligns (cols, _, widths, heads, lns) = - (cols, aligns, widths, heads, lns) + -> OrgTable + -> OrgTable +setAligns aligns t = t{ orgTableAlignments = aligns } -- Paragraphs or Plain text paraOrPlain :: OrgParser Blocks @@ -549,4 +552,3 @@ endsOnThisLine input c doOnOtherLines = do then return () else endsOnThisLine rest c doOnOtherLines _ -> mzero - -- cgit v1.2.3 From d43c3e81017734170fb25460c4b9ab9cccb1e0db Mon Sep 17 00:00:00 2001 From: Albert Krewinkel <tarleb@moltkeplatz.de> Date: Fri, 4 Apr 2014 17:20:36 +0200 Subject: Org reader: Use specialized org parser state The default pandoc ParserState is replaced with `OrgParserState`. This is done to simplify the introduction of new state fields required for efficient Org parsing. --- src/Text/Pandoc/Readers/Org.hs | 48 ++++++++++++++++++++++++++++++++++++------ 1 file changed, 41 insertions(+), 7 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index 8b155194b..0ae4d231c 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -29,15 +29,16 @@ Conversion of Org-Mode to 'Pandoc' document. module Text.Pandoc.Readers.Org ( readOrg ) where import qualified Text.Pandoc.Builder as B -import Text.Pandoc.Builder (Inlines, Blocks, trimInlines, (<>)) +import Text.Pandoc.Builder (Inlines, Blocks, trimInlines, (<>), HasMeta(..)) import Text.Pandoc.Definition import Text.Pandoc.Options -import Text.Pandoc.Parsing hiding (orderedListMarker) +import Text.Pandoc.Parsing hiding (orderedListMarker, updateLastStrPos) import Text.Pandoc.Shared (compactify') import Control.Applicative (pure, (<$>), (<$), (<*>), (<*), (*>), (<**>)) import Control.Monad (guard, mzero) import Data.Char (toLower) +import Data.Default import Data.List (foldl') import Data.Maybe (listToMaybe, fromMaybe) import Data.Monoid (mconcat, mempty, mappend) @@ -46,15 +47,48 @@ import Data.Monoid (mconcat, mempty, mappend) readOrg :: ReaderOptions -- ^ Reader options -> String -- ^ String to parse (assuming @'\n'@ line endings) -> Pandoc -readOrg opts s = (readWith parseOrg) def{ stateOptions = opts } (s ++ "\n\n") +readOrg opts s = (readWith parseOrg) def{ orgOptions = opts } (s ++ "\n\n") + +type OrgParser = Parser [Char] OrgParserState + +-- | Org-mode parser state +data OrgParserState = OrgParserState + { orgOptions :: ReaderOptions + , orgInlineCharStack :: [Char] + , orgLastStrPos :: Maybe SourcePos + , orgMeta :: Meta + } deriving (Show) + +instance HasReaderOptions OrgParserState where + extractReaderOptions = orgOptions + +instance HasMeta OrgParserState where + setMeta field val st = + st{ orgMeta = setMeta field val $ orgMeta st } + deleteMeta field st = + st{ orgMeta = deleteMeta field $ orgMeta st } + +instance Default OrgParserState where + def = defaultOrgParserState + +defaultOrgParserState :: OrgParserState +defaultOrgParserState = OrgParserState + { orgOptions = def + , orgInlineCharStack = [] + , orgLastStrPos = Nothing + , orgMeta = nullMeta + } + +updateLastStrPos :: OrgParser () +updateLastStrPos = getPosition >>= \p -> + updateState $ \s -> s{ orgLastStrPos = Just p } -type OrgParser = Parser [Char] ParserState parseOrg:: OrgParser Pandoc parseOrg = do blocks' <- B.toList <$> parseBlocks st <- getState - let meta = stateMeta st + let meta = orgMeta st return $ Pandoc meta $ filter (/= Null) blocks' -- @@ -177,7 +211,7 @@ commentLineStart = try $ mappend <$> many spaceChar <*> string "# " declarationLine :: OrgParser Blocks declarationLine = try $ do meta' <- B.setMeta <$> metaKey <*> metaValue <*> pure nullMeta - updateState $ \st -> st { stateMeta = stateMeta st <> meta' } + updateState $ \st -> st { orgMeta = orgMeta st <> meta' } return mempty metaValue :: OrgParser MetaValue @@ -522,7 +556,7 @@ atStart :: OrgParser a -> OrgParser a atStart p = do pos <- getPosition st <- getState - guard $ stateLastStrPos st /= Just pos + guard $ orgLastStrPos st /= Just pos p -- | succeeds only if we're at the end of a word -- cgit v1.2.3 From fd98532784e43ad73072f37a31af5ff40fdc1c56 Mon Sep 17 00:00:00 2001 From: Albert Krewinkel <tarleb@moltkeplatz.de> Date: Sat, 5 Apr 2014 09:37:46 +0200 Subject: Org reader: Fix parsing of nested inlines Text such as /*this*/ was not correctly parsed as a strong, emphasised word. This was due to the end-of-word recognition being to strict as it did not accept markup chars as part of a word. The fix involves an additional parser state field, listing the markup chars which might be parsed as part of a word. --- src/Text/Pandoc/Readers/Org.hs | 27 ++++++++++++++++++++------- tests/Tests/Readers/Org.hs | 4 ++++ 2 files changed, 24 insertions(+), 7 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index 0ae4d231c..ad66caab9 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -535,8 +535,15 @@ enclosedInlines start end = try $ -- FIXME: This is a hack inlinesEnclosedBy :: Char -> OrgParser Inlines -inlinesEnclosedBy c = enclosedInlines (atStart (char c) <* endsOnThisOrNextLine c) - (atEnd $ char c) +inlinesEnclosedBy c = try $ do + updateState $ \st -> st { orgInlineCharStack = c:(orgInlineCharStack st) } + res <- enclosedInlines (atStart (char c) <* endsOnThisOrNextLine c) + (atEnd $ char c) + updateState $ \st -> st { orgInlineCharStack = shift . orgInlineCharStack $ st } + return res + where shift xs + | null xs = [] + | otherwise = tail xs enclosedRaw :: OrgParser a -> OrgParser b @@ -561,11 +568,16 @@ atStart p = do -- | succeeds only if we're at the end of a word atEnd :: OrgParser a -> OrgParser a -atEnd p = try $ p <* lookingAtEndOfWord - where lookingAtEndOfWord = lookAhead . oneOf $ postWordChars +atEnd p = try $ do + p <* lookingAtEndOfWord + where lookingAtEndOfWord = lookAhead . oneOf =<< postWordChars -postWordChars :: [Char] -postWordChars = "\t\n\r !\"'),-.:?}" +postWordChars :: OrgParser [Char] +postWordChars = do + st <- getState + return $ "\t\n\r !\"'),-.:?}" ++ (safeSecond . orgInlineCharStack $ st) + where safeSecond (_:x2:_) = [x2] + safeSecond _ = [] -- FIXME: These functions are hacks and should be replaced endsOnThisOrNextLine :: Char @@ -580,9 +592,10 @@ endsOnThisLine :: [Char] -> ([Char] -> OrgParser ()) -> OrgParser () endsOnThisLine input c doOnOtherLines = do + postWordChars' <- postWordChars case break (`elem` c:"\n") input of (_,'\n':rest) -> doOnOtherLines rest - (_,_:rest@(n:_)) -> if n `elem` postWordChars + (_,_:rest@(n:_)) -> if n `elem` postWordChars' then return () else endsOnThisLine rest c doOnOtherLines _ -> mzero diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs index 8c5982302..9091d9c74 100644 --- a/tests/Tests/Readers/Org.hs +++ b/tests/Tests/Readers/Org.hs @@ -42,6 +42,10 @@ tests = "*Cider*" =?> para (strong "Cider") + , "Strong Emphasis" =: + "/*strength*/" =?> + para (emph . strong $ "strength") + , "Strikeout" =: "+Kill Bill+" =?> para (strikeout . spcSep $ [ "Kill", "Bill" ]) -- cgit v1.2.3 From d76d2b707b2b5cebb38122e117527a70996c2c4f Mon Sep 17 00:00:00 2001 From: Albert Krewinkel <tarleb@moltkeplatz.de> Date: Sat, 5 Apr 2014 09:09:44 +0200 Subject: Org reader: Provide more language identifier translations Org-mode and Pandoc use different language identifiers, marking source code as being written in a certain programming language. This adds more translations from identifiers as used in Org to identifiers used in Pandoc. The full list of identifiers used in Org and Pandoc is available through http://orgmode.org/manual/Languages.html and `pandoc -v`, respectively. --- src/Text/Pandoc/Readers/Org.hs | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index ad66caab9..62088a04d 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -153,7 +153,14 @@ indentWith num = do , try (char '\t' >> count (num - tabStop) (char ' ')) ] translateLang :: String -> String -translateLang "sh" = "bash" +translateLang "C" = "c" +translateLang "C++" = "cpp" +translateLang "emacs-lisp" = "commonlisp" -- emacs lisp is not supported +translateLang "js" = "javascript" +translateLang "lisp" = "commonlisp" +translateLang "R" = "r" +translateLang "sh" = "bash" +translateLang "sqlite" = "sql" translateLang cs = cs commaEscaped :: String -> String -- cgit v1.2.3 From 652c781e375f3678a0ec821663240d4958f324de Mon Sep 17 00:00:00 2001 From: Albert Krewinkel <tarleb@moltkeplatz.de> Date: Sat, 5 Apr 2014 16:10:52 +0200 Subject: Org reader: Support inline images --- src/Text/Pandoc/Readers/Org.hs | 34 ++++++++++++++++++++++++---------- tests/Tests/Readers/Org.hs | 12 ++++++++++-- 2 files changed, 34 insertions(+), 12 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index 62088a04d..8b1b4fa23 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -39,7 +39,7 @@ import Control.Applicative (pure, (<$>), (<$), (<*>), (<*), (*>), (<** import Control.Monad (guard, mzero) import Data.Char (toLower) import Data.Default -import Data.List (foldl') +import Data.List (foldl', isPrefixOf, isSuffixOf) import Data.Maybe (listToMaybe, fromMaybe) import Data.Monoid (mconcat, mempty, mappend) @@ -484,20 +484,26 @@ endline = try $ do return B.space link :: OrgParser Inlines -link = explicitLink <|> selfLink <?> "link" +link = explicitOrImageLink <|> selflinkOrImage <?> "link" -explicitLink :: OrgParser Inlines -explicitLink = try $ do +explicitOrImageLink :: OrgParser Inlines +explicitOrImageLink = try $ do char '[' - src <- enclosedRaw (char '[') (char ']') - title <- enclosedInlines (char '[') (char ']') + src <- enclosedRaw (char '[') (char ']') + title <- enclosedRaw (char '[') (char ']') + title' <- parseFromString (mconcat . butLast <$> many inline) (title++"\n") char ']' - return $ B.link src "" title + return $ if (isImage src) && (isImage title) + then B.link src "" (B.image title "" "") + else B.link src "" title' + where butLast = reverse . tail . reverse -selfLink :: OrgParser Inlines -selfLink = try $ do +selflinkOrImage :: OrgParser Inlines +selflinkOrImage = try $ do src <- enclosedRaw (string "[[") (string "]]") - return $ B.link src "" (B.str src) + return $ if isImage src + then B.image src "" "" + else B.link src "" (B.str src) emph :: OrgParser Inlines emph = B.emph <$> inlinesEnclosedBy '/' @@ -606,3 +612,11 @@ endsOnThisLine input c doOnOtherLines = do then return () else endsOnThisLine rest c doOnOtherLines _ -> mzero + +isImage filename = + any (\x -> ('.':x) `isSuffixOf` filename) imageExtensions && + any (\x -> (x++":") `isPrefixOf` filename) protocols || + ':' `notElem` filename + where + imageExtensions = [ "jpeg" , "jpg" , "png" , "gif" , "svg" ] + protocols = [ "file", "http", "https" ] diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs index 9091d9c74..1088d6611 100644 --- a/tests/Tests/Readers/Org.hs +++ b/tests/Tests/Readers/Org.hs @@ -94,14 +94,22 @@ tests = , (strong ("is" <> space <> "not")) , "emph/" ]) + , "Image" =: + "[[./sunset.jpg]]" =?> + (para $ image "./sunset.jpg" "" "") + , "Explicit link" =: - "[[http://zeitlens.com/][pseudo-random nonsense]]" =?> + "[[http://zeitlens.com/][pseudo-random /nonsense/]]" =?> (para $ link "http://zeitlens.com/" "" - ("pseudo-random" <> space <> "nonsense")) + ("pseudo-random" <> space <> emph "nonsense")) , "Self-link" =: "[[http://zeitlens.com/]]" =?> (para $ link "http://zeitlens.com/" "" "http://zeitlens.com/") + + , "Image link" =: + "[[sunset.png][dusk.svg]]" =?> + (para $ link "sunset.png" "" (image "dusk.svg" "" "")) ] , testGroup "Meta Information" $ -- cgit v1.2.3 From c0309a60bc48e347e4b9d621ee38b84a98d0c187 Mon Sep 17 00:00:00 2001 From: John MacFarlane <fiddlosopher@gmail.com> Date: Sat, 5 Apr 2014 10:58:32 -0700 Subject: Shared.openURL: Set proxy with value of http_proxy env variable. Note: proxies with non-root paths are not supported, because of limitations in http-conduit. Closes #1211. --- src/Text/Pandoc/Shared.hs | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index 714402e42..3835629db 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -1,5 +1,5 @@ {-# LANGUAGE DeriveDataTypeable, CPP, MultiParamTypeClasses, - FlexibleContexts #-} + FlexibleContexts, ScopedTypeVariables #-} {- Copyright (C) 2006-2013 John MacFarlane <jgm@berkeley.edu> @@ -120,7 +120,9 @@ import Paths_pandoc (getDataFileName) #ifdef HTTP_CONDUIT import Data.ByteString.Lazy (toChunks) import Network.HTTP.Conduit (httpLbs, parseUrl, withManager, - responseBody, responseHeaders) + responseBody, responseHeaders, addProxy, + Request(port,host)) +import System.Environment (getEnv) import Network.HTTP.Types.Header ( hContentType) import Network (withSocketsDo) #else @@ -648,7 +650,13 @@ openURL u #ifdef HTTP_CONDUIT | otherwise = withSocketsDo $ E.try $ do req <- parseUrl u - resp <- withManager $ httpLbs req + (proxy :: Either E.SomeException String) <- E.try $ getEnv "http_proxy" + let req' = case proxy of + Left _ -> req + Right pr -> case parseUrl pr of + Just r -> addProxy (host r) (port r) req + Nothing -> req + resp <- withManager $ httpLbs req' return (BS.concat $ toChunks $ responseBody resp, UTF8.toString `fmap` lookup hContentType (responseHeaders resp)) #else -- cgit v1.2.3 From f2deb9d86d864c79c5ca18205b4ca565b7199413 Mon Sep 17 00:00:00 2001 From: John MacFarlane <fiddlosopher@gmail.com> Date: Sat, 5 Apr 2014 15:50:46 -0700 Subject: Org reader: Added type signature. --- src/Text/Pandoc/Readers/Org.hs | 1 + 1 file changed, 1 insertion(+) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index 8b1b4fa23..5ad2531ac 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -613,6 +613,7 @@ endsOnThisLine input c doOnOtherLines = do else endsOnThisLine rest c doOnOtherLines _ -> mzero +isImage :: String -> Bool isImage filename = any (\x -> ('.':x) `isSuffixOf` filename) imageExtensions && any (\x -> (x++":") `isPrefixOf` filename) protocols || -- cgit v1.2.3 From d4054444c00449f49a8948eae91ad7bfdb36ea8e Mon Sep 17 00:00:00 2001 From: John MacFarlane <fiddlosopher@gmail.com> Date: Sat, 5 Apr 2014 19:57:42 -0700 Subject: Text.Pandoc.PDF: Ensure that temp directories deleted on Windows. The PDF is now read as a strict bytestring, ensuring that process ownership will be terminated, so the temp directory can be deleted. Closes #1192. --- src/Text/Pandoc/PDF.hs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/PDF.hs b/src/Text/Pandoc/PDF.hs index 608cad2e9..abc5c41b7 100644 --- a/src/Text/Pandoc/PDF.hs +++ b/src/Text/Pandoc/PDF.hs @@ -176,7 +176,10 @@ runTeXProgram program runsLeft tmpDir source = do let pdfFile = replaceDirectory (replaceExtension file ".pdf") tmpDir pdfExists <- doesFileExist pdfFile pdf <- if pdfExists - then Just `fmap` B.readFile pdfFile + -- We read PDF as a strict bytestring to make sure that the + -- temp directory is removed on Windows. + -- See https://github.com/jgm/pandoc/issues/1192. + then (Just . B.fromChunks . (:[])) `fmap` BS.readFile pdfFile else return Nothing return (exit, out <> err, pdf) -- cgit v1.2.3 From 75dbe87a9969d3fb684409f26bb14b593ea68ca7 Mon Sep 17 00:00:00 2001 From: John MacFarlane <fiddlosopher@gmail.com> Date: Sat, 5 Apr 2014 20:31:27 -0700 Subject: Removed whitespace at ends of lines. --- src/Text/Pandoc/Readers/Textile.hs | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs index c6f992275..b42c5e75d 100644 --- a/src/Text/Pandoc/Readers/Textile.hs +++ b/src/Text/Pandoc/Readers/Textile.hs @@ -54,7 +54,7 @@ import Text.Pandoc.Definition import Text.Pandoc.Builder (Inlines, Blocks, trimInlines) import qualified Text.Pandoc.Builder as B import Text.Pandoc.Options -import Text.Pandoc.Parsing +import Text.Pandoc.Parsing import Text.Pandoc.Readers.HTML ( htmlTag, isInlineTag, isBlockTag ) import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXBlock ) import Text.HTML.TagSoup (parseTags, innerText, fromAttrib, Tag(..)) @@ -225,7 +225,7 @@ bulletListAtDepth depth = try $ B.bulletList <$> many1 (bulletListItemAtDepth d -- | Bullet List Item of given depth, depth being the number of -- leading '*' -bulletListItemAtDepth :: Int -> Parser [Char] ParserState Blocks +bulletListItemAtDepth :: Int -> Parser [Char] ParserState Blocks bulletListItemAtDepth = genericListItemAtDepth '*' -- | Ordered List of given depth, depth being the number of @@ -237,7 +237,7 @@ orderedListAtDepth depth = try $ do -- | Ordered List Item of given depth, depth being the number of -- leading '#' -orderedListItemAtDepth :: Int -> Parser [Char] ParserState Blocks +orderedListItemAtDepth :: Int -> Parser [Char] ParserState Blocks orderedListItemAtDepth = genericListItemAtDepth '#' -- | Common implementation of list items @@ -274,7 +274,7 @@ definitionListItem = try $ do where inlineDef :: Parser [Char] ParserState [Blocks] inlineDef = liftM (\d -> [B.plain d]) $ optional whitespace >> (trimInlines . mconcat <$> many listInline) <* newline - multilineDef :: Parser [Char] ParserState [Blocks] + multilineDef :: Parser [Char] ParserState [Blocks] multilineDef = try $ do optional whitespace >> newline s <- many1Till anyChar (try (string "=:" >> newline)) @@ -596,7 +596,7 @@ surrounded border = enclosed (border *> notFollowedBy (oneOf " \t\n\r")) (try bo simpleInline :: Parser [Char] ParserState t -- ^ surrounding parser -> (Inlines -> Inlines) -- ^ Inline constructor -> Parser [Char] ParserState Inlines -- ^ content parser (to be used repeatedly) -simpleInline border construct = groupedSimpleInline border construct <|> ungroupedSimpleInline border construct +simpleInline border construct = groupedSimpleInline border construct <|> ungroupedSimpleInline border construct ungroupedSimpleInline :: Parser [Char] ParserState t -- ^ surrounding parser -> (Inlines -> Inlines) -- ^ Inline constructor @@ -605,27 +605,27 @@ ungroupedSimpleInline border construct = try $ do st <- getState pos <- getPosition isWhitespace <- option False (whitespace >> return True) - guard $ (stateQuoteContext st /= NoQuote) - || (sourceColumn pos == 1) + guard $ (stateQuoteContext st /= NoQuote) + || (sourceColumn pos == 1) || isWhitespace body <- surrounded border inlineWithAttribute lookAhead (notFollowedBy alphaNum) - let result = construct $ mconcat body - return $ if isWhitespace then B.space <> result + let result = construct $ mconcat body + return $ if isWhitespace then B.space <> result else result - where - inlineWithAttribute = (try $ optional attributes) >> notFollowedBy (string "\n\n") + where + inlineWithAttribute = (try $ optional attributes) >> notFollowedBy (string "\n\n") >> (withQuoteContext InSingleQuote inline) -groupedSimpleInline :: Parser [Char] ParserState t +groupedSimpleInline :: Parser [Char] ParserState t -> (Inlines -> Inlines) - -> Parser [Char] ParserState Inlines + -> Parser [Char] ParserState Inlines groupedSimpleInline border construct = try $ do - char '[' + char '[' withQuoteContext InSingleQuote (simpleInline border construct) >>~ char ']' - + -- | Create a singleton list -- cgit v1.2.3 From 060a76a38e1f3586bc92787bb2c25c2dc04e380e Mon Sep 17 00:00:00 2001 From: John MacFarlane <fiddlosopher@gmail.com> Date: Sat, 5 Apr 2014 20:41:38 -0700 Subject: Textile reader: Improved treatment of HTML spans (%). Closes #1115. --- src/Text/Pandoc/Readers/Textile.hs | 6 +----- tests/textile-reader.native | 2 +- 2 files changed, 2 insertions(+), 6 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs index b42c5e75d..f19d68e64 100644 --- a/src/Text/Pandoc/Readers/Textile.hs +++ b/src/Text/Pandoc/Readers/Textile.hs @@ -376,7 +376,6 @@ inlineParsers = [ inlineMarkup , endline , code , escapedInline - , htmlSpan , rawHtmlInline , rawLaTeXInline' , note @@ -400,6 +399,7 @@ inlineMarkup = choice [ simpleInline (string "??") (B.cite []) , simpleInline (char '-' <* notFollowedBy (char '-')) B.strikeout , simpleInline (char '^') B.superscript , simpleInline (char '~') B.subscript + , simpleInline (char '%') id ] -- | Trademark, registered, copyright @@ -476,10 +476,6 @@ str = do updateLastStrPos return $ B.str fullStr --- | Textile allows HTML span infos, we discard them -htmlSpan :: Parser [Char] ParserState Inlines -htmlSpan = try $ B.str <$> ( char '%' *> attributes *> manyTill anyChar (char '%') ) - -- | Some number of space chars whitespace :: Parser [Char] ParserState Inlines whitespace = many1 spaceChar >> return B.space <?> "whitespace" diff --git a/tests/textile-reader.native b/tests/textile-reader.native index 0c2b13e72..88fc0bb47 100644 --- a/tests/textile-reader.native +++ b/tests/textile-reader.native @@ -124,7 +124,7 @@ Pandoc (Meta {unMeta = fromList []}) ,Para [Str "Textile",Space,Str "inline",Space,Str "image",Space,Str "syntax,",Space,Str "like",LineBreak,Str "here",Space,Image [Str "this is the alt text"] ("this_is_an_image.png","this is the alt text"),LineBreak,Str "and",Space,Str "here",Space,Image [Str ""] ("this_is_an_image.png",""),Str "."] ,Header 1 ("attributes",[],[]) [Str "Attributes"] ,Header 2 ("ident",["bar","foo"],[("style","color:red"),("lang","en")]) [Str "HTML",Space,Str "and",Space,Str "CSS",Space,Str "attributes",Space,Str "are",Space,Str "parsed",Space,Str "in",Space,Str "headers."] -,Para [Str "as",Space,Str "well",Space,Str "as",Space,Strong [Str "inline",Space,Str "attributes"],Space,Str "of",Space,Str " all kind"] +,Para [Str "as",Space,Str "well",Space,Str "as",Space,Strong [Str "inline",Space,Str "attributes"],Space,Str "of",Space,Str "all",Space,Str "kind"] ,Para [Str "and",Space,Str "paragraph",Space,Str "attributes,",Space,Str "and",Space,Str "table",Space,Str "attributes."] ,Table [] [AlignDefault,AlignDefault,AlignDefault] [0.0,0.0,0.0] [] -- cgit v1.2.3 From 24f438aa5f230464d510fae034c94644c0e181ca Mon Sep 17 00:00:00 2001 From: John MacFarlane <fiddlosopher@gmail.com> Date: Sat, 5 Apr 2014 21:02:12 -0700 Subject: Textile reader: Better support for attributes. Instead of being ignored, attributes are now parsed and included in Span inlines. The output will be a bit different from stock textile: e.g. for `*(foo)hi*`, we'll get `<em><span class="foo">hi</span></em>` instead of `<em class="foo">hi</em>`. But at least the data is not lost. --- src/Text/Pandoc/Readers/Textile.hs | 21 ++++++++++++--------- tests/textile-reader.native | 2 +- 2 files changed, 13 insertions(+), 10 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs index f19d68e64..81994e6bd 100644 --- a/src/Text/Pandoc/Readers/Textile.hs +++ b/src/Text/Pandoc/Readers/Textile.hs @@ -604,15 +604,18 @@ ungroupedSimpleInline border construct = try $ do guard $ (stateQuoteContext st /= NoQuote) || (sourceColumn pos == 1) || isWhitespace - body <- surrounded border inlineWithAttribute - lookAhead (notFollowedBy alphaNum) - let result = construct $ mconcat body - return $ if isWhitespace then B.space <> result - else result - where - inlineWithAttribute = (try $ optional attributes) >> notFollowedBy (string "\n\n") - >> (withQuoteContext InSingleQuote inline) - + border *> notFollowedBy (oneOf " \t\n\r") + attr <- attributes + body <- trimInlines . mconcat <$> + withQuoteContext InSingleQuote + (manyTill inline (try border <* notFollowedBy alphaNum)) + let result = construct $ + if attr == nullAttr + then body + else B.spanWith attr body + return $ if isWhitespace + then B.space <> result + else result groupedSimpleInline :: Parser [Char] ParserState t -> (Inlines -> Inlines) diff --git a/tests/textile-reader.native b/tests/textile-reader.native index 88fc0bb47..a17bd8de1 100644 --- a/tests/textile-reader.native +++ b/tests/textile-reader.native @@ -124,7 +124,7 @@ Pandoc (Meta {unMeta = fromList []}) ,Para [Str "Textile",Space,Str "inline",Space,Str "image",Space,Str "syntax,",Space,Str "like",LineBreak,Str "here",Space,Image [Str "this is the alt text"] ("this_is_an_image.png","this is the alt text"),LineBreak,Str "and",Space,Str "here",Space,Image [Str ""] ("this_is_an_image.png",""),Str "."] ,Header 1 ("attributes",[],[]) [Str "Attributes"] ,Header 2 ("ident",["bar","foo"],[("style","color:red"),("lang","en")]) [Str "HTML",Space,Str "and",Space,Str "CSS",Space,Str "attributes",Space,Str "are",Space,Str "parsed",Space,Str "in",Space,Str "headers."] -,Para [Str "as",Space,Str "well",Space,Str "as",Space,Strong [Str "inline",Space,Str "attributes"],Space,Str "of",Space,Str "all",Space,Str "kind"] +,Para [Str "as",Space,Str "well",Space,Str "as",Space,Strong [Span ("",["foo"],[]) [Str "inline",Space,Str "attributes"]],Space,Str "of",Space,Span ("",[],[("style","color:red")]) [Str "all",Space,Str "kind"]] ,Para [Str "and",Space,Str "paragraph",Space,Str "attributes,",Space,Str "and",Space,Str "table",Space,Str "attributes."] ,Table [] [AlignDefault,AlignDefault,AlignDefault] [0.0,0.0,0.0] [] -- cgit v1.2.3 From 971e4c43647cf28d1de0dbc109a6ec6f269fa563 Mon Sep 17 00:00:00 2001 From: John MacFarlane <fiddlosopher@gmail.com> Date: Sat, 5 Apr 2014 23:05:11 -0700 Subject: HTML reader: Updated `closes` with rules from HTML5 spec. --- src/Text/Pandoc/Readers/HTML.hs | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index 38619d040..2101b2fc2 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -469,7 +469,7 @@ pCloses :: String -> TagParser () pCloses tagtype = try $ do t <- lookAhead $ pSatisfy $ \tag -> isTagClose tag || isTagOpen tag case t of - (TagClose t') | t' == tagtype -> pAnyTag >> return () + (TagClose t') | t' == tagtype -> pAnyTag >> return () (TagOpen t' _) | t' `closes` tagtype -> return () (TagClose "ul") | tagtype == "li" -> return () (TagClose "ol") | tagtype == "li" -> return () @@ -627,7 +627,7 @@ isCommentTag :: Tag String -> Bool isCommentTag = tagComment (const True) -- taken from HXT and extended - +-- See http://www.w3.org/TR/html5/syntax.html sec 8.1.2.4 optional tags closes :: String -> String -> Bool _ `closes` "body" = False _ `closes` "html" = False @@ -635,11 +635,18 @@ _ `closes` "html" = False "li" `closes` "li" = True "th" `closes` t | t `elem` ["th","td"] = True "tr" `closes` t | t `elem` ["th","td","tr"] = True +"dd" `closes` t | t `elem` ["dt", "dd"] = True "dt" `closes` t | t `elem` ["dt","dd"] = True -"hr" `closes` "p" = True -"p" `closes` "p" = True +"rt" `closes` t | t `elem` ["rb", "rt", "rtc"] = True +"optgroup" `closes` "optgroup" = True +"optgroup" `closes` "option" = True +"option" `closes` "option" = True +-- http://www.w3.org/TR/html-markup/p.html +x `closes` "p" | x `elem` ["address", "article", "aside", "blockquote", + "dir", "div", "dl", "fieldset", "footer", "form", "h1", "h2", "h3", "h4", + "h5", "h6", "header", "hr", "menu", "nav", "ol", "p", "pre", "section", + "table", "ul"] = True "meta" `closes` "meta" = True -"colgroup" `closes` "colgroup" = True "form" `closes` "form" = True "label" `closes` "label" = True "map" `closes` "map" = True -- cgit v1.2.3 From 4ebf6f6ebf7d679252ade08203ec13e3e92c2db5 Mon Sep 17 00:00:00 2001 From: Albert Krewinkel <tarleb@moltkeplatz.de> Date: Sun, 6 Apr 2014 19:09:33 +0200 Subject: Org reader: Minor code clean-up --- src/Text/Pandoc/Readers/Org.hs | 51 +++++++++++++++++------------------------- 1 file changed, 21 insertions(+), 30 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index 5ad2531ac..6652925aa 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -268,8 +268,12 @@ data OrgTable = OrgTable table :: OrgParser Blocks table = try $ do lookAhead tableStart - OrgTable _ aligns heads lns <- normalizeTable . rowsToTable <$> tableRows - return $ B.table "" (zip aligns $ repeat 0) heads lns + orgToPandocTable . normalizeTable . rowsToTable <$> tableRows + +orgToPandocTable :: OrgTable + -> Blocks +orgToPandocTable (OrgTable _ aligns heads lns) = + B.table "" (zip aligns $ repeat 0) heads lns tableStart :: OrgParser Char tableStart = try $ skipSpaces *> char '|' @@ -403,20 +407,14 @@ orderedListStart = genericListStart orderedListMarker -- Ordered list markers allowed in org-mode where orderedListMarker = mappend <$> many1 digit <*> (pure <$> oneOf ".)") +-- parse raw text for one list item, excluding start marker and continuations listItem :: OrgParser Int -> OrgParser Blocks listItem start = try $ do - (markerLength, first) <- try (start >>= rawListItem) - rest <- many (listContinuation markerLength) - parseFromString parseBlocks $ concat (first:rest) - --- parse raw text for one list item, excluding start marker and continuations -rawListItem :: Int - -> OrgParser (Int, String) -rawListItem markerLength = try $ do - firstLine <- anyLine - restLines <- many (listLine markerLength) - return (markerLength, (firstLine ++ "\n" ++ (concat restLines))) + markerLength <- try start + firstLine <- anyLineNewline + rest <- concat <$> many (listContinuation markerLength) + parseFromString parseBlocks $ firstLine ++ rest -- continuation of a list item - indented and separated by blankline or endline. -- Note: nested lists are parsed as continuations. @@ -424,14 +422,11 @@ listContinuation :: Int -> OrgParser String listContinuation markerLength = try $ mappend <$> many blankline - <*> (concat <$> many1 (listLine markerLength)) + <*> (concat <$> many1 listLine) + where listLine = try $ indentWith markerLength *> anyLineNewline --- parse a line of a list item -listLine :: Int - -> OrgParser String -listLine markerLength = try $ - indentWith markerLength *> anyLine - <**> pure (++ "\n") +anyLineNewline :: OrgParser String +anyLineNewline = (++ "\n") <$> anyLine -- @@ -491,12 +486,11 @@ explicitOrImageLink = try $ do char '[' src <- enclosedRaw (char '[') (char ']') title <- enclosedRaw (char '[') (char ']') - title' <- parseFromString (mconcat . butLast <$> many inline) (title++"\n") + title' <- parseFromString (mconcat <$> many inline) title char ']' return $ if (isImage src) && (isImage title) then B.link src "" (B.image title "" "") else B.link src "" title' - where butLast = reverse . tail . reverse selflinkOrImage :: OrgParser Inlines selflinkOrImage = try $ do @@ -552,11 +546,8 @@ inlinesEnclosedBy c = try $ do updateState $ \st -> st { orgInlineCharStack = c:(orgInlineCharStack st) } res <- enclosedInlines (atStart (char c) <* endsOnThisOrNextLine c) (atEnd $ char c) - updateState $ \st -> st { orgInlineCharStack = shift . orgInlineCharStack $ st } + updateState $ \st -> st { orgInlineCharStack = drop 1 . orgInlineCharStack $ st } return res - where shift xs - | null xs = [] - | otherwise = tail xs enclosedRaw :: OrgParser a -> OrgParser b @@ -583,14 +574,13 @@ atStart p = do atEnd :: OrgParser a -> OrgParser a atEnd p = try $ do p <* lookingAtEndOfWord - where lookingAtEndOfWord = lookAhead . oneOf =<< postWordChars + where lookingAtEndOfWord = + eof <|> const (return ()) =<< lookAhead . oneOf =<< postWordChars postWordChars :: OrgParser [Char] postWordChars = do st <- getState - return $ "\t\n\r !\"'),-.:?}" ++ (safeSecond . orgInlineCharStack $ st) - where safeSecond (_:x2:_) = [x2] - safeSecond _ = [] + return $ "\t\n\r !\"'),-.:?}" ++ (take 1 . drop 1 . orgInlineCharStack $ st) -- FIXME: These functions are hacks and should be replaced endsOnThisOrNextLine :: Char @@ -608,6 +598,7 @@ endsOnThisLine input c doOnOtherLines = do postWordChars' <- postWordChars case break (`elem` c:"\n") input of (_,'\n':rest) -> doOnOtherLines rest + (_,_:[]) -> return () (_,_:rest@(n:_)) -> if n `elem` postWordChars' then return () else endsOnThisLine rest c doOnOtherLines -- cgit v1.2.3 From 480b33b7100048ef3fad51754ae76c21daa8b86f Mon Sep 17 00:00:00 2001 From: Albert Krewinkel <tarleb@moltkeplatz.de> Date: Sun, 6 Apr 2014 14:49:57 +0200 Subject: Org reader: Add support for definition lists --- src/Text/Pandoc/Readers/Org.hs | 17 ++++++++++++++++- tests/Tests/Readers/Org.hs | 26 +++++++++++++++++++++++--- 2 files changed, 39 insertions(+), 4 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index 6652925aa..20bca3e28 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -383,7 +383,10 @@ restOfLine = mconcat <$> manyTill inline newline -- list :: OrgParser Blocks -list = choice [ bulletList, orderedList ] <?> "list" +list = choice [ definitionList, bulletList, orderedList ] <?> "list" + +definitionList :: OrgParser Blocks +definitionList = B.definitionList <$> many1 (definitionListItem bulletListStart) bulletList :: OrgParser Blocks bulletList = B.bulletList . compactify' <$> many1 (listItem bulletListStart) @@ -407,6 +410,18 @@ orderedListStart = genericListStart orderedListMarker -- Ordered list markers allowed in org-mode where orderedListMarker = mappend <$> many1 digit <*> (pure <$> oneOf ".)") +definitionListItem :: OrgParser Int + -> OrgParser (Inlines, [Blocks]) +definitionListItem parseMarkerGetLength = try $ do + markerLength <- parseMarkerGetLength + term <- manyTill (noneOf "\n\r") (try $ string "::") + first <- anyLineNewline + cont <- concat <$> many (listContinuation markerLength) + term' <- parseFromString inline term + contents' <- parseFromString parseBlocks $ first ++ cont + return (term', [contents']) + + -- parse raw text for one list item, excluding start marker and continuations listItem :: OrgParser Int -> OrgParser Blocks diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs index 1088d6611..eb9f4d741 100644 --- a/tests/Tests/Readers/Org.hs +++ b/tests/Tests/Readers/Org.hs @@ -43,8 +43,8 @@ tests = para (strong "Cider") , "Strong Emphasis" =: - "/*strength*/" =?> - para (emph . strong $ "strength") + "/*strength*/" =?> + para (emph . strong $ "strength") , "Strikeout" =: "+Kill Bill+" =?> @@ -428,7 +428,27 @@ tests = , "Bullet List in Ordered List" =: ("1. GNU\n" ++ " - Freedom\n") =?> - orderedList [ (para "GNU") <> bulletList [ (plain "Freedom") ] ] + orderedList [ (para "GNU") <> bulletList [ (plain "Freedom") ] ] + + , "Definition List" =: + unlines [ "- PLL :: phase-locked loop" + , "- TTL ::" + , " transistor-transistor logic" + , "- PSK::phase-shift keying" + , "" + , " a digital modulation scheme" + ] =?> + definitionList [ ("PLL", [ plain $ "phase-locked" <> space <> "loop" ]) + , ("TTL", [ plain $ "transistor-transistor" <> space <> + "logic" ]) + , ("PSK", [ mconcat + [ para $ "phase-shift" <> space <> "keying" + , plain $ spcSep [ "a", "digital" + , "modulation", "scheme" ] + ] + ] + ) + ] ] , testGroup "Tables" -- cgit v1.2.3 From fcddd0e4bd5f1b5fd4eb33e291fb9ccc2358d43f Mon Sep 17 00:00:00 2001 From: John MacFarlane <fiddlosopher@gmail.com> Date: Sun, 6 Apr 2014 15:11:18 -0700 Subject: LaTeX reader: handle @{} and p{length} in tabular. The length is not actually recorded, but at least we get a table. Closes #1180. --- src/Text/Pandoc/Readers/LaTeX.hs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index f23a5b35e..fd761dbec 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -1168,12 +1168,13 @@ complexNatbibCitation mode = try $ do parseAligns :: LP [Alignment] parseAligns = try $ do char '{' - let maybeBar = try $ spaces >> optional (char '|') + let maybeBar = skipMany $ sp <|> () <$ char '|' <|> () <$ try (string "@{}") maybeBar let cAlign = AlignCenter <$ char 'c' let lAlign = AlignLeft <$ char 'l' let rAlign = AlignRight <$ char 'r' - let alignChar = optional sp *> (cAlign <|> lAlign <|> rAlign) + let parAlign = AlignLeft <$ (char 'p' >> braced) + let alignChar = cAlign <|> lAlign <|> rAlign <|> parAlign aligns' <- sepEndBy alignChar maybeBar spaces char '}' -- cgit v1.2.3 From e352ec5a0e4af245584972e89ae13352e907cdf0 Mon Sep 17 00:00:00 2001 From: John MacFarlane <fiddlosopher@gmail.com> Date: Sun, 6 Apr 2014 16:32:53 -0700 Subject: LaTeX writer: Workaround for level 4-5 headers in quotes. These previously produced invalid LaTeX: `\paragraph` or `\subparagraph` in a `quote` environment. This adds an `mbox{}` in these contexts to work around the problem. See http://tex.stackexchange.com/a/169833/22451. Closes #1221. --- src/Text/Pandoc/Writers/LaTeX.hs | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs index 6cf7ed730..e12c9078f 100644 --- a/src/Text/Pandoc/Writers/LaTeX.hs +++ b/src/Text/Pandoc/Writers/LaTeX.hs @@ -51,6 +51,7 @@ import Text.Pandoc.Highlighting (highlight, styleToLaTeX, data WriterState = WriterState { stInNote :: Bool -- true if we're in a note + , stInQuote :: Bool -- true if in a blockquote , stInMinipage :: Bool -- true if in minipage , stNotes :: [Doc] -- notes in a minipage , stOLLevel :: Int -- level of ordered list nesting @@ -73,7 +74,8 @@ data WriterState = writeLaTeX :: WriterOptions -> Pandoc -> String writeLaTeX options document = evalState (pandocToLaTeX options document) $ - WriterState { stInNote = False, stInMinipage = False, stNotes = [], + WriterState { stInNote = False, stInQuote = False, + stInMinipage = False, stNotes = [], stOLLevel = 1, stOptions = options, stVerbInNote = False, stTable = False, stStrikeout = False, @@ -331,7 +333,10 @@ blockToLaTeX (BlockQuote lst) = do modify $ \s -> s{ stIncremental = oldIncremental } return result _ -> do + oldInQuote <- gets stInQuote + modify (\s -> s{stInQuote = True}) contents <- blockListToLaTeX lst + modify (\s -> s{stInQuote = oldInQuote}) return $ "\\begin{quote}" $$ contents $$ "\\end{quote}" blockToLaTeX (CodeBlock (identifier,classes,keyvalAttr) str) = do opts <- gets stOptions @@ -610,9 +615,16 @@ sectionHeader unnumbered ref level lst = do 4 -> "paragraph" 5 -> "subparagraph" _ -> "" + inQuote <- gets stInQuote + let prefix = if inQuote && level' >= 4 + then text "\\mbox{}%" + -- needed for \paragraph, \subparagraph in quote environment + -- see http://tex.stackexchange.com/questions/169830/ + else empty return $ if level' > 5 then txt - else headerWith ('\\':sectionType) stuffing ref + else prefix $$ + headerWith ('\\':sectionType) stuffing ref $$ if unnumbered then "\\addcontentsline{toc}" <> braces (text sectionType) <> -- cgit v1.2.3 From c47bd8404fda0a782719848ef190b56eb0fdb9dc Mon Sep 17 00:00:00 2001 From: Albert Krewinkel <tarleb@moltkeplatz.de> Date: Mon, 7 Apr 2014 11:00:30 +0200 Subject: Org reader: Support inline math (like $E=mc^2$) Closes #1223. --- src/Text/Pandoc/Readers/Org.hs | 22 ++++++++++++++++------ tests/Tests/Readers/Org.hs | 4 ++++ 2 files changed, 20 insertions(+), 6 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index 20bca3e28..2bb6ee122 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -459,6 +459,7 @@ inline = choice inlineParsers <?> "inline" , strikeout , underline , code + , math , verbatim , subscript , superscript @@ -530,10 +531,13 @@ underline = B.strong <$> inlinesEnclosedBy '_' code :: OrgParser Inlines code = B.code <$> rawEnclosedBy '=' -verbatim :: OrgParser Inlines +math :: OrgParser Inlines +math = B.math <$> rawEnclosedBy '$' + +verbatim :: OrgParser Inlines verbatim = B.rawInline "" <$> rawEnclosedBy '~' -subscript :: OrgParser Inlines +subscript :: OrgParser Inlines subscript = B.subscript <$> (try $ char '_' *> maybeGroupedByBraces) superscript :: OrgParser Inlines @@ -580,18 +584,24 @@ rawEnclosedBy c = enclosedRaw (atStart $ char c) (atEnd $ char c) -- succeeds only if we're not right after a str (ie. in middle of word) atStart :: OrgParser a -> OrgParser a atStart p = do - pos <- getPosition - st <- getState - guard $ orgLastStrPos st /= Just pos + guard =<< not <$> isRightAfterString p -- | succeeds only if we're at the end of a word atEnd :: OrgParser a -> OrgParser a atEnd p = try $ do - p <* lookingAtEndOfWord + p <* lookingAtEndOfWord where lookingAtEndOfWord = eof <|> const (return ()) =<< lookAhead . oneOf =<< postWordChars +isRightAfterString :: OrgParser Bool +isRightAfterString = do + pos <- getPosition + st <- getState + -- the position `Nothing` isn't after a String, either, hence the double + -- negation + return $ not $ orgLastStrPos st /= Just pos + postWordChars :: OrgParser [Char] postWordChars = do st <- getState diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs index eb9f4d741..77b9d9327 100644 --- a/tests/Tests/Readers/Org.hs +++ b/tests/Tests/Readers/Org.hs @@ -54,6 +54,10 @@ tests = "=Robot.rock()=" =?> para (code "Robot.rock()") + , "Math" =: + "$E=mc^2$" =?> + para (math "E=mc^2") + , "Verbatim" =: "~word for word~" =?> para (rawInline "" "word for word") -- cgit v1.2.3 From e555a5703d4581f11c6b5020811bf60b5ec98c41 Mon Sep 17 00:00:00 2001 From: John MacFarlane <fiddlosopher@gmail.com> Date: Mon, 7 Apr 2014 21:23:39 -0700 Subject: Textile reader: Improved link parsing. In particular we now pick up on attributes. Since pandoc links can't have attributes, we enclose the whole link in a span if there are attributes. Closes #1008. --- src/Text/Pandoc/Readers/Textile.hs | 34 +++++++++++++++------------------- 1 file changed, 15 insertions(+), 19 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs index 81994e6bd..ae9c0cc8e 100644 --- a/src/Text/Pandoc/Readers/Textile.hs +++ b/src/Text/Pandoc/Readers/Textile.hs @@ -62,7 +62,7 @@ import Text.HTML.TagSoup.Match import Data.List ( intercalate ) import Data.Char ( digitToInt, isUpper) import Control.Monad ( guard, liftM ) -import Control.Applicative ((<$>), (*>), (<*)) +import Control.Applicative ((<$>), (*>), (<*), (<$)) import Data.Monoid -- | Parse a Textile text and return a Pandoc document. @@ -498,25 +498,21 @@ rawLaTeXInline' = try $ do -- | Textile standard link syntax is "label":target. But we -- can also have ["label":target]. link :: Parser [Char] ParserState Inlines -link = linkB <|> linkNoB - -linkNoB :: Parser [Char] ParserState Inlines -linkNoB = try $ do - name <- mconcat <$> surrounded (char '"') (withQuoteContext InDoubleQuote inline) - char ':' - let stopChars = "!.,;:" - url <- manyTill nonspaceChar (lookAhead $ space <|> try (oneOf stopChars >> (space <|> newline))) - let name' = if B.toList name == [Str "$"] then B.str url else name - return $ B.link url "" name' - -linkB :: Parser [Char] ParserState Inlines -linkB = try $ do - char '[' - name <- mconcat <$> surrounded (char '"') inline - char ':' - url <- manyTill nonspaceChar (char ']') +link = try $ do + bracketed <- (True <$ char '[') <|> return False + char '"' *> notFollowedBy (oneOf " \t\n\r") + attr <- attributes + name <- trimInlines . mconcat <$> + withQuoteContext InSingleQuote (manyTill inline (try (string "\":"))) + let stop = if bracketed + then char ']' + else lookAhead $ space <|> + try (oneOf "!.,;:" *> (space <|> newline)) + url <- manyTill nonspaceChar stop let name' = if B.toList name == [Str "$"] then B.str url else name - return $ B.link url "" name' + return $ if attr == nullAttr + then B.link url "" name' + else B.spanWith attr $ B.link url "" name' -- | image embedding image :: Parser [Char] ParserState Inlines -- cgit v1.2.3 From 030020236c85c736892a6f8e0dcefca1681e5ce0 Mon Sep 17 00:00:00 2001 From: Albert Krewinkel <tarleb@moltkeplatz.de> Date: Tue, 8 Apr 2014 22:39:25 +0200 Subject: Org reader: Precise rules for the recognition of markup The inline parsers have been rewritten using the org source code as a reference. This fixes a couple of bugs related to erroneous markup recognition. --- src/Text/Pandoc/Readers/Org.hs | 380 ++++++++++++++++++++++++++++------------- tests/Tests/Readers/Org.hs | 25 ++- 2 files changed, 283 insertions(+), 122 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index 2bb6ee122..392b17bbc 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -32,11 +32,12 @@ import qualified Text.Pandoc.Builder as B import Text.Pandoc.Builder (Inlines, Blocks, trimInlines, (<>), HasMeta(..)) import Text.Pandoc.Definition import Text.Pandoc.Options -import Text.Pandoc.Parsing hiding (orderedListMarker, updateLastStrPos) +import qualified Text.Pandoc.Parsing as P +import Text.Pandoc.Parsing hiding (newline, orderedListMarker, updateLastStrPos) import Text.Pandoc.Shared (compactify') import Control.Applicative (pure, (<$>), (<$), (<*>), (<*), (*>), (<**>)) -import Control.Monad (guard, mzero) +import Control.Monad (guard, when) import Data.Char (toLower) import Data.Default import Data.List (foldl', isPrefixOf, isSuffixOf) @@ -47,49 +48,100 @@ import Data.Monoid (mconcat, mempty, mappend) readOrg :: ReaderOptions -- ^ Reader options -> String -- ^ String to parse (assuming @'\n'@ line endings) -> Pandoc -readOrg opts s = (readWith parseOrg) def{ orgOptions = opts } (s ++ "\n\n") +readOrg opts s = (readWith parseOrg) def{ orgStateOptions = opts } (s ++ "\n\n") type OrgParser = Parser [Char] OrgParserState +parseOrg:: OrgParser Pandoc +parseOrg = do + blocks' <- B.toList <$> parseBlocks + st <- getState + let meta = orgStateMeta st + return $ Pandoc meta $ filter (/= Null) blocks' + +-- +-- Parser State for Org +-- + -- | Org-mode parser state data OrgParserState = OrgParserState - { orgOptions :: ReaderOptions - , orgInlineCharStack :: [Char] - , orgLastStrPos :: Maybe SourcePos - , orgMeta :: Meta + { orgStateOptions :: ReaderOptions + , orgStateEmphasisCharStack :: [Char] + , orgStateEmphasisNewlines :: Maybe Int + , orgStateLastForbiddenCharPos :: Maybe SourcePos + , orgStateLastPreCharPos :: Maybe SourcePos + , orgStateLastStrPos :: Maybe SourcePos + , orgStateMeta :: Meta } deriving (Show) instance HasReaderOptions OrgParserState where - extractReaderOptions = orgOptions + extractReaderOptions = orgStateOptions instance HasMeta OrgParserState where setMeta field val st = - st{ orgMeta = setMeta field val $ orgMeta st } + st{ orgStateMeta = setMeta field val $ orgStateMeta st } deleteMeta field st = - st{ orgMeta = deleteMeta field $ orgMeta st } + st{ orgStateMeta = deleteMeta field $ orgStateMeta st } instance Default OrgParserState where def = defaultOrgParserState defaultOrgParserState :: OrgParserState defaultOrgParserState = OrgParserState - { orgOptions = def - , orgInlineCharStack = [] - , orgLastStrPos = Nothing - , orgMeta = nullMeta + { orgStateOptions = def + , orgStateEmphasisCharStack = [] + , orgStateEmphasisNewlines = Nothing + , orgStateLastForbiddenCharPos = Nothing + , orgStateLastPreCharPos = Nothing + , orgStateLastStrPos = Nothing + , orgStateMeta = nullMeta } updateLastStrPos :: OrgParser () updateLastStrPos = getPosition >>= \p -> - updateState $ \s -> s{ orgLastStrPos = Just p } + updateState $ \s -> s{ orgStateLastStrPos = Just p } +updateLastForbiddenCharPos :: OrgParser () +updateLastForbiddenCharPos = getPosition >>= \p -> + updateState $ \s -> s{ orgStateLastForbiddenCharPos = Just p} -parseOrg:: OrgParser Pandoc -parseOrg = do - blocks' <- B.toList <$> parseBlocks +updateLastPreCharPos :: OrgParser () +updateLastPreCharPos = getPosition >>= \p -> + updateState $ \s -> s{ orgStateLastPreCharPos = Just p} + +pushToInlineCharStack :: Char -> OrgParser () +pushToInlineCharStack c = updateState $ \st -> + st { orgStateEmphasisCharStack = c:(orgStateEmphasisCharStack st) } + +popInlineCharStack :: OrgParser () +popInlineCharStack = updateState $ \st -> + st { orgStateEmphasisCharStack = drop 1 . orgStateEmphasisCharStack $ st } + +surroundingEmphasisChar :: OrgParser [Char] +surroundingEmphasisChar = take 1 . drop 1 . orgStateEmphasisCharStack <$> getState + +startEmphasisNewlinesCounting :: Int -> OrgParser () +startEmphasisNewlinesCounting maxNewlines = updateState $ \s -> + s { orgStateEmphasisNewlines = Just maxNewlines } + +decEmphasisNewlinesCount :: OrgParser () +decEmphasisNewlinesCount = updateState $ \s -> + s{ orgStateEmphasisNewlines = (\n -> n - 1) <$> orgStateEmphasisNewlines s } + +newlinesCountWithinLimits :: OrgParser Bool +newlinesCountWithinLimits = do st <- getState - let meta = orgMeta st - return $ Pandoc meta $ filter (/= Null) blocks' + return $ ((< 0) <$> orgStateEmphasisNewlines st) /= Just True + +resetEmphasisNewlines :: OrgParser () +resetEmphasisNewlines = updateState $ \s -> + s{ orgStateEmphasisNewlines = Nothing } + +newline :: OrgParser Char +newline = + P.newline + <* updateLastPreCharPos + <* updateLastForbiddenCharPos -- -- parsing blocks @@ -218,7 +270,7 @@ commentLineStart = try $ mappend <$> many spaceChar <*> string "# " declarationLine :: OrgParser Blocks declarationLine = try $ do meta' <- B.setMeta <$> metaKey <*> metaValue <*> pure nullMeta - updateState $ \st -> st { orgMeta = orgMeta st <> meta' } + updateState $ \st -> st { orgStateMeta = orgStateMeta st <> meta' } return mempty metaValue :: OrgParser MetaValue @@ -449,22 +501,24 @@ anyLineNewline = (++ "\n") <$> anyLine -- inline :: OrgParser Inlines -inline = choice inlineParsers <?> "inline" - where inlineParsers = [ whitespace - , link - , str - , endline - , emph - , strong - , strikeout - , underline - , code - , math - , verbatim - , subscript - , superscript - , symbol - ] +inline = + choice [ whitespace + , link + , str + , endline + , emph + , strong + , strikeout + , underline + , code + , math + , verbatim + , subscript + , superscript + , symbol + ] <* (guard =<< newlinesCountWithinLimits) + <?> "inline" + -- treat these as potentially non-text when parsing inline: specialChars :: [Char] @@ -472,7 +526,10 @@ specialChars = "\"$'()*+-./:<=>[\\]^_{|}~" whitespace :: OrgParser Inlines -whitespace = B.space <$ skipMany1 spaceChar <?> "whitespace" +whitespace = B.space <$ skipMany1 spaceChar + <* updateLastPreCharPos + <* updateLastForbiddenCharPos + <?> "whitespace" str :: OrgParser Inlines str = B.str <$> many1 (noneOf $ specialChars ++ "\n\r ") @@ -492,6 +549,9 @@ endline = try $ do notFollowedBy' commentLineStart notFollowedBy' bulletListStart notFollowedBy' orderedListStart + decEmphasisNewlinesCount + guard =<< newlinesCountWithinLimits + updateLastPreCharPos return B.space link :: OrgParser Inlines @@ -500,42 +560,54 @@ link = explicitOrImageLink <|> selflinkOrImage <?> "link" explicitOrImageLink :: OrgParser Inlines explicitOrImageLink = try $ do char '[' - src <- enclosedRaw (char '[') (char ']') + src <- linkTarget title <- enclosedRaw (char '[') (char ']') title' <- parseFromString (mconcat <$> many inline) title char ']' - return $ if (isImage src) && (isImage title) + return $ if (isImageFilename src) && (isImageFilename title) then B.link src "" (B.image title "" "") else B.link src "" title' selflinkOrImage :: OrgParser Inlines selflinkOrImage = try $ do - src <- enclosedRaw (string "[[") (string "]]") - return $ if isImage src + src <- (char '[') *> linkTarget <* char ']' + return $ if isImageFilename src then B.image src "" "" else B.link src "" (B.str src) +linkTarget :: OrgParser String +linkTarget = enclosed (char '[') (char ']') (noneOf "\n\r]") + +isImageFilename :: String -> Bool +isImageFilename filename = + any (\x -> ('.':x) `isSuffixOf` filename) imageExtensions && + any (\x -> (x++":") `isPrefixOf` filename) protocols || + ':' `notElem` filename + where + imageExtensions = [ "jpeg" , "jpg" , "png" , "gif" , "svg" ] + protocols = [ "file", "http", "https" ] + emph :: OrgParser Inlines -emph = B.emph <$> inlinesEnclosedBy '/' +emph = B.emph <$> emphasisBetween '/' strong :: OrgParser Inlines -strong = B.strong <$> inlinesEnclosedBy '*' +strong = B.strong <$> emphasisBetween '*' strikeout :: OrgParser Inlines -strikeout = B.strikeout <$> inlinesEnclosedBy '+' +strikeout = B.strikeout <$> emphasisBetween '+' -- There is no underline, so we use strong instead. underline :: OrgParser Inlines -underline = B.strong <$> inlinesEnclosedBy '_' +underline = B.strong <$> emphasisBetween '_' code :: OrgParser Inlines -code = B.code <$> rawEnclosedBy '=' - -math :: OrgParser Inlines -math = B.math <$> rawEnclosedBy '$' +code = B.code <$> verbatimBetween '=' verbatim :: OrgParser Inlines -verbatim = B.rawInline "" <$> rawEnclosedBy '~' +verbatim = B.rawInline "" <$> verbatimBetween '~' + +math :: OrgParser Inlines +math = B.math <$> mathStringBetween '$' subscript :: OrgParser Inlines subscript = B.subscript <$> (try $ char '_' *> maybeGroupedByBraces) @@ -550,7 +622,72 @@ maybeGroupedByBraces = try $ ] symbol :: OrgParser Inlines -symbol = B.str . (: "") <$> oneOf specialChars +symbol = B.str . (: "") <$> (oneOf specialChars >>= updatePositions) + where updatePositions c + | c `elem` emphasisPreChars = c <$ updateLastPreCharPos + | c `elem` emphasisForbiddenBorderChars = c <$ updateLastForbiddenCharPos + | otherwise = return c + +emphasisBetween :: Char + -> OrgParser Inlines +emphasisBetween c = try $ do + startEmphasisNewlinesCounting emphasisAllowedNewlines + res <- enclosedInlines (emphasisStart c) (emphasisEnd c) + isTopLevelEmphasis <- null . orgStateEmphasisCharStack <$> getState + when isTopLevelEmphasis + resetEmphasisNewlines + return res + +verbatimBetween :: Char + -> OrgParser String +verbatimBetween c = try $ + emphasisStart c *> + many1TillNOrLessNewlines 1 (noneOf "\n\r") (emphasisEnd c) + +-- | Parses a raw string delimited by @c@ using Org's math rules +mathStringBetween :: Char + -> OrgParser String +mathStringBetween c = try $ do + mathStart c + body <- many1TillNOrLessNewlines mathAllowedNewlines + (noneOf (c:"\n\r")) + (lookAhead $ mathEnd c) + final <- mathEnd c + return $ body ++ [final] + +-- | Parses the start (opening character) of emphasis +emphasisStart :: Char -> OrgParser Char +emphasisStart c = try $ do + guard =<< afterEmphasisPreChar + guard =<< notAfterString + char c + lookAhead (noneOf emphasisForbiddenBorderChars) + pushToInlineCharStack c + return c + +-- | Parses the closing character of emphasis +emphasisEnd :: Char -> OrgParser Char +emphasisEnd c = try $ do + guard =<< notAfterForbiddenBorderChar + char c + eof <|> lookAhead (surroundingEmphasisChar >>= \x -> + oneOf (x ++ emphasisPostChars)) + *> return () + updateLastStrPos + popInlineCharStack + return c + +mathStart :: Char -> OrgParser Char +mathStart c = try $ do + char c <* notFollowedBy' (oneOf (c:mathForbiddenBorderChars)) + +mathEnd :: Char -> OrgParser Char +mathEnd c = try $ do + res <- noneOf (c:mathForbiddenBorderChars) + char c + eof <|> (lookAhead $ oneOf mathPostChars *> pure ()) + return res + enclosedInlines :: OrgParser a -> OrgParser b @@ -558,16 +695,6 @@ enclosedInlines :: OrgParser a enclosedInlines start end = try $ trimInlines . mconcat <$> enclosed start end inline --- FIXME: This is a hack -inlinesEnclosedBy :: Char - -> OrgParser Inlines -inlinesEnclosedBy c = try $ do - updateState $ \st -> st { orgInlineCharStack = c:(orgInlineCharStack st) } - res <- enclosedInlines (atStart (char c) <* endsOnThisOrNextLine c) - (atEnd $ char c) - updateState $ \st -> st { orgInlineCharStack = drop 1 . orgInlineCharStack $ st } - return res - enclosedRaw :: OrgParser a -> OrgParser b -> OrgParser String @@ -577,63 +704,76 @@ enclosedRaw start end = try $ spanningTwoLines = try $ anyLine >>= \f -> mappend (f <> " ") <$> onSingleLine -rawEnclosedBy :: Char - -> OrgParser String -rawEnclosedBy c = enclosedRaw (atStart $ char c) (atEnd $ char c) - --- succeeds only if we're not right after a str (ie. in middle of word) -atStart :: OrgParser a -> OrgParser a -atStart p = do - guard =<< not <$> isRightAfterString - p - --- | succeeds only if we're at the end of a word -atEnd :: OrgParser a -> OrgParser a -atEnd p = try $ do - p <* lookingAtEndOfWord - where lookingAtEndOfWord = - eof <|> const (return ()) =<< lookAhead . oneOf =<< postWordChars - -isRightAfterString :: OrgParser Bool -isRightAfterString = do +-- | Like many1Till, but parses at most @n+1@ lines. @p@ must not consume +-- newlines. +many1TillNOrLessNewlines :: Int + -> OrgParser Char + -> OrgParser a + -> OrgParser String +many1TillNOrLessNewlines n p end = try $ + nMoreLines (Just n) mempty >>= oneOrMore + where + nMoreLines Nothing cs = return cs + nMoreLines (Just 0) cs = try $ (cs ++) <$> finalLine + nMoreLines k cs = try $ (final k cs <|> rest k cs) + >>= uncurry nMoreLines + final _ cs = (\x -> (Nothing, cs ++ x)) <$> (try finalLine) + rest m cs = (\x -> (minus1 <$> m, cs ++ x ++ "\n")) <$> (try $ manyTill p P.newline) + finalLine = try $ manyTill p end + minus1 k = k - 1 + oneOrMore cs = guard (not $ null cs) *> return cs + +-- Org allows customization of the way it reads emphasis. We use the defaults +-- here (see, e.g., the Emacs Lisp variable `org-emphasis-regexp-components` +-- for details). + +-- | Chars allowed to occur before emphasis (spaces and newlines are ok, too) +emphasisPreChars :: [Char] +emphasisPreChars = "\t \"'({" + +-- | Chars allowed at after emphasis +emphasisPostChars :: [Char] +emphasisPostChars = "\t\n !\"'),-.:;?\\}" + +-- | Chars not allowed at the (inner) border of emphasis +emphasisForbiddenBorderChars :: [Char] +emphasisForbiddenBorderChars = "\t\n\r \"'," + +-- | The maximum number of newlines within +emphasisAllowedNewlines :: Int +emphasisAllowedNewlines = 1 + +-- LaTeX-style math: see `org-latex-regexps` for details + +-- | Chars allowed after an inline ($...$) math statement +mathPostChars :: [Char] +mathPostChars = "\t\n \"',-.:;?" + +-- | Chars not allowed at the (inner) border of math +mathForbiddenBorderChars :: [Char] +mathForbiddenBorderChars = "\t\n\r ,;.$" + +-- | Maximum number of newlines in an inline math statement +mathAllowedNewlines :: Int +mathAllowedNewlines = 2 + +-- | Whether we are right behind a char allowed before emphasis +afterEmphasisPreChar :: OrgParser Bool +afterEmphasisPreChar = do pos <- getPosition - st <- getState - -- the position `Nothing` isn't after a String, either, hence the double - -- negation - return $ not $ orgLastStrPos st /= Just pos + lastPrePos <- orgStateLastPreCharPos <$> getState + return $ lastPrePos == Nothing || lastPrePos == Just pos -postWordChars :: OrgParser [Char] -postWordChars = do - st <- getState - return $ "\t\n\r !\"'),-.:?}" ++ (take 1 . drop 1 . orgInlineCharStack $ st) - --- FIXME: These functions are hacks and should be replaced -endsOnThisOrNextLine :: Char - -> OrgParser () -endsOnThisOrNextLine c = do - inp <- getInput - let doOtherwise = \rest -> endsOnThisLine rest c (const mzero) - endsOnThisLine inp c doOtherwise - -endsOnThisLine :: [Char] - -> Char - -> ([Char] -> OrgParser ()) - -> OrgParser () -endsOnThisLine input c doOnOtherLines = do - postWordChars' <- postWordChars - case break (`elem` c:"\n") input of - (_,'\n':rest) -> doOnOtherLines rest - (_,_:[]) -> return () - (_,_:rest@(n:_)) -> if n `elem` postWordChars' - then return () - else endsOnThisLine rest c doOnOtherLines - _ -> mzero - -isImage :: String -> Bool -isImage filename = - any (\x -> ('.':x) `isSuffixOf` filename) imageExtensions && - any (\x -> (x++":") `isPrefixOf` filename) protocols || - ':' `notElem` filename - where - imageExtensions = [ "jpeg" , "jpg" , "png" , "gif" , "svg" ] - protocols = [ "file", "http", "https" ] +-- | Whether we are right after the end of a string +notAfterString :: OrgParser Bool +notAfterString = do + pos <- getPosition + lastStrPos <- orgStateLastStrPos <$> getState + return $ lastStrPos /= Just pos + +-- | Whether the parser is right after a forbidden border char +notAfterForbiddenBorderChar :: OrgParser Bool +notAfterForbiddenBorderChar = do + pos <- getPosition + lastFBCPos <- orgStateLastForbiddenCharPos <$> getState + return $ lastFBCPos /= Just pos diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs index 77b9d9327..efd8fe977 100644 --- a/tests/Tests/Readers/Org.hs +++ b/tests/Tests/Readers/Org.hs @@ -86,16 +86,37 @@ tests = unlines [ "this+that+ +so+on" , "seven*eight* nine*" , "+not+funny+" + , "this == self" ] =?> para (spcSep [ "this+that+", "+so+on" , "seven*eight*", "nine*" , strikeout "not+funny" + , "this" <> space <> "==" <> space <> "self" ]) + , "Adherence to Org's rules for markup borders" =: + "/t/& a/ / ./r/ (*l*) /e/! /b/." =?> + para (spcSep [ emph $ "t/&" <> space <> "a" + , "/" + , "./r/" + , "(" <> (strong "l") <> ")" + , (emph "e") <> "!" + , (emph "b") <> "." + ]) + + , "Inline math must stay within three lines" =: + unlines [ "$a", "b", "c$", "$d", "e", "f", "g$" ] =?> + para ((math "a\nb\nc") <> space <> + spcSep [ "$d", "e", "f", "g$" ]) + , "Markup may not span more than two lines" =: - unlines [ "/this *is", "not*", "emph/" ] =?> + unlines [ "/this *is +totally", "nice+ not*", "emph/" ] =?> para (spcSep [ "/this" - , (strong ("is" <> space <> "not")) + , (strong (spcSep + [ "is" + , (strikeout ("totally" <> space <> "nice")) + , "not" + ])) , "emph/" ]) , "Image" =: -- cgit v1.2.3 From 1715d7cee0b9388ac77b8b2a31fcbb00ead80adf Mon Sep 17 00:00:00 2001 From: Albert Krewinkel <tarleb@moltkeplatz.de> Date: Thu, 10 Apr 2014 15:11:03 +0200 Subject: Org reader: Support more inline/display math variants Support all of the following variants as valid ways to define inline or display math inlines: - `\[..\]` (display) - `$$..$$` (display) - `\(..\)` (inline) - `$..$` (inline) This closes #1223. Again. --- src/Text/Pandoc/Readers/Org.hs | 28 ++++++++++++++++++++++++++-- tests/Tests/Readers/Org.hs | 36 ++++++++++++++++++++++++++++++------ 2 files changed, 56 insertions(+), 8 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index 392b17bbc..1d0400d96 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -512,6 +512,7 @@ inline = , underline , code , math + , displayMath , verbatim , subscript , superscript @@ -607,7 +608,15 @@ verbatim :: OrgParser Inlines verbatim = B.rawInline "" <$> verbatimBetween '~' math :: OrgParser Inlines -math = B.math <$> mathStringBetween '$' +math = B.math <$> choice [ math1CharBetween '$' + , mathStringBetween '$' + , rawMathBetween "\\(" "\\)" + ] + +displayMath :: OrgParser Inlines +displayMath = B.displayMath <$> choice [ rawMathBetween "\\[" "\\]" + , rawMathBetween "$$" "$$" + ] subscript :: OrgParser Inlines subscript = B.subscript <$> (try $ char '_' *> maybeGroupedByBraces) @@ -655,6 +664,21 @@ mathStringBetween c = try $ do final <- mathEnd c return $ body ++ [final] +-- | Parse a single character between @c@ using math rules +math1CharBetween :: Char + -> OrgParser String +math1CharBetween c = try $ do + char c + res <- noneOf $ c:mathForbiddenBorderChars + char c + eof <|> lookAhead (oneOf mathPostChars) *> return () + return [res] + +rawMathBetween :: String + -> String + -> OrgParser String +rawMathBetween s e = try $ string s *> manyTill anyChar (try $ string e) + -- | Parses the start (opening character) of emphasis emphasisStart :: Char -> OrgParser Char emphasisStart c = try $ do @@ -747,7 +771,7 @@ emphasisAllowedNewlines = 1 -- | Chars allowed after an inline ($...$) math statement mathPostChars :: [Char] -mathPostChars = "\t\n \"',-.:;?" +mathPostChars = "\t\n \"'),-.:;?" -- | Chars not allowed at the (inner) border of math mathForbiddenBorderChars :: [Char] diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs index efd8fe977..9e9482e45 100644 --- a/tests/Tests/Readers/Org.hs +++ b/tests/Tests/Readers/Org.hs @@ -54,14 +54,26 @@ tests = "=Robot.rock()=" =?> para (code "Robot.rock()") - , "Math" =: - "$E=mc^2$" =?> - para (math "E=mc^2") - , "Verbatim" =: "~word for word~" =?> para (rawInline "" "word for word") + , "Math $..$" =: + "$E=mc^2$" =?> + para (math "E=mc^2") + + , "Math $$..$$" =: + "$$E=mc^2$$" =?> + para (displayMath "E=mc^2") + + , "Math \\[..\\]" =: + "\\[E=ℎν\\]" =?> + para (displayMath "E=ℎν") + + , "Math \\(..\\)" =: + "\\(σ_x σ_p ≥ \\frac{ℏ}{2}\\)" =?> + para (math "σ_x σ_p ≥ \\frac{ℏ}{2}") + , "Symbol" =: "A * symbol" =?> para (str "A" <> space <> str "*" <> space <> "symbol") @@ -86,14 +98,19 @@ tests = unlines [ "this+that+ +so+on" , "seven*eight* nine*" , "+not+funny+" - , "this == self" ] =?> para (spcSep [ "this+that+", "+so+on" , "seven*eight*", "nine*" , strikeout "not+funny" - , "this" <> space <> "==" <> space <> "self" ]) + , "No empty markup" =: + -- FIXME: __ is erroneously parsed as subscript "_" + -- "// ** __ ++ == ~~ $$" =?> + -- para (spcSep [ "//", "**", "__", "++", "==", "~~", "$$" ]) + "// ** ++ == ~~ $$" =?> + para (spcSep [ "//", "**", "++", "==", "~~", "$$" ]) + , "Adherence to Org's rules for markup borders" =: "/t/& a/ / ./r/ (*l*) /e/! /b/." =?> para (spcSep [ emph $ "t/&" <> space <> "a" @@ -109,6 +126,13 @@ tests = para ((math "a\nb\nc") <> space <> spcSep [ "$d", "e", "f", "g$" ]) + , "Single-character math" =: + "$a$ $b$! $c$?" =?> + para (spcSep [ math "a" + , "$b$!" + , (math "c") <> "?" + ]) + , "Markup may not span more than two lines" =: unlines [ "/this *is +totally", "nice+ not*", "emph/" ] =?> para (spcSep [ "/this" -- cgit v1.2.3 From ace8837cd691b17e994b41dcb797de6ca1940136 Mon Sep 17 00:00:00 2001 From: Albert Krewinkel <tarleb@moltkeplatz.de> Date: Thu, 10 Apr 2014 17:25:05 +0200 Subject: Org reader: Improve code by following HLint recommendations HLint's recommendations for better code are applied to the Org-mode reader code. --- src/Text/Pandoc/Readers/Org.hs | 44 +++++++++++++++++++++++------------------- 1 file changed, 24 insertions(+), 20 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index 1d0400d96..29611e8cc 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -44,11 +44,14 @@ import Data.List (foldl', isPrefixOf, isSuffixOf) import Data.Maybe (listToMaybe, fromMaybe) import Data.Monoid (mconcat, mempty, mappend) +-- Ignore HLint warnings to use String instead of [Char] +{-# ANN module ("HLint: ignore Use String" :: String) #-} + -- | Parse org-mode string and return a Pandoc document. readOrg :: ReaderOptions -- ^ Reader options -> String -- ^ String to parse (assuming @'\n'@ line endings) -> Pandoc -readOrg opts s = (readWith parseOrg) def{ orgStateOptions = opts } (s ++ "\n\n") +readOrg opts s = readWith parseOrg def{ orgStateOptions = opts } (s ++ "\n\n") type OrgParser = Parser [Char] OrgParserState @@ -111,7 +114,7 @@ updateLastPreCharPos = getPosition >>= \p -> pushToInlineCharStack :: Char -> OrgParser () pushToInlineCharStack c = updateState $ \st -> - st { orgStateEmphasisCharStack = c:(orgStateEmphasisCharStack st) } + st { orgStateEmphasisCharStack = c:orgStateEmphasisCharStack st } popInlineCharStack :: OrgParser () popInlineCharStack = updateState $ \st -> @@ -176,7 +179,7 @@ orgBlock = try $ do "comment" -> return mempty "src" -> return $ B.codeBlockWith ("", classArgs, []) blockStr _ -> B.divWith ("", [blockType], []) - <$> (parseFromString parseBlocks blockStr) + <$> parseFromString parseBlocks blockStr blockHeader :: OrgParser (Int, String, [String]) blockHeader = (,,) <$> blockIndent @@ -199,7 +202,7 @@ rawBlockContent indent blockType = indentWith :: Int -> OrgParser String indentWith num = do tabStop <- getOption readerTabStop - if (num < tabStop) + if num < tabStop then count num (char ' ') else choice [ try (count num (char ' ')) , try (char '\t' >> count (num - tabStop) (char ' ')) ] @@ -242,7 +245,7 @@ drawerStart = try $ <|> stringAnyCase "LOGBOOK" drawerLine :: OrgParser String -drawerLine = try $ anyLine +drawerLine = try anyLine drawerEnd :: OrgParser String drawerEnd = try $ @@ -276,7 +279,7 @@ declarationLine = try $ do metaValue :: OrgParser MetaValue metaValue = MetaInlines . B.toList . trimInlines <$> restOfLine -metaKey :: OrgParser [Char] +metaKey :: OrgParser String metaKey = map toLower <$> many1 (noneOf ": \n\r") <* char ':' <* skipSpaces @@ -350,7 +353,7 @@ tableAlignRow = try $ tableAlignCell :: OrgParser Alignment tableAlignCell = - choice [ try $ emptyCell *> return (AlignDefault) + choice [ try $ emptyCell *> return AlignDefault , try $ skipSpaces *> char '<' *> tableAlignFromChar @@ -381,8 +384,8 @@ normalizeTable (OrgTable cols aligns heads lns) = let aligns' = fillColumns aligns AlignDefault heads' = if heads == mempty then mempty - else fillColumns heads (B.plain mempty) - lns' = map (flip fillColumns (B.plain mempty)) lns + else fillColumns heads (B.plain mempty) + lns' = map (`fillColumns` B.plain mempty) lns fillColumns base padding = take cols $ base ++ repeat padding in OrgTable cols aligns' heads' lns' @@ -565,13 +568,14 @@ explicitOrImageLink = try $ do title <- enclosedRaw (char '[') (char ']') title' <- parseFromString (mconcat <$> many inline) title char ']' - return $ if (isImageFilename src) && (isImageFilename title) - then B.link src "" (B.image title "" "") - else B.link src "" title' + return . B.link src "" + $ if isImageFilename src && isImageFilename title + then B.image title "" "" + else title' selflinkOrImage :: OrgParser Inlines selflinkOrImage = try $ do - src <- (char '[') *> linkTarget <* char ']' + src <- char '[' *> linkTarget <* char ']' return $ if isImageFilename src then B.image src "" "" else B.link src "" (B.str src) @@ -619,10 +623,10 @@ displayMath = B.displayMath <$> choice [ rawMathBetween "\\[" "\\]" ] subscript :: OrgParser Inlines -subscript = B.subscript <$> (try $ char '_' *> maybeGroupedByBraces) +subscript = B.subscript <$> try (char '_' *> maybeGroupedByBraces) superscript :: OrgParser Inlines -superscript = B.superscript <$> (try $ char '^' *> maybeGroupedByBraces) +superscript = B.superscript <$> try (char '^' *> maybeGroupedByBraces) maybeGroupedByBraces :: OrgParser Inlines maybeGroupedByBraces = try $ @@ -702,14 +706,14 @@ emphasisEnd c = try $ do return c mathStart :: Char -> OrgParser Char -mathStart c = try $ do +mathStart c = try $ char c <* notFollowedBy' (oneOf (c:mathForbiddenBorderChars)) mathEnd :: Char -> OrgParser Char mathEnd c = try $ do res <- noneOf (c:mathForbiddenBorderChars) char c - eof <|> (lookAhead $ oneOf mathPostChars *> pure ()) + eof <|> lookAhead (oneOf mathPostChars *> pure ()) return res @@ -741,8 +745,8 @@ many1TillNOrLessNewlines n p end = try $ nMoreLines (Just 0) cs = try $ (cs ++) <$> finalLine nMoreLines k cs = try $ (final k cs <|> rest k cs) >>= uncurry nMoreLines - final _ cs = (\x -> (Nothing, cs ++ x)) <$> (try finalLine) - rest m cs = (\x -> (minus1 <$> m, cs ++ x ++ "\n")) <$> (try $ manyTill p P.newline) + final _ cs = (\x -> (Nothing, cs ++ x)) <$> try finalLine + rest m cs = (\x -> (minus1 <$> m, cs ++ x ++ "\n")) <$> try (manyTill p P.newline) finalLine = try $ manyTill p end minus1 k = k - 1 oneOrMore cs = guard (not $ null cs) *> return cs @@ -786,7 +790,7 @@ afterEmphasisPreChar :: OrgParser Bool afterEmphasisPreChar = do pos <- getPosition lastPrePos <- orgStateLastPreCharPos <$> getState - return $ lastPrePos == Nothing || lastPrePos == Just pos + return . fromMaybe True $ (== pos) <$> lastPrePos -- | Whether we are right after the end of a string notAfterString :: OrgParser Bool -- cgit v1.2.3 From ca40acea5b022d6309a36000d54844a482c14555 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Thu, 10 Apr 2014 16:52:30 -0700 Subject: MediaWiki reader: Handle table rows containing just an HTML comment. Closes #1230. --- src/Text/Pandoc/Readers/MediaWiki.hs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/MediaWiki.hs b/src/Text/Pandoc/Readers/MediaWiki.hs index 9bbabd44b..feaedb7c2 100644 --- a/src/Text/Pandoc/Readers/MediaWiki.hs +++ b/src/Text/Pandoc/Readers/MediaWiki.hs @@ -227,6 +227,7 @@ table = do let widths' = map (\w -> if w == 0 then defaultwidth else w) widths let cellspecs = zip (map fst cellspecs') widths' rows' <- many $ try $ rowsep *> (map snd <$> tableRow) + optional blanklines tableEnd let cols = length hdr let (headers,rows) = if hasheader @@ -275,7 +276,7 @@ tableCaption = try $ do (trimInlines . mconcat) <$> many (notFollowedBy (cellsep <|> rowsep) *> inline) tableRow :: MWParser [((Alignment, Double), Blocks)] -tableRow = try $ many tableCell +tableRow = try $ skipMany htmlComment *> many tableCell tableCell :: MWParser ((Alignment, Double), Blocks) tableCell = try $ do -- cgit v1.2.3 From 6f19be7d40f583ee4e10fa2b0f20bd4f1fa80c43 Mon Sep 17 00:00:00 2001 From: Albert Krewinkel <tarleb@moltkeplatz.de> Date: Fri, 11 Apr 2014 11:05:42 +0200 Subject: Org reader: Fix parsing of sub-/superscript expressions This fixes the org-reader's handling of sub- and superscript expressions. Simple expressions (like `2^+10`), expressions in parentheses (`a_(n+1)`) and nested sexp (like `a_(nested()parens)`) are now read correctly. --- src/Text/Pandoc/Readers/Org.hs | 47 +++++++++++++++++++++++++++++++++--------- tests/Tests/Readers/Org.hs | 39 +++++++++++++++++++++++++++-------- 2 files changed, 67 insertions(+), 19 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index 29611e8cc..ceac69367 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -622,17 +622,11 @@ displayMath = B.displayMath <$> choice [ rawMathBetween "\\[" "\\]" , rawMathBetween "$$" "$$" ] -subscript :: OrgParser Inlines -subscript = B.subscript <$> try (char '_' *> maybeGroupedByBraces) +subscript :: OrgParser Inlines +subscript = B.subscript <$> try (char '_' *> subOrSuperExpr) -superscript :: OrgParser Inlines -superscript = B.superscript <$> try (char '^' *> maybeGroupedByBraces) - -maybeGroupedByBraces :: OrgParser Inlines -maybeGroupedByBraces = try $ - choice [ try $ enclosedInlines (char '{') (char '}') - , B.str . (:"") <$> anyChar - ] +superscript :: OrgParser Inlines +superscript = B.superscript <$> try (char '^' *> subOrSuperExpr) symbol :: OrgParser Inlines symbol = B.str . (: "") <$> (oneOf specialChars >>= updatePositions) @@ -805,3 +799,36 @@ notAfterForbiddenBorderChar = do pos <- getPosition lastFBCPos <- orgStateLastForbiddenCharPos <$> getState return $ lastFBCPos /= Just pos + +-- | Read a sub- or superscript expression +subOrSuperExpr :: OrgParser Inlines +subOrSuperExpr = try $ do + choice [ balancedSexp '{' '}' + , balancedSexp '(' ')' >>= return . enclosing ('(', ')') + , simpleSubOrSuperString + ] >>= parseFromString (mconcat <$> many inline) + +-- | Read a balanced sexp +balancedSexp :: Char + -> Char + -> OrgParser String +balancedSexp l r = try $ do + char l + res <- concat <$> many ( many1 (noneOf ([l, r] ++ "\n\r")) + <|> try (string [l, r]) + <|> enclosing (l, r) <$> balancedSexp l r + ) + char r + return res + +simpleSubOrSuperString :: OrgParser String +simpleSubOrSuperString = try $ + choice [ string "*" + , mappend <$> option [] ((:[]) <$> oneOf "+-") + <*> many1 alphaNum + ] + +enclosing :: (a, a) + -> [a] + -> [a] +enclosing (left, right) s = left : s ++ [right] diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs index 9e9482e45..49130f0ab 100644 --- a/tests/Tests/Readers/Org.hs +++ b/tests/Tests/Readers/Org.hs @@ -78,15 +78,15 @@ tests = "A * symbol" =?> para (str "A" <> space <> str "*" <> space <> "symbol") - , "Superscript single char" =: - "2^n" =?> - para (str "2" <> superscript "n") + , "Superscript simple expression" =: + "2^-λ" =?> + para (str "2" <> superscript "-λ") , "Superscript multi char" =: "2^{n-1}" =?> para (str "2" <> superscript "n-1") - , "Subscript single char" =: + , "Subscript simple expression" =: "a_n" =?> para (str "a" <> subscript "n") @@ -105,11 +105,8 @@ tests = ]) , "No empty markup" =: - -- FIXME: __ is erroneously parsed as subscript "_" - -- "// ** __ ++ == ~~ $$" =?> - -- para (spcSep [ "//", "**", "__", "++", "==", "~~", "$$" ]) - "// ** ++ == ~~ $$" =?> - para (spcSep [ "//", "**", "++", "==", "~~", "$$" ]) + "// ** __ ++ == ~~ $$" =?> + para (spcSep [ "//", "**", "__", "++", "==", "~~", "$$" ]) , "Adherence to Org's rules for markup borders" =: "/t/& a/ / ./r/ (*l*) /e/! /b/." =?> @@ -143,6 +140,30 @@ tests = ])) , "emph/" ]) + , "Sub- and superscript expressions" =: + unlines [ "a_(a(b)(c)d)" + , "e^(f(g)h)" + , "i_(jk)l)" + , "m^()n" + , "o_{p{q{}r}}" + , "s^{t{u}v}" + , "w_{xy}z}" + , "1^{}2" + , "3_{{}}" + , "4^(a(*b(c*)d))" + ] =?> + para (spcSep [ "a" <> subscript "(a(b)(c)d)" + , "e" <> superscript "(f(g)h)" + , "i" <> (subscript "(jk)") <> "l)" + , "m" <> (superscript "()") <> "n" + , "o" <> subscript "p{q{}r}" + , "s" <> superscript "t{u}v" + , "w" <> (subscript "xy") <> "z}" + , "1" <> (superscript "") <> "2" + , "3" <> subscript "{}" + , "4" <> superscript ("(a(" <> strong "b(c" <> ")d))") + ]) + , "Image" =: "[[./sunset.jpg]]" =?> (para $ image "./sunset.jpg" "" "") -- cgit v1.2.3 From 8699071ec2d01dbf2c72eb6cda7794de8b850999 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Fri, 11 Apr 2014 10:10:54 -0700 Subject: HTML reader: Treat processing instructions & declarations as block. Previously these were treated as inline, and included in paragraph tags in HTML or DocBook output, which is generally not what is wanted. Closes #1233. --- src/Text/Pandoc/Readers/HTML.hs | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index 2101b2fc2..c94ee3d6b 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -610,15 +610,19 @@ blockTags :: [String] blockTags = blockHtmlTags ++ blockDocBookTags isInlineTag :: Tag String -> Bool -isInlineTag t = tagOpen (`notElem` blockTags) (const True) t || - tagClose (`notElem` blockTags) t || +isInlineTag t = tagOpen isInlineTagName (const True) t || + tagClose isInlineTagName t || tagComment (const True) t + where isInlineTagName x = x `notElem` blockTags isBlockTag :: Tag String -> Bool -isBlockTag t = tagOpen (`elem` blocktags) (const True) t || - tagClose (`elem` blocktags) t || +isBlockTag t = tagOpen isBlockTagName (const True) t || + tagClose isBlockTagName t || tagComment (const True) t - where blocktags = blockTags ++ eitherBlockOrInline + where isBlockTagName ('?':_) = True + isBlockTagName ('!':_) = True + isBlockTagName x = x `elem` blockTags + || x `elem` eitherBlockOrInline isTextTag :: Tag String -> Bool isTextTag = tagText (const True) -- cgit v1.2.3 From ae4280fba528efe68c5955cb3ca0779e6910f43b Mon Sep 17 00:00:00 2001 From: Albert Krewinkel <tarleb@moltkeplatz.de> Date: Sat, 12 Apr 2014 00:17:46 +0200 Subject: Org reader: Add support for figures Support for figures (images with name and caption) is added. --- src/Text/Pandoc/Readers/Org.hs | 57 ++++++++++++++++++++++++++++++++++++------ tests/Tests/Readers/Org.hs | 21 ++++++++++++++++ 2 files changed, 70 insertions(+), 8 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index ceac69367..8f0ce61e0 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -37,6 +37,7 @@ import Text.Pandoc.Parsing hiding (newline, orderedListMarker, updateL import Text.Pandoc.Shared (compactify') import Control.Applicative (pure, (<$>), (<$), (<*>), (<*), (*>), (<**>)) +import Control.Arrow ((***)) import Control.Monad (guard, when) import Data.Char (toLower) import Data.Default @@ -158,6 +159,7 @@ block = choice [ mempty <$ blanklines , orgBlock , example , drawer + , figure , specialLine , header , hline @@ -252,6 +254,43 @@ drawerEnd = try $ skipSpaces *> stringAnyCase ":END:" <* skipSpaces <* newline +-- +-- Figures +-- + +-- Figures (Image on a line by itself, preceded by name and/or caption) +figure :: OrgParser Blocks +figure = try $ do + (tit, cap) <- (maybe mempty withFigPrefix *** fromMaybe mempty) + <$> nameAndOrCaption + src <- skipSpaces *> selfTarget <* skipSpaces <* newline + guard (isImageFilename src) + return . B.para $ B.image src tit cap + where withFigPrefix cs = if "fig:" `isPrefixOf` cs + then cs + else "fig:" ++ cs + +nameAndOrCaption :: OrgParser (Maybe String, Maybe Inlines) +nameAndOrCaption = try $ nameFirst <|> captionFirst + where + nameFirst = try $ do + n <- name + c <- optionMaybe caption + return (Just n, c) + captionFirst = try $ do + c <- caption + n <- optionMaybe name + return (n, Just c) + +caption :: OrgParser Inlines +caption = try $ annotation "CAPTION" *> inlinesTillNewline + +name :: OrgParser String +name = try $ annotation "NAME" *> skipSpaces *> manyTill anyChar newline + +annotation :: String -> OrgParser String +annotation ann = try $ metaLineStart *> stringAnyCase ann <* char ':' + -- Comments, Options and Metadata specialLine :: OrgParser Blocks specialLine = try $ metaLine <|> commentLine @@ -277,7 +316,7 @@ declarationLine = try $ do return mempty metaValue :: OrgParser MetaValue -metaValue = MetaInlines . B.toList . trimInlines <$> restOfLine +metaValue = MetaInlines . B.toList <$> inlinesTillNewline metaKey :: OrgParser String metaKey = map toLower <$> many1 (noneOf ": \n\r") @@ -288,7 +327,7 @@ metaKey = map toLower <$> many1 (noneOf ": \n\r") header :: OrgParser Blocks header = try $ B.header <$> headerStart - <*> (trimInlines <$> restOfLine) + <*> inlinesTillNewline headerStart :: OrgParser Int headerStart = try $ @@ -424,13 +463,10 @@ setAligns aligns t = t{ orgTableAlignments = aligns } -- Paragraphs or Plain text paraOrPlain :: OrgParser Blocks paraOrPlain = try $ - trimInlines . mconcat - <$> many1 inline - <**> option B.plain - (try $ newline *> pure B.para) + parseInlines <**> option B.plain (try $ newline *> pure B.para) -restOfLine :: OrgParser Inlines -restOfLine = mconcat <$> manyTill inline newline +inlinesTillNewline :: OrgParser Inlines +inlinesTillNewline = trimInlines . mconcat <$> manyTill inline newline -- @@ -523,6 +559,8 @@ inline = ] <* (guard =<< newlinesCountWithinLimits) <?> "inline" +parseInlines :: OrgParser Inlines +parseInlines = trimInlines . mconcat <$> many1 inline -- treat these as potentially non-text when parsing inline: specialChars :: [Char] @@ -580,6 +618,9 @@ selflinkOrImage = try $ do then B.image src "" "" else B.link src "" (B.str src) +selfTarget :: OrgParser String +selfTarget = try $ char '[' *> linkTarget <* char ']' + linkTarget :: OrgParser String linkTarget = enclosed (char '[') (char ']') (noneOf "\n\r]") diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs index 49130f0ab..99dadc0ac 100644 --- a/tests/Tests/Readers/Org.hs +++ b/tests/Tests/Readers/Org.hs @@ -377,6 +377,27 @@ tests = code' = "main = putStrLn greeting\n" ++ " where greeting = \"moin\"\n" in codeBlockWith attr' code' + + , "Figure" =: + unlines [ "#+caption: A very courageous man." + , "#+name: goodguy" + , "[[edward.jpg]]" + ] =?> + para (image "edward.jpg" "fig:goodguy" "A very courageous man.") + + , "Unnamed figure" =: + unlines [ "#+caption: A great whistleblower." + , "[[snowden.png]]" + ] =?> + para (image "snowden.png" "" "A great whistleblower.") + + , "Figure with `fig:` prefix in name" =: + unlines [ "#+caption: Used as a metapher in evolutionary biology." + , "#+name: fig:redqueen" + , "[[the-red-queen.jpg]]" + ] =?> + para (image "the-red-queen.jpg" "fig:redqueen" + "Used as a metapher in evolutionary biology.") ] , testGroup "Lists" $ -- cgit v1.2.3 From 36066699c31ca50566ca2492a5c112ecbe690a63 Mon Sep 17 00:00:00 2001 From: Albert Krewinkel <tarleb@moltkeplatz.de> Date: Sat, 12 Apr 2014 00:22:49 +0200 Subject: Org writer: Fix output for linebreaks Hard linebreaks in Org mode are represented by the string "\\" as the last characters in a line. Adds this feature to the Org-mode writer. --- src/Text/Pandoc/Writers/Org.hs | 2 +- tests/writer.org | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Writers/Org.hs b/src/Text/Pandoc/Writers/Org.hs index d318c5f6a..58a5729e7 100644 --- a/src/Text/Pandoc/Writers/Org.hs +++ b/src/Text/Pandoc/Writers/Org.hs @@ -271,7 +271,7 @@ inlineToOrg (Math t str) = do else "$$" <> text str <> "$$" inlineToOrg (RawInline f str) | f == "tex" || f == "latex" = return $ text str inlineToOrg (RawInline _ _) = return empty -inlineToOrg (LineBreak) = return cr -- there's no line break in Org +inlineToOrg (LineBreak) = return (text "\\\\" <> cr) inlineToOrg Space = return space inlineToOrg (Link txt (src, _)) = do case txt of diff --git a/tests/writer.org b/tests/writer.org index 85016f352..524d49305 100644 --- a/tests/writer.org +++ b/tests/writer.org @@ -42,7 +42,7 @@ item. Here's one with a bullet. * criminey. -There should be a hard line break +There should be a hard line break\\ here. -------------- -- cgit v1.2.3 From 82d4160bdcc149df020d1f95f4a7d893a9ecb42a Mon Sep 17 00:00:00 2001 From: Albert Krewinkel <tarleb@moltkeplatz.de> Date: Sat, 12 Apr 2014 11:07:38 +0200 Subject: Org reader: Read linebreaks Linebreaks are marked by the string `\\` at the end of a line. --- src/Text/Pandoc/Readers/Org.hs | 4 ++++ tests/Tests/Readers/Org.hs | 4 ++++ 2 files changed, 8 insertions(+) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index ceac69367..19dd03c6b 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -506,6 +506,7 @@ anyLineNewline = (++ "\n") <$> anyLine inline :: OrgParser Inlines inline = choice [ whitespace + , linebreak , link , str , endline @@ -535,6 +536,9 @@ whitespace = B.space <$ skipMany1 spaceChar <* updateLastForbiddenCharPos <?> "whitespace" +linebreak :: OrgParser Inlines +linebreak = try $ B.linebreak <$ string "\\\\" <* skipSpaces <* newline + str :: OrgParser Inlines str = B.str <$> many1 (noneOf $ specialChars ++ "\n\r ") <* updateLastStrPos diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs index 49130f0ab..567cc4c41 100644 --- a/tests/Tests/Readers/Org.hs +++ b/tests/Tests/Readers/Org.hs @@ -94,6 +94,10 @@ tests = "a_{n+1}" =?> para (str "a" <> subscript "n+1") + , "Linebreak" =: + "line \\\\ \nbreak" =?> + para ("line" <> linebreak <> "break") + , "Markup-chars not occuring on word break are symbols" =: unlines [ "this+that+ +so+on" , "seven*eight* nine*" -- cgit v1.2.3 From 464d7a8e49b81a0c883b8c4a9bfd5d26cdfb38ba Mon Sep 17 00:00:00 2001 From: Neil Mayhew <neiljhmayhew@gmail.com> Date: Thu, 6 Mar 2014 07:31:49 -0700 Subject: Improve handling of hard line breaks in Docbook writer * Use a <literallayout> for the entire paragraph, not just for the newline character * Don't let LineBreaks inside footnotes influence the enclosing paragraph --- src/Text/Pandoc/Writers/Docbook.hs | 19 ++++++++++++++++--- tests/writer.docbook | 6 ++---- 2 files changed, 18 insertions(+), 7 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Writers/Docbook.hs b/src/Text/Pandoc/Writers/Docbook.hs index 02d875be3..2d6ce3020 100644 --- a/src/Text/Pandoc/Writers/Docbook.hs +++ b/src/Text/Pandoc/Writers/Docbook.hs @@ -32,12 +32,14 @@ module Text.Pandoc.Writers.Docbook ( writeDocbook) where import Text.Pandoc.Definition import Text.Pandoc.XML import Text.Pandoc.Shared +import Text.Pandoc.Walk import Text.Pandoc.Writers.Shared import Text.Pandoc.Options import Text.Pandoc.Templates (renderTemplate') import Text.Pandoc.Readers.TeXMath import Data.List ( isPrefixOf, intercalate, isSuffixOf ) import Data.Char ( toLower ) +import Data.Monoid ( Any(..) ) import Text.Pandoc.Highlighting ( languages, languagesByExtension ) import Text.Pandoc.Pretty import qualified Text.Pandoc.Builder as B @@ -165,8 +167,9 @@ blockToDocbook opts (Para [Image txt (src,'f':'i':'g':':':_)]) = (inTagsIndented "imageobject" (selfClosingTag "imagedata" [("fileref",src)])) $$ inTagsSimple "textobject" (inTagsSimple "phrase" alt)) -blockToDocbook opts (Para lst) = - inTagsIndented "para" $ inlinesToDocbook opts lst +blockToDocbook opts (Para lst) + | hasLineBreaks lst = flush $ nowrap $ inTagsSimple "literallayout" $ inlinesToDocbook opts lst + | otherwise = inTagsIndented "para" $ inlinesToDocbook opts lst blockToDocbook opts (BlockQuote blocks) = inTagsIndented "blockquote" $ blocksToDocbook opts blocks blockToDocbook _ (CodeBlock (_,classes,_) str) = @@ -226,6 +229,16 @@ blockToDocbook opts (Table caption aligns widths headers rows) = (inTags True "tgroup" [("cols", show (length headers))] $ coltags $$ head' $$ body') +hasLineBreaks :: [Inline] -> Bool +hasLineBreaks = getAny . query isLineBreak . walk removeNote + where + removeNote :: Inline -> Inline + removeNote (Note _) = Str "" + removeNote x = x + isLineBreak :: Inline -> Any + isLineBreak LineBreak = Any True + isLineBreak _ = Any False + alignmentToString :: Alignment -> [Char] alignmentToString alignment = case alignment of AlignLeft -> "left" @@ -293,7 +306,7 @@ inlineToDocbook opts (Math t str) fixNS = everywhere (mkT fixNS') inlineToDocbook _ (RawInline f x) | f == "html" || f == "docbook" = text x | otherwise = empty -inlineToDocbook _ LineBreak = flush $ inTagsSimple "literallayout" (text "\n") +inlineToDocbook _ LineBreak = text "\n" inlineToDocbook _ Space = space inlineToDocbook opts (Link txt (src, _)) = if isPrefixOf "mailto:" src diff --git a/tests/writer.docbook b/tests/writer.docbook index d4b3c7efd..9cb9a5359 100644 --- a/tests/writer.docbook +++ b/tests/writer.docbook @@ -68,10 +68,8 @@ <para> Here’s one with a bullet. * criminey. </para> - <para> - There should be a hard line break<literallayout> -</literallayout>here. - </para> +<literallayout>There should be a hard line break +here.</literallayout> </sect1> <sect1 id="block-quotes"> <title>Block Quotes -- cgit v1.2.3 From d4c1cd456c16298069b05e1da328e70dc87ea547 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 12 Apr 2014 21:44:54 -0700 Subject: Org reader: Removed ANN pragma. This relies on Template Haskell, which causes problems in Windows due to libraries with C dependencies. We need to avoid using TH in pandoc code. --- src/Text/Pandoc/Readers/Org.hs | 3 --- 1 file changed, 3 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index ceac69367..36b1ad287 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -44,9 +44,6 @@ import Data.List (foldl', isPrefixOf, isSuffixOf) import Data.Maybe (listToMaybe, fromMaybe) import Data.Monoid (mconcat, mempty, mappend) --- Ignore HLint warnings to use String instead of [Char] -{-# ANN module ("HLint: ignore Use String" :: String) #-} - -- | Parse org-mode string and return a Pandoc document. readOrg :: ReaderOptions -- ^ Reader options -> String -- ^ String to parse (assuming @'\n'@ line endings) -- cgit v1.2.3 From 7f036c0b57f2791c03040bed61e55adcd21ee496 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 15 Apr 2014 19:53:11 -0700 Subject: Shared: Fixed bug in toRomanNumeral. 9 and numbers ending in 9 would end with "IXIV." Thanks to Jesse Rosenthal. Closes #1249. --- src/Text/Pandoc/Shared.hs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index 3835629db..27ef6a579 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -234,9 +234,9 @@ toRomanNumeral x = _ | x >= 50 -> "L" ++ toRomanNumeral (x - 50) _ | x >= 40 -> "XL" ++ toRomanNumeral (x - 40) _ | x >= 10 -> "X" ++ toRomanNumeral (x - 10) - _ | x >= 9 -> "IX" ++ toRomanNumeral (x - 5) + _ | x == 9 -> "IX" _ | x >= 5 -> "V" ++ toRomanNumeral (x - 5) - _ | x >= 4 -> "IV" ++ toRomanNumeral (x - 4) + _ | x == 4 -> "IV" _ | x >= 1 -> "I" ++ toRomanNumeral (x - 1) _ -> "" -- cgit v1.2.3 From 346bcea713f933a6cf48829d948e14b9c28b4798 Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Wed, 16 Apr 2014 13:22:48 +0200 Subject: Org reader: Better module description, minor style changes Use module description analogous to the markdown reader's. Use (<$) where it makes sense. --- src/Text/Pandoc/Readers/Org.hs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index bda0b0262..c4ea64ba7 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -24,7 +24,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Maintainer : Albert Krewinkel -Conversion of Org-Mode to 'Pandoc' document. +Conversion of org-mode formatted plain text to 'Pandoc' document. -} module Text.Pandoc.Readers.Org ( readOrg ) where @@ -711,7 +711,7 @@ math1CharBetween c = try $ do char c res <- noneOf $ c:mathForbiddenBorderChars char c - eof <|> lookAhead (oneOf mathPostChars) *> return () + eof <|> () <$ lookAhead (oneOf mathPostChars) return [res] rawMathBetween :: String @@ -734,12 +734,12 @@ emphasisEnd :: Char -> OrgParser Char emphasisEnd c = try $ do guard =<< notAfterForbiddenBorderChar char c - eof <|> lookAhead (surroundingEmphasisChar >>= \x -> - oneOf (x ++ emphasisPostChars)) - *> return () + eof <|> () <$ lookAhead acceptablePostChars updateLastStrPos popInlineCharStack return c + where acceptablePostChars = + surroundingEmphasisChar >>= \x -> oneOf (x ++ emphasisPostChars) mathStart :: Char -> OrgParser Char mathStart c = try $ @@ -749,7 +749,7 @@ mathEnd :: Char -> OrgParser Char mathEnd c = try $ do res <- noneOf (c:mathForbiddenBorderChars) char c - eof <|> lookAhead (oneOf mathPostChars *> pure ()) + eof <|> () <$ lookAhead (oneOf mathPostChars) return res -- cgit v1.2.3 From 5fc252270c8332908e3ad9ec12d16c08c49de4a2 Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Mon, 14 Apr 2014 15:04:32 +0200 Subject: Org reader: Fix code for subexpression parsing --- src/Text/Pandoc/Readers/Org.hs | 25 ++++--------------------- 1 file changed, 4 insertions(+), 21 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index c4ea64ba7..ec0436f4c 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -844,24 +844,12 @@ notAfterForbiddenBorderChar = do -- | Read a sub- or superscript expression subOrSuperExpr :: OrgParser Inlines -subOrSuperExpr = try $ do - choice [ balancedSexp '{' '}' - , balancedSexp '(' ')' >>= return . enclosing ('(', ')') +subOrSuperExpr = try $ + choice [ id <$> charsInBalanced '{' '}' (noneOf "\n\r") + , enclosing ('(', ')') <$> charsInBalanced '(' ')' (noneOf "\n\r") , simpleSubOrSuperString ] >>= parseFromString (mconcat <$> many inline) - --- | Read a balanced sexp -balancedSexp :: Char - -> Char - -> OrgParser String -balancedSexp l r = try $ do - char l - res <- concat <$> many ( many1 (noneOf ([l, r] ++ "\n\r")) - <|> try (string [l, r]) - <|> enclosing (l, r) <$> balancedSexp l r - ) - char r - return res + where enclosing (left, right) s = left : s ++ [right] simpleSubOrSuperString :: OrgParser String simpleSubOrSuperString = try $ @@ -869,8 +857,3 @@ simpleSubOrSuperString = try $ , mappend <$> option [] ((:[]) <$> oneOf "+-") <*> many1 alphaNum ] - -enclosing :: (a, a) - -> [a] - -> [a] -enclosing (left, right) s = left : s ++ [right] -- cgit v1.2.3 From 92582c6272a3a171c406699e46e88afc4835d85c Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Wed, 16 Apr 2014 11:58:16 +0200 Subject: Org reader: introduce Reader environment around Blocks/Inlines This introduces a Reader environment in the style of Text.Pandoc.Parsing.F, but adapted to the Org reader parser. --- src/Text/Pandoc/Readers/Org.hs | 306 ++++++++++++++++++++++++----------------- 1 file changed, 176 insertions(+), 130 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index ec0436f4c..bdff4869c 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -1,4 +1,5 @@ {-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE GeneralizedNewtypeDeriving #-} {- Copyright (C) 2014 Albert Krewinkel @@ -29,21 +30,26 @@ Conversion of org-mode formatted plain text to 'Pandoc' document. module Text.Pandoc.Readers.Org ( readOrg ) where import qualified Text.Pandoc.Builder as B -import Text.Pandoc.Builder (Inlines, Blocks, trimInlines, (<>), HasMeta(..)) +import Text.Pandoc.Builder ( Inlines, Blocks, HasMeta(..), (<>) + , trimInlines ) import Text.Pandoc.Definition import Text.Pandoc.Options import qualified Text.Pandoc.Parsing as P -import Text.Pandoc.Parsing hiding (newline, orderedListMarker, updateLastStrPos) +import Text.Pandoc.Parsing hiding ( F, unF, askF, asksF, runF + , newline, orderedListMarker + , updateLastStrPos ) import Text.Pandoc.Shared (compactify') -import Control.Applicative (pure, (<$>), (<$), (<*>), (<*), (*>), (<**>)) +import Control.Applicative ( Applicative, pure + , (<$>), (<$), (<*>), (<*), (*>), (<**>) ) import Control.Arrow ((***)) -import Control.Monad (guard, when) +import Control.Monad (foldM, guard, liftM, liftM2, when) +import Control.Monad.Reader (Reader, runReader) import Data.Char (toLower) import Data.Default -import Data.List (foldl', isPrefixOf, isSuffixOf) +import Data.List (isPrefixOf, isSuffixOf) import Data.Maybe (listToMaybe, fromMaybe) -import Data.Monoid (mconcat, mempty, mappend) +import Data.Monoid (Monoid, mconcat, mempty, mappend) -- | Parse org-mode string and return a Pandoc document. readOrg :: ReaderOptions -- ^ Reader options @@ -55,10 +61,10 @@ type OrgParser = Parser [Char] OrgParserState parseOrg:: OrgParser Pandoc parseOrg = do - blocks' <- B.toList <$> parseBlocks + blocks' <- parseBlocks st <- getState - let meta = orgStateMeta st - return $ Pandoc meta $ filter (/= Null) blocks' + let meta = runF (orgStateMeta' st) st + return $ Pandoc meta $ filter (/= Null) (B.toList $ runF blocks' st) -- -- Parser State for Org @@ -73,7 +79,8 @@ data OrgParserState = OrgParserState , orgStateLastPreCharPos :: Maybe SourcePos , orgStateLastStrPos :: Maybe SourcePos , orgStateMeta :: Meta - } deriving (Show) + , orgStateMeta' :: F Meta + } instance HasReaderOptions OrgParserState where extractReaderOptions = orgStateOptions @@ -96,6 +103,7 @@ defaultOrgParserState = OrgParserState , orgStateLastPreCharPos = Nothing , orgStateLastStrPos = Nothing , orgStateMeta = nullMeta + , orgStateMeta' = return nullMeta } updateLastStrPos :: OrgParser () @@ -138,6 +146,27 @@ resetEmphasisNewlines :: OrgParser () resetEmphasisNewlines = updateState $ \s -> s{ orgStateEmphasisNewlines = Nothing } + +-- +-- Adaptions and specializations of parsing utilities +-- + +newtype F a = F { unF :: Reader OrgParserState a + } deriving (Monad, Applicative, Functor) + +runF :: F a -> OrgParserState -> a +runF = runReader . unF + +instance Monoid a => Monoid (F a) where + mempty = return mempty + mappend = liftM2 mappend + mconcat = fmap mconcat . sequence + +trimInlinesF :: F Inlines -> F Inlines +trimInlinesF = liftM trimInlines + + +-- | Like @Text.Parsec.Char.newline@, but causes additional state changes. newline :: OrgParser Char newline = P.newline @@ -148,10 +177,10 @@ newline = -- parsing blocks -- -parseBlocks :: OrgParser Blocks +parseBlocks :: OrgParser (F Blocks) parseBlocks = mconcat <$> manyTill block eof -block :: OrgParser Blocks +block :: OrgParser (F Blocks) block = choice [ mempty <$ blanklines , orgBlock , example @@ -159,7 +188,7 @@ block = choice [ mempty <$ blanklines , figure , specialLine , header - , hline + , return <$> hline , list , table , paraOrPlain @@ -169,15 +198,15 @@ block = choice [ mempty <$ blanklines -- Org Blocks (#+BEGIN_... / #+END_...) -- -orgBlock :: OrgParser Blocks +orgBlock :: OrgParser (F Blocks) orgBlock = try $ do (indent, blockType, args) <- blockHeader blockStr <- rawBlockContent indent blockType let classArgs = [ translateLang . fromMaybe [] $ listToMaybe args ] case blockType of "comment" -> return mempty - "src" -> return $ B.codeBlockWith ("", classArgs, []) blockStr - _ -> B.divWith ("", [blockType], []) + "src" -> return . return $ B.codeBlockWith ("", classArgs, []) blockStr + _ -> fmap (B.divWith ("", [blockType], [])) <$> parseFromString parseBlocks blockStr blockHeader :: OrgParser (Int, String, [String]) @@ -222,15 +251,16 @@ commaEscaped (',':cs@('*':_)) = cs commaEscaped (',':cs@('#':'+':_)) = cs commaEscaped cs = cs -example :: OrgParser Blocks -example = try $ - B.codeBlockWith ("", ["example"], []) . unlines <$> many1 exampleLine +example :: OrgParser (F Blocks) +example = try $ do + body <- unlines <$> many1 exampleLine + return . return $ B.codeBlockWith ("", ["example"], []) body exampleLine :: OrgParser String exampleLine = try $ string ": " *> anyLine -- Drawers for properties or a logbook -drawer :: OrgParser Blocks +drawer :: OrgParser (F Blocks) drawer = try $ do drawerStart manyTill drawerLine (try drawerEnd) @@ -256,18 +286,20 @@ drawerEnd = try $ -- -- Figures (Image on a line by itself, preceded by name and/or caption) -figure :: OrgParser Blocks +figure :: OrgParser (F Blocks) figure = try $ do (tit, cap) <- (maybe mempty withFigPrefix *** fromMaybe mempty) <$> nameAndOrCaption src <- skipSpaces *> selfTarget <* skipSpaces <* newline guard (isImageFilename src) - return . B.para $ B.image src tit cap + return $ do + cap' <- cap + return $ B.para $ B.image src tit cap' where withFigPrefix cs = if "fig:" `isPrefixOf` cs then cs else "fig:" ++ cs -nameAndOrCaption :: OrgParser (Maybe String, Maybe Inlines) +nameAndOrCaption :: OrgParser (Maybe String, Maybe (F Inlines)) nameAndOrCaption = try $ nameFirst <|> captionFirst where nameFirst = try $ do @@ -279,7 +311,7 @@ nameAndOrCaption = try $ nameFirst <|> captionFirst n <- optionMaybe name return (n, Just c) -caption :: OrgParser Inlines +caption :: OrgParser (F Inlines) caption = try $ annotation "CAPTION" *> inlinesTillNewline name :: OrgParser String @@ -289,8 +321,8 @@ annotation :: String -> OrgParser String annotation ann = try $ metaLineStart *> stringAnyCase ann <* char ':' -- Comments, Options and Metadata -specialLine :: OrgParser Blocks -specialLine = try $ metaLine <|> commentLine +specialLine :: OrgParser (F Blocks) +specialLine = fmap return . try $ metaLine <|> commentLine metaLine :: OrgParser Blocks metaLine = try $ metaLineStart *> declarationLine @@ -308,12 +340,15 @@ commentLineStart = try $ mappend <$> many spaceChar <*> string "# " declarationLine :: OrgParser Blocks declarationLine = try $ do - meta' <- B.setMeta <$> metaKey <*> metaValue <*> pure nullMeta - updateState $ \st -> st { orgStateMeta = orgStateMeta st <> meta' } + key <- metaKey + inlinesF <- metaInlines + updateState $ \st -> + let meta' = B.setMeta <$> pure key <*> inlinesF <*> pure nullMeta + in st { orgStateMeta' = orgStateMeta' st <> meta' } return mempty -metaValue :: OrgParser MetaValue -metaValue = MetaInlines . B.toList <$> inlinesTillNewline +metaInlines :: OrgParser (F MetaValue) +metaInlines = fmap (MetaInlines . B.toList) <$> inlinesTillNewline metaKey :: OrgParser String metaKey = map toLower <$> many1 (noneOf ": \n\r") @@ -321,16 +356,20 @@ metaKey = map toLower <$> many1 (noneOf ": \n\r") <* skipSpaces -- | Headers -header :: OrgParser Blocks -header = try $ - B.header <$> headerStart - <*> inlinesTillNewline +header :: OrgParser (F Blocks) +header = try $ do + level <- headerStart + title <- inlinesTillNewline + return $ B.header level <$> title headerStart :: OrgParser Int headerStart = try $ (length <$> many1 (char '*')) <* many1 (char ' ') --- Horizontal Line (five dashes or more) +-- Don't use (or need) the reader wrapper here, we want hline to be +-- @show@able. Otherwise we can't use it with @notFollowedBy'@. + +-- | Horizontal Line (five -- dashes or more) hline :: OrgParser Blocks hline = try $ do skipSpaces @@ -344,22 +383,23 @@ hline = try $ do -- Tables -- -data OrgTableRow = OrgContentRow [Blocks] +data OrgTableRow = OrgContentRow (F [Blocks]) | OrgAlignRow [Alignment] | OrgHlineRow - deriving (Eq, Show) data OrgTable = OrgTable { orgTableColumns :: Int , orgTableAlignments :: [Alignment] , orgTableHeader :: [Blocks] , orgTableRows :: [[Blocks]] - } deriving (Eq, Show) + } -table :: OrgParser Blocks +table :: OrgParser (F Blocks) table = try $ do lookAhead tableStart - orgToPandocTable . normalizeTable . rowsToTable <$> tableRows + do + rows <- tableRows + return $ return . orgToPandocTable . normalizeTable =<< rowsToTable rows orgToPandocTable :: OrgTable -> Blocks @@ -374,11 +414,11 @@ tableRows = try $ many (tableAlignRow <|> tableHline <|> tableContentRow) tableContentRow :: OrgParser OrgTableRow tableContentRow = try $ - OrgContentRow <$> (tableStart *> manyTill tableContentCell newline) + OrgContentRow . sequence <$> (tableStart *> manyTill tableContentCell newline) -tableContentCell :: OrgParser Blocks +tableContentCell :: OrgParser (F Blocks) tableContentCell = try $ - B.plain . trimInlines . mconcat <$> many1Till inline endOfCell + fmap B.plain . trimInlinesF . mconcat <$> many1Till inline endOfCell endOfCell :: OrgParser Char endOfCell = try $ char '|' <|> lookAhead newline @@ -410,8 +450,8 @@ tableHline = try $ OrgHlineRow <$ (tableStart *> char '-' *> anyLine) rowsToTable :: [OrgTableRow] - -> OrgTable -rowsToTable = foldl' (flip rowToContent) zeroTable + -> F OrgTable +rowsToTable = foldM (flip rowToContent) zeroTable where zeroTable = OrgTable 0 mempty mempty mempty normalizeTable :: OrgTable @@ -430,57 +470,64 @@ normalizeTable (OrgTable cols aligns heads lns) = -- line as a header. All other horizontal lines are discarded. rowToContent :: OrgTableRow -> OrgTable - -> OrgTable -rowToContent OrgHlineRow = maybeBodyToHeader -rowToContent (OrgContentRow rs) = setLongestRow rs . appendToBody rs -rowToContent (OrgAlignRow as) = setLongestRow as . setAligns as + -> F OrgTable +rowToContent OrgHlineRow t = maybeBodyToHeader t +rowToContent (OrgAlignRow as) t = setLongestRow as =<< setAligns as t +rowToContent (OrgContentRow rf) t = do + rs <- rf + setLongestRow rs =<< appendToBody rs t setLongestRow :: [a] -> OrgTable - -> OrgTable -setLongestRow rs t = t{ orgTableColumns = max (length rs) (orgTableColumns t) } + -> F OrgTable +setLongestRow rs t = + return t{ orgTableColumns = max (length rs) (orgTableColumns t) } maybeBodyToHeader :: OrgTable - -> OrgTable + -> F OrgTable maybeBodyToHeader t = case t of OrgTable{ orgTableHeader = [], orgTableRows = b:[] } -> - t{ orgTableHeader = b , orgTableRows = [] } - _ -> t + return t{ orgTableHeader = b , orgTableRows = [] } + _ -> return t appendToBody :: [Blocks] -> OrgTable - -> OrgTable -appendToBody r t = t{ orgTableRows = orgTableRows t ++ [r] } + -> F OrgTable +appendToBody r t = return t{ orgTableRows = orgTableRows t ++ [r] } setAligns :: [Alignment] -> OrgTable - -> OrgTable -setAligns aligns t = t{ orgTableAlignments = aligns } + -> F OrgTable +setAligns aligns t = return $ t{ orgTableAlignments = aligns } -- Paragraphs or Plain text -paraOrPlain :: OrgParser Blocks +paraOrPlain :: OrgParser (F Blocks) paraOrPlain = try $ - parseInlines <**> option B.plain (try $ newline *> pure B.para) + parseInlines <**> (fmap <$> option B.plain (try $ newline *> pure B.para)) -inlinesTillNewline :: OrgParser Inlines -inlinesTillNewline = trimInlines . mconcat <$> manyTill inline newline +inlinesTillNewline :: OrgParser (F Inlines) +inlinesTillNewline = trimInlinesF . mconcat <$> manyTill inline newline -- -- list blocks -- -list :: OrgParser Blocks +list :: OrgParser (F Blocks) list = choice [ definitionList, bulletList, orderedList ] "list" -definitionList :: OrgParser Blocks -definitionList = B.definitionList <$> many1 (definitionListItem bulletListStart) +definitionList :: OrgParser (F Blocks) +definitionList = fmap B.definitionList . sequence + <$> many1 (definitionListItem bulletListStart) -bulletList :: OrgParser Blocks -bulletList = B.bulletList . compactify' <$> many1 (listItem bulletListStart) +bulletList :: OrgParser (F Blocks) +bulletList = fmap B.bulletList . fmap compactify' . sequence + <$> many1 (listItem bulletListStart) -orderedList :: OrgParser Blocks -orderedList = B.orderedList . compactify' <$> many1 (listItem orderedListStart) +orderedList :: OrgParser (F Blocks) +-- orderedList = B.orderedList . compactify' <$> many1 (listItem orderedListStart) +orderedList = fmap B.orderedList . fmap compactify' . sequence + <$> many1 (listItem orderedListStart) genericListStart :: OrgParser String -> OrgParser Int @@ -499,7 +546,7 @@ orderedListStart = genericListStart orderedListMarker where orderedListMarker = mappend <$> many1 digit <*> (pure <$> oneOf ".)") definitionListItem :: OrgParser Int - -> OrgParser (Inlines, [Blocks]) + -> OrgParser (F (Inlines, [Blocks])) definitionListItem parseMarkerGetLength = try $ do markerLength <- parseMarkerGetLength term <- manyTill (noneOf "\n\r") (try $ string "::") @@ -507,12 +554,12 @@ definitionListItem parseMarkerGetLength = try $ do cont <- concat <$> many (listContinuation markerLength) term' <- parseFromString inline term contents' <- parseFromString parseBlocks $ first ++ cont - return (term', [contents']) + return $ (,) <$> term' <*> fmap (:[]) contents' -- parse raw text for one list item, excluding start marker and continuations listItem :: OrgParser Int - -> OrgParser Blocks + -> OrgParser (F Blocks) listItem start = try $ do markerLength <- try start firstLine <- anyLineNewline @@ -536,11 +583,11 @@ anyLineNewline = (++ "\n") <$> anyLine -- inline -- -inline :: OrgParser Inlines +inline :: OrgParser (F Inlines) inline = choice [ whitespace , linebreak - , link + , linkOrImage , str , endline , emph @@ -557,29 +604,29 @@ inline = ] <* (guard =<< newlinesCountWithinLimits) "inline" -parseInlines :: OrgParser Inlines -parseInlines = trimInlines . mconcat <$> many1 inline +parseInlines :: OrgParser (F Inlines) +parseInlines = trimInlinesF . mconcat <$> many1 inline -- treat these as potentially non-text when parsing inline: specialChars :: [Char] specialChars = "\"$'()*+-./:<=>[\\]^_{|}~" -whitespace :: OrgParser Inlines -whitespace = B.space <$ skipMany1 spaceChar - <* updateLastPreCharPos - <* updateLastForbiddenCharPos +whitespace :: OrgParser (F Inlines) +whitespace = pure B.space <$ skipMany1 spaceChar + <* updateLastPreCharPos + <* updateLastForbiddenCharPos "whitespace" -linebreak :: OrgParser Inlines -linebreak = try $ B.linebreak <$ string "\\\\" <* skipSpaces <* newline +linebreak :: OrgParser (F Inlines) +linebreak = try $ pure B.linebreak <$ string "\\\\" <* skipSpaces <* newline -str :: OrgParser Inlines -str = B.str <$> many1 (noneOf $ specialChars ++ "\n\r ") - <* updateLastStrPos +str :: OrgParser (F Inlines) +str = return . B.str <$> many1 (noneOf $ specialChars ++ "\n\r ") + <* updateLastStrPos -- an endline character that can be treated as a space, not a structural break -endline :: OrgParser Inlines +endline :: OrgParser (F Inlines) endline = try $ do newline notFollowedBy blankline @@ -595,29 +642,29 @@ endline = try $ do decEmphasisNewlinesCount guard =<< newlinesCountWithinLimits updateLastPreCharPos - return B.space + return . return $ B.space -link :: OrgParser Inlines -link = explicitOrImageLink <|> selflinkOrImage "link" +linkOrImage :: OrgParser (F Inlines) +linkOrImage = explicitOrImageLink <|> selflinkOrImage "link or image" -explicitOrImageLink :: OrgParser Inlines +explicitOrImageLink :: OrgParser (F Inlines) explicitOrImageLink = try $ do char '[' src <- linkTarget title <- enclosedRaw (char '[') (char ']') title' <- parseFromString (mconcat <$> many inline) title char ']' - return . B.link src "" - $ if isImageFilename src && isImageFilename title - then B.image title "" "" - else title' + return $ B.link src "" <$> + if isImageFilename src && isImageFilename title + then return $ B.image title mempty mempty + else title' -selflinkOrImage :: OrgParser Inlines +selflinkOrImage :: OrgParser (F Inlines) selflinkOrImage = try $ do src <- char '[' *> linkTarget <* char ']' - return $ if isImageFilename src - then B.image src "" "" - else B.link src "" (B.str src) + return . return $ if isImageFilename src + then B.image src "" "" + else B.link src "" (B.str src) selfTarget :: OrgParser String selfTarget = try $ char '[' *> linkTarget <* char ']' @@ -634,51 +681,50 @@ isImageFilename filename = imageExtensions = [ "jpeg" , "jpg" , "png" , "gif" , "svg" ] protocols = [ "file", "http", "https" ] -emph :: OrgParser Inlines -emph = B.emph <$> emphasisBetween '/' +emph :: OrgParser (F Inlines) +emph = fmap B.emph <$> emphasisBetween '/' -strong :: OrgParser Inlines -strong = B.strong <$> emphasisBetween '*' +strong :: OrgParser (F Inlines) +strong = fmap B.strong <$> emphasisBetween '*' -strikeout :: OrgParser Inlines -strikeout = B.strikeout <$> emphasisBetween '+' +strikeout :: OrgParser (F Inlines) +strikeout = fmap B.strikeout <$> emphasisBetween '+' -- There is no underline, so we use strong instead. -underline :: OrgParser Inlines -underline = B.strong <$> emphasisBetween '_' - -code :: OrgParser Inlines -code = B.code <$> verbatimBetween '=' +underline :: OrgParser (F Inlines) +underline = fmap B.strong <$> emphasisBetween '_' -verbatim :: OrgParser Inlines -verbatim = B.rawInline "" <$> verbatimBetween '~' +code :: OrgParser (F Inlines) +code = return . B.code <$> verbatimBetween '=' -math :: OrgParser Inlines -math = B.math <$> choice [ math1CharBetween '$' - , mathStringBetween '$' - , rawMathBetween "\\(" "\\)" - ] +verbatim :: OrgParser (F Inlines) +verbatim = return . B.rawInline "" <$> verbatimBetween '~' -displayMath :: OrgParser Inlines -displayMath = B.displayMath <$> choice [ rawMathBetween "\\[" "\\]" - , rawMathBetween "$$" "$$" - ] +subscript :: OrgParser (F Inlines) +subscript = fmap B.subscript <$> try (char '_' *> subOrSuperExpr) -subscript :: OrgParser Inlines -subscript = B.subscript <$> try (char '_' *> subOrSuperExpr) +superscript :: OrgParser (F Inlines) +superscript = fmap B.superscript <$> try (char '^' *> subOrSuperExpr) -superscript :: OrgParser Inlines -superscript = B.superscript <$> try (char '^' *> subOrSuperExpr) +math :: OrgParser (F Inlines) +math = return . B.math <$> choice [ math1CharBetween '$' + , mathStringBetween '$' + , rawMathBetween "\\(" "\\)" + ] -symbol :: OrgParser Inlines -symbol = B.str . (: "") <$> (oneOf specialChars >>= updatePositions) +displayMath :: OrgParser (F Inlines) +displayMath = return . B.displayMath <$> choice [ rawMathBetween "\\[" "\\]" + , rawMathBetween "$$" "$$" + ] +symbol :: OrgParser (F Inlines) +symbol = return . B.str . (: "") <$> (oneOf specialChars >>= updatePositions) where updatePositions c | c `elem` emphasisPreChars = c <$ updateLastPreCharPos | c `elem` emphasisForbiddenBorderChars = c <$ updateLastForbiddenCharPos | otherwise = return c emphasisBetween :: Char - -> OrgParser Inlines + -> OrgParser (F Inlines) emphasisBetween c = try $ do startEmphasisNewlinesCounting emphasisAllowedNewlines res <- enclosedInlines (emphasisStart c) (emphasisEnd c) @@ -755,9 +801,9 @@ mathEnd c = try $ do enclosedInlines :: OrgParser a -> OrgParser b - -> OrgParser Inlines + -> OrgParser (F Inlines) enclosedInlines start end = try $ - trimInlines . mconcat <$> enclosed start end inline + trimInlinesF . mconcat <$> enclosed start end inline enclosedRaw :: OrgParser a -> OrgParser b @@ -843,7 +889,7 @@ notAfterForbiddenBorderChar = do return $ lastFBCPos /= Just pos -- | Read a sub- or superscript expression -subOrSuperExpr :: OrgParser Inlines +subOrSuperExpr :: OrgParser (F Inlines) subOrSuperExpr = try $ choice [ id <$> charsInBalanced '{' '}' (noneOf "\n\r") , enclosing ('(', ')') <$> charsInBalanced '(' ')' (noneOf "\n\r") -- cgit v1.2.3 From 0672f58a445c289c58e42cffbbf32a273e801e39 Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Sun, 6 Apr 2014 18:43:49 +0200 Subject: Org reader: Support footnotes --- src/Text/Pandoc/Readers/Org.hs | 66 ++++++++++++++++++++++++++++++++++++++++-- tests/Tests/Readers/Org.hs | 4 +++ 2 files changed, 68 insertions(+), 2 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index bdff4869c..17f8a1c9e 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -44,7 +44,7 @@ import Control.Applicative ( Applicative, pure , (<$>), (<$), (<*>), (<*), (*>), (<**>) ) import Control.Arrow ((***)) import Control.Monad (foldM, guard, liftM, liftM2, when) -import Control.Monad.Reader (Reader, runReader) +import Control.Monad.Reader (Reader, runReader, ask, asks) import Data.Char (toLower) import Data.Default import Data.List (isPrefixOf, isSuffixOf) @@ -59,7 +59,7 @@ readOrg opts s = readWith parseOrg def{ orgStateOptions = opts } (s ++ "\n\n") type OrgParser = Parser [Char] OrgParserState -parseOrg:: OrgParser Pandoc +parseOrg :: OrgParser Pandoc parseOrg = do blocks' <- parseBlocks st <- getState @@ -70,6 +70,9 @@ parseOrg = do -- Parser State for Org -- +type OrgNoteRecord = (String, F Blocks) +type OrgNoteTable = [OrgNoteRecord] + -- | Org-mode parser state data OrgParserState = OrgParserState { orgStateOptions :: ReaderOptions @@ -80,6 +83,7 @@ data OrgParserState = OrgParserState , orgStateLastStrPos :: Maybe SourcePos , orgStateMeta :: Meta , orgStateMeta' :: F Meta + , orgStateNotes' :: OrgNoteTable } instance HasReaderOptions OrgParserState where @@ -104,6 +108,7 @@ defaultOrgParserState = OrgParserState , orgStateLastStrPos = Nothing , orgStateMeta = nullMeta , orgStateMeta' = return nullMeta + , orgStateNotes' = [] } updateLastStrPos :: OrgParser () @@ -146,6 +151,11 @@ resetEmphasisNewlines :: OrgParser () resetEmphasisNewlines = updateState $ \s -> s{ orgStateEmphasisNewlines = Nothing } +addToNotesTable :: OrgNoteRecord -> OrgParser () +addToNotesTable note = do + oldnotes <- orgStateNotes' <$> getState + updateState $ \s -> s{ orgStateNotes' = note:oldnotes } + -- -- Adaptions and specializations of parsing utilities @@ -157,6 +167,12 @@ newtype F a = F { unF :: Reader OrgParserState a runF :: F a -> OrgParserState -> a runF = runReader . unF +askF :: F OrgParserState +askF = F ask + +asksF :: (OrgParserState -> a) -> F a +asksF f = F $ asks f + instance Monoid a => Monoid (F a) where mempty = return mempty mappend = liftM2 mappend @@ -191,6 +207,7 @@ block = choice [ mempty <$ blanklines , return <$> hline , list , table + , noteBlock , paraOrPlain ] "block" @@ -500,6 +517,16 @@ setAligns :: [Alignment] -> F OrgTable setAligns aligns t = return $ t{ orgTableAlignments = aligns } +-- +-- Footnote defintions +-- +noteBlock :: OrgParser (F Blocks) +noteBlock = try $ do + ref <- noteMarker + content <- skipSpaces *> paraOrPlain + addToNotesTable (ref, content) + return mempty + -- Paragraphs or Plain text paraOrPlain :: OrgParser (F Blocks) paraOrPlain = try $ @@ -587,6 +614,7 @@ inline :: OrgParser (F Inlines) inline = choice [ whitespace , linebreak + , footnote , linkOrImage , str , endline @@ -632,6 +660,7 @@ endline = try $ do notFollowedBy blankline notFollowedBy' exampleLine notFollowedBy' hline + notFollowedBy' noteMarker notFollowedBy' tableStart notFollowedBy' drawerStart notFollowedBy' headerStart @@ -644,6 +673,39 @@ endline = try $ do updateLastPreCharPos return . return $ B.space +footnote :: OrgParser (F Inlines) +footnote = try $ inlineNote <|> referencedNote + +inlineNote :: OrgParser (F Inlines) +inlineNote = try $ do + string "[fn:" + ref <- many alphaNum + char ':' + note <- fmap B.para . trimInlinesF . mconcat <$> many1Till inline (char ']') + when (not $ null ref) $ + addToNotesTable ("fn:" ++ ref, note) + return $ B.note <$> note + +referencedNote :: OrgParser (F Inlines) +referencedNote = try $ do + ref <- noteMarker + return $ do + notes <- asksF orgStateNotes' + case lookup ref notes of + Nothing -> return $ B.str $ "[" ++ ref ++ "]" + Just contents -> do + st <- askF + let contents' = runF contents st{ orgStateNotes' = [] } + return $ B.note contents' + +noteMarker :: OrgParser String +noteMarker = try $ do + char '[' + choice [ many1Till digit (char ']') + , (++) <$> string "fn:" + <*> many1Till (noneOf "\n\r\t ") (char ']') + ] + linkOrImage :: OrgParser (F Inlines) linkOrImage = explicitOrImageLink <|> selflinkOrImage "link or image" diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs index f39bd7992..7f9c5f1d5 100644 --- a/tests/Tests/Readers/Org.hs +++ b/tests/Tests/Readers/Org.hs @@ -98,6 +98,10 @@ tests = "line \\\\ \nbreak" =?> para ("line" <> linebreak <> "break") + , "Inline note" =: + "[fn::Schreib mir eine E-Mail]" =?> + para (note $ para "Schreib mir eine E-Mail") + , "Markup-chars not occuring on word break are symbols" =: unlines [ "this+that+ +so+on" , "seven*eight* nine*" -- cgit v1.2.3 From 6d6724cf2c6ae6bcc0df312c476e45644c972a85 Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Thu, 17 Apr 2014 18:09:27 +0200 Subject: Org reader: Support more types of '#+BEGIN_' blocks Support for standard org-blocks is improved. The parser now handles "HTML", "LATEX", "ASCII", "EXAMPLE", "QUOTE" and "VERSE" blocks in a sensible fashion. --- src/Text/Pandoc/Readers/Org.hs | 41 +++++++++++++++--- tests/Tests/Readers/Org.hs | 97 ++++++++++++++++++++++++++++++++---------- 2 files changed, 108 insertions(+), 30 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index 17f8a1c9e..88e81f5fc 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -37,6 +37,7 @@ import Text.Pandoc.Options import qualified Text.Pandoc.Parsing as P import Text.Pandoc.Parsing hiding ( F, unF, askF, asksF, runF , newline, orderedListMarker + , parseFromString , updateLastStrPos ) import Text.Pandoc.Shared (compactify') @@ -47,7 +48,7 @@ import Control.Monad (foldM, guard, liftM, liftM2, when) import Control.Monad.Reader (Reader, runReader, ask, asks) import Data.Char (toLower) import Data.Default -import Data.List (isPrefixOf, isSuffixOf) +import Data.List (intersperse, isPrefixOf, isSuffixOf) import Data.Maybe (listToMaybe, fromMaybe) import Data.Monoid (Monoid, mconcat, mempty, mappend) @@ -156,6 +157,16 @@ addToNotesTable note = do oldnotes <- orgStateNotes' <$> getState updateState $ \s -> s{ orgStateNotes' = note:oldnotes } +-- The version Text.Pandoc.Parsing cannot be used, as we need additional parts +-- of the state saved and restored. +parseFromString :: OrgParser a -> String -> OrgParser a +parseFromString parser str' = do + oldLastPreCharPos <- orgStateLastPreCharPos <$> getState + updateState $ \s -> s{ orgStateLastPreCharPos = Nothing } + result <- P.parseFromString parser str' + updateState $ \s -> s{ orgStateLastPreCharPos = oldLastPreCharPos } + return result + -- -- Adaptions and specializations of parsing utilities @@ -218,13 +229,27 @@ block = choice [ mempty <$ blanklines orgBlock :: OrgParser (F Blocks) orgBlock = try $ do (indent, blockType, args) <- blockHeader - blockStr <- rawBlockContent indent blockType + content <- rawBlockContent indent blockType + contentBlocks <- parseFromString parseBlocks (content ++ "\n") let classArgs = [ translateLang . fromMaybe [] $ listToMaybe args ] case blockType of "comment" -> return mempty - "src" -> return . return $ B.codeBlockWith ("", classArgs, []) blockStr - _ -> fmap (B.divWith ("", [blockType], [])) - <$> parseFromString parseBlocks blockStr + "src" -> returnF $ B.codeBlockWith ("", classArgs, []) content + "html" -> returnF $ B.rawBlock "html" content + "latex" -> returnF $ B.rawBlock "latex" content + "ascii" -> returnF $ B.rawBlock "ascii" content + "example" -> returnF $ exampleCode content + "quote" -> return $ B.blockQuote <$> contentBlocks + "verse" -> parseVerse content + _ -> return $ B.divWith ("", [blockType], []) <$> contentBlocks + where + returnF :: a -> OrgParser (F a) + returnF = return . return + + parseVerse :: String -> OrgParser (F Blocks) + parseVerse cs = + fmap B.para . mconcat . intersperse (pure B.linebreak) + <$> mapM (parseFromString parseInlines) (lines cs) blockHeader :: OrgParser (Int, String, [String]) blockHeader = (,,) <$> blockIndent @@ -270,8 +295,10 @@ commaEscaped cs = cs example :: OrgParser (F Blocks) example = try $ do - body <- unlines <$> many1 exampleLine - return . return $ B.codeBlockWith ("", ["example"], []) body + return . return . exampleCode =<< unlines <$> many1 exampleLine + +exampleCode :: String -> Blocks +exampleCode = B.codeBlockWith ("", ["example"], []) exampleLine :: OrgParser String exampleLine = try $ string ": " *> anyLine diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs index 7f9c5f1d5..7d5bfe650 100644 --- a/tests/Tests/Readers/Org.hs +++ b/tests/Tests/Readers/Org.hs @@ -363,29 +363,6 @@ tests = , "#+END_COMMENT"] =?> (mempty::Blocks) - , "Source Block in Text" =: - unlines [ "Low German greeting" - , " #+BEGIN_SRC haskell" - , " main = putStrLn greeting" - , " where greeting = \"moin\"" - , " #+END_SRC" ] =?> - let attr' = ("", ["haskell"], []) - code' = "main = putStrLn greeting\n" ++ - " where greeting = \"moin\"\n" - in mconcat [ para $ spcSep [ "Low", "German", "greeting" ] - , codeBlockWith attr' code' - ] - - , "Source Block" =: - unlines [ " #+BEGIN_SRC haskell" - , " main = putStrLn greeting" - , " where greeting = \"moin\"" - , " #+END_SRC" ] =?> - let attr' = ("", ["haskell"], []) - code' = "main = putStrLn greeting\n" ++ - " where greeting = \"moin\"\n" - in codeBlockWith attr' code' - , "Figure" =: unlines [ "#+caption: A very courageous man." , "#+name: goodguy" @@ -661,4 +638,78 @@ tests = , [ plain "2" , plain mempty , plain mempty ] ] ] + + , testGroup "Blocks" + [ "Source block" =: + unlines [ " #+BEGIN_SRC haskell" + , " main = putStrLn greeting" + , " where greeting = \"moin\"" + , " #+END_SRC" ] =?> + let attr' = ("", ["haskell"], []) + code' = "main = putStrLn greeting\n" ++ + " where greeting = \"moin\"\n" + in codeBlockWith attr' code' + + , "Source block between paragraphs" =: + unlines [ "Low German greeting" + , " #+BEGIN_SRC haskell" + , " main = putStrLn greeting" + , " where greeting = \"Moin!\"" + , " #+END_SRC" ] =?> + let attr' = ("", ["haskell"], []) + code' = "main = putStrLn greeting\n" ++ + " where greeting = \"Moin!\"\n" + in mconcat [ para $ spcSep [ "Low", "German", "greeting" ] + , codeBlockWith attr' code' + ] + + , "Example block" =: + unlines [ "#+begin_example" + , "A chosen representation of" + , "a rule." + , "#+eND_exAMPle" + ] =?> + codeBlockWith ("", ["example"], []) + "A chosen representation of\na rule.\n" + + , "HTML block" =: + unlines [ "#+BEGIN_HTML" + , "" + , "#+END_HTML" + ] =?> + rawBlock "html" "\n" + + , "Quote block" =: + unlines [ "#+BEGIN_QUOTE" + , "/Niemand/ hat die Absicht, eine Mauer zu errichten!" + , "#+END_QUOTE" + ] =?> + blockQuote (para (spcSep [ emph "Niemand", "hat", "die", "Absicht," + , "eine", "Mauer", "zu", "errichten!" + ])) + + , "Verse block" =: + unlines [ "The first lines of Goethe's /Faust/:" + , "#+begin_verse" + , "Habe nun, ach! Philosophie," + , "Juristerei und Medizin," + , "Und leider auch Theologie!" + , "Durchaus studiert, mit heißem Bemühn." + , "#+end_verse" + ] =?> + mconcat + [ para $ spcSep [ "The", "first", "lines", "of" + , "Goethe's", emph "Faust" <> ":"] + , para $ mconcat + [ spcSep [ "Habe", "nun,", "ach!", "Philosophie," ] + , linebreak + , spcSep [ "Juristerei", "und", "Medizin," ] + , linebreak + , spcSep [ "Und", "leider", "auch", "Theologie!" ] + , linebreak + , spcSep [ "Durchaus", "studiert,", "mit", "heißem", "Bemühn." ] + ] + ] + + ] ] -- cgit v1.2.3 From f19d7233d8d3e47912b760fc62a253e5baf8275a Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Fri, 18 Apr 2014 08:33:25 +0200 Subject: Org reader: Fix parsing of loose lists Loose lists (i.e. lists with blankline separated items), were parsed as multiple lists, each containing a single item. This patch fixes this issue. --- src/Text/Pandoc/Readers/Org.hs | 11 +++++++---- tests/Tests/Readers/Org.hs | 21 ++++++++++++++++----- 2 files changed, 23 insertions(+), 9 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index 88e81f5fc..1fa8d4d5e 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -605,9 +605,10 @@ definitionListItem parseMarkerGetLength = try $ do markerLength <- parseMarkerGetLength term <- manyTill (noneOf "\n\r") (try $ string "::") first <- anyLineNewline + blank <- option "" ("\n" <$ blankline) cont <- concat <$> many (listContinuation markerLength) term' <- parseFromString inline term - contents' <- parseFromString parseBlocks $ first ++ cont + contents' <- parseFromString parseBlocks $ first ++ blank ++ cont return $ (,) <$> term' <*> fmap (:[]) contents' @@ -617,16 +618,18 @@ listItem :: OrgParser Int listItem start = try $ do markerLength <- try start firstLine <- anyLineNewline + blank <- option "" ("\n" <$ blankline) rest <- concat <$> many (listContinuation markerLength) - parseFromString parseBlocks $ firstLine ++ rest + parseFromString parseBlocks $ firstLine ++ blank ++ rest -- continuation of a list item - indented and separated by blankline or endline. -- Note: nested lists are parsed as continuations. listContinuation :: Int -> OrgParser String listContinuation markerLength = try $ - mappend <$> many blankline - <*> (concat <$> many1 listLine) + notFollowedBy' blankline + *> (mappend <$> (concat <$> many1 listLine) + <*> many blankline) where listLine = try $ indentWith markerLength *> anyLineNewline anyLineNewline :: OrgParser String diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs index 7d5bfe650..572fc501f 100644 --- a/tests/Tests/Readers/Org.hs +++ b/tests/Tests/Readers/Org.hs @@ -518,13 +518,24 @@ tests = , ("TTL", [ plain $ "transistor-transistor" <> space <> "logic" ]) , ("PSK", [ mconcat - [ para $ "phase-shift" <> space <> "keying" - , plain $ spcSep [ "a", "digital" - , "modulation", "scheme" ] + [ para $ "phase-shift" <> space <> "keying" + , para $ spcSep [ "a", "digital" + , "modulation", "scheme" ] ] - ] - ) + ]) ] + + , "Loose bullet list" =: + unlines [ "- apple" + , "" + , "- orange" + , "" + , "- peach" + ] =?> + bulletList [ para "apple" + , para "orange" + , para "peach" + ] ] , testGroup "Tables" -- cgit v1.2.3 From 09441b65a83f372410394a88af7808f494c3aa57 Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Fri, 18 Apr 2014 10:15:58 +0200 Subject: Org reader: Add support for plain LaTeX fragments This adds support for LaTeX fragments like the following: ``` \begin{equation} \int fg \mathrm{d}x \end{equation} ``` --- src/Text/Pandoc/Readers/Org.hs | 41 ++++++++++++++++++++++++++++++++++++++++- tests/Tests/Readers/Org.hs | 20 +++++++++++++++++++- 2 files changed, 59 insertions(+), 2 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index 1fa8d4d5e..66cfe720e 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -218,6 +218,7 @@ block = choice [ mempty <$ blanklines , return <$> hline , list , table + , latexFragment , noteBlock , paraOrPlain ] "block" @@ -544,6 +545,41 @@ setAligns :: [Alignment] -> F OrgTable setAligns aligns t = return $ t{ orgTableAlignments = aligns } + +-- +-- LaTeX fragments +-- +latexFragment :: OrgParser (F Blocks) +latexFragment = try $ do + envName <- latexEnvStart + content <- mconcat <$> manyTill anyLineNewline (latexEnd envName) + return . return $ B.rawBlock "latex" (content `inLatexEnv` envName) + where + c `inLatexEnv` e = mconcat [ "\\begin{", e, "}\n" + , c + , "\\end{", e, "}\n" + ] + +latexEnvStart :: OrgParser String +latexEnvStart = try $ do + skipSpaces *> string "\\begin{" + *> latexEnvName + <* string "}" + <* blankline + +latexEnd :: String -> OrgParser () +latexEnd envName = try $ + () <$ skipSpaces + <* string ("\\end{" ++ envName ++ "}") + <* blankline + +-- | Parses a LaTeX environment name. +latexEnvName :: OrgParser String +latexEnvName = try $ do + mappend <$> many1 alphaNum + <*> option "" (string "*") + + -- -- Footnote defintions -- @@ -683,7 +719,9 @@ str :: OrgParser (F Inlines) str = return . B.str <$> many1 (noneOf $ specialChars ++ "\n\r ") <* updateLastStrPos --- an endline character that can be treated as a space, not a structural break +-- | An endline character that can be treated as a space, not a structural +-- break. This should reflect the values of the Emacs variable +-- @org-element-pagaraph-separate@. endline :: OrgParser (F Inlines) endline = try $ do newline @@ -695,6 +733,7 @@ endline = try $ do notFollowedBy' drawerStart notFollowedBy' headerStart notFollowedBy' metaLineStart + notFollowedBy' latexEnvStart notFollowedBy' commentLineStart notFollowedBy' bulletListStart notFollowedBy' orderedListStart diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs index 572fc501f..1ac2c1fd8 100644 --- a/tests/Tests/Readers/Org.hs +++ b/tests/Tests/Readers/Org.hs @@ -650,7 +650,7 @@ tests = ] ] - , testGroup "Blocks" + , testGroup "Blocks and fragments" [ "Source block" =: unlines [ " #+BEGIN_SRC haskell" , " main = putStrLn greeting" @@ -722,5 +722,23 @@ tests = ] ] + , "LaTeX fragment" =: + unlines [ "\\begin{equation}" + , "X_i = \\begin{cases}" + , " G_{\\alpha(i)} & \\text{if }\\alpha(i-1) = \\alpha(i)\\\\" + , " C_{\\alpha(i)} & \\text{otherwise}" + , " \\end{cases}" + , "\\end{equation}" + ] =?> + rawBlock "latex" + (unlines [ "\\begin{equation}" + , "X_i = \\begin{cases}" + , " G_{\\alpha(i)} & \\text{if }\\alpha(i-1) =" ++ + " \\alpha(i)\\\\" + , " C_{\\alpha(i)} & \\text{otherwise}" + , " \\end{cases}" + , "\\end{equation}" + ]) + ] ] -- cgit v1.2.3 From 6ded3d41d94c1e90d1d30a1f99ddad62e62d9ce6 Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Fri, 18 Apr 2014 20:47:50 +0200 Subject: Org reader: Apply captions to code blocks and tables The `Table` blocktype already takes the caption as an argument, while code blocks are wrapped in a `Div` block together with a labelling `Span`. --- src/Text/Pandoc/Readers/Org.hs | 134 +++++++++++++++++++++++++++++------------ tests/Tests/Readers/Org.hs | 31 +++++++++- 2 files changed, 124 insertions(+), 41 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index 66cfe720e..025158060 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -43,13 +43,13 @@ import Text.Pandoc.Shared (compactify') import Control.Applicative ( Applicative, pure , (<$>), (<$), (<*>), (<*), (*>), (<**>) ) -import Control.Arrow ((***)) import Control.Monad (foldM, guard, liftM, liftM2, when) import Control.Monad.Reader (Reader, runReader, ask, asks) import Data.Char (toLower) import Data.Default import Data.List (intersperse, isPrefixOf, isSuffixOf) -import Data.Maybe (listToMaybe, fromMaybe) +import qualified Data.Map as M +import Data.Maybe (listToMaybe, fromMaybe, isJust) import Data.Monoid (Monoid, mconcat, mempty, mappend) -- | Parse org-mode string and return a Pandoc document. @@ -74,9 +74,12 @@ parseOrg = do type OrgNoteRecord = (String, F Blocks) type OrgNoteTable = [OrgNoteRecord] +type OrgBlockAttributes = M.Map String String + -- | Org-mode parser state data OrgParserState = OrgParserState { orgStateOptions :: ReaderOptions + , orgStateBlockAttributes :: OrgBlockAttributes , orgStateEmphasisCharStack :: [Char] , orgStateEmphasisNewlines :: Maybe Int , orgStateLastForbiddenCharPos :: Maybe SourcePos @@ -102,6 +105,7 @@ instance Default OrgParserState where defaultOrgParserState :: OrgParserState defaultOrgParserState = OrgParserState { orgStateOptions = def + , orgStateBlockAttributes = M.empty , orgStateEmphasisCharStack = [] , orgStateEmphasisNewlines = Nothing , orgStateLastForbiddenCharPos = Nothing @@ -112,6 +116,19 @@ defaultOrgParserState = OrgParserState , orgStateNotes' = [] } +addBlockAttribute :: String -> String -> OrgParser () +addBlockAttribute key val = updateState $ \s -> + let attrs = orgStateBlockAttributes s + in s{ orgStateBlockAttributes = M.insert key val attrs } + +lookupBlockAttribute :: String -> OrgParser (Maybe String) +lookupBlockAttribute key = + M.lookup key . orgStateBlockAttributes <$> getState + +resetBlockAttributes :: OrgParser () +resetBlockAttributes = updateState $ \s -> + s{ orgStateBlockAttributes = orgStateBlockAttributes def } + updateLastStrPos :: OrgParser () updateLastStrPos = getPosition >>= \p -> updateState $ \s -> s{ orgStateLastStrPos = Just p } @@ -125,19 +142,19 @@ updateLastPreCharPos = getPosition >>= \p -> updateState $ \s -> s{ orgStateLastPreCharPos = Just p} pushToInlineCharStack :: Char -> OrgParser () -pushToInlineCharStack c = updateState $ \st -> - st { orgStateEmphasisCharStack = c:orgStateEmphasisCharStack st } +pushToInlineCharStack c = updateState $ \s -> + s{ orgStateEmphasisCharStack = c:orgStateEmphasisCharStack s } popInlineCharStack :: OrgParser () -popInlineCharStack = updateState $ \st -> - st { orgStateEmphasisCharStack = drop 1 . orgStateEmphasisCharStack $ st } +popInlineCharStack = updateState $ \s -> + s{ orgStateEmphasisCharStack = drop 1 . orgStateEmphasisCharStack $ s } surroundingEmphasisChar :: OrgParser [Char] surroundingEmphasisChar = take 1 . drop 1 . orgStateEmphasisCharStack <$> getState startEmphasisNewlinesCounting :: Int -> OrgParser () startEmphasisNewlinesCounting maxNewlines = updateState $ \s -> - s { orgStateEmphasisNewlines = Just maxNewlines } + s{ orgStateEmphasisNewlines = Just maxNewlines } decEmphasisNewlinesCount :: OrgParser () decEmphasisNewlinesCount = updateState $ \s -> @@ -209,20 +226,50 @@ parseBlocks = mconcat <$> manyTill block eof block :: OrgParser (F Blocks) block = choice [ mempty <$ blanklines - , orgBlock + , optionalAttributes $ choice + [ orgBlock + , figure + , table + ] , example , drawer - , figure , specialLine , header , return <$> hline , list - , table , latexFragment , noteBlock , paraOrPlain ] "block" +optionalAttributes :: OrgParser (F Blocks) -> OrgParser (F Blocks) +optionalAttributes parser = try $ + resetBlockAttributes *> parseBlockAttributes *> parser + +parseBlockAttributes :: OrgParser () +parseBlockAttributes = do + attrs <- many attribute + () <$ mapM (uncurry parseAndAddAttribute) attrs + where + attribute :: OrgParser (String, String) + attribute = try $ do + key <- metaLineStart *> many1Till (noneOf "\n\r") (char ':') + val <- skipSpaces *> anyLine + return (map toLower key, val) + +parseAndAddAttribute :: String -> String -> OrgParser () +parseAndAddAttribute key value = do + let key' = map toLower key + () <$ addBlockAttribute key' value + +lookupInlinesAttr :: String -> OrgParser (Maybe (F Inlines)) +lookupInlinesAttr attr = try $ do + val <- lookupBlockAttribute attr + maybe (return Nothing) + (fmap Just . parseFromString parseInlines) + val + + -- -- Org Blocks (#+BEGIN_... / #+END_...) -- @@ -235,13 +282,13 @@ orgBlock = try $ do let classArgs = [ translateLang . fromMaybe [] $ listToMaybe args ] case blockType of "comment" -> return mempty - "src" -> returnF $ B.codeBlockWith ("", classArgs, []) content "html" -> returnF $ B.rawBlock "html" content "latex" -> returnF $ B.rawBlock "latex" content "ascii" -> returnF $ B.rawBlock "ascii" content "example" -> returnF $ exampleCode content "quote" -> return $ B.blockQuote <$> contentBlocks "verse" -> parseVerse content + "src" -> codeBlockWithAttr classArgs content _ -> return $ B.divWith ("", [blockType], []) <$> contentBlocks where returnF :: a -> OrgParser (F a) @@ -260,6 +307,18 @@ blockHeader = (,,) <$> blockIndent blockType = map toLower <$> (stringAnyCase "#+begin_" *> many letter) blockArgs = manyTill (many nonspaceChar <* skipSpaces) newline +codeBlockWithAttr :: [String] -> String -> OrgParser (F Blocks) +codeBlockWithAttr classArgs content = do + identifier <- fromMaybe "" <$> lookupBlockAttribute "name" + caption <- lookupInlinesAttr "caption" + let codeBlck = B.codeBlockWith (identifier, classArgs, []) content + return $ maybe (pure codeBlck) (labelDiv codeBlck) caption + where + labelDiv blk value = + B.divWith nullAttr <$> (mappend <$> labelledBlock value + <*> pure blk) + labelledBlock = fmap (B.plain . B.spanWith ("", ["label"], [])) + rawBlockContent :: Int -> String -> OrgParser String rawBlockContent indent blockType = unlines . map commaEscaped <$> manyTill indentedLine blockEnder @@ -333,38 +392,26 @@ drawerEnd = try $ -- Figures (Image on a line by itself, preceded by name and/or caption) figure :: OrgParser (F Blocks) figure = try $ do - (tit, cap) <- (maybe mempty withFigPrefix *** fromMaybe mempty) - <$> nameAndOrCaption + (cap, nam) <- nameAndCaption src <- skipSpaces *> selfTarget <* skipSpaces <* newline guard (isImageFilename src) return $ do cap' <- cap - return $ B.para $ B.image src tit cap' - where withFigPrefix cs = if "fig:" `isPrefixOf` cs - then cs - else "fig:" ++ cs - -nameAndOrCaption :: OrgParser (Maybe String, Maybe (F Inlines)) -nameAndOrCaption = try $ nameFirst <|> captionFirst + return $ B.para $ B.image src nam cap' where - nameFirst = try $ do - n <- name - c <- optionMaybe caption - return (Just n, c) - captionFirst = try $ do - c <- caption - n <- optionMaybe name - return (n, Just c) - -caption :: OrgParser (F Inlines) -caption = try $ annotation "CAPTION" *> inlinesTillNewline - -name :: OrgParser String -name = try $ annotation "NAME" *> skipSpaces *> manyTill anyChar newline - -annotation :: String -> OrgParser String -annotation ann = try $ metaLineStart *> stringAnyCase ann <* char ':' + nameAndCaption = + do + maybeCap <- lookupInlinesAttr "caption" + maybeNam <- lookupBlockAttribute "name" + guard $ isJust maybeCap || isJust maybeNam + return ( fromMaybe mempty maybeCap + , maybe mempty withFigPrefix maybeNam ) + withFigPrefix cs = + if "fig:" `isPrefixOf` cs + then cs + else "fig:" ++ cs +-- -- Comments, Options and Metadata specialLine :: OrgParser (F Blocks) specialLine = fmap return . try $ metaLine <|> commentLine @@ -400,6 +447,10 @@ metaKey = map toLower <$> many1 (noneOf ": \n\r") <* char ':' <* skipSpaces +-- +-- Headers +-- + -- | Headers header :: OrgParser (F Blocks) header = try $ do @@ -411,6 +462,7 @@ headerStart :: OrgParser Int headerStart = try $ (length <$> many1 (char '*')) <* many1 (char ' ') + -- Don't use (or need) the reader wrapper here, we want hline to be -- @show@able. Otherwise we can't use it with @notFollowedBy'@. @@ -444,12 +496,14 @@ table = try $ do lookAhead tableStart do rows <- tableRows - return $ return . orgToPandocTable . normalizeTable =<< rowsToTable rows + cptn <- fromMaybe (pure "") <$> lookupInlinesAttr "caption" + return $ (<$> cptn) . orgToPandocTable . normalizeTable =<< rowsToTable rows orgToPandocTable :: OrgTable + -> Inlines -> Blocks -orgToPandocTable (OrgTable _ aligns heads lns) = - B.table "" (zip aligns $ repeat 0) heads lns +orgToPandocTable (OrgTable _ aligns heads lns) caption = + B.table caption (zip aligns $ repeat 0) heads lns tableStart :: OrgParser Char tableStart = try $ skipSpaces *> char '|' diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs index 1ac2c1fd8..80a95d36b 100644 --- a/tests/Tests/Readers/Org.hs +++ b/tests/Tests/Readers/Org.hs @@ -8,7 +8,7 @@ import Tests.Arbitrary() import Text.Pandoc.Builder import Text.Pandoc import Data.List (intersperse) -import Data.Monoid (mempty, mconcat) +import Data.Monoid (mempty, mappend, mconcat) org :: String -> Pandoc org = readOrg def @@ -648,6 +648,18 @@ tests = [ [ plain "1" , plain "One" , plain "foo" ] , [ plain "2" , plain mempty , plain mempty ] ] + + , "Table with caption" =: + unlines [ "#+CAPTION: Hitchhiker's Multiplication Table" + , "| x | 6 |" + , "| 9 | 42 |" + ] =?> + table "Hitchhiker's Multiplication Table" + [(AlignDefault, 0), (AlignDefault, 0)] + [] + [ [ plain "x", plain "6" ] + , [ plain "9", plain "42" ] + ] ] , testGroup "Blocks and fragments" @@ -740,5 +752,22 @@ tests = , "\\end{equation}" ]) + , "Code block with caption" =: + unlines [ "#+CAPTION: Functor laws in Haskell" + , "#+NAME: functor-laws" + , "#+BEGIN_SRC haskell" + , "fmap id = id" + , "fmap (p . q) = (fmap p) . (fmap q)" + , "#+END_SRC" + ] =?> + divWith + nullAttr + (mappend + (plain $ spanWith ("", ["label"], []) + (spcSep [ "Functor", "laws", "in", "Haskell" ])) + (codeBlockWith ("functor-laws", ["haskell"], []) + (unlines [ "fmap id = id" + , "fmap (p . q) = (fmap p) . (fmap q)" + ]))) ] ] -- cgit v1.2.3 From a69416091ba035ab1661ff306ef3e51fd926488b Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Sat, 19 Apr 2014 11:25:39 +0200 Subject: Org reader: Fix distinction of images and normal links Fixed a false assumption about the precedence of (&&) vs (||). --- src/Text/Pandoc/Readers/Org.hs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index 025158060..66211b20e 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -860,8 +860,8 @@ linkTarget = enclosed (char '[') (char ']') (noneOf "\n\r]") isImageFilename :: String -> Bool isImageFilename filename = any (\x -> ('.':x) `isSuffixOf` filename) imageExtensions && - any (\x -> (x++":") `isPrefixOf` filename) protocols || - ':' `notElem` filename + (any (\x -> (x++":") `isPrefixOf` filename) protocols || + ':' `notElem` filename) where imageExtensions = [ "jpeg" , "jpg" , "png" , "gif" , "svg" ] protocols = [ "file", "http", "https" ] -- cgit v1.2.3 From 8e91d362a392d1ee90a497f39cfcf90fee8d8da0 Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Sat, 19 Apr 2014 13:15:47 +0200 Subject: Org reader: Fix parsing of footnotes Footnotes can consist of multiple blocks and end only at a header or at the beginning of another footnote. This fixes the previous behavior, which restricted notes to a single paragraph. --- src/Text/Pandoc/Readers/Org.hs | 8 ++++++-- tests/Tests/Readers/Org.hs | 42 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 48 insertions(+), 2 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index 66211b20e..0bc0a2668 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -639,10 +639,14 @@ latexEnvName = try $ do -- noteBlock :: OrgParser (F Blocks) noteBlock = try $ do - ref <- noteMarker - content <- skipSpaces *> paraOrPlain + ref <- noteMarker <* skipSpaces + content <- mconcat <$> blocksTillHeaderOrNote addToNotesTable (ref, content) return mempty + where + blocksTillHeaderOrNote = + many1Till block (eof <|> () <$ lookAhead noteMarker + <|> () <$ lookAhead headerStart) -- Paragraphs or Plain text paraOrPlain :: OrgParser (F Blocks) diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs index 80a95d36b..4cc405c0f 100644 --- a/tests/Tests/Readers/Org.hs +++ b/tests/Tests/Readers/Org.hs @@ -383,6 +383,48 @@ tests = ] =?> para (image "the-red-queen.jpg" "fig:redqueen" "Used as a metapher in evolutionary biology.") + + , "Footnote" =: + unlines [ "A footnote[1]" + , "" + , "[1] First paragraph" + , "" + , "second paragraph" + ] =?> + para (mconcat + [ "A", space, "footnote" + , note $ mconcat [ para ("First" <> space <> "paragraph") + , para ("second" <> space <> "paragraph") + ] + ]) + + , "Two footnotes" =: + unlines [ "Footnotes[fn:1][fn:2]" + , "" + , "[fn:1] First note." + , "" + , "[fn:2] Second note." + ] =?> + para (mconcat + [ "Footnotes" + , note $ para ("First" <> space <> "note.") + , note $ para ("Second" <> space <> "note.") + ]) + + , "Footnote followed by header" =: + unlines [ "Another note[fn:yay]" + , "" + , "[fn:yay] This is great!" + , "" + , "** Headline" + ] =?> + mconcat + [ para (mconcat + [ "Another", space, "note" + , note $ para ("This" <> space <> "is" <> space <> "great!") + ]) + , header 2 "Headline" + ] ] , testGroup "Lists" $ -- cgit v1.2.3 From efebade38b3aef30b5fbf04e60f1fc2bd76e6c6c Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Sat, 19 Apr 2014 14:48:35 +0200 Subject: Move `compactify'DL` from Markdown reader into Shared The function `compactify'DL`, used to change the final definition item of a definition list into a `Plain` iff all other items are `Plain`s as well, is useful in many parsers and hence moved into Text.Pandoc.Shared. --- src/Text/Pandoc/Readers/Markdown.hs | 17 ----------------- src/Text/Pandoc/Shared.hs | 20 +++++++++++++++++--- 2 files changed, 17 insertions(+), 20 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 57e1ca560..053385d20 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -861,22 +861,6 @@ definitionList = do items <- fmap sequence $ many1 definitionListItem return $ B.definitionList <$> fmap compactify'DL items -compactify'DL :: [(Inlines, [Blocks])] -> [(Inlines, [Blocks])] -compactify'DL items = - let defs = concatMap snd items - defBlocks = reverse $ concatMap B.toList defs - isPara (Para _) = True - isPara _ = False - in case defBlocks of - (Para x:_) -> if not $ any isPara (drop 1 defBlocks) - then let (t,ds) = last items - lastDef = B.toList $ last ds - ds' = init ds ++ - [B.fromList $ init lastDef ++ [Plain x]] - in init items ++ [(t, ds')] - else items - _ -> items - -- -- paragraph block -- @@ -1892,4 +1876,3 @@ doubleQuoted = try $ do (withQuoteContext InDoubleQuote $ doubleQuoteEnd >> return (fmap B.doubleQuoted . trimInlinesF $ contents)) <|> (return $ return (B.str "\8220") <> contents) - diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index 3835629db..687355701 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -56,6 +56,7 @@ module Text.Pandoc.Shared ( stringify, compactify, compactify', + compactify'DL, Element (..), hierarchicalize, uniqueIdent, @@ -82,7 +83,7 @@ module Text.Pandoc.Shared ( import Text.Pandoc.Definition import Text.Pandoc.Walk import Text.Pandoc.Generic -import Text.Pandoc.Builder (Blocks, ToMetaValue(..)) +import Text.Pandoc.Builder (Inlines, Blocks, ToMetaValue(..)) import qualified Text.Pandoc.Builder as B import qualified Text.Pandoc.UTF8 as UTF8 import System.Environment (getProgName) @@ -435,6 +436,21 @@ compactify' items = _ -> items _ -> items +-- | Like @compactify'@, but akts on items of definition lists. +compactify'DL :: [(Inlines, [Blocks])] -> [(Inlines, [Blocks])] +compactify'DL items = + let defs = concatMap snd items + defBlocks = reverse $ concatMap B.toList defs + in case defBlocks of + (Para x:_) -> if not $ any isPara (drop 1 defBlocks) + then let (t,ds) = last items + lastDef = B.toList $ last ds + ds' = init ds ++ + [B.fromList $ init lastDef ++ [Plain x]] + in init items ++ [(t, ds')] + else items + _ -> items + isPara :: Block -> Bool isPara (Para _) = True isPara _ = False @@ -698,5 +714,3 @@ safeRead s = case reads s of (d,x):_ | all isSpace x -> return d _ -> fail $ "Could not read `" ++ s ++ "'" - - -- cgit v1.2.3 From 8276449520ba85c78f0b4e919fbc9bcf893a74f0 Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Sat, 19 Apr 2014 15:05:03 +0200 Subject: Org reader: Allow for compact definition lists Use `Text.Pandoc.Shared.compactify'DL` to allow for compact definition lists. --- src/Text/Pandoc/Readers/Org.hs | 4 ++-- tests/Tests/Readers/Org.hs | 12 ++++++++++++ 2 files changed, 14 insertions(+), 2 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index 0bc0a2668..c71cc24be 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -39,7 +39,7 @@ import Text.Pandoc.Parsing hiding ( F, unF, askF, asksF, runF , newline, orderedListMarker , parseFromString , updateLastStrPos ) -import Text.Pandoc.Shared (compactify') +import Text.Pandoc.Shared (compactify', compactify'DL) import Control.Applicative ( Applicative, pure , (<$>), (<$), (<*>), (<*), (*>), (<**>) ) @@ -665,7 +665,7 @@ list :: OrgParser (F Blocks) list = choice [ definitionList, bulletList, orderedList ] "list" definitionList :: OrgParser (F Blocks) -definitionList = fmap B.definitionList . sequence +definitionList = fmap B.definitionList . fmap compactify'DL . sequence <$> many1 (definitionListItem bulletListStart) bulletList :: OrgParser (F Blocks) diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs index 4cc405c0f..f62b73ce4 100644 --- a/tests/Tests/Readers/Org.hs +++ b/tests/Tests/Readers/Org.hs @@ -567,6 +567,18 @@ tests = ]) ] + , "Compact definition list" =: + unlines [ "- ATP :: adenosine 5' triphosphate" + , "- DNA :: deoxyribonucleic acid" + , "- PCR :: polymerase chain reaction" + , "" + ] =?> + definitionList + [ ("ATP", [ plain $ spcSep [ "adenosine", "5'", "triphosphate" ] ]) + , ("DNA", [ plain $ spcSep [ "deoxyribonucleic", "acid" ] ]) + , ("PCR", [ plain $ spcSep [ "polymerase", "chain", "reaction" ] ]) + ] + , "Loose bullet list" =: unlines [ "- apple" , "" -- cgit v1.2.3 From e0688711fd8fb640d96044413aa2d7b1b1cd4e03 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Wed, 23 Apr 2014 10:23:02 -0700 Subject: EPUB writer: include extension in epub ids. This fixes a problem with duplicate extensions for fonts and images with the same base name but different extensions. Closes #1254. --- src/Text/Pandoc/Writers/EPUB.hs | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs index dae45b90f..9f10554a9 100644 --- a/src/Text/Pandoc/Writers/EPUB.hs +++ b/src/Text/Pandoc/Writers/EPUB.hs @@ -35,7 +35,7 @@ import Data.Maybe ( fromMaybe ) import Data.List ( isInfixOf, intercalate ) import System.Environment ( getEnv ) import Text.Printf (printf) -import System.FilePath ( (), takeBaseName, takeExtension, takeFileName ) +import System.FilePath ( (), takeExtension, takeFileName ) import qualified Data.ByteString.Lazy as B import qualified Data.ByteString.Lazy.Char8 as B8 import qualified Text.Pandoc.UTF8 as UTF8 @@ -56,7 +56,7 @@ import Text.XML.Light hiding (ppTopElement) import Text.Pandoc.UUID import Text.Pandoc.Writers.HTML import Text.Pandoc.Writers.Markdown ( writePlain ) -import Data.Char ( toLower, isDigit ) +import Data.Char ( toLower, isDigit, isAlphaNum ) import Network.URI ( unEscapeString ) import Text.Pandoc.MIME (getMimeType) #if MIN_VERSION_base(4,6,0) @@ -132,6 +132,11 @@ removeNote :: Inline -> Inline removeNote (Note _) = Str "" removeNote x = x +toId :: FilePath -> String +toId = map (\x -> if isAlphaNum x || x == '-' || x == '_' + then x + else '_') . takeFileName + getEPUBMetadata :: WriterOptions -> Meta -> IO EPUBMetadata getEPUBMetadata opts meta = do let md = metadataFromMeta opts meta @@ -427,7 +432,7 @@ writeEPUB opts doc@(Pandoc meta _) = do -- contents.opf let chapterNode ent = unode "item" ! - ([("id", takeBaseName $ eRelativePath ent), + ([("id", toId $ eRelativePath ent), ("href", eRelativePath ent), ("media-type", "application/xhtml+xml")] ++ case props ent of @@ -435,14 +440,14 @@ writeEPUB opts doc@(Pandoc meta _) = do xs -> [("properties", unwords xs)]) $ () let chapterRefNode ent = unode "itemref" ! - [("idref", takeBaseName $ eRelativePath ent)] $ () + [("idref", takeFileName $ eRelativePath ent)] $ () let pictureNode ent = unode "item" ! - [("id", takeBaseName $ eRelativePath ent), + [("id", toId $ eRelativePath ent), ("href", eRelativePath ent), ("media-type", fromMaybe "application/octet-stream" $ mediaTypeOf $ eRelativePath ent)] $ () let fontNode ent = unode "item" ! - [("id", takeBaseName $ eRelativePath ent), + [("id", toId $ eRelativePath ent), ("href", eRelativePath ent), ("media-type", fromMaybe "" $ getMimeType $ eRelativePath ent)] $ () let plainTitle = case docTitle meta of -- cgit v1.2.3 From ec24f9761c71961821c180d3f6eeb3b5c08aaebf Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Thu, 24 Apr 2014 12:56:25 +0200 Subject: RST reader: Remove duplicate 'http' in PEP links The generated link to PEPs had a duplicate 'http://' in its URL. --- src/Text/Pandoc/Readers/RST.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs index a574f343a..7785861cc 100644 --- a/src/Text/Pandoc/Readers/RST.hs +++ b/src/Text/Pandoc/Readers/RST.hs @@ -1005,7 +1005,7 @@ renderRole contents fmt role attr = case role of where rfcUrl = "http://www.faqs.org/rfcs/rfc" ++ rfcNo ++ ".html" pepLink pepNo = B.link pepUrl ("PEP " ++ pepNo) $ B.str ("PEP " ++ pepNo) where padNo = replicate (4 - length pepNo) '0' ++ pepNo - pepUrl = "http://http://www.python.org/dev/peps/pep-" ++ padNo ++ "/" + pepUrl = "http://www.python.org/dev/peps/pep-" ++ padNo ++ "/" roleNameEndingIn :: RSTParser Char -> RSTParser String roleNameEndingIn end = many1Till (letter <|> char '-') end -- cgit v1.2.3 From c128daba9dee096ce0e78b81a381f43337b74285 Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Thu, 24 Apr 2014 17:42:01 +0200 Subject: Org reader: Recognize plain and angle links This adds support for plain links (like http://zeitlens.com) and angle links (like ). --- src/Text/Pandoc/Readers/Org.hs | 34 +++++++++++++++++++++++++--------- tests/Tests/Readers/Org.hs | 14 ++++++++++++++ 2 files changed, 39 insertions(+), 9 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index c71cc24be..7a50b1db9 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -45,7 +45,7 @@ import Control.Applicative ( Applicative, pure , (<$>), (<$), (<*>), (<*), (*>), (<**>) ) import Control.Monad (foldM, guard, liftM, liftM2, when) import Control.Monad.Reader (Reader, runReader, ask, asks) -import Data.Char (toLower) +import Data.Char (isAlphaNum, toLower) import Data.Default import Data.List (intersperse, isPrefixOf, isSuffixOf) import qualified Data.Map as M @@ -209,6 +209,9 @@ instance Monoid a => Monoid (F a) where trimInlinesF :: F Inlines -> F Inlines trimInlinesF = liftM trimInlines +returnF :: a -> OrgParser (F a) +returnF = return . return + -- | Like @Text.Parsec.Char.newline@, but causes additional state changes. newline :: OrgParser Char @@ -291,9 +294,6 @@ orgBlock = try $ do "src" -> codeBlockWithAttr classArgs content _ -> return $ B.divWith ("", [blockType], []) <$> contentBlocks where - returnF :: a -> OrgParser (F a) - returnF = return . return - parseVerse :: String -> OrgParser (F Blocks) parseVerse cs = fmap B.para . mconcat . intersperse (pure B.linebreak) @@ -834,7 +834,11 @@ noteMarker = try $ do ] linkOrImage :: OrgParser (F Inlines) -linkOrImage = explicitOrImageLink <|> selflinkOrImage "link or image" +linkOrImage = explicitOrImageLink + <|> selflinkOrImage + <|> angleLink + <|> plainLink + "link or image" explicitOrImageLink :: OrgParser (F Inlines) explicitOrImageLink = try $ do @@ -851,15 +855,27 @@ explicitOrImageLink = try $ do selflinkOrImage :: OrgParser (F Inlines) selflinkOrImage = try $ do src <- char '[' *> linkTarget <* char ']' - return . return $ if isImageFilename src - then B.image src "" "" - else B.link src "" (B.str src) + returnF $ if isImageFilename src + then B.image src "" "" + else B.link src "" (B.str src) + +plainLink :: OrgParser (F Inlines) +plainLink = try $ do + (orig, src) <- uri + returnF $ B.link src "" (B.str orig) + +angleLink :: OrgParser (F Inlines) +angleLink = try $ do + char '<' + link <- plainLink + char '>' + return link selfTarget :: OrgParser String selfTarget = try $ char '[' *> linkTarget <* char ']' linkTarget :: OrgParser String -linkTarget = enclosed (char '[') (char ']') (noneOf "\n\r]") +linkTarget = enclosed (char '[') (char ']') (noneOf "\n\r[]") isImageFilename :: String -> Bool isImageFilename filename = diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs index f62b73ce4..ed774f527 100644 --- a/tests/Tests/Readers/Org.hs +++ b/tests/Tests/Readers/Org.hs @@ -188,6 +188,20 @@ tests = , "Image link" =: "[[sunset.png][dusk.svg]]" =?> (para $ link "sunset.png" "" (image "dusk.svg" "" "")) + + , "Plain link" =: + "Posts on http://zeitlens.com/ can be funny at times." =?> + (para $ spcSep [ "Posts", "on" + , link "http://zeitlens.com/" "" "http://zeitlens.com/" + , "can", "be", "funny", "at", "times." + ]) + + , "Angle link" =: + "Look at for fnords." =?> + (para $ spcSep [ "Look", "at" + , link "http://moltkeplatz.de" "" "http://moltkeplatz.de" + , "for", "fnords." + ]) ] , testGroup "Meta Information" $ -- cgit v1.2.3 From 2f724aaaa4875a21560870d25c3d212f974c6dde Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Thu, 24 Apr 2014 17:42:01 +0200 Subject: Org reader: Read anchors as empty spans Anchors (like <>) are parsed as empty spans. --- src/Text/Pandoc/Readers/Org.hs | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index 7a50b1db9..7f1893936 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -740,6 +740,7 @@ inline = , linebreak , footnote , linkOrImage + , anchor , str , endline , emph @@ -886,6 +887,30 @@ isImageFilename filename = imageExtensions = [ "jpeg" , "jpg" , "png" , "gif" , "svg" ] protocols = [ "file", "http", "https" ] +-- | Parse an anchor like @<>@ and return an empty span with +-- @anchor-id@ set as id. Legal anchors in org-mode are defined through +-- @org-target-regexp@, which is fairly liberal. Since no link is created if +-- @anchor-id@ contains spaces, we are more restrictive in what is accepted as +-- an anchor. + +anchor :: OrgParser (F Inlines) +anchor = try $ pure <$> (B.spanWith <$> attributes <*> pure mempty) + where + name = string "<<" + *> many1 (noneOf "\t\n\r<>\"' ") + <* string ">>" + attributes = name >>= \n -> return (solidify n, [], []) + +-- | Replace every char but [a-zA-Z0-9_.-:] with a hypen '-'. This mirrors +-- the org function @org-export-solidify-link-text@. + +solidify :: String -> String +solidify = map replaceSpecialChar + where replaceSpecialChar c + | isAlphaNum c = c + | c `elem` "_.-:" = c + | otherwise = '-' + emph :: OrgParser (F Inlines) emph = fmap B.emph <$> emphasisBetween '/' -- cgit v1.2.3 From d16775e1c7ba248e693817a8d53ebcb9a8332ed5 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Thu, 24 Apr 2014 11:09:07 -0700 Subject: Render numbers in YAML metadata without decimals when possible. The change to aeson > 0.7 caused numbers to be rendered with decimals. This change causes them to be rendered without decimals wehn possible. --- pandoc.cabal | 1 + src/Text/Pandoc/Readers/Markdown.hs | 7 ++++++- 2 files changed, 7 insertions(+), 1 deletion(-) (limited to 'src/Text') diff --git a/pandoc.cabal b/pandoc.cabal index 3e202484a..eae8b97e0 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -233,6 +233,7 @@ Library blaze-markup >= 0.5.1 && < 0.7, attoparsec >= 0.10 && < 0.12, yaml >= 0.8.8.2 && < 0.9, + scientific >= 0.2 && < 0.3, vector >= 0.10 && < 0.11, hslua >= 0.3 && < 0.4, binary >= 0.5 && < 0.8 diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 053385d20..d3ca8d26f 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -33,6 +33,7 @@ module Text.Pandoc.Readers.Markdown ( readMarkdown, import Data.List ( transpose, sortBy, findIndex, intersperse, intercalate ) import qualified Data.Map as M +import Data.Scientific (coefficient, base10Exponent) import Data.Ord ( comparing ) import Data.Char ( isAlphaNum, toLower ) import Data.Maybe @@ -285,7 +286,11 @@ toMetaValue opts x = yamlToMeta :: ReaderOptions -> Yaml.Value -> MetaValue yamlToMeta opts (Yaml.String t) = toMetaValue opts t -yamlToMeta _ (Yaml.Number n) = MetaString $ show n +yamlToMeta _ (Yaml.Number n) + -- avoid decimal points for numbers that don't need them: + | base10Exponent n >= 0 = MetaString $ show + $ coefficient n * (10 ^ base10Exponent n) + | otherwise = MetaString $ show n yamlToMeta _ (Yaml.Bool b) = MetaBool b yamlToMeta opts (Yaml.Array xs) = B.toMetaValue $ map (yamlToMeta opts) $ V.toList xs -- cgit v1.2.3 From e6333a9d7cfe8ec36acae59bd19a654d868f4b8d Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Thu, 24 Apr 2014 16:44:49 -0700 Subject: Markdown writer: Use proper escapes to avoid unwanted lists. Previously we used 0-width spaces, an ugly hack. Closes #980. --- src/Text/Pandoc/Writers/Markdown.hs | 27 ++++++++++++++++----------- tests/writer.opml | 10 +++++----- 2 files changed, 21 insertions(+), 16 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs index e8f976da1..95082add6 100644 --- a/src/Text/Pandoc/Writers/Markdown.hs +++ b/src/Text/Pandoc/Writers/Markdown.hs @@ -316,20 +316,25 @@ blockToMarkdown opts (Div attrs ils) = do contents <> blankline <> "
" <> blankline blockToMarkdown opts (Plain inlines) = do contents <- inlineListToMarkdown opts inlines - return $ contents <> cr + -- escape if para starts with ordered list marker + st <- get + let colwidth = if writerWrapText opts + then Just $ writerColumns opts + else Nothing + let rendered = render colwidth contents + let escapeDelimiter (x:xs) | x `elem` ".()" = '\\':x:xs + | otherwise = x : escapeDelimiter xs + escapeDelimiter [] = [] + let contents' = if isEnabled Ext_all_symbols_escapable opts && + not (stPlain st) && beginsWithOrderedListMarker rendered + then text $ escapeDelimiter rendered + else contents + return $ contents' <> cr -- title beginning with fig: indicates figure blockToMarkdown opts (Para [Image alt (src,'f':'i':'g':':':tit)]) = blockToMarkdown opts (Para [Image alt (src,tit)]) -blockToMarkdown opts (Para inlines) = do - contents <- inlineListToMarkdown opts inlines - -- escape if para starts with ordered list marker - st <- get - let esc = if isEnabled Ext_all_symbols_escapable opts && - not (stPlain st) && - beginsWithOrderedListMarker (render Nothing contents) - then text "\x200B" -- zero-width space, a hack - else empty - return $ esc <> contents <> blankline +blockToMarkdown opts (Para inlines) = + (<> blankline) `fmap` blockToMarkdown opts (Plain inlines) blockToMarkdown opts (RawBlock f str) | f == "html" = do st <- get diff --git a/tests/writer.opml b/tests/writer.opml index 228cad247..34d161fb8 100644 --- a/tests/writer.opml +++ b/tests/writer.opml @@ -18,7 +18,7 @@ - + @@ -55,18 +55,18 @@ - + - + - + - + -- cgit v1.2.3 From cbeb3bb2132908b76e3a83e61ff99418ebdf83b4 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Thu, 24 Apr 2014 17:37:10 -0700 Subject: EPUB writer: Fixed some idrefs to match changes in ids. --- src/Text/Pandoc/Writers/EPUB.hs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs index 9f10554a9..c39a7798d 100644 --- a/src/Text/Pandoc/Writers/EPUB.hs +++ b/src/Text/Pandoc/Writers/EPUB.hs @@ -440,7 +440,7 @@ writeEPUB opts doc@(Pandoc meta _) = do xs -> [("properties", unwords xs)]) $ () let chapterRefNode ent = unode "itemref" ! - [("idref", takeFileName $ eRelativePath ent)] $ () + [("idref", toId $ eRelativePath ent)] $ () let pictureNode ent = unode "item" ! [("id", toId $ eRelativePath ent), ("href", eRelativePath ent), @@ -488,8 +488,8 @@ writeEPUB opts doc@(Pandoc meta _) = do case epubCoverImage metadata of Nothing -> [] Just _ -> [ unode "itemref" ! - [("idref", "cover"),("linear","no")] $ () ] - ++ ((unode "itemref" ! [("idref", "title_page") + [("idref", "cover_xhtml"),("linear","no")] $ () ] + ++ ((unode "itemref" ! [("idref", "title_page_xhtml") ,("linear", if null (docTitle meta) then "no" else "yes")] $ ()) : -- cgit v1.2.3 From 2eec20d92fd0f498da5b66ac03cf6f8159392323 Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Fri, 25 Apr 2014 15:29:28 +0200 Subject: Org reader: Enable internal links Internal links in Org are possible by using an anchor-name as the target of a link: [[some-anchor][This]] is an internal link. It links <> here. --- src/Text/Pandoc/Readers/Org.hs | 50 ++++++++++++++++++++++++++++++++---------- tests/Tests/Readers/Org.hs | 25 +++++++++++++++++++++ 2 files changed, 63 insertions(+), 12 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index 7f1893936..0e52bff90 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -79,6 +79,7 @@ type OrgBlockAttributes = M.Map String String -- | Org-mode parser state data OrgParserState = OrgParserState { orgStateOptions :: ReaderOptions + , orgStateAnchorIds :: [String] , orgStateBlockAttributes :: OrgBlockAttributes , orgStateEmphasisCharStack :: [Char] , orgStateEmphasisNewlines :: Maybe Int @@ -105,6 +106,7 @@ instance Default OrgParserState where defaultOrgParserState :: OrgParserState defaultOrgParserState = OrgParserState { orgStateOptions = def + , orgStateAnchorIds = [] , orgStateBlockAttributes = M.empty , orgStateEmphasisCharStack = [] , orgStateEmphasisNewlines = Nothing @@ -116,6 +118,10 @@ defaultOrgParserState = OrgParserState , orgStateNotes' = [] } +recordAnchorId :: String -> OrgParser () +recordAnchorId i = updateState $ \s -> + s{ orgStateAnchorIds = i : (orgStateAnchorIds s) } + addBlockAttribute :: String -> String -> OrgParser () addBlockAttribute key val = updateState $ \s -> let attrs = orgStateBlockAttributes s @@ -848,17 +854,14 @@ explicitOrImageLink = try $ do title <- enclosedRaw (char '[') (char ']') title' <- parseFromString (mconcat <$> many inline) title char ']' - return $ B.link src "" <$> - if isImageFilename src && isImageFilename title - then return $ B.image title mempty mempty - else title' + return $ if isImageFilename src && isImageFilename title + then pure $ B.link src "" $ B.image title mempty mempty + else linkToInlinesF src =<< title' selflinkOrImage :: OrgParser (F Inlines) selflinkOrImage = try $ do src <- char '[' *> linkTarget <* char ']' - returnF $ if isImageFilename src - then B.image src "" "" - else B.link src "" (B.str src) + return $ linkToInlinesF src (B.str src) plainLink :: OrgParser (F Inlines) plainLink = try $ do @@ -878,6 +881,26 @@ selfTarget = try $ char '[' *> linkTarget <* char ']' linkTarget :: OrgParser String linkTarget = enclosed (char '[') (char ']') (noneOf "\n\r[]") +linkToInlinesF :: String -> Inlines -> F Inlines +linkToInlinesF s@('#':_) = pure . B.link s "" +linkToInlinesF s + | isImageFilename s = const . pure $ B.image s "" "" + | isUri s = pure . B.link s "" + | isRelativeUrl s = pure . B.link s "" +linkToInlinesF s = \title -> do + anchorB <- (s `elem`) <$> asksF orgStateAnchorIds + if anchorB + then pure $ B.link ('#':s) "" title + else pure $ B.emph title + +isRelativeUrl :: String -> Bool +isRelativeUrl s = (':' `notElem` s) && ("./" `isPrefixOf` s) + +isUri :: String -> Bool +isUri s = let (scheme, path) = break (== ':') s + in all (\c -> isAlphaNum c || c `elem` ".-") scheme + && not (null path) + isImageFilename :: String -> Bool isImageFilename filename = any (\x -> ('.':x) `isSuffixOf` filename) imageExtensions && @@ -894,12 +917,15 @@ isImageFilename filename = -- an anchor. anchor :: OrgParser (F Inlines) -anchor = try $ pure <$> (B.spanWith <$> attributes <*> pure mempty) +anchor = try $ do + anchorId <- parseAnchor + recordAnchorId anchorId + returnF $ B.spanWith (solidify anchorId, [], []) mempty where - name = string "<<" - *> many1 (noneOf "\t\n\r<>\"' ") - <* string ">>" - attributes = name >>= \n -> return (solidify n, [], []) + parseAnchor = string "<<" + *> many1 (noneOf "\t\n\r<>\"' ") + <* string ">>" + <* skipSpaces -- | Replace every char but [a-zA-Z0-9_.-:] with a hypen '-'. This mirrors -- the org function @org-export-solidify-link-text@. diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs index ed774f527..96747d148 100644 --- a/tests/Tests/Readers/Org.hs +++ b/tests/Tests/Readers/Org.hs @@ -202,6 +202,11 @@ tests = , link "http://moltkeplatz.de" "" "http://moltkeplatz.de" , "for", "fnords." ]) + + , "Anchor" =: + "<> Link here later." =?> + (para $ spanWith ("anchor", [], []) mempty <> + "Link" <> space <> "here" <> space <> "later.") ] , testGroup "Meta Information" $ @@ -279,6 +284,26 @@ tests = , ":END:" ] =?> para (":FOO:" <> space <> ":END:") + + , "Anchor reference" =: + unlines [ "<> Target." + , "" + , "[[link-here][See here!]]" + ] =?> + (para (spanWith ("link-here", [], []) mempty <> "Target.") <> + para (link "#link-here" "" ("See" <> space <> "here!"))) + + , "Search links are read as emph" =: + "[[Wally][Where's Wally?]]" =?> + (para (emph $ "Where's" <> space <> "Wally?")) + + , "Link to nonexistent anchor" =: + unlines [ "<> Target." + , "" + , "[[link$here][See here!]]" + ] =?> + (para (spanWith ("link-here", [], []) mempty <> "Target.") <> + para (emph ("See" <> space <> "here!"))) ] , testGroup "Basic Blocks" $ -- cgit v1.2.3 From b09412d852880a0c8e18e1cab9b0ce33f0e0e8a2 Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Fri, 25 Apr 2014 16:14:52 +0200 Subject: LaTeX writer: Mark span contents with label if span has an ID Prepend `\label{span-id}` to span contents iff `span-id` is defined. --- src/Text/Pandoc/Writers/LaTeX.hs | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs index e12c9078f..e52220f01 100644 --- a/src/Text/Pandoc/Writers/LaTeX.hs +++ b/src/Text/Pandoc/Writers/LaTeX.hs @@ -655,16 +655,20 @@ isQuoted _ = False -- | Convert inline element to LaTeX inlineToLaTeX :: Inline -- ^ Inline to convert -> State WriterState Doc -inlineToLaTeX (Span (_,classes,_) ils) = do +inlineToLaTeX (Span (id',classes,_) ils) = do let noEmph = "csl-no-emph" `elem` classes let noStrong = "csl-no-strong" `elem` classes let noSmallCaps = "csl-no-smallcaps" `elem` classes - ((if noEmph then inCmd "textup" else id) . - (if noStrong then inCmd "textnormal" else id) . - (if noSmallCaps then inCmd "textnormal" else id) . - (if not (noEmph || noStrong || noSmallCaps) - then braces - else id)) `fmap` inlineListToLaTeX ils + let label' = if (null id') + then empty + else text "\\label" <> braces (text $ toLabel id') + fmap (label' <>) + ((if noEmph then inCmd "textup" else id) . + (if noStrong then inCmd "textnormal" else id) . + (if noSmallCaps then inCmd "textnormal" else id) . + (if not (noEmph || noStrong || noSmallCaps) + then braces + else id)) `fmap` inlineListToLaTeX ils inlineToLaTeX (Emph lst) = inlineListToLaTeX lst >>= return . inCmd "emph" inlineToLaTeX (Strong lst) = -- cgit v1.2.3 From 35ea8de3690fcd9bf06532576ced4d82fd51f26d Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 26 Apr 2014 12:04:08 -0700 Subject: HTML writer: improved detection of image links. Previously image links with queries were not recognized, leading to use of an embed tag rather than an img tag. --- src/Text/Pandoc/Writers/HTML.hs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs index e0385af25..1de4345f9 100644 --- a/src/Text/Pandoc/Writers/HTML.hs +++ b/src/Text/Pandoc/Writers/HTML.hs @@ -40,6 +40,7 @@ import Text.Pandoc.Slides import Text.Pandoc.Highlighting ( highlight, styleToCss, formatHtmlInline, formatHtmlBlock ) import Text.Pandoc.XML (fromEntities, escapeStringForXML) +import Network.URI ( parseURIReference, URI(..) ) import Network.HTTP ( urlEncode ) import Numeric ( showHex ) import Data.Char ( ord, toLower ) @@ -396,7 +397,10 @@ imageExts = [ "art", "bmp", "cdr", "cdt", "cpt", "cr2", "crw", "djvu", "erf", treatAsImage :: FilePath -> Bool treatAsImage fp = - let ext = map toLower $ drop 1 $ takeExtension fp + let path = case uriPath `fmap` parseURIReference fp of + Nothing -> fp + Just up -> up + ext = map toLower $ drop 1 $ takeExtension path in null ext || ext `elem` imageExts -- | Convert Pandoc block element to HTML. -- cgit v1.2.3 From 22e36e104058b0ea93dbda106374f6c02bbf36d6 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 26 Apr 2014 12:14:42 -0700 Subject: LaTeX reader: Made `\nocite` work. This adds nocite citations to a metadata field, `nocite`. These will appear in the bibliography but not in the text (unless you use a `$nocite$` variable in your template, of course). --- src/Text/Pandoc/Readers/LaTeX.hs | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index fd761dbec..b5d529eb9 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -322,7 +322,8 @@ blockCommands = M.fromList $ ] addMeta :: ToMetaValue a => String -> a -> LP () -addMeta field val = updateState $ setMeta field val +addMeta field val = updateState $ \st -> + st{ stateMeta = addMetaField field val $ stateMeta st } setCaption :: Inlines -> LP Blocks setCaption ils = do @@ -341,7 +342,7 @@ authors = try $ do -- skip e.g. \vspace{10pt} auths <- sepBy oneAuthor (controlSeq "and") char '}' - addMeta "authors" (map trimInlines auths) + addMeta "author" (map trimInlines auths) section :: Attr -> Int -> LP Blocks section (ident, classes, kvs) lvl = do @@ -525,10 +526,12 @@ inlineCommands = M.fromList $ , ("citeauthor", (try (tok *> optional sp *> controlSeq "citetext") *> complexNatbibCitation AuthorInText) <|> citation "citeauthor" AuthorInText False) + , ("nocite", mempty <$ (citation "nocite" NormalCitation False >>= + addMeta "nocite")) ] ++ map ignoreInlines -- these commands will be ignored unless --parse-raw is specified, -- in which case they will appear as raw latex blocks: - [ "noindent", "index", "nocite" ] + [ "noindent", "index" ] mkImage :: String -> LP Inlines mkImage src = do -- cgit v1.2.3 From c8f97d3d418f929a1f499a9ef37c17d71f282d45 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 27 Apr 2014 20:56:50 -0700 Subject: Fix #1267. We now check the writerName for a lua script in pandoc.hs, so that lowercasing and format parsing aren't done. Note this behavior change: getWriter in Text.Pandoc no longer returns a custom writer on input "foo.lua". --- pandoc.hs | 23 +++++++++++++---------- src/Text/Pandoc.hs | 35 ++++++++++++++++------------------- 2 files changed, 29 insertions(+), 29 deletions(-) (limited to 'src/Text') diff --git a/pandoc.hs b/pandoc.hs index d39ed3a59..93e1d4a76 100644 --- a/pandoc.hs +++ b/pandoc.hs @@ -47,7 +47,7 @@ import System.Exit ( exitWith, ExitCode (..) ) import System.FilePath import System.Console.GetOpt import Data.Char ( toLower ) -import Data.List ( intercalate, isPrefixOf, sort ) +import Data.List ( intercalate, isPrefixOf, isSuffixOf, sort ) import System.Directory ( getAppUserDataDirectory, findExecutable, doesFileExist ) import System.IO ( stdout, stderr ) @@ -1021,15 +1021,18 @@ main = do let laTeXOutput = "latex" `isPrefixOf` writerName' || "beamer" `isPrefixOf` writerName' - writer <- case getWriter writerName' of - Left e -> err 9 $ - if writerName' == "pdf" - then e ++ "\nTo create a pdf with pandoc, use the " ++ - "latex or beamer writer and specify\n" ++ - "an output file with .pdf extension " ++ - "(pandoc -t latex -o filename.pdf)." - else e - Right w -> return w + writer <- if ".lua" `isSuffixOf` writerName' + -- note: use non-lowercased version writerName + then return $ IOStringWriter $ writeCustom writerName + else case getWriter writerName' of + Left e -> err 9 $ + if writerName' == "pdf" + then e ++ "\nTo create a pdf with pandoc, use " ++ + "the latex or beamer writer and specify\n" ++ + "an output file with .pdf extension " ++ + "(pandoc -t latex -o filename.pdf)." + else e + Right w -> return w reader <- case getReader readerName' of Right r -> return r diff --git a/src/Text/Pandoc.hs b/src/Text/Pandoc.hs index 66b0e49c0..a37c98814 100644 --- a/src/Text/Pandoc.hs +++ b/src/Text/Pandoc.hs @@ -152,7 +152,7 @@ import Text.Pandoc.Options import Text.Pandoc.Shared (safeRead, warn) import Data.Aeson import qualified Data.ByteString.Lazy as BL -import Data.List (intercalate, isSuffixOf) +import Data.List (intercalate) import Data.Version (showVersion) import Data.Set (Set) import qualified Data.Set as Set @@ -292,24 +292,21 @@ getReader s = -- | Retrieve writer based on formatSpec (format+extensions). getWriter :: String -> Either String Writer -getWriter s = - case parseFormatSpec s of - Left e -> Left $ intercalate "\n" $ [m | Message m <- errorMessages e] - Right (writerName, setExts) -> - case lookup writerName writers of - Nothing - | ".lua" `isSuffixOf` s -> - Right $ IOStringWriter $ writeCustom s - | otherwise -> Left $ "Unknown writer: " ++ writerName - Just (PureStringWriter r) -> Right $ PureStringWriter $ - \o -> r o{ writerExtensions = setExts $ - getDefaultExtensions writerName } - Just (IOStringWriter r) -> Right $ IOStringWriter $ - \o -> r o{ writerExtensions = setExts $ - getDefaultExtensions writerName } - Just (IOByteStringWriter r) -> Right $ IOByteStringWriter $ - \o -> r o{ writerExtensions = setExts $ - getDefaultExtensions writerName } +getWriter s + = case parseFormatSpec s of + Left e -> Left $ intercalate "\n" $ [m | Message m <- errorMessages e] + Right (writerName, setExts) -> + case lookup writerName writers of + Nothing -> Left $ "Unknown writer: " ++ writerName + Just (PureStringWriter r) -> Right $ PureStringWriter $ + \o -> r o{ writerExtensions = setExts $ + getDefaultExtensions writerName } + Just (IOStringWriter r) -> Right $ IOStringWriter $ + \o -> r o{ writerExtensions = setExts $ + getDefaultExtensions writerName } + Just (IOByteStringWriter r) -> Right $ IOByteStringWriter $ + \o -> r o{ writerExtensions = setExts $ + getDefaultExtensions writerName } {-# DEPRECATED toJsonFilter "Use 'toJSONFilter' from 'Text.Pandoc.JSON' instead" #-} -- | Deprecated. Use @toJSONFilter@ from @Text.Pandoc.JSON@ instead. -- cgit v1.2.3 From 093229dc35506bff88f4edc6f2ae5316d621f8ff Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Wed, 30 Apr 2014 08:58:10 -0700 Subject: ConTeXt writer: Improved autolinks. Closes #1270. --- src/Text/Pandoc/Writers/ConTeXt.hs | 16 +++++----------- tests/writer.context | 10 ++++------ 2 files changed, 9 insertions(+), 17 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Writers/ConTeXt.hs b/src/Text/Pandoc/Writers/ConTeXt.hs index 3095cf508..cec420dcf 100644 --- a/src/Text/Pandoc/Writers/ConTeXt.hs +++ b/src/Text/Pandoc/Writers/ConTeXt.hs @@ -35,7 +35,7 @@ import Text.Pandoc.Writers.Shared import Text.Pandoc.Options import Text.Pandoc.Walk (query) import Text.Printf ( printf ) -import Data.List ( intercalate, isPrefixOf ) +import Data.List ( intercalate ) import Control.Monad.State import Text.Pandoc.Pretty import Text.Pandoc.Templates ( renderTemplate' ) @@ -283,14 +283,6 @@ inlineToConTeXt (RawInline "tex" str) = return $ text str inlineToConTeXt (RawInline _ _) = return empty inlineToConTeXt (LineBreak) = return $ text "\\crlf" <> cr inlineToConTeXt Space = return space --- autolink -inlineToConTeXt (Link [Str str] (src, tit)) - | if "mailto:" `isPrefixOf` src - then src == escapeURI ("mailto:" ++ str) - else src == escapeURI str = - inlineToConTeXt (Link - [RawInline "context" "\\hyphenatedurl{", Str str, RawInline "context" "}"] - (src, tit)) -- Handle HTML-like internal document references to sections inlineToConTeXt (Link txt (('#' : ref), _)) = do opts <- gets stOptions @@ -305,6 +297,7 @@ inlineToConTeXt (Link txt (('#' : ref), _)) = do <> brackets (text ref) inlineToConTeXt (Link txt (src, _)) = do + let isAutolink = txt == [Str src] st <- get let next = stNextRef st put $ st {stNextRef = next + 1} @@ -313,8 +306,9 @@ inlineToConTeXt (Link txt (src, _)) = do return $ "\\useURL" <> brackets (text ref) <> brackets (text $ escapeStringUsing [('#',"\\#"),('%',"\\%")] src) - <> brackets empty - <> brackets label + <> (if isAutolink + then empty + else brackets empty <> brackets label) <> "\\from" <> brackets (text ref) inlineToConTeXt (Image _ (src, _)) = do diff --git a/tests/writer.context b/tests/writer.context index 0b031fd76..0c5024d89 100644 --- a/tests/writer.context +++ b/tests/writer.context @@ -813,24 +813,22 @@ braces]\from[url26]. \subsection[autolinks]{Autolinks} -With an ampersand: -\useURL[url27][http://example.com/?foo=1&bar=2][][\hyphenatedurl{http://example.com/?foo=1&bar=2}]\from[url27] +With an ampersand: \useURL[url27][http://example.com/?foo=1&bar=2]\from[url27] \startitemize[packed] \item In a list? \item - \useURL[url28][http://example.com/][][\hyphenatedurl{http://example.com/}]\from[url28] + \useURL[url28][http://example.com/]\from[url28] \item It should. \stopitemize An e-mail address: -\useURL[url29][mailto:nobody@nowhere.net][][\hyphenatedurl{nobody@nowhere.net}]\from[url29] +\useURL[url29][mailto:nobody@nowhere.net][][nobody@nowhere.net]\from[url29] \startblockquote -Blockquoted: -\useURL[url30][http://example.com/][][\hyphenatedurl{http://example.com/}]\from[url30] +Blockquoted: \useURL[url30][http://example.com/]\from[url30] \stopblockquote Auto-links should not occur here: \type{} -- cgit v1.2.3 From 81bf82c258f12700d64c8d090f75a90c8a18ec61 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Wed, 30 Apr 2014 09:59:36 -0700 Subject: RST reader: Better handling of directives. * We now correctly handle field lists that are indented more than 3 spaces. * We treat an "aafig" directive as a code block with attributes, so it can be processed in a filter. (Closes #1212.) --- src/Text/Pandoc/Readers/RST.hs | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs index 7785861cc..4dc1fa006 100644 --- a/src/Text/Pandoc/Readers/RST.hs +++ b/src/Text/Pandoc/Readers/RST.hs @@ -185,22 +185,22 @@ block = choice [ codeBlock -- field list -- -rawFieldListItem :: String -> RSTParser (String, String) -rawFieldListItem indent = try $ do - string indent +rawFieldListItem :: Int -> RSTParser (String, String) +rawFieldListItem minIndent = try $ do + indent <- length <$> many (char ' ') + guard $ indent >= minIndent char ':' name <- many1Till (noneOf "\n") (char ':') (() <$ lookAhead newline) <|> skipMany1 spaceChar first <- anyLine - rest <- option "" $ try $ do lookAhead (string indent >> spaceChar) + rest <- option "" $ try $ do lookAhead (count indent (char ' ') >> spaceChar) indentedBlock let raw = (if null first then "" else (first ++ "\n")) ++ rest ++ "\n" - return (name, raw) + return (name, trimr raw) -fieldListItem :: String - -> RSTParser (Inlines, [Blocks]) -fieldListItem indent = try $ do - (name, raw) <- rawFieldListItem indent +fieldListItem :: Int -> RSTParser (Inlines, [Blocks]) +fieldListItem minIndent = try $ do + (name, raw) <- rawFieldListItem minIndent let term = B.str name contents <- parseFromString parseBlocks raw optional blanklines @@ -208,7 +208,7 @@ fieldListItem indent = try $ do fieldList :: RSTParser Blocks fieldList = try $ do - indent <- lookAhead $ many spaceChar + indent <- length <$> lookAhead (many spaceChar) items <- many1 $ fieldListItem indent case items of [] -> return mempty @@ -521,11 +521,11 @@ directive' = do skipMany spaceChar top <- many $ satisfy (/='\n') <|> try (char '\n' <* - notFollowedBy' (rawFieldListItem " ") <* + notFollowedBy' (rawFieldListItem 3) <* count 3 (char ' ') <* notFollowedBy blankline) newline - fields <- many $ rawFieldListItem " " + fields <- many $ rawFieldListItem 3 body <- option "" $ try $ blanklines >> indentedBlock optional blanklines let body' = body ++ "\n\n" @@ -576,6 +576,9 @@ directive' = do role -> role }) "code" -> codeblock (lookup "number-lines" fields) (trim top) body "code-block" -> codeblock (lookup "number-lines" fields) (trim top) body + "aafig" -> do + let attribs = ("", ["aafig"], fields) + return $ B.codeBlockWith attribs $ stripTrailingNewlines body "math" -> return $ B.para $ mconcat $ map B.displayMath $ toChunks $ top ++ "\n\n" ++ body "figure" -> do -- cgit v1.2.3 From eaba340b9381264f3706c780182711a8713b3def Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Wed, 30 Apr 2014 11:28:18 -0700 Subject: RST reader: Some fixes to last change, and use "author" not "authors". (in metadata) --- src/Text/Pandoc/Readers/RST.hs | 13 +++++++------ tests/rst-reader.native | 2 +- 2 files changed, 8 insertions(+), 7 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs index 4dc1fa006..54b6fa34a 100644 --- a/src/Text/Pandoc/Readers/RST.hs +++ b/src/Text/Pandoc/Readers/RST.hs @@ -113,15 +113,16 @@ titleTransform (bs, meta) = metaFromDefList :: [([Inline], [[Block]])] -> Meta -> Meta metaFromDefList ds meta = adjustAuthors $ foldr f meta ds where f (k,v) = setMeta (map toLower $ stringify k) (mconcat $ map fromList v) - adjustAuthors (Meta metamap) = Meta $ M.adjust toPlain "author" + adjustAuthors (Meta metamap) = Meta $ M.adjust splitAuthors "author" $ M.adjust toPlain "date" $ M.adjust toPlain "title" - $ M.adjust splitAuthors "authors" + $ M.mapKeys (\k -> if k == "authors" then "author" else k) $ metamap toPlain (MetaBlocks [Para xs]) = MetaInlines xs toPlain x = x - splitAuthors (MetaBlocks [Para xs]) = MetaList $ map MetaInlines - $ splitAuthors' xs + splitAuthors (MetaBlocks [Para xs]) + = MetaList $ map MetaInlines + $ splitAuthors' xs splitAuthors x = x splitAuthors' = map normalizeSpaces . splitOnSemi . concatMap factorSemi @@ -196,7 +197,7 @@ rawFieldListItem minIndent = try $ do rest <- option "" $ try $ do lookAhead (count indent (char ' ') >> spaceChar) indentedBlock let raw = (if null first then "" else (first ++ "\n")) ++ rest ++ "\n" - return (name, trimr raw) + return (name, raw) fieldListItem :: Int -> RSTParser (Inlines, [Blocks]) fieldListItem minIndent = try $ do @@ -577,7 +578,7 @@ directive' = do "code" -> codeblock (lookup "number-lines" fields) (trim top) body "code-block" -> codeblock (lookup "number-lines" fields) (trim top) body "aafig" -> do - let attribs = ("", ["aafig"], fields) + let attribs = ("", ["aafig"], map (\(k,v) -> (k, trimr v)) fields) return $ B.codeBlockWith attribs $ stripTrailingNewlines body "math" -> return $ B.para $ mconcat $ map B.displayMath $ toChunks $ top ++ "\n\n" ++ body diff --git a/tests/rst-reader.native b/tests/rst-reader.native index fd48bc60c..c77d15775 100644 --- a/tests/rst-reader.native +++ b/tests/rst-reader.native @@ -1,4 +1,4 @@ -Pandoc (Meta {unMeta = fromList [("authors",MetaList [MetaInlines [Str "John",Space,Str "MacFarlane"],MetaInlines [Str "Anonymous"]]),("date",MetaInlines [Str "July",Space,Str "17,",Space,Str "2006"]),("revision",MetaBlocks [Para [Str "3"]]),("subtitle",MetaInlines [Str "Subtitle"]),("title",MetaInlines [Str "Pandoc",Space,Str "Test",Space,Str "Suite"])]}) +Pandoc (Meta {unMeta = fromList [("author",MetaList [MetaInlines [Str "John",Space,Str "MacFarlane"],MetaInlines [Str "Anonymous"]]),("date",MetaInlines [Str "July",Space,Str "17,",Space,Str "2006"]),("revision",MetaBlocks [Para [Str "3"]]),("subtitle",MetaInlines [Str "Subtitle"]),("title",MetaInlines [Str "Pandoc",Space,Str "Test",Space,Str "Suite"])]}) [Header 1 ("level-one-header",[],[]) [Str "Level",Space,Str "one",Space,Str "header"] ,Para [Str "This",Space,Str "is",Space,Str "a",Space,Str "set",Space,Str "of",Space,Str "tests",Space,Str "for",Space,Str "pandoc.",Space,Str "Most",Space,Str "of",Space,Str "them",Space,Str "are",Space,Str "adapted",Space,Str "from",Space,Str "John",Space,Str "Gruber\8217s",Space,Str "markdown",Space,Str "test",Space,Str "suite."] ,Header 2 ("level-two-header",[],[]) [Str "Level",Space,Str "two",Space,Str "header"] -- cgit v1.2.3 From 8726eebcd363ccb33ea8c297b004feca7ef37ceb Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Wed, 30 Apr 2014 11:16:01 +0200 Subject: Org reader: Add support for custom link types Org allows users to define their own custom link types. E.g., in a document with a lot of links to Wikipedia articles, one can define a custom wikipedia link-type via #+LINK: wp https://en.wikipedia.org/wiki/ This allows to write [[wp:Org_mode][Org-mode]] instead of the equivallent [[https://en.wikipedia.org/wiki/Org_mode][Org-mode]]. --- src/Text/Pandoc/Readers/Org.hs | 68 +++++++++++++++++++++++++++++++++++++----- tests/Tests/Readers/Org.hs | 26 ++++++++++++++++ 2 files changed, 86 insertions(+), 8 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index 0e52bff90..d68ef45ef 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -43,7 +43,7 @@ import Text.Pandoc.Shared (compactify', compactify'DL) import Control.Applicative ( Applicative, pure , (<$>), (<$), (<*>), (<*), (*>), (<**>) ) -import Control.Monad (foldM, guard, liftM, liftM2, when) +import Control.Monad (foldM, guard, liftM, liftM2, mzero, when) import Control.Monad.Reader (Reader, runReader, ask, asks) import Data.Char (isAlphaNum, toLower) import Data.Default @@ -51,6 +51,7 @@ import Data.List (intersperse, isPrefixOf, isSuffixOf) import qualified Data.Map as M import Data.Maybe (listToMaybe, fromMaybe, isJust) import Data.Monoid (Monoid, mconcat, mempty, mappend) +import Network.HTTP (urlEncode) -- | Parse org-mode string and return a Pandoc document. readOrg :: ReaderOptions -- ^ Reader options @@ -76,6 +77,8 @@ type OrgNoteTable = [OrgNoteRecord] type OrgBlockAttributes = M.Map String String +type OrgLinkFormatters = M.Map String (String -> String) + -- | Org-mode parser state data OrgParserState = OrgParserState { orgStateOptions :: ReaderOptions @@ -86,6 +89,7 @@ data OrgParserState = OrgParserState , orgStateLastForbiddenCharPos :: Maybe SourcePos , orgStateLastPreCharPos :: Maybe SourcePos , orgStateLastStrPos :: Maybe SourcePos + , orgStateLinkFormatters :: OrgLinkFormatters , orgStateMeta :: Meta , orgStateMeta' :: F Meta , orgStateNotes' :: OrgNoteTable @@ -113,6 +117,7 @@ defaultOrgParserState = OrgParserState , orgStateLastForbiddenCharPos = Nothing , orgStateLastPreCharPos = Nothing , orgStateLastStrPos = Nothing + , orgStateLinkFormatters = M.empty , orgStateMeta = nullMeta , orgStateMeta' = return nullMeta , orgStateNotes' = [] @@ -175,6 +180,13 @@ resetEmphasisNewlines :: OrgParser () resetEmphasisNewlines = updateState $ \s -> s{ orgStateEmphasisNewlines = Nothing } +addLinkFormat :: String + -> (String -> String) + -> OrgParser () +addLinkFormat key formatter = updateState $ \s -> + let fs = orgStateLinkFormatters s + in s{ orgStateLinkFormatters = M.insert key formatter fs } + addToNotesTable :: OrgNoteRecord -> OrgParser () addToNotesTable note = do oldnotes <- orgStateNotes' <$> getState @@ -423,7 +435,8 @@ specialLine :: OrgParser (F Blocks) specialLine = fmap return . try $ metaLine <|> commentLine metaLine :: OrgParser Blocks -metaLine = try $ metaLineStart *> declarationLine +metaLine = try $ mempty + <$ (metaLineStart *> (optionLine <|> declarationLine)) commentLine :: OrgParser Blocks commentLine = try $ commentLineStart *> anyLine *> pure mempty @@ -436,14 +449,14 @@ metaLineStart = try $ mappend <$> many spaceChar <*> string "#+" commentLineStart :: OrgParser String commentLineStart = try $ mappend <$> many spaceChar <*> string "# " -declarationLine :: OrgParser Blocks +declarationLine :: OrgParser () declarationLine = try $ do key <- metaKey inlinesF <- metaInlines updateState $ \st -> let meta' = B.setMeta <$> pure key <*> inlinesF <*> pure nullMeta in st { orgStateMeta' = orgStateMeta' st <> meta' } - return mempty + return () metaInlines :: OrgParser (F MetaValue) metaInlines = fmap (MetaInlines . B.toList) <$> inlinesTillNewline @@ -453,6 +466,35 @@ metaKey = map toLower <$> many1 (noneOf ": \n\r") <* char ':' <* skipSpaces +optionLine :: OrgParser () +optionLine = try $ do + key <- metaKey + case key of + "link" -> parseLinkFormat >>= uncurry addLinkFormat + _ -> mzero + +parseLinkFormat :: OrgParser ((String, String -> String)) +parseLinkFormat = try $ do + linkType <- (:) <$> letter <*> many (alphaNum <|> oneOf "-_") <* skipSpaces + linkSubst <- parseFormat + return (linkType, linkSubst) + +-- | An ad-hoc, single-argument-only implementation of a printf-style format +-- parser. +parseFormat :: OrgParser (String -> String) +parseFormat = try $ do + replacePlain <|> replaceUrl <|> justAppend + where + -- inefficient, but who cares + replacePlain = try $ (\x -> concat . flip intersperse x) + <$> sequence [tillSpecifier 's', rest] + replaceUrl = try $ (\x -> concat . flip intersperse x . urlEncode) + <$> sequence [tillSpecifier 'h', rest] + justAppend = try $ (++) <$> rest + + rest = manyTill anyChar (eof <|> () <$ oneOf "\n\r") + tillSpecifier c = manyTill (noneOf "\n\r") (try $ string ('%':c:"")) + -- -- Headers -- @@ -850,13 +892,15 @@ linkOrImage = explicitOrImageLink explicitOrImageLink :: OrgParser (F Inlines) explicitOrImageLink = try $ do char '[' - src <- linkTarget + srcF <- applyCustomLinkFormat =<< linkTarget title <- enclosedRaw (char '[') (char ']') title' <- parseFromString (mconcat <$> many inline) title char ']' - return $ if isImageFilename src && isImageFilename title - then pure $ B.link src "" $ B.image title mempty mempty - else linkToInlinesF src =<< title' + return $ do + src <- srcF + if isImageFilename src && isImageFilename title + then pure $ B.link src "" $ B.image title mempty mempty + else linkToInlinesF src =<< title' selflinkOrImage :: OrgParser (F Inlines) selflinkOrImage = try $ do @@ -881,6 +925,14 @@ selfTarget = try $ char '[' *> linkTarget <* char ']' linkTarget :: OrgParser String linkTarget = enclosed (char '[') (char ']') (noneOf "\n\r[]") +applyCustomLinkFormat :: String -> OrgParser (F String) +applyCustomLinkFormat link = do + let (linkType, rest) = break (== ':') link + return $ do + formatter <- M.lookup linkType <$> asksF orgStateLinkFormatters + return $ maybe link ($ drop 1 rest) formatter + + linkToInlinesF :: String -> Inlines -> F Inlines linkToInlinesF s@('#':_) = pure . B.link s "" linkToInlinesF s diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs index 96747d148..78684f0f1 100644 --- a/tests/Tests/Readers/Org.hs +++ b/tests/Tests/Readers/Org.hs @@ -304,6 +304,32 @@ tests = ] =?> (para (spanWith ("link-here", [], []) mempty <> "Target.") <> para (emph ("See" <> space <> "here!"))) + + , "Link abbreviation" =: + unlines [ "#+LINK: wp https://en.wikipedia.org/wiki/%s" + , "[[wp:Org_mode][Wikipedia on Org-mode]]" + ] =?> + (para (link "https://en.wikipedia.org/wiki/Org_mode" "" + ("Wikipedia" <> space <> "on" <> space <> "Org-mode"))) + + , "Link abbreviation, defined after first use" =: + unlines [ "[[zl:non-sense][Non-sense articles]]" + , "#+LINK: zl http://zeitlens.com/tags/%s.html" + ] =?> + (para (link "http://zeitlens.com/tags/non-sense.html" "" + ("Non-sense" <> space <> "articles"))) + + , "Link abbreviation, URL encoded arguments" =: + unlines [ "#+link: expl http://example.com/%h/foo" + , "[[expl:Hello, World!][Moin!]]" + ] =?> + (para (link "http://example.com/Hello%2C%20World%21/foo" "" "Moin!")) + + , "Link abbreviation, append arguments" =: + unlines [ "#+link: expl http://example.com/" + , "[[expl:foo][bar]]" + ] =?> + (para (link "http://example.com/foo" "" "bar")) ] , testGroup "Basic Blocks" $ -- cgit v1.2.3 From 007eb96e06bc1fff12119addf2e03552ac992b2e Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Thu, 1 May 2014 09:23:21 -0700 Subject: Markdown reader: Make one-column pipe tables work. Closes #1218. --- src/Text/Pandoc/Readers/Markdown.hs | 22 ++++++++++++++++------ tests/pipe-tables.native | 10 +++++++++- tests/pipe-tables.txt | 10 ++++++++++ 3 files changed, 35 insertions(+), 7 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index d3ca8d26f..97a3168da 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -1221,11 +1221,20 @@ removeOneLeadingSpace xs = gridTableFooter :: MarkdownParser [Char] gridTableFooter = blanklines +pipeBreak :: MarkdownParser [Alignment] +pipeBreak = try $ do + nonindentSpaces + openPipe <- (True <$ char '|') <|> return False + first <- pipeTableHeaderPart + rest <- many $ sepPipe *> pipeTableHeaderPart + -- surrounding pipes needed for a one-column table: + guard $ not (null rest && not openPipe) + optional (char '|') + blankline + return (first:rest) + pipeTable :: MarkdownParser ([Alignment], [Double], F [Blocks], F [[Blocks]]) pipeTable = try $ do - let pipeBreak = nonindentSpaces *> optional (char '|') *> - pipeTableHeaderPart `sepBy1` sepPipe <* - optional (char '|') <* blankline (heads,aligns) <- try ( pipeBreak >>= \als -> return (return $ replicate (length als) mempty, als)) <|> ( pipeTableRow >>= \row -> pipeBreak >>= \als -> @@ -1244,12 +1253,13 @@ sepPipe = try $ do pipeTableRow :: MarkdownParser (F [Blocks]) pipeTableRow = do nonindentSpaces - optional (char '|') + openPipe <- (True <$ char '|') <|> return False let cell = mconcat <$> many (notFollowedBy (blankline <|> char '|') >> inline) first <- cell - sepPipe - rest <- cell `sepBy1` sepPipe + rest <- many $ sepPipe *> cell + -- surrounding pipes needed for a one-column table: + guard $ not (null rest && not openPipe) optional (char '|') blankline let cells = sequence (first:rest) diff --git a/tests/pipe-tables.native b/tests/pipe-tables.native index 5420a7bd3..eafd21d22 100644 --- a/tests/pipe-tables.native +++ b/tests/pipe-tables.native @@ -67,4 +67,12 @@ ,[[Plain [Str "orange"]] ,[Plain [Str "17"]]] ,[[Plain [Str "pear"]] - ,[Plain [Str "302"]]]]] + ,[Plain [Str "302"]]]] +,Para [Str "One-column:"] +,Table [] [AlignDefault] [0.0] + [[Plain [Str "hi"]]] + [[[Plain [Str "lo"]]]] +,Para [Str "Header-less",Space,Str "one-column:"] +,Table [] [AlignCenter] [0.0] + [[]] + [[[Plain [Str "hi"]]]]] diff --git a/tests/pipe-tables.txt b/tests/pipe-tables.txt index 79d79200f..ee8d54d9f 100644 --- a/tests/pipe-tables.txt +++ b/tests/pipe-tables.txt @@ -40,3 +40,13 @@ apple | 5 orange| 17 pear | 302 +One-column: + +|hi| +|--| +|lo| + +Header-less one-column: + +|:-:| +|hi| -- cgit v1.2.3 From 4c4382420356928d73026395d4ab2f0f9957df08 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Fri, 2 May 2014 22:58:47 -0700 Subject: Fixed empty reference links. Closes #1186. Includes test. --- src/Text/Pandoc/Readers/Markdown.hs | 4 +--- tests/markdown-reader-more.native | 5 ++++- tests/markdown-reader-more.txt | 8 ++++++++ 3 files changed, 13 insertions(+), 4 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 97a3168da..36f73d847 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -349,10 +349,8 @@ referenceKey = try $ do char ':' skipSpaces >> optional newline >> skipSpaces >> notFollowedBy (char '[') let sourceURL = liftM unwords $ many $ try $ do - notFollowedBy' referenceTitle - skipMany spaceChar - optional $ newline >> notFollowedBy blankline skipMany spaceChar + notFollowedBy' referenceTitle notFollowedBy' (() <$ reference) many1 $ notFollowedBy space >> litChar let betweenAngles = try $ char '<' >> diff --git a/tests/markdown-reader-more.native b/tests/markdown-reader-more.native index 27f09dada..0d74c233d 100644 --- a/tests/markdown-reader-more.native +++ b/tests/markdown-reader-more.native @@ -136,4 +136,7 @@ ,Para [Link [Str "link"] ("/hithere)","")] ,Para [Link [Str "linky"] ("hi_(there_(nested))","")] ,Header 2 ("reference-link-fallbacks",[],[]) [Str "Reference",Space,Str "link",Space,Str "fallbacks"] -,Para [Str "[",Emph [Str "not",Space,Str "a",Space,Str "link"],Str "]",Space,Str "[",Emph [Str "nope"],Str "]\8230"]] +,Para [Str "[",Emph [Str "not",Space,Str "a",Space,Str "link"],Str "]",Space,Str "[",Emph [Str "nope"],Str "]\8230"] +,Header 2 ("empty-reference-links",[],[]) [Str "Empty",Space,Str "reference",Space,Str "links"] +,Para [Str "bar"] +,Para [Link [Str "foo2"] ("","")]] diff --git a/tests/markdown-reader-more.txt b/tests/markdown-reader-more.txt index d133b3dbb..739543bfd 100644 --- a/tests/markdown-reader-more.txt +++ b/tests/markdown-reader-more.txt @@ -235,3 +235,11 @@ Empty cells ## Reference link fallbacks [*not a link*] [*nope*]... + +## Empty reference links + +[foo2]: + +bar + +[foo2] -- cgit v1.2.3 From 743dac493fab08abdec59feb7bd57030a3ba5c90 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 3 May 2014 15:15:04 -0700 Subject: LaTeX reader: Better error messages with include files. Closes #1274. Rewrote handleIncludes. We now report the actual source file and position where the error occurs, even if it is included. We do this by inserting special commands, `\PandocStartInclude` and `\PandocEndInclude`, that encode this information in the preprocessing phase. Also generalized the types of a couple functions from `Text.Pandoc.Parsing`. --- src/Text/Pandoc/Parsing.hs | 2 +- src/Text/Pandoc/Readers/LaTeX.hs | 172 ++++++++++++++++++++++++++------------- 2 files changed, 118 insertions(+), 56 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs index a9009eaa2..4d0a677da 100644 --- a/src/Text/Pandoc/Parsing.hs +++ b/src/Text/Pandoc/Parsing.hs @@ -504,7 +504,7 @@ withHorizDisplacement parser = do -- | Applies a parser and returns the raw string that was parsed, -- along with the value produced by the parser. -withRaw :: Parser [Char] st a -> Parser [Char] st (a, [Char]) +withRaw :: Monad m => ParsecT [Char] st m a -> ParsecT [Char] st m (a, [Char]) withRaw parser = do pos1 <- getPosition inp <- getInput diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index b5d529eb9..d1e0b6f0a 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -40,8 +40,10 @@ import Text.Pandoc.Shared import Text.Pandoc.Options import Text.Pandoc.Parsing hiding ((<|>), many, optional, space, mathDisplay, mathInline) +import Text.Parsec.Prim (ParsecT, runParserT) import qualified Text.Pandoc.UTF8 as UTF8 import Data.Char ( chr, ord ) +import Control.Monad.Trans (lift) import Control.Monad import Text.Pandoc.Builder import Data.Char (isLetter, isAlphaNum) @@ -303,6 +305,8 @@ blockCommands = M.fromList $ , ("documentclass", skipopts *> braced *> preamble) , ("centerline", (para . trimInlines) <$> (skipopts *> tok)) , ("caption", tok >>= setCaption) + , ("PandocStartInclude", startInclude) + , ("PandocEndInclude", endInclude) ] ++ map ignoreBlocks -- these commands will be ignored unless --parse-raw is specified, -- in which case they will appear as raw latex blocks @@ -794,31 +798,107 @@ rawEnv name = do (withRaw (env name blocks) >>= applyMacros' . snd) else env name blocks +---- + +type IncludeParser = ParsecT [Char] [String] IO String + -- | Replace "include" commands with file contents. handleIncludes :: String -> IO String -handleIncludes = handleIncludes' [] - --- parents parameter prevents infinite include loops -handleIncludes' :: [FilePath] -> String -> IO String -handleIncludes' _ [] = return [] -handleIncludes' parents ('\\':'%':xs) = - ("\\%"++) `fmap` handleIncludes' parents xs -handleIncludes' parents ('%':xs) = handleIncludes' parents - $ drop 1 $ dropWhile (/='\n') xs -handleIncludes' parents ('\\':xs) = - case runParser include defaultParserState "input" ('\\':xs) of - Right (fs, rest) -> do yss <- mapM (\f -> if f `elem` parents - then "" <$ warn ("Include file loop in '" - ++ f ++ "'.") - else readTeXFile f >>= - handleIncludes' (f:parents)) fs - rest' <- handleIncludes' parents rest - return $ intercalate "\n" yss ++ rest' - _ -> case runParser (verbCmd <|> verbatimEnv) defaultParserState - "input" ('\\':xs) of - Right (r, rest) -> (r ++) `fmap` handleIncludes' parents rest - _ -> ('\\':) `fmap` handleIncludes' parents xs -handleIncludes' parents (x:xs) = (x:) `fmap` handleIncludes' parents xs +handleIncludes s = do + res <- runParserT includeParser' [] "input" s + case res of + Right s' -> return s' + Left e -> error $ show e + +includeParser' :: IncludeParser +includeParser' = + concat <$> many (comment' <|> escaped' <|> blob' <|> include' + <|> startMarker' <|> endMarker' + <|> verbCmd' <|> verbatimEnv' <|> backslash') + +comment' :: IncludeParser +comment' = do + char '%' + xs <- manyTill anyChar newline + return ('%':xs ++ "\n") + +escaped' :: IncludeParser +escaped' = try $ string "\\%" <|> string "\\\\" + +verbCmd' :: IncludeParser +verbCmd' = fmap snd <$> + withRaw $ try $ do + string "\\verb" + c <- anyChar + manyTill anyChar (char c) + +verbatimEnv' :: IncludeParser +verbatimEnv' = fmap snd <$> + withRaw $ try $ do + string "\\begin" + name <- braced' + guard $ name `elem` ["verbatim", "Verbatim", "lstlisting", + "minted", "alltt"] + manyTill anyChar (try $ string $ "\\end{" ++ name ++ "}") + +blob' :: IncludeParser +blob' = try $ many1 (noneOf "\\%") + +backslash' :: IncludeParser +backslash' = string "\\" + +braced' :: IncludeParser +braced' = try $ char '{' *> manyTill (satisfy (/='}')) (char '}') + +include' :: IncludeParser +include' = do + name <- try $ do + char '\\' + try (string "include") + <|> try (string "input") + <|> string "usepackage" + -- skip options + skipMany $ try $ char '[' *> (manyTill anyChar (char ']')) + fs <- (map trim . splitBy (==',')) <$> braced' + pos <- getPosition + let fs' = if name == "usepackage" + then map (flip replaceExtension ".sty") fs + else map (flip replaceExtension ".tex") fs + containers <- getState + let fn = case containers of + (f':_) -> f' + [] -> "input" + -- now process each include file in order... + rest <- getInput + results' <- forM fs' (\f -> do + when (f `elem` containers) $ + fail "Include file loop!" + contents <- lift $ readTeXFile f + return $ "\\PandocStartInclude{" ++ f ++ "}" ++ + contents ++ "\\PandocEndInclude{" ++ + fn ++ "}{" ++ show (sourceLine pos) ++ "}{" + ++ show (sourceColumn pos) ++ "}") + setInput $ concat results' ++ rest + return "" + +startMarker' :: IncludeParser +startMarker' = try $ do + string "\\PandocStartInclude" + fn <- braced' + updateState (fn:) + setPosition $ newPos fn 1 1 + return $ "\\PandocStartInclude{" ++ fn ++ "}" + +endMarker' :: IncludeParser +endMarker' = try $ do + string "\\PandocEndInclude" + fn <- braced' + ln <- braced' + co <- braced' + updateState tail + setPosition $ newPos fn (fromMaybe 1 $ safeRead ln) (fromMaybe 1 $ safeRead co) + return $ "\\PandocEndInclude{" ++ fn ++ "}{" ++ ln ++ "}{" ++ + co ++ "}" readTeXFile :: FilePath -> IO String readTeXFile f = do @@ -833,27 +913,7 @@ readFileFromDirs (d:ds) f = E.catch (UTF8.readFile $ d f) $ \(_ :: E.SomeException) -> readFileFromDirs ds f -include :: LP ([FilePath], String) -include = do - name <- controlSeq "include" - <|> controlSeq "input" - <|> controlSeq "usepackage" - skipopts - fs <- (splitBy (==',')) <$> braced - rest <- getInput - let fs' = if name == "usepackage" - then map (flip replaceExtension ".sty") fs - else map (flip replaceExtension ".tex") fs - return (fs', rest) - -verbCmd :: LP (String, String) -verbCmd = do - (_,r) <- withRaw $ do - controlSeq "verb" - c <- anyChar - manyTill anyChar (char c) - rest <- getInput - return (r, rest) +---- keyval :: LP (String, String) keyval = try $ do @@ -875,17 +935,6 @@ alltt t = walk strToCode <$> parseFromString blocks where strToCode (Str s) = Code nullAttr s strToCode x = x -verbatimEnv :: LP (String, String) -verbatimEnv = do - (_,r) <- withRaw $ do - controlSeq "begin" - name <- braced - guard $ name `elem` ["verbatim", "Verbatim", "lstlisting", - "minted", "alltt"] - verbEnv name - rest <- getInput - return (r,rest) - rawLaTeXBlock :: Parser [Char] ParserState String rawLaTeXBlock = snd <$> try (withRaw (environment <|> blockCommand)) @@ -1218,3 +1267,16 @@ simpTable = try $ do lookAhead $ controlSeq "end" -- make sure we're at end return $ table mempty (zip aligns (repeat 0)) header'' rows +startInclude :: LP Blocks +startInclude = do + fn <- braced + setPosition $ newPos fn 1 1 + return mempty + +endInclude :: LP Blocks +endInclude = do + fn <- braced + ln <- braced + co <- braced + setPosition $ newPos fn (fromMaybe 1 $ safeRead ln) (fromMaybe 1 $ safeRead co) + return mempty -- cgit v1.2.3 From 2ba78730861b0947ed26aec00f0520e5affd5f7c Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 3 May 2014 18:34:23 -0700 Subject: LaTeX reader: Fixed regression introduced with last commit. Tests now pass again. --- src/Text/Pandoc/Readers/LaTeX.hs | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index d1e0b6f0a..79c66b510 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -852,18 +852,18 @@ braced' = try $ char '{' *> manyTill (satisfy (/='}')) (char '}') include' :: IncludeParser include' = do - name <- try $ do + fs' <- try $ do char '\\' - try (string "include") - <|> try (string "input") - <|> string "usepackage" - -- skip options - skipMany $ try $ char '[' *> (manyTill anyChar (char ']')) - fs <- (map trim . splitBy (==',')) <$> braced' + name <- try (string "include") + <|> try (string "input") + <|> string "usepackage" + -- skip options + skipMany $ try $ char '[' *> (manyTill anyChar (char ']')) + fs <- (map trim . splitBy (==',')) <$> braced' + return $ if name == "usepackage" + then map (flip replaceExtension ".sty") fs + else map (flip replaceExtension ".tex") fs pos <- getPosition - let fs' = if name == "usepackage" - then map (flip replaceExtension ".sty") fs - else map (flip replaceExtension ".tex") fs containers <- getState let fn = case containers of (f':_) -> f' -- cgit v1.2.3 From ccbf4fc9c20ccdce0f7144845cd022ee8cfca1af Mon Sep 17 00:00:00 2001 From: Neil Mayhew Date: Mon, 14 Apr 2014 19:07:14 -0600 Subject: Distinguish tight and loose lists in Docbook output Determined by the first block of the first item being Plain. --- src/Text/Pandoc/Writers/Docbook.hs | 26 +++-- tests/Tests/Writers/Docbook.hs | 199 +++++++++++++++++++++++++++++++++++-- tests/writer.docbook | 60 +++++------ 3 files changed, 236 insertions(+), 49 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Writers/Docbook.hs b/src/Text/Pandoc/Writers/Docbook.hs index 2d6ce3020..e1b62f02d 100644 --- a/src/Text/Pandoc/Writers/Docbook.hs +++ b/src/Text/Pandoc/Writers/Docbook.hs @@ -185,10 +185,13 @@ blockToDocbook _ (CodeBlock (_,classes,_) str) = else languagesByExtension . map toLower $ s langs = concatMap langsFrom classes blockToDocbook opts (BulletList lst) = - inTagsIndented "itemizedlist" $ listItemsToDocbook opts lst + let attribs = case lst of + ((Plain _:_):_) -> [("spacing", "compact")] + _ -> [] + in inTags True "itemizedlist" attribs $ listItemsToDocbook opts lst blockToDocbook _ (OrderedList _ []) = empty blockToDocbook opts (OrderedList (start, numstyle, _) (first:rest)) = - let attribs = case numstyle of + let numeration = case numstyle of DefaultStyle -> [] Decimal -> [("numeration", "arabic")] Example -> [("numeration", "arabic")] @@ -196,14 +199,21 @@ blockToDocbook opts (OrderedList (start, numstyle, _) (first:rest)) = LowerAlpha -> [("numeration", "loweralpha")] UpperRoman -> [("numeration", "upperroman")] LowerRoman -> [("numeration", "lowerroman")] - items = if start == 1 - then listItemsToDocbook opts (first:rest) - else (inTags True "listitem" [("override",show start)] - (blocksToDocbook opts $ map plainToPara first)) $$ - listItemsToDocbook opts rest + spacing = case first of + (Plain _:_) -> [("spacing", "compact")] + _ -> [] + attribs = numeration ++ spacing + items = if start == 1 + then listItemsToDocbook opts (first:rest) + else (inTags True "listitem" [("override",show start)] + (blocksToDocbook opts $ map plainToPara first)) $$ + listItemsToDocbook opts rest in inTags True "orderedlist" attribs items blockToDocbook opts (DefinitionList lst) = - inTagsIndented "variablelist" $ deflistItemsToDocbook opts lst + let attribs = case lst of + ((_, (Plain _:_):_):_) -> [("spacing", "compact")] + _ -> [] + in inTags True "variablelist" attribs $ deflistItemsToDocbook opts lst blockToDocbook _ (RawBlock f str) | f == "docbook" = text str -- raw XML block | f == "html" = text str -- allow html for backwards compatibility diff --git a/tests/Tests/Writers/Docbook.hs b/tests/Tests/Writers/Docbook.hs index e815b4f5a..97126b473 100644 --- a/tests/Tests/Writers/Docbook.hs +++ b/tests/Tests/Writers/Docbook.hs @@ -31,22 +31,199 @@ lineblock :: Blocks lineblock = para ("some text" <> linebreak <> "and more lines" <> linebreak <> "and again") -lineblock_out :: String -lineblock_out = "some text\n" ++ - "and more lines\n" ++ - "and again" +lineblock_out :: [String] +lineblock_out = [ "some text" + , "and more lines" + , "and again" + ] tests :: [Test] tests = [ testGroup "line blocks" [ "none" =: para "This is a test" - =?> "\n This is a test\n" + =?> unlines + [ "" + , " This is a test" + , "" + ] , "basic" =: lineblock - =?> lineblock_out + =?> unlines lineblock_out , "blockquote" =: blockQuote lineblock - =?> ("
\n" ++ lineblock_out ++ "\n
") - , "footnote" =: para ("This is a test" <> note lineblock <> " of footnotes") - =?> ("\n This is a test\n" ++ - lineblock_out ++ - "\n of footnotes\n") + =?> unlines + ( [ "
" ] ++ + lineblock_out ++ + [ "
" ] + ) + , "footnote" =: para ("This is a test" <> + note lineblock <> + " of footnotes") + =?> unlines + ( [ "" + , " This is a test" ] ++ + lineblock_out ++ + [ " of footnotes" + , "" ] + ) + ] + , testGroup "compact lists" + [ testGroup "bullet" + [ "compact" =: bulletList [plain "a", plain "b", plain "c"] + =?> unlines + [ "" + , " " + , " " + , " a" + , " " + , " " + , " " + , " " + , " b" + , " " + , " " + , " " + , " " + , " c" + , " " + , " " + , "" + ] + , "loose" =: bulletList [para "a", para "b", para "c"] + =?> unlines + [ "" + , " " + , " " + , " a" + , " " + , " " + , " " + , " " + , " b" + , " " + , " " + , " " + , " " + , " c" + , " " + , " " + , "" + ] + ] + , testGroup "ordered" + [ "compact" =: orderedList [plain "a", plain "b", plain "c"] + =?> unlines + [ "" + , " " + , " " + , " a" + , " " + , " " + , " " + , " " + , " b" + , " " + , " " + , " " + , " " + , " c" + , " " + , " " + , "" + ] + , "loose" =: orderedList [para "a", para "b", para "c"] + =?> unlines + [ "" + , " " + , " " + , " a" + , " " + , " " + , " " + , " " + , " b" + , " " + , " " + , " " + , " " + , " c" + , " " + , " " + , "" + ] + ] + , testGroup "definition" + [ "compact" =: definitionList [ ("an", [plain "apple" ]) + , ("a", [plain "banana"]) + , ("an", [plain "orange"])] + =?> unlines + [ "" + , " " + , " " + , " an" + , " " + , " " + , " " + , " apple" + , " " + , " " + , " " + , " " + , " " + , " a" + , " " + , " " + , " " + , " banana" + , " " + , " " + , " " + , " " + , " " + , " an" + , " " + , " " + , " " + , " orange" + , " " + , " " + , " " + , "" + ] + , "loose" =: definitionList [ ("an", [para "apple" ]) + , ("a", [para "banana"]) + , ("an", [para "orange"])] + =?> unlines + [ "" + , " " + , " " + , " an" + , " " + , " " + , " " + , " apple" + , " " + , " " + , " " + , " " + , " " + , " a" + , " " + , " " + , " " + , " banana" + , " " + , " " + , " " + , " " + , " " + , " an" + , " " + , " " + , " " + , " orange" + , " " + , " " + , " " + , "" + ] + ] ] ] diff --git a/tests/writer.docbook b/tests/writer.docbook index 9cb9a5359..26dcbadaa 100644 --- a/tests/writer.docbook +++ b/tests/writer.docbook @@ -93,7 +93,7 @@ sub status { A list: - + item one @@ -156,7 +156,7 @@ These should not be escaped: \$ \\ \> \[ \{ Asterisks tight: - + asterisk 1 @@ -196,7 +196,7 @@ These should not be escaped: \$ \\ \> \[ \{ Pluses tight: - + Plus 1 @@ -236,7 +236,7 @@ These should not be escaped: \$ \\ \> \[ \{ Minuses tight: - + Minus 1 @@ -279,7 +279,7 @@ These should not be escaped: \$ \\ \> \[ \{ Tight: - + First @@ -299,7 +299,7 @@ These should not be escaped: \$ \\ \> \[ \{ and: - + One @@ -383,17 +383,17 @@ These should not be escaped: \$ \\ \> \[ \{ Nested - + Tab - + Tab - + Tab @@ -407,7 +407,7 @@ These should not be escaped: \$ \\ \> \[ \{ Here’s another: - + First @@ -417,7 +417,7 @@ These should not be escaped: \$ \\ \> \[ \{ Second: - + Fee @@ -454,7 +454,7 @@ These should not be escaped: \$ \\ \> \[ \{ Second: - + Fee @@ -508,7 +508,7 @@ These should not be escaped: \$ \\ \> \[ \{ Fancy list markers - + begins with 2 @@ -521,7 +521,7 @@ These should not be escaped: \$ \\ \> \[ \{ with a continuation - + sublist with roman numerals, starting with 4 @@ -531,7 +531,7 @@ These should not be escaped: \$ \\ \> \[ \{ more items - + a subsublist @@ -550,22 +550,22 @@ These should not be escaped: \$ \\ \> \[ \{ Nesting: - + Upper Alpha - + Upper Roman. - + Decimal start with 6 - + Lower alpha with paren @@ -581,7 +581,7 @@ These should not be escaped: \$ \\ \> \[ \{ Autonumbering: - + Autonumber. @@ -591,7 +591,7 @@ These should not be escaped: \$ \\ \> \[ \{ More. - + Nested. @@ -616,7 +616,7 @@ These should not be escaped: \$ \\ \> \[ \{ Tight using spaces: - + apple @@ -651,7 +651,7 @@ These should not be escaped: \$ \\ \> \[ \{ Tight using tabs: - + apple @@ -757,7 +757,7 @@ These should not be escaped: \$ \\ \> \[ \{ Multiple definitions, tight: - + apple @@ -841,7 +841,7 @@ These should not be escaped: \$ \\ \> \[ \{ orange fruit - + sublist @@ -1051,7 +1051,7 @@ These should not be escaped: \$ \\ \> \[ \{ LaTeX - + @@ -1097,7 +1097,7 @@ These should not be escaped: \$ \\ \> \[ \{ These shouldn’t be math: - + To get the famous equation, write $e = mc^2$. @@ -1130,7 +1130,7 @@ These should not be escaped: \$ \\ \> \[ \{ Here is some unicode: - + I hat: Î @@ -1316,7 +1316,7 @@ These should not be escaped: \$ \\ \> \[ \{ With an ampersand: http://example.com/?foo=1&bar=2 - + In a list? @@ -1414,7 +1414,7 @@ or here: <http://example.com/> - + And in list items. -- cgit v1.2.3 From abd3a039b9adcafa8aa1df6e0753a725f90c78fc Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 3 May 2014 20:45:05 -0700 Subject: DocBook writer: Small tweaks to last commit. * Use isTightList from Shared. * Adjust writer test, since isTightList is a bit different from what was used before. Closes #1250. --- src/Text/Pandoc/Writers/Docbook.hs | 12 +++--------- tests/writer.docbook | 2 +- 2 files changed, 4 insertions(+), 10 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Writers/Docbook.hs b/src/Text/Pandoc/Writers/Docbook.hs index e1b62f02d..1a8e58354 100644 --- a/src/Text/Pandoc/Writers/Docbook.hs +++ b/src/Text/Pandoc/Writers/Docbook.hs @@ -185,9 +185,7 @@ blockToDocbook _ (CodeBlock (_,classes,_) str) = else languagesByExtension . map toLower $ s langs = concatMap langsFrom classes blockToDocbook opts (BulletList lst) = - let attribs = case lst of - ((Plain _:_):_) -> [("spacing", "compact")] - _ -> [] + let attribs = [("spacing", "compact") | isTightList lst] in inTags True "itemizedlist" attribs $ listItemsToDocbook opts lst blockToDocbook _ (OrderedList _ []) = empty blockToDocbook opts (OrderedList (start, numstyle, _) (first:rest)) = @@ -199,9 +197,7 @@ blockToDocbook opts (OrderedList (start, numstyle, _) (first:rest)) = LowerAlpha -> [("numeration", "loweralpha")] UpperRoman -> [("numeration", "upperroman")] LowerRoman -> [("numeration", "lowerroman")] - spacing = case first of - (Plain _:_) -> [("spacing", "compact")] - _ -> [] + spacing = [("spacing", "compact") | isTightList (first:rest)] attribs = numeration ++ spacing items = if start == 1 then listItemsToDocbook opts (first:rest) @@ -210,9 +206,7 @@ blockToDocbook opts (OrderedList (start, numstyle, _) (first:rest)) = listItemsToDocbook opts rest in inTags True "orderedlist" attribs items blockToDocbook opts (DefinitionList lst) = - let attribs = case lst of - ((_, (Plain _:_):_):_) -> [("spacing", "compact")] - _ -> [] + let attribs = [("spacing", "compact") | isTightList $ concatMap snd lst] in inTags True "variablelist" attribs $ deflistItemsToDocbook opts lst blockToDocbook _ (RawBlock f str) | f == "docbook" = text str -- raw XML block diff --git a/tests/writer.docbook b/tests/writer.docbook index 26dcbadaa..01daa2c30 100644 --- a/tests/writer.docbook +++ b/tests/writer.docbook @@ -508,7 +508,7 @@ These should not be escaped: \$ \\ \> \[ \{ Fancy list markers - + begins with 2 -- cgit v1.2.3 From fde52c25a65c479871afcf8192f56d2918230f5e Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 3 May 2014 21:08:45 -0700 Subject: AsciiDoc writer: Correctly handle empty table cells. Closes #1245. --- src/Text/Pandoc/Writers/AsciiDoc.hs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Writers/AsciiDoc.hs b/src/Text/Pandoc/Writers/AsciiDoc.hs index 68b525742..e5868172e 100644 --- a/src/Text/Pandoc/Writers/AsciiDoc.hs +++ b/src/Text/Pandoc/Writers/AsciiDoc.hs @@ -217,6 +217,7 @@ blockToAsciiDoc opts (Table caption aligns widths headers rows) = do let makeCell [Plain x] = do d <- blockListToAsciiDoc opts [Plain x] return $ text "|" <> chomp d makeCell [Para x] = makeCell [Plain x] + makeCell [] = return $ text "|" makeCell _ = return $ text "|" <> "[multiblock cell omitted]" let makeRow cells = hsep `fmap` mapM makeCell cells rows' <- mapM makeRow rows @@ -227,7 +228,7 @@ blockToAsciiDoc opts (Table caption aligns widths headers rows) = do else 100000 let maxwidth = maximum $ map offset (head':rows') let body = if maxwidth > colwidth then vsep rows' else vcat rows' - let border = text $ "|" ++ replicate ((min maxwidth colwidth) - 1) '=' + let border = text $ "|" ++ replicate (max 5 (min maxwidth colwidth) - 1) '=' return $ caption'' $$ tablespec $$ border $$ head'' $$ body $$ border $$ blankline blockToAsciiDoc opts (BulletList items) = do -- cgit v1.2.3 From 96c0c950cab8b42d5d6b8b1a6f1fb20f7f4a5aae Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 3 May 2014 21:31:53 -0700 Subject: AsciiDoc writer: Handle multiblock table cells. Closes #1246. --- src/Text/Pandoc/Writers/AsciiDoc.hs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Writers/AsciiDoc.hs b/src/Text/Pandoc/Writers/AsciiDoc.hs index e5868172e..15579cba2 100644 --- a/src/Text/Pandoc/Writers/AsciiDoc.hs +++ b/src/Text/Pandoc/Writers/AsciiDoc.hs @@ -218,7 +218,8 @@ blockToAsciiDoc opts (Table caption aligns widths headers rows) = do return $ text "|" <> chomp d makeCell [Para x] = makeCell [Plain x] makeCell [] = return $ text "|" - makeCell _ = return $ text "|" <> "[multiblock cell omitted]" + makeCell bs = do d <- blockListToAsciiDoc opts bs + return $ text "a|" $$ d let makeRow cells = hsep `fmap` mapM makeCell cells rows' <- mapM makeRow rows head' <- makeRow headers -- cgit v1.2.3 From 3e42f08e87c4795b260154e9747df29bc1613ccc Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 4 May 2014 08:07:17 -0700 Subject: Markdown reader: Fixed bug with unwanted code in lists. Closes #1154. When reading a raw list item, we now strip off nonindent spaces. --- src/Text/Pandoc/Readers/Markdown.hs | 1 + 1 file changed, 1 insertion(+) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 36f73d847..69245cf66 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -746,6 +746,7 @@ listLine = try $ do many spaceChar listStart) notFollowedBy' $ htmlTag (~== TagClose "div") + nonindentSpaces chunks <- manyTill ( many1 (satisfy $ \c -> c /= '\n' && c /= '<') <|> liftM snd (htmlTag isCommentTag) -- cgit v1.2.3 From d72871598174474218ae46dd984632a3753882b1 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 4 May 2014 10:45:20 -0700 Subject: Docx writer: Added ability to give fallback in parseXml. --- src/Text/Pandoc/Writers/Docx.hs | 33 +++++++++++++++++---------------- 1 file changed, 17 insertions(+), 16 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index 2a834c2da..bb2071455 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -217,7 +217,7 @@ writeDocx opts doc@(Pandoc meta _) = do -- styles let newstyles = styleToOpenXml $ writerHighlightStyle opts let stylepath = "word/styles.xml" - styledoc <- parseXml refArchive stylepath + styledoc <- parseXml refArchive stylepath Nothing let styledoc' = styledoc{ elContent = elContent styledoc ++ [Elem x | x <- newstyles, writerHighlight opts] } let styleEntry = toEntry stylepath epochtime $ renderXml styledoc' @@ -256,19 +256,20 @@ writeDocx opts doc@(Pandoc meta _) = do ] let relsEntry = toEntry relsPath epochtime $ renderXml rels - let entryFromArchive path = (toEntry path epochtime . renderXml) `fmap` - parseXml refArchive path - docPropsAppEntry <- entryFromArchive "docProps/app.xml" - themeEntry <- entryFromArchive "word/theme/theme1.xml" - fontTableEntry <- entryFromArchive "word/fontTable.xml" - settingsEntry <- entryFromArchive "word/settings.xml" - webSettingsEntry <- entryFromArchive "word/webSettings.xml" + let entryFromArchive path fallback = + (toEntry path epochtime . renderXml) `fmap` + parseXml refArchive path fallback + docPropsAppEntry <- entryFromArchive "docProps/app.xml" Nothing + themeEntry <- entryFromArchive "word/theme/theme1.xml" Nothing + fontTableEntry <- entryFromArchive "word/fontTable.xml" Nothing + settingsEntry <- entryFromArchive "word/settings.xml" Nothing + webSettingsEntry <- entryFromArchive "word/webSettings.xml" Nothing let miscRels = [ f | f <- filesInArchive refArchive , "word/_rels/" `isPrefixOf` f , ".xml.rels" `isSuffixOf` f , f /= "word/_rels/document.xml.rels" , f /= "word/_rels/footnotes.xml.rels" ] - miscRelEntries <- mapM entryFromArchive miscRels + miscRelEntries <- mapM (\f -> entryFromArchive f Nothing) miscRels -- Create archive let archive = foldr addEntryToArchive emptyArchive $ @@ -814,10 +815,10 @@ inlineToOpenXML opts (Image alt (src, tit)) = do br :: Element br = mknode "w:r" [] [mknode "w:br" [("w:type","textWrapping")] () ] -parseXml :: Archive -> String -> IO Element -parseXml refArchive relpath = - case findEntryByPath relpath refArchive of - Just e -> case parseXMLDoc $ UTF8.toStringLazy $ fromEntry e of - Just d -> return d - Nothing -> fail $ relpath ++ " corrupt in reference docx" - Nothing -> fail $ relpath ++ " missing in reference docx" +parseXml :: Archive -> String -> Maybe String -> IO Element +parseXml refArchive relpath fallback = + case (findEntryByPath relpath refArchive + >>= parseXMLDoc . UTF8.toStringLazy . fromEntry) `mplus` + (fallback >>= parseXMLDoc) of + Just d -> return d + Nothing -> fail $ relpath ++ " corrupt or missing in reference docx" -- cgit v1.2.3 From 0c7e084342b2a077f83809e6613979adcefb1592 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 4 May 2014 10:54:45 -0700 Subject: Docx writer: Fall back on distribution reference.docx. * Undid changes to parseXml in last commit. * Instead of a string fallback, we have parseXml fall back on the reference.docx that comes with pandoc if the user's reference.docx does not contain a needed file. * Closes #1185. --- src/Text/Pandoc/Writers/Docx.hs | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index bb2071455..fcb73a427 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -116,6 +116,7 @@ writeDocx opts doc@(Pandoc meta _) = do case writerReferenceDocx opts of Just f -> B.readFile f Nothing -> readDataFile datadir "reference.docx" + distArchive <- liftM (toArchive . toLazy) $ readDataFile Nothing "reference.docx" ((contents, footnotes), st) <- runStateT (writeOpenXML opts{writerWrapText = False} doc') defaultWriterState @@ -217,7 +218,7 @@ writeDocx opts doc@(Pandoc meta _) = do -- styles let newstyles = styleToOpenXml $ writerHighlightStyle opts let stylepath = "word/styles.xml" - styledoc <- parseXml refArchive stylepath Nothing + styledoc <- parseXml refArchive distArchive stylepath let styledoc' = styledoc{ elContent = elContent styledoc ++ [Elem x | x <- newstyles, writerHighlight opts] } let styleEntry = toEntry stylepath epochtime $ renderXml styledoc' @@ -256,20 +257,20 @@ writeDocx opts doc@(Pandoc meta _) = do ] let relsEntry = toEntry relsPath epochtime $ renderXml rels - let entryFromArchive path fallback = + let entryFromArchive path = (toEntry path epochtime . renderXml) `fmap` - parseXml refArchive path fallback - docPropsAppEntry <- entryFromArchive "docProps/app.xml" Nothing - themeEntry <- entryFromArchive "word/theme/theme1.xml" Nothing - fontTableEntry <- entryFromArchive "word/fontTable.xml" Nothing - settingsEntry <- entryFromArchive "word/settings.xml" Nothing - webSettingsEntry <- entryFromArchive "word/webSettings.xml" Nothing + parseXml refArchive distArchive path + docPropsAppEntry <- entryFromArchive "docProps/app.xml" + themeEntry <- entryFromArchive "word/theme/theme1.xml" + fontTableEntry <- entryFromArchive "word/fontTable.xml" + settingsEntry <- entryFromArchive "word/settings.xml" + webSettingsEntry <- entryFromArchive "word/webSettings.xml" let miscRels = [ f | f <- filesInArchive refArchive , "word/_rels/" `isPrefixOf` f , ".xml.rels" `isSuffixOf` f , f /= "word/_rels/document.xml.rels" , f /= "word/_rels/footnotes.xml.rels" ] - miscRelEntries <- mapM (\f -> entryFromArchive f Nothing) miscRels + miscRelEntries <- mapM entryFromArchive miscRels -- Create archive let archive = foldr addEntryToArchive emptyArchive $ @@ -815,10 +816,10 @@ inlineToOpenXML opts (Image alt (src, tit)) = do br :: Element br = mknode "w:r" [] [mknode "w:br" [("w:type","textWrapping")] () ] -parseXml :: Archive -> String -> Maybe String -> IO Element -parseXml refArchive relpath fallback = - case (findEntryByPath relpath refArchive - >>= parseXMLDoc . UTF8.toStringLazy . fromEntry) `mplus` - (fallback >>= parseXMLDoc) of +parseXml :: Archive -> Archive -> String -> IO Element +parseXml refArchive distArchive relpath = + case ((findEntryByPath relpath refArchive `mplus` + findEntryByPath relpath distArchive) + >>= parseXMLDoc . UTF8.toStringLazy . fromEntry) of Just d -> return d Nothing -> fail $ relpath ++ " corrupt or missing in reference docx" -- cgit v1.2.3 From 51aa3048347280db6798a84a30af4f6e1ae56b26 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 4 May 2014 14:43:05 -0700 Subject: LaTeX writer: Fixed inconsistencies with reference escaping. - toLabel is now monadic, and it does the needed string escaping. - Closes #1130. --- src/Text/Pandoc/Writers/LaTeX.hs | 131 ++++++++++++++++++++------------------- 1 file changed, 67 insertions(+), 64 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs index e52220f01..c17e041b5 100644 --- a/src/Text/Pandoc/Writers/LaTeX.hs +++ b/src/Text/Pandoc/Writers/LaTeX.hs @@ -193,7 +193,7 @@ stringToLaTeX _ [] = return "" stringToLaTeX ctx (x:xs) = do opts <- gets stOptions rest <- stringToLaTeX ctx xs - let ligatures = writerTeXLigatures opts && (ctx /= CodeString) + let ligatures = writerTeXLigatures opts && ctx == TextString let isUrl = ctx == URLString when (x == '€') $ modify $ \st -> st{ stUsesEuro = True } @@ -207,7 +207,8 @@ stringToLaTeX ctx (x:xs) = do '&' -> "\\&" ++ rest '_' | not isUrl -> "\\_" ++ rest '#' -> "\\#" ++ rest - '-' -> case xs of -- prevent adjacent hyphens from forming ligatures + '-' | not isUrl -> case xs of + -- prevent adjacent hyphens from forming ligatures ('-':_) -> "-\\/" ++ rest _ -> '-' : rest '~' | not isUrl -> "\\textasciitilde{}" ++ rest @@ -229,12 +230,13 @@ stringToLaTeX ctx (x:xs) = do '\x2013' | ligatures -> "--" ++ rest _ -> x : rest -toLabel :: String -> String -toLabel [] = "" -toLabel (x:xs) - | (isLetter x || isDigit x) && isAscii x = x:toLabel xs - | elem x "-+=:;." = x:toLabel xs - | otherwise = "ux" ++ printf "%x" (ord x) ++ toLabel xs +toLabel :: String -> State WriterState String +toLabel z = go `fmap` stringToLaTeX URLString z + where go [] = "" + go (x:xs) + | (isLetter x || isDigit x) && isAscii x = x:go xs + | elem x "-+=:;." = x:go xs + | otherwise = "ux" ++ printf "%x" (ord x) ++ go xs -- | Puts contents into LaTeX command. inCmd :: String -> Doc -> Doc @@ -340,57 +342,57 @@ blockToLaTeX (BlockQuote lst) = do return $ "\\begin{quote}" $$ contents $$ "\\end{quote}" blockToLaTeX (CodeBlock (identifier,classes,keyvalAttr) str) = do opts <- gets stOptions + ref <- toLabel identifier + let linkAnchor = if null identifier + then empty + else "\\hyperdef{}" <> braces (text ref) <> + braces ("\\label" <> braces (text ref)) + let lhsCodeBlock = do + modify $ \s -> s{ stLHS = True } + return $ flush (linkAnchor $$ "\\begin{code}" $$ text str $$ + "\\end{code}") $$ cr + let rawCodeBlock = do + st <- get + env <- if stInNote st + then modify (\s -> s{ stVerbInNote = True }) >> + return "Verbatim" + else return "verbatim" + return $ flush (linkAnchor $$ text ("\\begin{" ++ env ++ "}") $$ + text str $$ text ("\\end{" ++ env ++ "}")) <> cr + let listingsCodeBlock = do + st <- get + let params = if writerListings (stOptions st) + then (case getListingsLanguage classes of + Just l -> [ "language=" ++ l ] + Nothing -> []) ++ + [ "numbers=left" | "numberLines" `elem` classes + || "number" `elem` classes + || "number-lines" `elem` classes ] ++ + [ (if key == "startFrom" + then "firstnumber" + else key) ++ "=" ++ attr | + (key,attr) <- keyvalAttr ] ++ + (if identifier == "" + then [] + else [ "label=" ++ ref ]) + + else [] + printParams + | null params = empty + | otherwise = brackets $ hcat (intersperse ", " (map text params)) + return $ flush ("\\begin{lstlisting}" <> printParams $$ text str $$ + "\\end{lstlisting}") $$ cr + let highlightedCodeBlock = + case highlight formatLaTeXBlock ("",classes,keyvalAttr) str of + Nothing -> rawCodeBlock + Just h -> modify (\st -> st{ stHighlighting = True }) >> + return (flush $ linkAnchor $$ text h) case () of _ | isEnabled Ext_literate_haskell opts && "haskell" `elem` classes && "literate" `elem` classes -> lhsCodeBlock | writerListings opts -> listingsCodeBlock | writerHighlight opts && not (null classes) -> highlightedCodeBlock | otherwise -> rawCodeBlock - where ref = text $ toLabel identifier - linkAnchor = if null identifier - then empty - else "\\hyperdef{}" <> braces ref <> - braces ("\\label" <> braces ref) - lhsCodeBlock = do - modify $ \s -> s{ stLHS = True } - return $ flush (linkAnchor $$ "\\begin{code}" $$ text str $$ - "\\end{code}") $$ cr - rawCodeBlock = do - st <- get - env <- if stInNote st - then modify (\s -> s{ stVerbInNote = True }) >> - return "Verbatim" - else return "verbatim" - return $ flush (linkAnchor $$ text ("\\begin{" ++ env ++ "}") $$ - text str $$ text ("\\end{" ++ env ++ "}")) <> cr - listingsCodeBlock = do - st <- get - let params = if writerListings (stOptions st) - then (case getListingsLanguage classes of - Just l -> [ "language=" ++ l ] - Nothing -> []) ++ - [ "numbers=left" | "numberLines" `elem` classes - || "number" `elem` classes - || "number-lines" `elem` classes ] ++ - [ (if key == "startFrom" - then "firstnumber" - else key) ++ "=" ++ attr | - (key,attr) <- keyvalAttr ] ++ - (if identifier == "" - then [] - else [ "label=" ++ toLabel identifier ]) - - else [] - printParams - | null params = empty - | otherwise = brackets $ hcat (intersperse ", " (map text params)) - return $ flush ("\\begin{lstlisting}" <> printParams $$ text str $$ - "\\end{lstlisting}") $$ cr - highlightedCodeBlock = - case highlight formatLaTeXBlock ("",classes,keyvalAttr) str of - Nothing -> rawCodeBlock - Just h -> modify (\st -> st{ stHighlighting = True }) >> - return (flush $ linkAnchor $$ text h) blockToLaTeX (RawBlock f x) | f == Format "latex" || f == Format "tex" = return $ text x @@ -579,6 +581,7 @@ sectionHeader :: Bool -- True for unnumbered -> State WriterState Doc sectionHeader unnumbered ref level lst = do txt <- inlineListToLaTeX lst + lab <- text `fmap` toLabel ref let noNote (Note _) = Str "" noNote x = x let lstNoNotes = walk noNote lst @@ -599,13 +602,13 @@ sectionHeader unnumbered ref level lst = do let refLabel x = (if ref `elem` internalLinks then text "\\hyperdef" <> braces empty - <> braces (text $ toLabel ref) + <> braces lab <> braces x else x) - let headerWith x y r = refLabel $ text x <> y <> - if null r + let headerWith x y = refLabel $ text x <> y <> + if null ref then empty - else text "\\label" <> braces (text $ toLabel r) + else text "\\label" <> braces lab let sectionType = case level' of 0 | writerBeamer opts -> "part" | otherwise -> "chapter" @@ -624,7 +627,7 @@ sectionHeader unnumbered ref level lst = do return $ if level' > 5 then txt else prefix $$ - headerWith ('\\':sectionType) stuffing ref + headerWith ('\\':sectionType) stuffing $$ if unnumbered then "\\addcontentsline{toc}" <> braces (text sectionType) <> @@ -659,9 +662,10 @@ inlineToLaTeX (Span (id',classes,_) ils) = do let noEmph = "csl-no-emph" `elem` classes let noStrong = "csl-no-strong" `elem` classes let noSmallCaps = "csl-no-smallcaps" `elem` classes - let label' = if (null id') - then empty - else text "\\label" <> braces (text $ toLabel id') + label' <- if null id' + then return empty + else toLabel id' >>= \x -> + return (text "\\label" <> braces (text x)) fmap (label' <>) ((if noEmph then inCmd "textup" else id) . (if noStrong then inCmd "textnormal" else id) . @@ -745,9 +749,8 @@ inlineToLaTeX (LineBreak) = return "\\\\" inlineToLaTeX Space = return space inlineToLaTeX (Link txt ('#':ident, _)) = do contents <- inlineListToLaTeX txt - ident' <- stringToLaTeX URLString ident - return $ text "\\hyperref" <> brackets (text $ toLabel ident') <> - braces contents + lab <- toLabel ident + return $ text "\\hyperref" <> brackets (text lab) <> braces contents inlineToLaTeX (Link txt (src, _)) = case txt of [Str x] | x == src -> -- autolink -- cgit v1.2.3 From dbd6c1540f9688a3439fceec405ec4d86dc951d5 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 4 May 2014 16:21:18 -0700 Subject: Fixed the fix to #1154. We need to strip off up to 4 spaces, not up to 3. --- src/Text/Pandoc/Readers/Markdown.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 69245cf66..aac87f363 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -746,7 +746,7 @@ listLine = try $ do many spaceChar listStart) notFollowedBy' $ htmlTag (~== TagClose "div") - nonindentSpaces + optional (() <$ indentSpaces) chunks <- manyTill ( many1 (satisfy $ \c -> c /= '\n' && c /= '<') <|> liftM snd (htmlTag isCommentTag) -- cgit v1.2.3 From 71bd4fb2b3778d3906a63938625ebcadca40b8c8 Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Mon, 5 May 2014 14:39:25 +0200 Subject: Org reader: Read inline code blocks Org's inline code blocks take forms like `src_haskell(print "hi")` and are frequently used to include results from computations called from within the document. The blocks are read as inline code and marked with the special class `rundoc-block`. Proper handling and execution of these blocks is the subject of a separate library, rundoc, which is work in progress. This closes #1278. --- src/Text/Pandoc/Readers/Org.hs | 43 +++++++++++++++++++++++++++++++++++++++--- tests/Tests/Readers/Org.hs | 18 ++++++++++++++++++ 2 files changed, 58 insertions(+), 3 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index d68ef45ef..dba61dfe0 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -43,6 +43,7 @@ import Text.Pandoc.Shared (compactify', compactify'DL) import Control.Applicative ( Applicative, pure , (<$>), (<$), (<*>), (<*), (*>), (<**>) ) +import Control.Arrow (first) import Control.Monad (foldM, guard, liftM, liftM2, mzero, when) import Control.Monad.Reader (Reader, runReader, ask, asks) import Data.Char (isAlphaNum, toLower) @@ -721,7 +722,6 @@ bulletList = fmap B.bulletList . fmap compactify' . sequence <$> many1 (listItem bulletListStart) orderedList :: OrgParser (F Blocks) --- orderedList = B.orderedList . compactify' <$> many1 (listItem orderedListStart) orderedList = fmap B.orderedList . fmap compactify' . sequence <$> many1 (listItem orderedListStart) @@ -746,11 +746,11 @@ definitionListItem :: OrgParser Int definitionListItem parseMarkerGetLength = try $ do markerLength <- parseMarkerGetLength term <- manyTill (noneOf "\n\r") (try $ string "::") - first <- anyLineNewline + line1 <- anyLineNewline blank <- option "" ("\n" <$ blankline) cont <- concat <$> many (listContinuation markerLength) term' <- parseFromString inline term - contents' <- parseFromString parseBlocks $ first ++ blank ++ cont + contents' <- parseFromString parseBlocks $ line1 ++ blank ++ cont return $ (,) <$> term' <*> fmap (:[]) contents' @@ -789,6 +789,7 @@ inline = , footnote , linkOrImage , anchor + , inlineCodeBlock , str , endline , emph @@ -989,6 +990,42 @@ solidify = map replaceSpecialChar | c `elem` "_.-:" = c | otherwise = '-' +-- | Parses an inline code block and marks it as an babel block. +inlineCodeBlock :: OrgParser (F Inlines) +inlineCodeBlock = try $ do + string "src_" + lang <- many1 orgArgWordChar + opts <- option [] $ enclosedByPair '[' ']' blockOption + inlineCode <- enclosedByPair '{' '}' (noneOf "\n\r") + let attrClasses = [translateLang lang, rundocBlockClass] + let attrKeyVal = map toRundocAttrib (("language", lang) : opts) + returnF $ B.codeWith ("", attrClasses, attrKeyVal) inlineCode + where enclosedByPair s e p = char s *> many1Till p (char e) + +-- | The class-name used to mark rundoc blocks. +rundocBlockClass :: String +rundocBlockClass = "rundoc-block" + +blockOption :: OrgParser (String, String) +blockOption = try $ (,) <$> orgArgKey <*> orgArgValue + +orgArgKey :: OrgParser String +orgArgKey = try $ + skipSpaces *> char ':' + *> many1 orgArgWordChar + <* many1 spaceChar + +orgArgValue :: OrgParser String +orgArgValue = try $ + skipSpaces *> many1 orgArgWordChar + <* skipSpaces + +orgArgWordChar :: OrgParser Char +orgArgWordChar = alphaNum <|> oneOf "-_" + +toRundocAttrib :: (String, String) -> (String, String) +toRundocAttrib = first ("rundoc-" ++) + emph :: OrgParser (F Inlines) emph = fmap B.emph <$> emphasisBetween '/' diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs index 78684f0f1..949976aba 100644 --- a/tests/Tests/Readers/Org.hs +++ b/tests/Tests/Readers/Org.hs @@ -207,6 +207,24 @@ tests = "<> Link here later." =?> (para $ spanWith ("anchor", [], []) mempty <> "Link" <> space <> "here" <> space <> "later.") + + , "Inline code block" =: + "src_emacs-lisp{(message \"Hello\")}" =?> + (para $ codeWith ( "" + , [ "commonlisp", "rundoc-block" ] + , [ ("rundoc-language", "emacs-lisp") ]) + "(message \"Hello\")") + + , "Inline code block with arguments" =: + "src_sh[:export both :results output]{echo 'Hello, World'}" =?> + (para $ codeWith ( "" + , [ "bash", "rundoc-block" ] + , [ ("rundoc-language", "sh") + , ("rundoc-export", "both") + , ("rundoc-results", "output") + ] + ) + "echo 'Hello, World'") ] , testGroup "Meta Information" $ -- cgit v1.2.3 From 10644607e35369ec3b19b5d02fbe9b936d0ecb85 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 6 May 2014 22:13:59 -0700 Subject: Textile reader: Rewrote some inline parsing code for clarity. (It seems clearer to put the whitespace parsing in the grouped parser. This also uses stateLastStrPos to determine when the border is adjacent to an alphanumeric.) --- src/Text/Pandoc/Readers/Textile.hs | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs index ae9c0cc8e..3c07a4d85 100644 --- a/src/Text/Pandoc/Readers/Textile.hs +++ b/src/Text/Pandoc/Readers/Textile.hs @@ -596,32 +596,28 @@ ungroupedSimpleInline :: Parser [Char] ParserState t -- ^ surrounding ungroupedSimpleInline border construct = try $ do st <- getState pos <- getPosition - isWhitespace <- option False (whitespace >> return True) - guard $ (stateQuoteContext st /= NoQuote) - || (sourceColumn pos == 1) - || isWhitespace + let afterString = stateLastStrPos st == Just pos + guard $ not afterString border *> notFollowedBy (oneOf " \t\n\r") attr <- attributes body <- trimInlines . mconcat <$> withQuoteContext InSingleQuote (manyTill inline (try border <* notFollowedBy alphaNum)) - let result = construct $ + return $ construct $ if attr == nullAttr then body else B.spanWith attr body - return $ if isWhitespace - then B.space <> result - else result groupedSimpleInline :: Parser [Char] ParserState t -> (Inlines -> Inlines) -> Parser [Char] ParserState Inlines groupedSimpleInline border construct = try $ do char '[' - withQuoteContext InSingleQuote (simpleInline border construct) >>~ char ']' - - - + sp1 <- option mempty $ B.space <$ whitespace + result <- withQuoteContext InSingleQuote (simpleInline border construct) + sp2 <- option mempty $ B.space <$ whitespace + char ']' + return $ sp1 <> result <> sp2 -- | Create a singleton list singleton :: a -> [a] -- cgit v1.2.3 From d6a9ba1cdc4cb10d34c61593d04868da3abb5e40 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 6 May 2014 22:28:11 -0700 Subject: Make `--trace` work with textile reader. --- src/Text/Pandoc/Readers/Textile.hs | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs index 3c07a4d85..b67e8fbc8 100644 --- a/src/Text/Pandoc/Readers/Textile.hs +++ b/src/Text/Pandoc/Readers/Textile.hs @@ -61,9 +61,11 @@ import Text.HTML.TagSoup (parseTags, innerText, fromAttrib, Tag(..)) import Text.HTML.TagSoup.Match import Data.List ( intercalate ) import Data.Char ( digitToInt, isUpper) -import Control.Monad ( guard, liftM ) +import Control.Monad ( guard, liftM, when ) +import Text.Printf import Control.Applicative ((<$>), (*>), (<*), (<$)) import Data.Monoid +import Debug.Trace (trace) -- | Parse a Textile text and return a Pandoc document. readTextile :: ReaderOptions -- ^ Reader options @@ -135,9 +137,17 @@ blockParsers = [ codeBlock endBlock :: Parser [Char] ParserState Blocks endBlock = string "\n\n" >> return mempty + -- | Any block in the order of definition of blockParsers block :: Parser [Char] ParserState Blocks -block = choice blockParsers "block" +block = do + res <- choice blockParsers "block" + pos <- getPosition + tr <- getOption readerTrace + when tr $ + trace (printf "line %d: %s" (sourceLine pos) + (take 60 $ show $ B.toList res)) (return ()) + return res commentBlock :: Parser [Char] ParserState Blocks commentBlock = try $ do -- cgit v1.2.3 From ea4e947bd0308861dbbbe020d21afe7943db1b98 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 6 May 2014 23:16:47 -0700 Subject: Textile reader: Disallow blank lines in inline contexts. @hi there@ should not be a single code span. --- src/Text/Pandoc/Readers/Textile.hs | 31 ++++++++++++++++++++----------- 1 file changed, 20 insertions(+), 11 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs index b67e8fbc8..622a41168 100644 --- a/src/Text/Pandoc/Readers/Textile.hs +++ b/src/Text/Pandoc/Readers/Textile.hs @@ -481,7 +481,7 @@ str = do -- followed by parens, parens content is unconditionally word acronym fullStr <- option baseStr $ try $ do guard $ all isUpper baseStr - acro <- enclosed (char '(') (char ')') anyChar + acro <- enclosed (char '(') (char ')') anyChar' return $ concat [baseStr, " (", acro, ")"] updateLastStrPos return $ B.str fullStr @@ -528,8 +528,8 @@ link = try $ do image :: Parser [Char] ParserState Inlines image = try $ do char '!' >> notFollowedBy space - src <- manyTill anyChar (lookAhead $ oneOf "!(") - alt <- option "" (try $ (char '(' >> manyTill anyChar (char ')'))) + src <- manyTill anyChar' (lookAhead $ oneOf "!(") + alt <- option "" (try $ (char '(' >> manyTill anyChar' (char ')'))) char '!' return $ B.image src alt (B.str alt) @@ -537,12 +537,14 @@ escapedInline :: Parser [Char] ParserState Inlines escapedInline = escapedEqs <|> escapedTag escapedEqs :: Parser [Char] ParserState Inlines -escapedEqs = B.str <$> (try $ string "==" *> manyTill anyChar (try $ string "==")) +escapedEqs = B.str <$> + (try $ string "==" *> manyTill anyChar' (try $ string "==")) -- | literal text escaped btw tags escapedTag :: Parser [Char] ParserState Inlines escapedTag = B.str <$> - (try $ string "" *> manyTill anyChar (try $ string "")) + (try $ string "" *> + manyTill anyChar' (try $ string "")) -- | Any special symbol defined in wordBoundaries symbol :: Parser [Char] ParserState Inlines @@ -552,13 +554,18 @@ symbol = B.str . singleton <$> (oneOf wordBoundaries <|> oneOf markupChars) code :: Parser [Char] ParserState Inlines code = code1 <|> code2 +-- any character except a newline before a blank line +anyChar' :: Parser [Char] ParserState Char +anyChar' = + satisfy (/='\n') <|> (try $ char '\n' <* notFollowedBy blankline) + code1 :: Parser [Char] ParserState Inlines -code1 = B.code <$> surrounded (char '@') anyChar +code1 = B.code <$> surrounded (char '@') anyChar' code2 :: Parser [Char] ParserState Inlines code2 = do htmlTag (tagOpen (=="tt") null) - B.code <$> manyTill anyChar (try $ htmlTag $ tagClose (=="tt")) + B.code <$> manyTill anyChar' (try $ htmlTag $ tagClose (=="tt")) -- | Html / CSS attributes attributes :: Parser [Char] ParserState Attr @@ -570,7 +577,7 @@ attribute = classIdAttr <|> styleAttr <|> langAttr classIdAttr :: Parser [Char] ParserState (Attr -> Attr) classIdAttr = try $ do -- (class class #id) char '(' - ws <- words `fmap` manyTill anyChar (char ')') + ws <- words `fmap` manyTill anyChar' (char ')') case reverse ws of [] -> return $ \(_,_,keyvals) -> ("",[],keyvals) (('#':ident'):classes') -> return $ \(_,_,keyvals) -> @@ -580,7 +587,7 @@ classIdAttr = try $ do -- (class class #id) styleAttr :: Parser [Char] ParserState (Attr -> Attr) styleAttr = do - style <- try $ enclosed (char '{') (char '}') anyChar + style <- try $ enclosed (char '{') (char '}') anyChar' return $ \(id',classes,keyvals) -> (id',classes,("style",style):keyvals) langAttr :: Parser [Char] ParserState (Attr -> Attr) @@ -592,13 +599,15 @@ langAttr = do surrounded :: Parser [Char] st t -- ^ surrounding parser -> Parser [Char] st a -- ^ content parser (to be used repeatedly) -> Parser [Char] st [a] -surrounded border = enclosed (border *> notFollowedBy (oneOf " \t\n\r")) (try border) +surrounded border = + enclosed (border *> notFollowedBy (oneOf " \t\n\r")) (try border) simpleInline :: Parser [Char] ParserState t -- ^ surrounding parser -> (Inlines -> Inlines) -- ^ Inline constructor -> Parser [Char] ParserState Inlines -- ^ content parser (to be used repeatedly) -simpleInline border construct = groupedSimpleInline border construct <|> ungroupedSimpleInline border construct +simpleInline border construct = groupedSimpleInline border construct + <|> ungroupedSimpleInline border construct ungroupedSimpleInline :: Parser [Char] ParserState t -- ^ surrounding parser -> (Inlines -> Inlines) -- ^ Inline constructor -- cgit v1.2.3 From 442eecc15c2b805872600e111a510e022d1920f7 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 6 May 2014 23:27:16 -0700 Subject: Textile reader: Rewrote simpleInline for clarity and efficiency. This way we only look once for the opening `[`. --- src/Text/Pandoc/Readers/Textile.hs | 22 +++++++--------------- 1 file changed, 7 insertions(+), 15 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs index 622a41168..f83298d4c 100644 --- a/src/Text/Pandoc/Readers/Textile.hs +++ b/src/Text/Pandoc/Readers/Textile.hs @@ -381,6 +381,7 @@ inline = do -- | Inline parsers tried in order inlineParsers :: [Parser [Char] ParserState Inlines] inlineParsers = [ inlineMarkup + , groupedInlineMarkup , str , whitespace , endline @@ -602,17 +603,10 @@ surrounded :: Parser [Char] st t -- ^ surrounding parser surrounded border = enclosed (border *> notFollowedBy (oneOf " \t\n\r")) (try border) - simpleInline :: Parser [Char] ParserState t -- ^ surrounding parser - -> (Inlines -> Inlines) -- ^ Inline constructor - -> Parser [Char] ParserState Inlines -- ^ content parser (to be used repeatedly) -simpleInline border construct = groupedSimpleInline border construct - <|> ungroupedSimpleInline border construct - -ungroupedSimpleInline :: Parser [Char] ParserState t -- ^ surrounding parser - -> (Inlines -> Inlines) -- ^ Inline constructor - -> Parser [Char] ParserState Inlines -- ^ content parser (to be used repeatedly) -ungroupedSimpleInline border construct = try $ do + -> (Inlines -> Inlines) -- ^ Inline constructor + -> Parser [Char] ParserState Inlines -- ^ content parser (to be used repeatedly) +simpleInline border construct = try $ do st <- getState pos <- getPosition let afterString = stateLastStrPos st == Just pos @@ -627,13 +621,11 @@ ungroupedSimpleInline border construct = try $ do then body else B.spanWith attr body -groupedSimpleInline :: Parser [Char] ParserState t - -> (Inlines -> Inlines) - -> Parser [Char] ParserState Inlines -groupedSimpleInline border construct = try $ do +groupedInlineMarkup :: Parser [Char] ParserState Inlines +groupedInlineMarkup = try $ do char '[' sp1 <- option mempty $ B.space <$ whitespace - result <- withQuoteContext InSingleQuote (simpleInline border construct) + result <- withQuoteContext InSingleQuote inlineMarkup sp2 <- option mempty $ B.space <$ whitespace char ']' return $ sp1 <> result <> sp2 -- cgit v1.2.3 From 84f2336a7db04dbc3c36594d1b4aca2e18186617 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 6 May 2014 23:41:56 -0700 Subject: Textile reader: Rearranged inline parsers for performance. This is possible because of the rewrite of simpleInline. Also removed a redundant parser for grouped inlines. --- src/Text/Pandoc/Readers/Textile.hs | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs index f83298d4c..2f1fd30b4 100644 --- a/src/Text/Pandoc/Readers/Textile.hs +++ b/src/Text/Pandoc/Readers/Textile.hs @@ -380,17 +380,16 @@ inline = do -- | Inline parsers tried in order inlineParsers :: [Parser [Char] ParserState Inlines] -inlineParsers = [ inlineMarkup - , groupedInlineMarkup - , str +inlineParsers = [ str , whitespace , endline , code , escapedInline + , inlineMarkup + , groupedInlineMarkup , rawHtmlInline , rawLaTeXInline' , note - , try $ (char '[' *> inlineMarkup <* char ']') , link , image , mark -- cgit v1.2.3 From 0050b509052ff81ba021b98fdbc573d3475ed74c Mon Sep 17 00:00:00 2001 From: mpickering Date: Wed, 7 May 2014 13:03:45 +0100 Subject: Fix textile reader hanging. Textile reader hung on pandoc -f textile http://johnmacfarlane.net/pandoc/demo/example25.textile The reader no longer hangs. --- src/Text/Pandoc/Readers/Textile.hs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs index 2f1fd30b4..f7c87ab5a 100644 --- a/src/Text/Pandoc/Readers/Textile.hs +++ b/src/Text/Pandoc/Readers/Textile.hs @@ -513,7 +513,8 @@ link = try $ do char '"' *> notFollowedBy (oneOf " \t\n\r") attr <- attributes name <- trimInlines . mconcat <$> - withQuoteContext InSingleQuote (manyTill inline (try (string "\":"))) + withQuoteContext InDoubleQuote (many1Till inline (try (char '"'))) + char ':' let stop = if bracketed then char ']' else lookAhead $ space <|> -- cgit v1.2.3 From f0f88111e6597ade7e771457fc1b81bcc9a6d974 Mon Sep 17 00:00:00 2001 From: mpickering Date: Wed, 7 May 2014 13:03:45 +0100 Subject: Small improvement to textile reader fix. Removed 'try'. --- src/Text/Pandoc/Readers/Textile.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs index f7c87ab5a..9ee34caa5 100644 --- a/src/Text/Pandoc/Readers/Textile.hs +++ b/src/Text/Pandoc/Readers/Textile.hs @@ -513,7 +513,7 @@ link = try $ do char '"' *> notFollowedBy (oneOf " \t\n\r") attr <- attributes name <- trimInlines . mconcat <$> - withQuoteContext InDoubleQuote (many1Till inline (try (char '"'))) + withQuoteContext InDoubleQuote (many1Till inline (char '"')) char ':' let stop = if bracketed then char ']' -- cgit v1.2.3 From 8fdbef841d0ef77dcc2e30cfa475e92a0f3de6cf Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Thu, 8 May 2014 21:50:20 +0200 Subject: Update copyright notices for 2014, add missing notices --- COPYRIGHT | 10 +++++----- Setup.hs | 17 +++++++++++++++++ benchmark/benchmark-pandoc.hs | 18 +++++++++++++++++- pandoc.cabal | 2 +- pandoc.hs | 6 +++--- src/Text/Pandoc.hs | 4 ++-- src/Text/Pandoc/Asciify.hs | 4 ++-- src/Text/Pandoc/Highlighting.hs | 4 ++-- src/Text/Pandoc/ImageSize.hs | 4 ++-- src/Text/Pandoc/MIME.hs | 4 ++-- src/Text/Pandoc/Options.hs | 4 ++-- src/Text/Pandoc/PDF.hs | 4 ++-- src/Text/Pandoc/Parsing.hs | 4 ++-- src/Text/Pandoc/Pretty.hs | 4 ++-- src/Text/Pandoc/Process.hs | 4 ++-- src/Text/Pandoc/Readers/HTML.hs | 4 ++-- src/Text/Pandoc/Readers/LaTeX.hs | 4 ++-- src/Text/Pandoc/Readers/Markdown.hs | 4 ++-- src/Text/Pandoc/Readers/MediaWiki.hs | 4 ++-- src/Text/Pandoc/Readers/Native.hs | 4 ++-- src/Text/Pandoc/Readers/RST.hs | 4 ++-- src/Text/Pandoc/Readers/TeXMath.hs | 4 ++-- src/Text/Pandoc/Readers/Textile.hs | 5 +++-- src/Text/Pandoc/SelfContained.hs | 4 ++-- src/Text/Pandoc/Shared.hs | 4 ++-- src/Text/Pandoc/Slides.hs | 4 ++-- src/Text/Pandoc/Templates.hs | 4 ++-- src/Text/Pandoc/UTF8.hs | 4 ++-- src/Text/Pandoc/UUID.hs | 4 ++-- src/Text/Pandoc/Writers/AsciiDoc.hs | 4 ++-- src/Text/Pandoc/Writers/ConTeXt.hs | 4 ++-- src/Text/Pandoc/Writers/Custom.hs | 4 ++-- src/Text/Pandoc/Writers/Docbook.hs | 4 ++-- src/Text/Pandoc/Writers/Docx.hs | 4 ++-- src/Text/Pandoc/Writers/EPUB.hs | 4 ++-- src/Text/Pandoc/Writers/HTML.hs | 4 ++-- src/Text/Pandoc/Writers/LaTeX.hs | 4 ++-- src/Text/Pandoc/Writers/Man.hs | 4 ++-- src/Text/Pandoc/Writers/Markdown.hs | 4 ++-- src/Text/Pandoc/Writers/MediaWiki.hs | 4 ++-- src/Text/Pandoc/Writers/Native.hs | 4 ++-- src/Text/Pandoc/Writers/ODT.hs | 4 ++-- src/Text/Pandoc/Writers/OPML.hs | 4 ++-- src/Text/Pandoc/Writers/OpenDocument.hs | 6 +++--- src/Text/Pandoc/Writers/Org.hs | 5 +++-- src/Text/Pandoc/Writers/RST.hs | 4 ++-- src/Text/Pandoc/Writers/RTF.hs | 4 ++-- src/Text/Pandoc/Writers/Shared.hs | 4 ++-- src/Text/Pandoc/Writers/Texinfo.hs | 4 ++-- src/Text/Pandoc/Writers/Textile.hs | 4 ++-- src/Text/Pandoc/XML.hs | 4 ++-- 51 files changed, 138 insertions(+), 103 deletions(-) (limited to 'src/Text') diff --git a/COPYRIGHT b/COPYRIGHT index cd5adb1be..065090018 100644 --- a/COPYRIGHT +++ b/COPYRIGHT @@ -1,5 +1,5 @@ Pandoc -Copyright (C) 2006-2013 John MacFarlane +Copyright (C) 2006-2014 John MacFarlane This code is released under the [GPL], version 2 or later: @@ -33,25 +33,25 @@ licenses. ---------------------------------------------------------------------- src/Text/Pandoc/Writers/Texinfo.hs -Copyright (C) 2008-2010 John MacFarlane and Peter Wang +Copyright (C) 2008-2014 John MacFarlane and Peter Wang Released under the GNU General Public License version 2 or later. ---------------------------------------------------------------------- src/Text/Pandoc/Writers/OpenDocument.hs -Copyright (C) 2008-2010 Andrea Rossato and John MacFarlane +Copyright (C) 2008-2014 Andrea Rossato and John MacFarlane Released under the GNU General Public License version 2 or later. ---------------------------------------------------------------------- src/Text/Pandoc/Writers/Org.hs -Copyright (C) 2010 Puneeth Chaganti +Copyright (C) 2010-2014 Puneeth Chaganti and JohnMacFarlane Released under the GNU General Public License version 2 or later. ---------------------------------------------------------------------- src/Text/Pandoc/Readers/Textile.hs -Copyright (C) 2010 Paul Rivier +Copyright (C) 2010-2014 Paul Rivier and John MacFarlane Released under the GNU General Public License version 2 or later. diff --git a/Setup.hs b/Setup.hs index 89d03ee7a..f5d18eee4 100644 --- a/Setup.hs +++ b/Setup.hs @@ -1,4 +1,21 @@ {-# LANGUAGE CPP #-} +{- +Copyright (C) 2006-2014 John MacFarlane + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} import Distribution.Simple import Distribution.Simple.PreProcess diff --git a/benchmark/benchmark-pandoc.hs b/benchmark/benchmark-pandoc.hs index 2eaaf91a1..9238b09d7 100644 --- a/benchmark/benchmark-pandoc.hs +++ b/benchmark/benchmark-pandoc.hs @@ -1,3 +1,20 @@ +{- +Copyright (C) 2012-2014 John MacFarlane + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} import Text.Pandoc import Criterion.Main import Criterion.Config @@ -36,4 +53,3 @@ main = do let writers' = [(n,w) | (n, PureStringWriter w) <- writers] defaultMainWith conf (return ()) $ map (writerBench doc) writers' ++ readerBs - diff --git a/pandoc.cabal b/pandoc.cabal index ea0aa71ba..63c748a47 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -4,7 +4,7 @@ Cabal-Version: >= 1.10 Build-Type: Custom License: GPL License-File: COPYING -Copyright: (c) 2006-2013 John MacFarlane +Copyright: (c) 2006-2014 John MacFarlane Author: John MacFarlane Maintainer: John MacFarlane Bug-Reports: https://github.com/jgm/pandoc/issues diff --git a/pandoc.hs b/pandoc.hs index 959605625..5dd0e6899 100644 --- a/pandoc.hs +++ b/pandoc.hs @@ -1,6 +1,6 @@ {-# LANGUAGE CPP #-} {- -Copyright (C) 2006-2013 John MacFarlane +Copyright (C) 2006-2014 John MacFarlane This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Main - Copyright : Copyright (C) 2006-2013 John MacFarlane + Copyright : Copyright (C) 2006-2014 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane @@ -69,7 +69,7 @@ import qualified Data.Yaml as Yaml import qualified Data.Text as T copyrightMessage :: String -copyrightMessage = "\nCopyright (C) 2006-2013 John MacFarlane\n" ++ +copyrightMessage = "\nCopyright (C) 2006-2014 John MacFarlane\n" ++ "Web: http://johnmacfarlane.net/pandoc\n" ++ "This is free software; see the source for copying conditions. There is no\n" ++ "warranty, not even for merchantability or fitness for a particular purpose." diff --git a/src/Text/Pandoc.hs b/src/Text/Pandoc.hs index a37c98814..dd5bc18f6 100644 --- a/src/Text/Pandoc.hs +++ b/src/Text/Pandoc.hs @@ -1,6 +1,6 @@ {-# LANGUAGE ScopedTypeVariables, FlexibleInstances #-} {- -Copyright (C) 2006-2010 John MacFarlane +Copyright (C) 2006-2014 John MacFarlane This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc - Copyright : Copyright (C) 2006-2010 John MacFarlane + Copyright : Copyright (C) 2006-2014 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane diff --git a/src/Text/Pandoc/Asciify.hs b/src/Text/Pandoc/Asciify.hs index 1c177da90..8a5ccec5c 100644 --- a/src/Text/Pandoc/Asciify.hs +++ b/src/Text/Pandoc/Asciify.hs @@ -1,5 +1,5 @@ {- -Copyright (C) 2013 John MacFarlane +Copyright (C) 2013-2014 John MacFarlane This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.SelfContained - Copyright : Copyright (C) 2013 John MacFarlane + Copyright : Copyright (C) 2013-2014 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane diff --git a/src/Text/Pandoc/Highlighting.hs b/src/Text/Pandoc/Highlighting.hs index 11d608db6..2e7a9f648 100644 --- a/src/Text/Pandoc/Highlighting.hs +++ b/src/Text/Pandoc/Highlighting.hs @@ -1,5 +1,5 @@ {- -Copyright (C) 2008 John MacFarlane +Copyright (C) 2008-2014 John MacFarlane This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Highlighting - Copyright : Copyright (C) 2008 John MacFarlane + Copyright : Copyright (C) 2008-2014 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane diff --git a/src/Text/Pandoc/ImageSize.hs b/src/Text/Pandoc/ImageSize.hs index 3c9623b3c..a6d076fa9 100644 --- a/src/Text/Pandoc/ImageSize.hs +++ b/src/Text/Pandoc/ImageSize.hs @@ -1,6 +1,6 @@ {-# LANGUAGE OverloadedStrings, ScopedTypeVariables #-} {- - Copyright (C) 2011 John MacFarlane + Copyright (C) 2011-2014 John MacFarlane This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +19,7 @@ {- | Module : Text.Pandoc.ImageSize -Copyright : Copyright (C) 2011 John MacFarlane +Copyright : Copyright (C) 2011-2014 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane diff --git a/src/Text/Pandoc/MIME.hs b/src/Text/Pandoc/MIME.hs index 977cb576b..6e6284b25 100644 --- a/src/Text/Pandoc/MIME.hs +++ b/src/Text/Pandoc/MIME.hs @@ -1,5 +1,5 @@ {- -Copyright (C) 2011 John MacFarlane +Copyright (C) 2011-2014 John MacFarlane This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.MIME - Copyright : Copyright (C) 2011 John MacFarlane + Copyright : Copyright (C) 2011-2014 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane diff --git a/src/Text/Pandoc/Options.hs b/src/Text/Pandoc/Options.hs index 38220f542..611a6bb06 100644 --- a/src/Text/Pandoc/Options.hs +++ b/src/Text/Pandoc/Options.hs @@ -1,5 +1,5 @@ {- -Copyright (C) 2012 John MacFarlane +Copyright (C) 2012-2014 John MacFarlane This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Options - Copyright : Copyright (C) 2012 John MacFarlane + Copyright : Copyright (C) 2012-2014 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane diff --git a/src/Text/Pandoc/PDF.hs b/src/Text/Pandoc/PDF.hs index abc5c41b7..e4e06e6c9 100644 --- a/src/Text/Pandoc/PDF.hs +++ b/src/Text/Pandoc/PDF.hs @@ -1,6 +1,6 @@ {-# LANGUAGE OverloadedStrings, CPP #-} {- -Copyright (C) 2012 John MacFarlane +Copyright (C) 2012-2014 John MacFarlane This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.PDF - Copyright : Copyright (C) 2012 John MacFarlane + Copyright : Copyright (C) 2012-2014 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs index 4d0a677da..d1e55cbc4 100644 --- a/src/Text/Pandoc/Parsing.hs +++ b/src/Text/Pandoc/Parsing.hs @@ -1,7 +1,7 @@ {-# LANGUAGE GeneralizedNewtypeDeriving, TypeSynonymInstances, FlexibleInstances#-} {- -Copyright (C) 2006-2010 John MacFarlane +Copyright (C) 2006-2014 John MacFarlane This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -20,7 +20,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Parsing - Copyright : Copyright (C) 2006-2010 John MacFarlane + Copyright : Copyright (C) 2006-2014 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane diff --git a/src/Text/Pandoc/Pretty.hs b/src/Text/Pandoc/Pretty.hs index 5331587ce..d25ba725f 100644 --- a/src/Text/Pandoc/Pretty.hs +++ b/src/Text/Pandoc/Pretty.hs @@ -1,6 +1,6 @@ {-# LANGUAGE GeneralizedNewtypeDeriving, CPP #-} {- -Copyright (C) 2010 John MacFarlane +Copyright (C) 2010-2014 John MacFarlane This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111(-1)307 USA {- | Module : Text.Pandoc.Pretty - Copyright : Copyright (C) 2010 John MacFarlane + Copyright : Copyright (C) 2010-2014 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane diff --git a/src/Text/Pandoc/Process.hs b/src/Text/Pandoc/Process.hs index 112c5b974..9c8853366 100644 --- a/src/Text/Pandoc/Process.hs +++ b/src/Text/Pandoc/Process.hs @@ -1,5 +1,5 @@ {- -Copyright (C) 2013 John MacFarlane +Copyright (C) 2013-2014 John MacFarlane This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Process - Copyright : Copyright (C) 2013 John MacFarlane + Copyright : Copyright (C) 2013-2014 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index c94ee3d6b..905e55b22 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -1,5 +1,5 @@ {- -Copyright (C) 2006-2010 John MacFarlane +Copyright (C) 2006-2014 John MacFarlane This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Readers.HTML - Copyright : Copyright (C) 2006-2010 John MacFarlane + Copyright : Copyright (C) 2006-2014 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index 79c66b510..bfafea1f6 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -1,6 +1,6 @@ {-# LANGUAGE ScopedTypeVariables, OverloadedStrings #-} {- -Copyright (C) 2006-2012 John MacFarlane +Copyright (C) 2006-2014 John MacFarlane This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Readers.LaTeX - Copyright : Copyright (C) 2006-2012 John MacFarlane + Copyright : Copyright (C) 2006-2014 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index aac87f363..d1637b701 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -1,6 +1,6 @@ {-# LANGUAGE RelaxedPolyRec #-} -- needed for inlinesBetween on GHC < 7 {- -Copyright (C) 2006-2010 John MacFarlane +Copyright (C) 2006-2014 John MacFarlane This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Readers.Markdown - Copyright : Copyright (C) 2006-2013 John MacFarlane + Copyright : Copyright (C) 2006-2014 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane diff --git a/src/Text/Pandoc/Readers/MediaWiki.hs b/src/Text/Pandoc/Readers/MediaWiki.hs index feaedb7c2..e4fabc898 100644 --- a/src/Text/Pandoc/Readers/MediaWiki.hs +++ b/src/Text/Pandoc/Readers/MediaWiki.hs @@ -1,7 +1,7 @@ {-# LANGUAGE RelaxedPolyRec, FlexibleInstances, TypeSynonymInstances #-} -- RelaxedPolyRec needed for inlinesBetween on GHC < 7 {- - Copyright (C) 2012 John MacFarlane + Copyright (C) 2012-2014 John MacFarlane This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -20,7 +20,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Readers.MediaWiki - Copyright : Copyright (C) 2012 John MacFarlane + Copyright : Copyright (C) 2012-2014 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane diff --git a/src/Text/Pandoc/Readers/Native.hs b/src/Text/Pandoc/Readers/Native.hs index c5d4cb98a..f4dfa62c1 100644 --- a/src/Text/Pandoc/Readers/Native.hs +++ b/src/Text/Pandoc/Readers/Native.hs @@ -1,5 +1,5 @@ {- -Copyright (C) 2011 John MacFarlane +Copyright (C) 2011-2014 John MacFarlane This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Readers.Native - Copyright : Copyright (C) 2011 John MacFarlane + Copyright : Copyright (C) 2011-2014 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs index 54b6fa34a..fa8438e70 100644 --- a/src/Text/Pandoc/Readers/RST.hs +++ b/src/Text/Pandoc/Readers/RST.hs @@ -1,6 +1,6 @@ {-# LANGUAGE OverloadedStrings #-} {- -Copyright (C) 2006-2010 John MacFarlane +Copyright (C) 2006-2014 John MacFarlane This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Readers.RST - Copyright : Copyright (C) 2006-2010 John MacFarlane + Copyright : Copyright (C) 2006-2014 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane diff --git a/src/Text/Pandoc/Readers/TeXMath.hs b/src/Text/Pandoc/Readers/TeXMath.hs index 6bd617f7e..f03eae044 100644 --- a/src/Text/Pandoc/Readers/TeXMath.hs +++ b/src/Text/Pandoc/Readers/TeXMath.hs @@ -1,5 +1,5 @@ {- -Copyright (C) 2007-2010 John MacFarlane +Copyright (C) 2007-2014 John MacFarlane This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Readers.TeXMath - Copyright : Copyright (C) 2007-2010 John MacFarlane + Copyright : Copyright (C) 2007-2014 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs index 9ee34caa5..6d839ec1d 100644 --- a/src/Text/Pandoc/Readers/Textile.hs +++ b/src/Text/Pandoc/Readers/Textile.hs @@ -1,5 +1,6 @@ {- -Copyright (C) 2010 Paul Rivier | tr '*#' '.@' +Copyright (C) 2010-2014 Paul Rivier | tr '*#' '.@' + and John MacFarlane This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Readers.Textile - Copyright : Copyright (C) 2010-2012 Paul Rivier and John MacFarlane + Copyright : Copyright (C) 2010-2014 Paul Rivier and John MacFarlane License : GNU GPL, version 2 or above Maintainer : Paul Rivier diff --git a/src/Text/Pandoc/SelfContained.hs b/src/Text/Pandoc/SelfContained.hs index 7fc9c2966..2a2f56281 100644 --- a/src/Text/Pandoc/SelfContained.hs +++ b/src/Text/Pandoc/SelfContained.hs @@ -1,6 +1,6 @@ {-# LANGUAGE OverloadedStrings #-} {- -Copyright (C) 2011 John MacFarlane +Copyright (C) 2011-2014 John MacFarlane This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.SelfContained - Copyright : Copyright (C) 2011 John MacFarlane + Copyright : Copyright (C) 2011-2014 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index 6f0629ea2..31c490af6 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -1,7 +1,7 @@ {-# LANGUAGE DeriveDataTypeable, CPP, MultiParamTypeClasses, FlexibleContexts, ScopedTypeVariables #-} {- -Copyright (C) 2006-2013 John MacFarlane +Copyright (C) 2006-2014 John MacFarlane This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -20,7 +20,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Shared - Copyright : Copyright (C) 2006-2013 John MacFarlane + Copyright : Copyright (C) 2006-2014 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane diff --git a/src/Text/Pandoc/Slides.hs b/src/Text/Pandoc/Slides.hs index 50c46d17f..2b863c780 100644 --- a/src/Text/Pandoc/Slides.hs +++ b/src/Text/Pandoc/Slides.hs @@ -1,5 +1,5 @@ {- -Copyright (C) 2012 John MacFarlane +Copyright (C) 2012-2014 John MacFarlane This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Slides - Copyright : Copyright (C) 2012 John MacFarlane + Copyright : Copyright (C) 2012-2014 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane diff --git a/src/Text/Pandoc/Templates.hs b/src/Text/Pandoc/Templates.hs index 52625abf6..551db6483 100644 --- a/src/Text/Pandoc/Templates.hs +++ b/src/Text/Pandoc/Templates.hs @@ -1,7 +1,7 @@ {-# LANGUAGE TypeSynonymInstances, FlexibleInstances, CPP, OverloadedStrings, GeneralizedNewtypeDeriving #-} {- -Copyright (C) 2009-2013 John MacFarlane +Copyright (C) 2009-2014 John MacFarlane This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -20,7 +20,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Templates - Copyright : Copyright (C) 2009-2013 John MacFarlane + Copyright : Copyright (C) 2009-2014 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane diff --git a/src/Text/Pandoc/UTF8.hs b/src/Text/Pandoc/UTF8.hs index 229442543..33c9ec1c5 100644 --- a/src/Text/Pandoc/UTF8.hs +++ b/src/Text/Pandoc/UTF8.hs @@ -1,6 +1,6 @@ {-# LANGUAGE CPP #-} {- -Copyright (C) 2010 John MacFarlane +Copyright (C) 2010-2014 John MacFarlane This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.UTF8 - Copyright : Copyright (C) 2010 John MacFarlane + Copyright : Copyright (C) 2010-2014 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane diff --git a/src/Text/Pandoc/UUID.hs b/src/Text/Pandoc/UUID.hs index 082644eea..eebfe09d2 100644 --- a/src/Text/Pandoc/UUID.hs +++ b/src/Text/Pandoc/UUID.hs @@ -1,5 +1,5 @@ {- -Copyright (C) 2010 John MacFarlane +Copyright (C) 2010-2014 John MacFarlane This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.UUID - Copyright : Copyright (C) 2010 John MacFarlane + Copyright : Copyright (C) 2010-2014 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane diff --git a/src/Text/Pandoc/Writers/AsciiDoc.hs b/src/Text/Pandoc/Writers/AsciiDoc.hs index 15579cba2..19112d8f5 100644 --- a/src/Text/Pandoc/Writers/AsciiDoc.hs +++ b/src/Text/Pandoc/Writers/AsciiDoc.hs @@ -1,6 +1,6 @@ {-# LANGUAGE OverloadedStrings #-} {- -Copyright (C) 2006-2010 John MacFarlane +Copyright (C) 2006-2014 John MacFarlane This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.AsciiDoc - Copyright : Copyright (C) 2006-2010 John MacFarlane + Copyright : Copyright (C) 2006-2014 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane diff --git a/src/Text/Pandoc/Writers/ConTeXt.hs b/src/Text/Pandoc/Writers/ConTeXt.hs index cec420dcf..3b321cc19 100644 --- a/src/Text/Pandoc/Writers/ConTeXt.hs +++ b/src/Text/Pandoc/Writers/ConTeXt.hs @@ -1,6 +1,6 @@ {-# LANGUAGE OverloadedStrings #-} {- -Copyright (C) 2007-2010 John MacFarlane +Copyright (C) 2007-2014 John MacFarlane This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.ConTeXt - Copyright : Copyright (C) 2007-2010 John MacFarlane + Copyright : Copyright (C) 2007-2014 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane diff --git a/src/Text/Pandoc/Writers/Custom.hs b/src/Text/Pandoc/Writers/Custom.hs index 0b30287f5..88f590c43 100644 --- a/src/Text/Pandoc/Writers/Custom.hs +++ b/src/Text/Pandoc/Writers/Custom.hs @@ -1,6 +1,6 @@ {-# LANGUAGE OverlappingInstances, FlexibleInstances, OverloadedStrings #-} {-# OPTIONS_GHC -fno-warn-orphans #-} -{- Copyright (C) 2012 John MacFarlane +{- Copyright (C) 2012-2014 John MacFarlane This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.Custom - Copyright : Copyright (C) 2012 John MacFarlane + Copyright : Copyright (C) 2012-2014 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane diff --git a/src/Text/Pandoc/Writers/Docbook.hs b/src/Text/Pandoc/Writers/Docbook.hs index 1a8e58354..ba6a92a08 100644 --- a/src/Text/Pandoc/Writers/Docbook.hs +++ b/src/Text/Pandoc/Writers/Docbook.hs @@ -1,6 +1,6 @@ {-# LANGUAGE OverloadedStrings #-} {- -Copyright (C) 2006-2010 John MacFarlane +Copyright (C) 2006-2014 John MacFarlane This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.Docbook - Copyright : Copyright (C) 2006-2010 John MacFarlane + Copyright : Copyright (C) 2006-2014 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index fcb73a427..551d97855 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -1,6 +1,6 @@ {-# LANGUAGE ScopedTypeVariables #-} {- -Copyright (C) 2012 John MacFarlane +Copyright (C) 2012-2014 John MacFarlane This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.Docx - Copyright : Copyright (C) 2012 John MacFarlane + Copyright : Copyright (C) 2012-2014 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs index c39a7798d..893ec3be9 100644 --- a/src/Text/Pandoc/Writers/EPUB.hs +++ b/src/Text/Pandoc/Writers/EPUB.hs @@ -1,6 +1,6 @@ {-# LANGUAGE PatternGuards, CPP, ScopedTypeVariables #-} {- -Copyright (C) 2010 John MacFarlane +Copyright (C) 2010-2014 John MacFarlane This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.EPUB - Copyright : Copyright (C) 2010 John MacFarlane + Copyright : Copyright (C) 2010-2014 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs index 1de4345f9..9a26cf2ac 100644 --- a/src/Text/Pandoc/Writers/HTML.hs +++ b/src/Text/Pandoc/Writers/HTML.hs @@ -1,7 +1,7 @@ {-# LANGUAGE OverloadedStrings, CPP #-} {-# OPTIONS_GHC -fno-warn-deprecations #-} {- -Copyright (C) 2006-2010 John MacFarlane +Copyright (C) 2006-2014 John MacFarlane This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -20,7 +20,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.HTML - Copyright : Copyright (C) 2006-2010 John MacFarlane + Copyright : Copyright (C) 2006-2014 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs index c17e041b5..c221b318e 100644 --- a/src/Text/Pandoc/Writers/LaTeX.hs +++ b/src/Text/Pandoc/Writers/LaTeX.hs @@ -1,6 +1,6 @@ {-# LANGUAGE OverloadedStrings, ScopedTypeVariables #-} {- -Copyright (C) 2006-2010 John MacFarlane +Copyright (C) 2006-2014 John MacFarlane This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.LaTeX - Copyright : Copyright (C) 2006-2010 John MacFarlane + Copyright : Copyright (C) 2006-2014 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane diff --git a/src/Text/Pandoc/Writers/Man.hs b/src/Text/Pandoc/Writers/Man.hs index 680bfef44..41eb3e5be 100644 --- a/src/Text/Pandoc/Writers/Man.hs +++ b/src/Text/Pandoc/Writers/Man.hs @@ -1,5 +1,5 @@ {- -Copyright (C) 2007-2010 John MacFarlane +Copyright (C) 2007-2014 John MacFarlane This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.Man - Copyright : Copyright (C) 2007-2010 John MacFarlane + Copyright : Copyright (C) 2007-2014 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs index 95082add6..f42a1b54c 100644 --- a/src/Text/Pandoc/Writers/Markdown.hs +++ b/src/Text/Pandoc/Writers/Markdown.hs @@ -1,6 +1,6 @@ {-# LANGUAGE OverloadedStrings, TupleSections, ScopedTypeVariables #-} {- -Copyright (C) 2006-2013 John MacFarlane +Copyright (C) 2006-2014 John MacFarlane This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.Markdown - Copyright : Copyright (C) 2006-2013 John MacFarlane + Copyright : Copyright (C) 2006-2014 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane diff --git a/src/Text/Pandoc/Writers/MediaWiki.hs b/src/Text/Pandoc/Writers/MediaWiki.hs index 83fefaa29..3b987ba2b 100644 --- a/src/Text/Pandoc/Writers/MediaWiki.hs +++ b/src/Text/Pandoc/Writers/MediaWiki.hs @@ -1,5 +1,5 @@ {- -Copyright (C) 2008-2010 John MacFarlane +Copyright (C) 2008-2014 John MacFarlane This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.MediaWiki - Copyright : Copyright (C) 2008-2010 John MacFarlane + Copyright : Copyright (C) 2008-2014 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane diff --git a/src/Text/Pandoc/Writers/Native.hs b/src/Text/Pandoc/Writers/Native.hs index 090b97433..cb821e40b 100644 --- a/src/Text/Pandoc/Writers/Native.hs +++ b/src/Text/Pandoc/Writers/Native.hs @@ -1,6 +1,6 @@ {-# LANGUAGE OverloadedStrings #-} {- -Copyright (C) 2006-2010 John MacFarlane +Copyright (C) 2006-2014 John MacFarlane This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.Native - Copyright : Copyright (C) 2006-2010 John MacFarlane + Copyright : Copyright (C) 2006-2014 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane diff --git a/src/Text/Pandoc/Writers/ODT.hs b/src/Text/Pandoc/Writers/ODT.hs index c3652d65d..15f7c8be8 100644 --- a/src/Text/Pandoc/Writers/ODT.hs +++ b/src/Text/Pandoc/Writers/ODT.hs @@ -1,6 +1,6 @@ {-# LANGUAGE ScopedTypeVariables #-} {- -Copyright (C) 2008-2010 John MacFarlane +Copyright (C) 2008-2014 John MacFarlane This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.ODT - Copyright : Copyright (C) 2008-2010 John MacFarlane + Copyright : Copyright (C) 2008-2014 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane diff --git a/src/Text/Pandoc/Writers/OPML.hs b/src/Text/Pandoc/Writers/OPML.hs index f6926c1dc..dd359f3f5 100644 --- a/src/Text/Pandoc/Writers/OPML.hs +++ b/src/Text/Pandoc/Writers/OPML.hs @@ -1,5 +1,5 @@ {- -Copyright (C) 2013 John MacFarlane +Copyright (C) 2013-2014 John MacFarlane This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.OPML - Copyright : Copyright (C) 2013 John MacFarlane + Copyright : Copyright (C) 2013-2014 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane diff --git a/src/Text/Pandoc/Writers/OpenDocument.hs b/src/Text/Pandoc/Writers/OpenDocument.hs index 0029c3296..b6da2694c 100644 --- a/src/Text/Pandoc/Writers/OpenDocument.hs +++ b/src/Text/Pandoc/Writers/OpenDocument.hs @@ -1,7 +1,7 @@ {-# LANGUAGE PatternGuards, OverloadedStrings #-} {- -Copyright (C) 2008-2010 Andrea Rossato -and John MacFarlane. +Copyright (C) 2008-2014 Andrea Rossato + and John MacFarlane. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -20,7 +20,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.OpenDocument - Copyright : Copyright (C) 2008-2010 Andrea Rossato and John MacFarlane + Copyright : Copyright (C) 2008-2014 Andrea Rossato and John MacFarlane License : GNU GPL, version 2 or above Maintainer : Andrea Rossato diff --git a/src/Text/Pandoc/Writers/Org.hs b/src/Text/Pandoc/Writers/Org.hs index 58a5729e7..87046537c 100644 --- a/src/Text/Pandoc/Writers/Org.hs +++ b/src/Text/Pandoc/Writers/Org.hs @@ -1,6 +1,7 @@ {-# LANGUAGE OverloadedStrings #-} {- -Copyright (C) 2006-2010 Puneeth Chaganti +Copyright (C) 2010-2014 Puneeth Chaganti + and John MacFarlane This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +20,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.Org - Copyright : Copyright (C) 2010 Puneeth Chaganti + Copyright : Copyright (C) 2010-2014 Puneeth Chaganti and John MacFarlane License : GNU GPL, version 2 or above Maintainer : Puneeth Chaganti diff --git a/src/Text/Pandoc/Writers/RST.hs b/src/Text/Pandoc/Writers/RST.hs index 1e7596b21..31c97349b 100644 --- a/src/Text/Pandoc/Writers/RST.hs +++ b/src/Text/Pandoc/Writers/RST.hs @@ -1,6 +1,6 @@ {-# LANGUAGE OverloadedStrings #-} {- -Copyright (C) 2006-2010 John MacFarlane +Copyright (C) 2006-2014 John MacFarlane This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.RST - Copyright : Copyright (C) 2006-2010 John MacFarlane + Copyright : Copyright (C) 2006-2014 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane diff --git a/src/Text/Pandoc/Writers/RTF.hs b/src/Text/Pandoc/Writers/RTF.hs index 3e0bd9976..e0428aaa8 100644 --- a/src/Text/Pandoc/Writers/RTF.hs +++ b/src/Text/Pandoc/Writers/RTF.hs @@ -1,5 +1,5 @@ {- -Copyright (C) 2006-2010 John MacFarlane +Copyright (C) 2006-2014 John MacFarlane This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.RTF - Copyright : Copyright (C) 2006-2010 John MacFarlane + Copyright : Copyright (C) 2006-2014 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane diff --git a/src/Text/Pandoc/Writers/Shared.hs b/src/Text/Pandoc/Writers/Shared.hs index 604aac1c9..800e741a4 100644 --- a/src/Text/Pandoc/Writers/Shared.hs +++ b/src/Text/Pandoc/Writers/Shared.hs @@ -1,6 +1,6 @@ {-# LANGUAGE OverloadedStrings #-} {- -Copyright (C) 2013 John MacFarlane +Copyright (C) 2013-2014 John MacFarlane This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.Shared - Copyright : Copyright (C) 2013 John MacFarlane + Copyright : Copyright (C) 2013-2014 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane diff --git a/src/Text/Pandoc/Writers/Texinfo.hs b/src/Text/Pandoc/Writers/Texinfo.hs index bf3df8035..8ac717bab 100644 --- a/src/Text/Pandoc/Writers/Texinfo.hs +++ b/src/Text/Pandoc/Writers/Texinfo.hs @@ -1,6 +1,6 @@ {-# LANGUAGE OverloadedStrings #-} {- -Copyright (C) 2008-2010 John MacFarlane and Peter Wang +Copyright (C) 2008-2014 John MacFarlane and Peter Wang This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.Texinfo - Copyright : Copyright (C) 2008-2010 John MacFarlane and Peter Wang + Copyright : Copyright (C) 2008-2014 John MacFarlane and Peter Wang License : GNU GPL, version 2 or above Maintainer : John MacFarlane diff --git a/src/Text/Pandoc/Writers/Textile.hs b/src/Text/Pandoc/Writers/Textile.hs index 95aedf780..3a6982a01 100644 --- a/src/Text/Pandoc/Writers/Textile.hs +++ b/src/Text/Pandoc/Writers/Textile.hs @@ -1,5 +1,5 @@ {- -Copyright (C) 2010 John MacFarlane +Copyright (C) 2010-2014 John MacFarlane This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.Textile - Copyright : Copyright (C) 2010 John MacFarlane + Copyright : Copyright (C) 2010-2014 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane diff --git a/src/Text/Pandoc/XML.hs b/src/Text/Pandoc/XML.hs index c11af9a19..8000368aa 100644 --- a/src/Text/Pandoc/XML.hs +++ b/src/Text/Pandoc/XML.hs @@ -1,5 +1,5 @@ {- -Copyright (C) 2006-2010 John MacFarlane +Copyright (C) 2006-2014 John MacFarlane This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.XML - Copyright : Copyright (C) 2006-2010 John MacFarlane + Copyright : Copyright (C) 2006-2014 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane -- cgit v1.2.3 From 7760504bb26f215e7d0c57da843f1f1dcc8c1186 Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Thu, 8 May 2014 17:01:58 +0200 Subject: Org reader: refactor #+BEGIN..#+END block parsing code --- src/Text/Pandoc/Readers/Org.hs | 122 +++++++++++++++++++++++++++-------------- 1 file changed, 80 insertions(+), 42 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index dba61dfe0..9df8ce0b3 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -50,7 +50,7 @@ import Data.Char (isAlphaNum, toLower) import Data.Default import Data.List (intersperse, isPrefixOf, isSuffixOf) import qualified Data.Map as M -import Data.Maybe (listToMaybe, fromMaybe, isJust) +import Data.Maybe (fromMaybe, isJust) import Data.Monoid (Monoid, mconcat, mempty, mappend) import Network.HTTP (urlEncode) @@ -162,7 +162,8 @@ popInlineCharStack = updateState $ \s -> s{ orgStateEmphasisCharStack = drop 1 . orgStateEmphasisCharStack $ s } surroundingEmphasisChar :: OrgParser [Char] -surroundingEmphasisChar = take 1 . drop 1 . orgStateEmphasisCharStack <$> getState +surroundingEmphasisChar = + take 1 . drop 1 . orgStateEmphasisCharStack <$> getState startEmphasisNewlinesCounting :: Int -> OrgParser () startEmphasisNewlinesCounting maxNewlines = updateState $ \s -> @@ -170,7 +171,7 @@ startEmphasisNewlinesCounting maxNewlines = updateState $ \s -> decEmphasisNewlinesCount :: OrgParser () decEmphasisNewlinesCount = updateState $ \s -> - s{ orgStateEmphasisNewlines = (\n -> n - 1) <$> orgStateEmphasisNewlines s } + s{ orgStateEmphasisNewlines = (\n -> n - 1) <$> orgStateEmphasisNewlines s } newlinesCountWithinLimits :: OrgParser Bool newlinesCountWithinLimits = do @@ -296,41 +297,60 @@ lookupInlinesAttr attr = try $ do -- Org Blocks (#+BEGIN_... / #+END_...) -- +type BlockProperties = (Int, String) -- (Indentation, Block-Type) + orgBlock :: OrgParser (F Blocks) orgBlock = try $ do - (indent, blockType, args) <- blockHeader - content <- rawBlockContent indent blockType - contentBlocks <- parseFromString parseBlocks (content ++ "\n") - let classArgs = [ translateLang . fromMaybe [] $ listToMaybe args ] - case blockType of - "comment" -> return mempty - "html" -> returnF $ B.rawBlock "html" content - "latex" -> returnF $ B.rawBlock "latex" content - "ascii" -> returnF $ B.rawBlock "ascii" content - "example" -> returnF $ exampleCode content - "quote" -> return $ B.blockQuote <$> contentBlocks - "verse" -> parseVerse content - "src" -> codeBlockWithAttr classArgs content - _ -> return $ B.divWith ("", [blockType], []) <$> contentBlocks + blockProp@(_, blkType) <- blockHeaderStart + ($ blockProp) $ + case blkType of + "comment" -> withRaw' (const mempty) + "html" -> withRaw' (return . (B.rawBlock blkType)) + "latex" -> withRaw' (return . (B.rawBlock blkType)) + "ascii" -> withRaw' (return . (B.rawBlock blkType)) + "example" -> withRaw' (return . exampleCode) + "quote" -> withParsed (fmap B.blockQuote) + "verse" -> verseBlock + "src" -> codeBlock + _ -> withParsed (fmap $ divWithClass blkType) + +blockHeaderStart :: OrgParser (Int, String) +blockHeaderStart = try $ (,) <$> indent <*> blockType where - parseVerse :: String -> OrgParser (F Blocks) - parseVerse cs = - fmap B.para . mconcat . intersperse (pure B.linebreak) - <$> mapM (parseFromString parseInlines) (lines cs) - -blockHeader :: OrgParser (Int, String, [String]) -blockHeader = (,,) <$> blockIndent - <*> blockType - <*> (skipSpaces *> blockArgs) - where blockIndent = length <$> many spaceChar - blockType = map toLower <$> (stringAnyCase "#+begin_" *> many letter) - blockArgs = manyTill (many nonspaceChar <* skipSpaces) newline - -codeBlockWithAttr :: [String] -> String -> OrgParser (F Blocks) -codeBlockWithAttr classArgs content = do - identifier <- fromMaybe "" <$> lookupBlockAttribute "name" - caption <- lookupInlinesAttr "caption" - let codeBlck = B.codeBlockWith (identifier, classArgs, []) content + indent = length <$> many spaceChar + blockType = map toLower <$> (stringAnyCase "#+begin_" *> many orgArgWordChar) + +withRaw' :: (String -> F Blocks) -> BlockProperties -> OrgParser (F Blocks) +withRaw' f blockProp = (ignHeaders *> (f <$> rawBlockContent blockProp)) + +withParsed :: (F Blocks -> F Blocks) -> BlockProperties -> OrgParser (F Blocks) +withParsed f blockProp = (ignHeaders *> (f <$> parsedBlockContent blockProp)) + +ignHeaders :: OrgParser () +ignHeaders = (() <$ newline) <|> (() <$ anyLine) + +divWithClass :: String -> Blocks -> Blocks +divWithClass cls = B.divWith ("", [cls], []) + +verseBlock :: BlockProperties -> OrgParser (F Blocks) +verseBlock blkProp = try $ do + ignHeaders + content <- rawBlockContent blkProp + fmap B.para . mconcat . intersperse (pure B.linebreak) + <$> mapM (parseFromString parseInlines) (lines content) + +codeBlock :: BlockProperties -> OrgParser (F Blocks) +codeBlock blkProp = do + skipSpaces + language <- optionMaybe orgArgWord + (classes, kv) <- codeHeaderArgs + id' <- fromMaybe "" <$> lookupBlockAttribute "name" + caption <- lookupInlinesAttr "caption" + content <- rawBlockContent blkProp + let attr = ( id' + , maybe id (\l -> (l:)) language $ classes + , kv ) + let codeBlck = B.codeBlockWith attr content return $ maybe (pure codeBlck) (labelDiv codeBlck) caption where labelDiv blk value = @@ -338,14 +358,21 @@ codeBlockWithAttr classArgs content = do <*> pure blk) labelledBlock = fmap (B.plain . B.spanWith ("", ["label"], [])) -rawBlockContent :: Int -> String -> OrgParser String -rawBlockContent indent blockType = +rawBlockContent :: BlockProperties -> OrgParser String +rawBlockContent (indent, blockType) = try $ unlines . map commaEscaped <$> manyTill indentedLine blockEnder where - indentedLine = try $ choice [ blankline *> pure "\n" - , indentWith indent *> anyLine - ] - blockEnder = try $ indentWith indent *> stringAnyCase ("#+end_" <> blockType) + indentedLine = try $ + choice [ blankline *> pure "\n" + , indentWith indent *> anyLine + ] + blockEnder = try $ + indentWith indent *> stringAnyCase ("#+end_" <> blockType) + +parsedBlockContent :: BlockProperties -> OrgParser (F Blocks) +parsedBlockContent blkProps = try $ do + raw <- rawBlockContent blkProps + parseFromString parseBlocks (raw ++ "\n") -- indent by specified number of spaces (or equiv. tabs) indentWith :: Int -> OrgParser String @@ -356,6 +383,13 @@ indentWith num = do else choice [ try (count num (char ' ')) , try (char '\t' >> count (num - tabStop) (char ' ')) ] +orgArgWord :: OrgParser String +orgArgWord = many1 orgArgWordChar + +codeHeaderArgs :: OrgParser ([String], [(String, String)]) +codeHeaderArgs = + (\x -> (x, [])) <$> manyTill (many nonspaceChar <* skipSpaces) newline + translateLang :: String -> String translateLang "C" = "c" translateLang "C++" = "cpp" @@ -1002,9 +1036,13 @@ inlineCodeBlock = try $ do returnF $ B.codeWith ("", attrClasses, attrKeyVal) inlineCode where enclosedByPair s e p = char s *> many1Till p (char e) +-- | Prefix used for Rundoc classes and arguments. +rundocPrefix :: String +rundocPrefix = "rundoc-" + -- | The class-name used to mark rundoc blocks. rundocBlockClass :: String -rundocBlockClass = "rundoc-block" +rundocBlockClass = rundocPrefix ++ "block" blockOption :: OrgParser (String, String) blockOption = try $ (,) <$> orgArgKey <*> orgArgValue -- cgit v1.2.3 From 757c4f68f3f3cab99db9499936e3ae4775ebbddf Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Fri, 9 May 2014 18:07:37 +0200 Subject: Org reader: Support arguments for code blocks The general form of source block headers (`#+BEGIN_SRC
`) was not recognized by the reader. This patch adds support for the above form, adds header arguments to the block's key-value pairs and marks the block as a rundoc block if header arguments are present. This closes #1286. --- src/Text/Pandoc/Readers/Org.hs | 98 ++++++++++++++++++++++++------------------ tests/Tests/Readers/Org.hs | 14 ++++++ 2 files changed, 70 insertions(+), 42 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index 9df8ce0b3..c05ac92d0 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -276,7 +276,7 @@ parseBlockAttributes = do where attribute :: OrgParser (String, String) attribute = try $ do - key <- metaLineStart *> many1Till (noneOf "\n\r") (char ':') + key <- metaLineStart *> many1Till nonspaceChar (char ':') val <- skipSpaces *> anyLine return (map toLower key, val) @@ -342,16 +342,11 @@ verseBlock blkProp = try $ do codeBlock :: BlockProperties -> OrgParser (F Blocks) codeBlock blkProp = do skipSpaces - language <- optionMaybe orgArgWord - (classes, kv) <- codeHeaderArgs + (classes, kv) <- codeHeaderArgs <|> (mempty <$ ignHeaders) id' <- fromMaybe "" <$> lookupBlockAttribute "name" - caption <- lookupInlinesAttr "caption" content <- rawBlockContent blkProp - let attr = ( id' - , maybe id (\l -> (l:)) language $ classes - , kv ) - let codeBlck = B.codeBlockWith attr content - return $ maybe (pure codeBlck) (labelDiv codeBlck) caption + let codeBlck = B.codeBlockWith ( id', classes, kv ) content + maybe (pure codeBlck) (labelDiv codeBlck) <$> lookupInlinesAttr "caption" where labelDiv blk value = B.divWith nullAttr <$> (mappend <$> labelledBlock value @@ -383,12 +378,33 @@ indentWith num = do else choice [ try (count num (char ' ')) , try (char '\t' >> count (num - tabStop) (char ' ')) ] +type SwitchOption = (Char, Maybe String) + orgArgWord :: OrgParser String orgArgWord = many1 orgArgWordChar +-- | Parse code block arguments +-- TODO: We currently don't handle switches. codeHeaderArgs :: OrgParser ([String], [(String, String)]) -codeHeaderArgs = - (\x -> (x, [])) <$> manyTill (many nonspaceChar <* skipSpaces) newline +codeHeaderArgs = try $ do + language <- skipSpaces *> orgArgWord + _ <- skipSpaces *> (try $ switch `sepBy` (many1 spaceChar)) + parameters <- manyTill blockOption newline + let pandocLang = translateLang language + return $ + if hasRundocParameters parameters + then ( [ pandocLang, rundocBlockClass ] + , map toRundocAttrib (("language", language) : parameters) + ) + else ([ pandocLang ], parameters) + where hasRundocParameters = not . null + +switch :: OrgParser SwitchOption +switch = try $ simpleSwitch <|> lineNumbersSwitch + where + simpleSwitch = (\c -> (c, Nothing)) <$> (oneOf "-+" *> letter) + lineNumbersSwitch = (\ls -> ('l', Just ls)) <$> + (string "-l \"" *> many1Till nonspaceChar (char '"')) translateLang :: String -> String translateLang "C" = "c" @@ -401,6 +417,32 @@ translateLang "sh" = "bash" translateLang "sqlite" = "sql" translateLang cs = cs +-- | Prefix used for Rundoc classes and arguments. +rundocPrefix :: String +rundocPrefix = "rundoc-" + +-- | The class-name used to mark rundoc blocks. +rundocBlockClass :: String +rundocBlockClass = rundocPrefix ++ "block" + +blockOption :: OrgParser (String, String) +blockOption = try $ (,) <$> orgArgKey <*> orgArgValue + +orgArgKey :: OrgParser String +orgArgKey = try $ + skipSpaces *> char ':' + *> many1 orgArgWordChar + +orgArgValue :: OrgParser String +orgArgValue = try $ + skipSpaces *> many1 orgArgWordChar <* skipSpaces + +orgArgWordChar :: OrgParser Char +orgArgWordChar = alphaNum <|> oneOf "-_" + +toRundocAttrib :: (String, String) -> (String, String) +toRundocAttrib = first ("rundoc-" ++) + commaEscaped :: String -> String commaEscaped (',':cs@('*':_)) = cs commaEscaped (',':cs@('#':'+':_)) = cs @@ -425,7 +467,7 @@ drawer = try $ do drawerStart :: OrgParser String drawerStart = try $ - skipSpaces *> drawerName <* skipSpaces <* newline + skipSpaces *> drawerName <* skipSpaces <* P.newline where drawerName = try $ char ':' *> validDrawerName <* char ':' validDrawerName = stringAnyCase "PROPERTIES" <|> stringAnyCase "LOGBOOK" @@ -435,7 +477,7 @@ drawerLine = try anyLine drawerEnd :: OrgParser String drawerEnd = try $ - skipSpaces *> stringAnyCase ":END:" <* skipSpaces <* newline + skipSpaces *> stringAnyCase ":END:" <* skipSpaces <* P.newline -- @@ -446,7 +488,7 @@ drawerEnd = try $ figure :: OrgParser (F Blocks) figure = try $ do (cap, nam) <- nameAndCaption - src <- skipSpaces *> selfTarget <* skipSpaces <* newline + src <- skipSpaces *> selfTarget <* skipSpaces <* P.newline guard (isImageFilename src) return $ do cap' <- cap @@ -1036,34 +1078,6 @@ inlineCodeBlock = try $ do returnF $ B.codeWith ("", attrClasses, attrKeyVal) inlineCode where enclosedByPair s e p = char s *> many1Till p (char e) --- | Prefix used for Rundoc classes and arguments. -rundocPrefix :: String -rundocPrefix = "rundoc-" - --- | The class-name used to mark rundoc blocks. -rundocBlockClass :: String -rundocBlockClass = rundocPrefix ++ "block" - -blockOption :: OrgParser (String, String) -blockOption = try $ (,) <$> orgArgKey <*> orgArgValue - -orgArgKey :: OrgParser String -orgArgKey = try $ - skipSpaces *> char ':' - *> many1 orgArgWordChar - <* many1 spaceChar - -orgArgValue :: OrgParser String -orgArgValue = try $ - skipSpaces *> many1 orgArgWordChar - <* skipSpaces - -orgArgWordChar :: OrgParser Char -orgArgWordChar = alphaNum <|> oneOf "-_" - -toRundocAttrib :: (String, String) -> (String, String) -toRundocAttrib = first ("rundoc-" ++) - emph :: OrgParser (F Inlines) emph = fmap B.emph <$> emphasisBetween '/' diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs index 949976aba..a78e8861f 100644 --- a/tests/Tests/Readers/Org.hs +++ b/tests/Tests/Readers/Org.hs @@ -822,6 +822,20 @@ tests = in mconcat [ para $ spcSep [ "Low", "German", "greeting" ] , codeBlockWith attr' code' ] + , "Source block with rundoc/babel arguments" =: + unlines [ "#+BEGIN_SRC emacs-lisp :exports both" + , "(progn (message \"Hello, World!\")" + , " (+ 23 42))" + , "#+END_SRC" ] =?> + let classes = [ "commonlisp" -- as kate doesn't know emacs-lisp syntax + , "rundoc-block" + ] + params = [ ("rundoc-language", "emacs-lisp") + , ("rundoc-exports", "both") + ] + code' = unlines [ "(progn (message \"Hello, World!\")" + , " (+ 23 42))" ] + in codeBlockWith ("", classes, params) code' , "Example block" =: unlines [ "#+begin_example" -- cgit v1.2.3 From 07694b30184bcf2ed0e2998016df394f47a1996f Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Fri, 9 May 2014 18:23:23 +0200 Subject: Org reader: Fix parsing of blank lines within blocks Blank lines were parsed as two newlines instead of just one. Thanks to Xiao Hanyu (@xiaohanyu) for pointing this out. --- src/Text/Pandoc/Readers/Org.hs | 8 ++------ tests/Tests/Readers/Org.hs | 9 +++++++++ 2 files changed, 11 insertions(+), 6 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index c05ac92d0..0f218d43f 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -357,12 +357,8 @@ rawBlockContent :: BlockProperties -> OrgParser String rawBlockContent (indent, blockType) = try $ unlines . map commaEscaped <$> manyTill indentedLine blockEnder where - indentedLine = try $ - choice [ blankline *> pure "\n" - , indentWith indent *> anyLine - ] - blockEnder = try $ - indentWith indent *> stringAnyCase ("#+end_" <> blockType) + indentedLine = try $ ("" <$ blankline) <|> (indentWith indent *> anyLine) + blockEnder = try $ indentWith indent *> stringAnyCase ("#+end_" <> blockType) parsedBlockContent :: BlockProperties -> OrgParser (F Blocks) parsedBlockContent blkProps = try $ do diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs index a78e8861f..87b0d0c90 100644 --- a/tests/Tests/Readers/Org.hs +++ b/tests/Tests/Readers/Org.hs @@ -920,5 +920,14 @@ tests = (unlines [ "fmap id = id" , "fmap (p . q) = (fmap p) . (fmap q)" ]))) + + , "Convert blank lines in blocks to single newlines" =: + unlines [ "#+begin_html" + , "" + , "boring" + , "" + , "#+end_html" + ] =?> + rawBlock "html" "\nboring\n\n" ] ] -- cgit v1.2.3 From c5fd631b550a3b05b60de1684c80387bc46a88cc Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Sat, 10 May 2014 11:25:20 +0200 Subject: Org reader: Fix block parameter reader, relax constraints The reader produced wrong results for block containing non-letter chars in their parameter arguments. This patch relaxes constraints in that it allows block header arguments to contain any non-space character (except for ']' for inline blocks). Thanks to Xiao Hanyu for noticing this. --- src/Text/Pandoc/Readers/Org.hs | 19 +++++++++++++------ tests/Tests/Readers/Org.hs | 12 ++++++++++++ 2 files changed, 25 insertions(+), 6 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index 0f218d43f..2e4a29beb 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -318,7 +318,7 @@ blockHeaderStart :: OrgParser (Int, String) blockHeaderStart = try $ (,) <$> indent <*> blockType where indent = length <$> many spaceChar - blockType = map toLower <$> (stringAnyCase "#+begin_" *> many orgArgWordChar) + blockType = map toLower <$> (stringAnyCase "#+begin_" *> orgArgWord) withRaw' :: (String -> F Blocks) -> BlockProperties -> OrgParser (F Blocks) withRaw' f blockProp = (ignHeaders *> (f <$> rawBlockContent blockProp)) @@ -422,16 +422,23 @@ rundocBlockClass :: String rundocBlockClass = rundocPrefix ++ "block" blockOption :: OrgParser (String, String) -blockOption = try $ (,) <$> orgArgKey <*> orgArgValue +blockOption = try $ (,) <$> orgArgKey <*> orgParamValue + +inlineBlockOption :: OrgParser (String, String) +inlineBlockOption = try $ (,) <$> orgArgKey <*> orgInlineParamValue orgArgKey :: OrgParser String orgArgKey = try $ skipSpaces *> char ':' *> many1 orgArgWordChar -orgArgValue :: OrgParser String -orgArgValue = try $ - skipSpaces *> many1 orgArgWordChar <* skipSpaces +orgParamValue :: OrgParser String +orgParamValue = try $ + skipSpaces *> many1 (noneOf "\t\n\r ") <* skipSpaces + +orgInlineParamValue :: OrgParser String +orgInlineParamValue = try $ + skipSpaces *> many1 (noneOf "\t\n\r ]") <* skipSpaces orgArgWordChar :: OrgParser Char orgArgWordChar = alphaNum <|> oneOf "-_" @@ -1067,7 +1074,7 @@ inlineCodeBlock :: OrgParser (F Inlines) inlineCodeBlock = try $ do string "src_" lang <- many1 orgArgWordChar - opts <- option [] $ enclosedByPair '[' ']' blockOption + opts <- option [] $ enclosedByPair '[' ']' inlineBlockOption inlineCode <- enclosedByPair '{' '}' (noneOf "\n\r") let attrClasses = [translateLang lang, rundocBlockClass] let attrKeyVal = map toRundocAttrib (("language", lang) : opts) diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs index 87b0d0c90..4ef7a7731 100644 --- a/tests/Tests/Readers/Org.hs +++ b/tests/Tests/Readers/Org.hs @@ -929,5 +929,17 @@ tests = , "#+end_html" ] =?> rawBlock "html" "\nboring\n\n" + + , "Non-letter chars in source block parameters" =: + unlines [ "#+BEGIN_SRC C :tangle xxxx.c :city Zürich" + , "code body" + , "#+END_SRC" + ] =?> + let classes = [ "c", "rundoc-block" ] + params = [ ("rundoc-language", "C") + , ("rundoc-tangle", "xxxx.c") + , ("rundoc-city", "Zürich") + ] + in codeBlockWith ( "", classes, params) "code body\n" ] ] -- cgit v1.2.3 From 009260647612238b5af964afcbeb452001f2ab0c Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 10 May 2014 23:26:32 -0700 Subject: LaTeX reader: Don't error on "%foo" with no newline. --- src/Text/Pandoc/Readers/LaTeX.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index bfafea1f6..8476c8636 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -125,7 +125,7 @@ comment :: LP () comment = do char '%' skipMany (satisfy (/='\n')) - newline + optional newline return () bgroup :: LP () -- cgit v1.2.3 From 113a32daa8a628a1f6f166c6b43d07ac005b8d42 Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Sun, 11 May 2014 15:02:48 +0200 Subject: Process: Fix minor typo in pipeProcess' docs Replace fullstop with comma, adjust capitalisation. --- src/Text/Pandoc/Process.hs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Process.hs b/src/Text/Pandoc/Process.hs index 9c8853366..19872b405 100644 --- a/src/Text/Pandoc/Process.hs +++ b/src/Text/Pandoc/Process.hs @@ -47,7 +47,7 @@ terminates, and then returns the 'ExitCode' of the process, the standard output, and the standard error. If an asynchronous exception is thrown to the thread executing -@readProcessWithExitCode@. The forked process will be terminated and +@readProcessWithExitCode@, the forked process will be terminated and @readProcessWithExitCode@ will wait (block) until the process has been terminated. -} @@ -102,4 +102,3 @@ forkWait a = do res <- newEmptyMVar _ <- mask $ \restore -> forkIO $ try (restore a) >>= putMVar res return (takeMVar res >>= either (\ex -> throwIO (ex :: SomeException)) return) - -- cgit v1.2.3 From a8319d133908f3c39834984e5e11991b166c37b7 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 11 May 2014 22:52:29 -0700 Subject: LaTeX reader: set `bibliography` in metadata from `\bibliography` cmd. --- src/Text/Pandoc/Readers/LaTeX.hs | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index 8476c8636..4b9e424d9 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -307,6 +307,8 @@ blockCommands = M.fromList $ , ("caption", tok >>= setCaption) , ("PandocStartInclude", startInclude) , ("PandocEndInclude", endInclude) + , ("bibliography", mempty <$ (skipopts *> braced >>= + addMeta "bibliography" . splitBibs)) ] ++ map ignoreBlocks -- these commands will be ignored unless --parse-raw is specified, -- in which case they will appear as raw latex blocks @@ -314,7 +316,7 @@ blockCommands = M.fromList $ -- newcommand, etc. should be parsed by macro, but we need this -- here so these aren't parsed as inline commands to ignore , "special", "pdfannot", "pdfstringdef" - , "bibliography", "bibliographystyle" + , "bibliographystyle" , "maketitle", "makeindex", "makeglossary" , "addcontentsline", "addtocontents", "addtocounter" -- \ignore{} is used conventionally in literate haskell for definitions @@ -329,6 +331,9 @@ addMeta :: ToMetaValue a => String -> a -> LP () addMeta field val = updateState $ \st -> st{ stateMeta = addMetaField field val $ stateMeta st } +splitBibs :: String -> [Inlines] +splitBibs = map (str . flip replaceExtension "bib" . trim) . splitBy (==',') + setCaption :: Inlines -> LP Blocks setCaption ils = do updateState $ \st -> st{ stateCaption = Just ils } -- cgit v1.2.3 From 2348f07b11700e685cd194c93c5891d2c288dbc0 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Mon, 12 May 2014 13:05:42 -0700 Subject: Shared addMetaField: if old and new values both lists, concatenate. --- src/Text/Pandoc/Shared.hs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index 31c490af6..4f506b5a6 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -564,8 +564,10 @@ addMetaField :: ToMetaValue a -> Meta addMetaField key val (Meta meta) = Meta $ M.insertWith combine key (toMetaValue val) meta - where combine newval (MetaList xs) = MetaList (xs ++ [newval]) + where combine newval (MetaList xs) = MetaList (xs ++ tolist newval) combine newval x = MetaList [x, newval] + tolist (MetaList ys) = ys + tolist y = [y] -- | Create 'Meta' from old-style title, authors, date. This is -- provided to ease the transition from the old API. -- cgit v1.2.3 From aa019448d6eef010922ea778597a2b7a0f3bd58f Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Mon, 12 May 2014 13:06:06 -0700 Subject: LaTeX reader: Support `\addbibresource`. --- src/Text/Pandoc/Readers/LaTeX.hs | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index 4b9e424d9..6f870318f 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -309,6 +309,8 @@ blockCommands = M.fromList $ , ("PandocEndInclude", endInclude) , ("bibliography", mempty <$ (skipopts *> braced >>= addMeta "bibliography" . splitBibs)) + , ("addbibresource", mempty <$ (skipopts *> braced >>= + addMeta "bibliography" . splitBibs)) ] ++ map ignoreBlocks -- these commands will be ignored unless --parse-raw is specified, -- in which case they will appear as raw latex blocks -- cgit v1.2.3 From 9df589b9c5a4f2dcb19445239dfae41b54625330 Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Wed, 14 May 2014 14:45:37 +0200 Subject: Introduce class HasLastStrPosition, generalize functions Both `ParserState` and `OrgParserState` keep track of the parser position at which the last string ended. This patch introduces a new class `HasLastStrPosition` and makes the above types instances of that class. This enables the generalization of functions updating the state or checking if one is right after a string. --- src/Text/Pandoc/Parsing.hs | 32 +++++++++++++++++++++++--------- src/Text/Pandoc/Readers/Markdown.hs | 11 +++-------- src/Text/Pandoc/Readers/Org.hs | 11 ++++------- 3 files changed, 30 insertions(+), 24 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs index d1e55cbc4..344f6c7ba 100644 --- a/src/Text/Pandoc/Parsing.hs +++ b/src/Text/Pandoc/Parsing.hs @@ -54,7 +54,6 @@ module Text.Pandoc.Parsing ( (>>~), withRaw, escaped, characterReference, - updateLastStrPos, anyOrderedListMarker, orderedListMarker, charRef, @@ -66,11 +65,14 @@ module Text.Pandoc.Parsing ( (>>~), testStringWith, guardEnabled, guardDisabled, + updateLastStrPos, + notAfterString, ParserState (..), HasReaderOptions (..), HasHeaderMap (..), HasIdentifierList (..), HasMacros (..), + HasLastStrPosition (..), defaultParserState, HeaderType (..), ParserContext (..), @@ -904,6 +906,14 @@ instance HasMacros ParserState where extractMacros = stateMacros updateMacros f st = st{ stateMacros = f $ stateMacros st } +class HasLastStrPosition st where + setLastStrPos :: SourcePos -> st -> st + getLastStrPos :: st -> Maybe SourcePos + +instance HasLastStrPosition ParserState where + setLastStrPos pos st = st{ stateLastStrPos = Just pos } + getLastStrPos st = stateLastStrPos st + defaultParserState :: ParserState defaultParserState = ParserState { stateOptions = def, @@ -938,6 +948,17 @@ guardEnabled ext = getOption readerExtensions >>= guard . Set.member ext guardDisabled :: HasReaderOptions st => Extension -> Parser s st () guardDisabled ext = getOption readerExtensions >>= guard . not . Set.member ext +-- | Update the position on which the last string ended. +updateLastStrPos :: HasLastStrPosition st => Parser s st () +updateLastStrPos = getPosition >>= updateState . setLastStrPos + +-- | Whether we are right after the end of a string. +notAfterString :: HasLastStrPosition st => Parser s st Bool +notAfterString = do + pos <- getPosition + st <- getState + return $ getLastStrPos st /= Just pos + data HeaderType = SingleHeader Char -- ^ Single line of characters underneath | DoubleHeader Char -- ^ Lines of characters above and below @@ -1049,17 +1070,11 @@ charOrRef cs = guard (c `elem` cs) return c) -updateLastStrPos :: Parser [Char] ParserState () -updateLastStrPos = getPosition >>= \p -> - updateState $ \s -> s{ stateLastStrPos = Just p } - singleQuoteStart :: Parser [Char] ParserState () singleQuoteStart = do failIfInQuoteContext InSingleQuote - pos <- getPosition - st <- getState -- single quote start can't be right after str - guard $ stateLastStrPos st /= Just pos + guard =<< notAfterString () <$ charOrRef "'\8216\145" singleQuoteEnd :: Parser [Char] st () @@ -1156,4 +1171,3 @@ applyMacros' target = do then do macros <- extractMacros `fmap` getState return $ applyMacros macros target else return target - diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index d1637b701..1ac98e94c 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -1474,9 +1474,7 @@ strongOrEmph = enclosure '*' <|> (checkIntraword >> enclosure '_') where checkIntraword = do exts <- getOption readerExtensions when (Ext_intraword_underscores `Set.member` exts) $ do - pos <- getPosition - lastStrPos <- stateLastStrPos <$> getState - guard $ lastStrPos /= Just pos + guard =<< notAfterString -- | Parses a list of inlines between start and end delimiters. inlinesBetween :: (Show b) @@ -1518,8 +1516,7 @@ nonEndline = satisfy (/='\n') str :: MarkdownParser (F Inlines) str = do result <- many1 alphaNum - pos <- getPosition - updateState $ \s -> s{ stateLastStrPos = Just pos } + updateLastStrPos let spacesToNbr = map (\c -> if c == ' ' then '\160' else c) isSmart <- getOption readerSmart if isSmart @@ -1821,9 +1818,7 @@ citeKey :: MarkdownParser (Bool, String) citeKey = try $ do -- make sure we're not right after an alphanumeric, -- since foo@bar.baz is probably an email address - lastStrPos <- stateLastStrPos <$> getState - pos <- getPosition - guard $ lastStrPos /= Just pos + guard =<< notAfterString suppress_author <- option False (char '-' >> return True) char '@' first <- letter <|> char '_' diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index 2e4a29beb..5dbcaee98 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -105,6 +105,10 @@ instance HasMeta OrgParserState where deleteMeta field st = st{ orgStateMeta = deleteMeta field $ orgStateMeta st } +instance HasLastStrPosition OrgParserState where + getLastStrPos = orgStateLastStrPos + setLastStrPos pos st = st{ orgStateLastStrPos = Just pos } + instance Default OrgParserState where def = defaultOrgParserState @@ -1274,13 +1278,6 @@ afterEmphasisPreChar = do lastPrePos <- orgStateLastPreCharPos <$> getState return . fromMaybe True $ (== pos) <$> lastPrePos --- | Whether we are right after the end of a string -notAfterString :: OrgParser Bool -notAfterString = do - pos <- getPosition - lastStrPos <- orgStateLastStrPos <$> getState - return $ lastStrPos /= Just pos - -- | Whether the parser is right after a forbidden border char notAfterForbiddenBorderChar :: OrgParser Bool notAfterForbiddenBorderChar = do -- cgit v1.2.3 From 2423f9e6b180bc6b04d222a4b574de995d296f80 Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Wed, 14 May 2014 14:58:05 +0200 Subject: Move `citeKey` from Readers.Markdown to Parsing The function can be used by other readers, so it is made accessible for all parsers. --- src/Text/Pandoc/Parsing.hs | 13 +++++++++++++ src/Text/Pandoc/Readers/Markdown.hs | 14 -------------- 2 files changed, 13 insertions(+), 14 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs index 344f6c7ba..4cd6591c0 100644 --- a/src/Text/Pandoc/Parsing.hs +++ b/src/Text/Pandoc/Parsing.hs @@ -94,6 +94,7 @@ module Text.Pandoc.Parsing ( (>>~), apostrophe, dash, nested, + citeKey, macro, applyMacros', Parser, @@ -1144,6 +1145,18 @@ nested p = do updateState $ \st -> st{ stateMaxNestingLevel = nestlevel } return res +citeKey :: HasLastStrPosition st => Parser [Char] st (Bool, String) +citeKey = try $ do + guard =<< notAfterString + suppress_author <- option False (char '-' *> return True) + char '@' + firstChar <- letter <|> char '_' + let regchar = satisfy (\c -> isAlphaNum c || c == '_') + let internal p = try $ p <* lookAhead regchar + rest <- many $ regchar <|> internal (oneOf ":.#$%&-+?<>~/") + let key = firstChar:rest + return (suppress_author, key) + -- -- Macros -- diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 1ac98e94c..5129bc2e3 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -1814,20 +1814,6 @@ normalCite = try $ do char ']' return citations -citeKey :: MarkdownParser (Bool, String) -citeKey = try $ do - -- make sure we're not right after an alphanumeric, - -- since foo@bar.baz is probably an email address - guard =<< notAfterString - suppress_author <- option False (char '-' >> return True) - char '@' - first <- letter <|> char '_' - let regchar = satisfy (\c -> isAlphaNum c || c == '_') - let internal p = try $ p >>~ lookAhead regchar - rest <- many $ regchar <|> internal (oneOf ":.#$%&-+?<>~/") - let key = first:rest - return (suppress_author, key) - suffix :: MarkdownParser (F Inlines) suffix = try $ do hasSpace <- option False (notFollowedBy nonspaceChar >> return True) -- cgit v1.2.3 From ceeb701c254c6dc4c054e10dd151d9ef6f751ad7 Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Wed, 14 May 2014 14:49:30 +0200 Subject: Org reader: support Pandocs citation extension Citations are defined via the "normal citation" syntax used in markdown, with the sole difference that newlines are not allowed between "[...]". This is for consistency, as org-mode generally disallows newlines between square brackets. The extension is turned on by default and can be turned off via the default syntax-extension mechanism, i.e. by specifying "org-citation" as the input format. Move `citeKey` from Readers.Markdown into Parsing The function can be used by other readers, so it is made accessible for all parsers. --- src/Text/Pandoc.hs | 2 +- src/Text/Pandoc/Readers/Org.hs | 55 ++++++++++++++++++++++++++++++++++++++++-- tests/Tests/Readers/Org.hs | 22 +++++++++++++++++ 3 files changed, 76 insertions(+), 3 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc.hs b/src/Text/Pandoc.hs index dd5bc18f6..130338f0e 100644 --- a/src/Text/Pandoc.hs +++ b/src/Text/Pandoc.hs @@ -275,6 +275,7 @@ getDefaultExtensions "markdown_mmd" = multimarkdownExtensions getDefaultExtensions "markdown_github" = githubMarkdownExtensions getDefaultExtensions "markdown" = pandocExtensions getDefaultExtensions "plain" = pandocExtensions +getDefaultExtensions "org" = Set.fromList [Ext_citations] getDefaultExtensions "textile" = Set.fromList [Ext_auto_identifiers, Ext_raw_tex] getDefaultExtensions _ = Set.fromList [Ext_auto_identifiers] @@ -319,4 +320,3 @@ readJSON _ = either error id . eitherDecode' . UTF8.fromStringLazy writeJSON :: WriterOptions -> Pandoc -> String writeJSON _ = UTF8.toStringLazy . encode - diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index 5dbcaee98..86dda2732 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -869,6 +869,7 @@ inline :: OrgParser (F Inlines) inline = choice [ whitespace , linebreak + , cite , footnote , linkOrImage , anchor @@ -933,6 +934,51 @@ endline = try $ do updateLastPreCharPos return . return $ B.space +cite :: OrgParser (F Inlines) +cite = try $ do + guardEnabled Ext_citations + (cs, raw) <- withRaw normalCite + return $ (flip B.cite (B.text raw)) <$> cs + +normalCite :: OrgParser (F [Citation]) +normalCite = try $ char '[' + *> skipSpaces + *> citeList + <* skipSpaces + <* char ']' + +citeList :: OrgParser (F [Citation]) +citeList = sequence <$> sepBy1 citation (try $ char ';' *> skipSpaces) + +citation :: OrgParser (F Citation) +citation = try $ do + pref <- prefix + (suppress_author, key) <- citeKey + suff <- suffix + return $ do + x <- pref + y <- suff + return $ Citation{ citationId = key + , citationPrefix = B.toList x + , citationSuffix = B.toList y + , citationMode = if suppress_author + then SuppressAuthor + else NormalCitation + , citationNoteNum = 0 + , citationHash = 0 + } + where + prefix = trimInlinesF . mconcat <$> + manyTill inline (char ']' <|> (']' <$ lookAhead citeKey)) + suffix = try $ do + hasSpace <- option False (notFollowedBy nonspaceChar >> return True) + skipSpaces + rest <- trimInlinesF . mconcat <$> + many (notFollowedBy (oneOf ";]") *> inline) + return $ if hasSpace + then (B.space <>) <$> rest + else rest + footnote :: OrgParser (F Inlines) footnote = try $ inlineNote <|> referencedNote @@ -1007,7 +1053,7 @@ selfTarget :: OrgParser String selfTarget = try $ char '[' *> linkTarget <* char ']' linkTarget :: OrgParser String -linkTarget = enclosed (char '[') (char ']') (noneOf "\n\r[]") +linkTarget = enclosedByPair '[' ']' (noneOf "\n\r[]") applyCustomLinkFormat :: String -> OrgParser (F String) applyCustomLinkFormat link = do @@ -1083,7 +1129,12 @@ inlineCodeBlock = try $ do let attrClasses = [translateLang lang, rundocBlockClass] let attrKeyVal = map toRundocAttrib (("language", lang) : opts) returnF $ B.codeWith ("", attrClasses, attrKeyVal) inlineCode - where enclosedByPair s e p = char s *> many1Till p (char e) + +enclosedByPair :: Char -- ^ opening char + -> Char -- ^ closing char + -> OrgParser a -- ^ parser + -> OrgParser [a] +enclosedByPair s e p = char s *> many1Till p (char e) emph :: OrgParser (F Inlines) emph = fmap B.emph <$> emphasisBetween '/' diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs index 4ef7a7731..ca97ba348 100644 --- a/tests/Tests/Readers/Org.hs +++ b/tests/Tests/Readers/Org.hs @@ -225,6 +225,28 @@ tests = ] ) "echo 'Hello, World'") + + , "Citation" =: + "[@nonexistent]" =?> + let citation = Citation + { citationId = "nonexistent" + , citationPrefix = [] + , citationSuffix = [] + , citationMode = NormalCitation + , citationNoteNum = 0 + , citationHash = 0} + in (para $ cite [citation] "[@nonexistent]") + + , "Citation containing text" =: + "[see @item1 p. 34-35]" =?> + let citation = Citation + { citationId = "item1" + , citationPrefix = [Str "see"] + , citationSuffix = [Space ,Str "p.",Space,Str "34-35"] + , citationMode = NormalCitation + , citationNoteNum = 0 + , citationHash = 0} + in (para $ cite [citation] "[see @item1 p. 34-35]") ] , testGroup "Meta Information" $ -- cgit v1.2.3 From c5c9b0d2890699ee8fcdbb660662957f8efad319 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Thu, 15 May 2014 10:11:48 -0700 Subject: EPUB writer: Fixed regression on cover image. In 1.12.4 and 1.12.4.2, the cover image would not appear properly, because the metadata id was not correct. This was introduced by the fix to #1254. Now we derive the id from the actual cover image filename, which we preserve rather than using "cover-image." --- src/Text/Pandoc/Writers/EPUB.hs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs index 893ec3be9..4d2a39846 100644 --- a/src/Text/Pandoc/Writers/EPUB.hs +++ b/src/Text/Pandoc/Writers/EPUB.hs @@ -333,7 +333,7 @@ writeEPUB opts doc@(Pandoc meta _) = do case epubCoverImage metadata of Nothing -> return ([],[]) Just img -> do - let coverImage = "cover-image" ++ takeExtension img + let coverImage = "media/" ++ takeFileName img let cpContent = renderHtml $ writeHtml opts' (Pandoc meta [RawBlock (Format "html") $ "
\n\"cover\n
"]) imgContent <- B.readFile img @@ -561,8 +561,8 @@ writeEPUB opts doc@(Pandoc meta _) = do ,("content", "0")] $ () ] ++ case epubCoverImage metadata of Nothing -> [] - Just _ -> [unode "meta" ! [("name","cover"), - ("content","cover-image")] $ ()] + Just img -> [unode "meta" ! [("name","cover"), + ("content", toId img)] $ ()] , unode "docTitle" $ unode "text" $ plainTitle , unode "navMap" $ tpNode : evalState (mapM (navPointNode navMapFormatter) secs) 1 @@ -657,8 +657,8 @@ metadataElement version md currentTime = coverageNodes = maybe [] (dcTag' "coverage") $ epubCoverage md rightsNodes = maybe [] (dcTag' "rights") $ epubRights md coverImageNodes = maybe [] - (const $ [unode "meta" ! [("name","cover"), - ("content","cover-image")] $ ()]) + (\img -> [unode "meta" ! [("name","cover"), + ("content",toId img)] $ ()]) $ epubCoverImage md modifiedNodes = [ unode "meta" ! [("property", "dcterms:modified")] $ (showDateTimeISO8601 currentTime) | version == EPUB3 ] -- cgit v1.2.3 From ee8c8da8ccfc7e3eb33679fd8a3a465766f9d5f7 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 18 May 2014 22:04:39 -0700 Subject: Removed dependency on conduit. * http-conduit flag is now https. * Instead of http-conduit, we depend on http-client and http-client-tls. --- INSTALL | 5 ++--- pandoc.cabal | 11 ++++++----- src/Text/Pandoc/Shared.hs | 14 ++++++++------ 3 files changed, 16 insertions(+), 14 deletions(-) (limited to 'src/Text') diff --git a/INSTALL b/INSTALL index 9442adbf0..f3366e103 100644 --- a/INSTALL +++ b/INSTALL @@ -116,9 +116,8 @@ assume that the pandoc source directory is your working directory. cabal install hsb2hs - - `http-conduit`: use the `http-conduit` library to fetch external - resources (default yes -- without this, pandoc cannot make SSL - connections) + - `https`: enable support for downloading resources over https + (using the `http-client` and `http-client-tls` libraries). 3. Build: diff --git a/pandoc.cabal b/pandoc.cabal index 6f12ec375..1741c59f6 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -196,8 +196,8 @@ Flag embed_data_files Description: Embed data files in binary for relocatable executable. Default: False -Flag http-conduit - Description: Enable downloading of resources over https. +Flag https + Description: Enable support for downloading of resources over https. Default: True Library @@ -239,10 +239,11 @@ Library hslua >= 0.3 && < 0.4, binary >= 0.5 && < 0.8 Build-Tools: alex, happy - if flag(http-conduit) - Build-Depends: http-conduit >= 1.9 && < 2.2, + if flag(https) + Build-Depends: http-client >= 0.3.2 && < 0.4, + http-client-tls >= 0.2 && < 0.3, http-types >= 0.8 && < 0.9 - cpp-options: -DHTTP_CONDUIT + cpp-options: -DHTTP_CLIENT if flag(embed_data_files) cpp-options: -DEMBED_DATA_FILES -- Build-Tools: hsb2hs -- not yet recognized by cabal diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index 4f506b5a6..d8cbe46d9 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -118,11 +118,13 @@ import System.FilePath ( joinPath, splitDirectories ) #else import Paths_pandoc (getDataFileName) #endif -#ifdef HTTP_CONDUIT +#ifdef HTTP_CLIENT import Data.ByteString.Lazy (toChunks) -import Network.HTTP.Conduit (httpLbs, parseUrl, withManager, - responseBody, responseHeaders, addProxy, - Request(port,host)) +import Network.HTTP.Client (httpLbs, parseUrl, withManager, + responseBody, responseHeaders, + Request(port,host)) +import Network.HTTP.Client.Internal (addProxy) +import Network.HTTP.Client.TLS (tlsManagerSettings) import System.Environment (getEnv) import Network.HTTP.Types.Header ( hContentType) import Network (withSocketsDo) @@ -665,7 +667,7 @@ openURL u let mime = takeWhile (/=',') $ drop 5 u contents = B8.pack $ unEscapeString $ drop 1 $ dropWhile (/=',') u in return $ Right (decodeLenient contents, Just mime) -#ifdef HTTP_CONDUIT +#ifdef HTTP_CLIENT | otherwise = withSocketsDo $ E.try $ do req <- parseUrl u (proxy :: Either E.SomeException String) <- E.try $ getEnv "http_proxy" @@ -674,7 +676,7 @@ openURL u Right pr -> case parseUrl pr of Just r -> addProxy (host r) (port r) req Nothing -> req - resp <- withManager $ httpLbs req' + resp <- withManager tlsManagerSettings $ httpLbs req' return (BS.concat $ toChunks $ responseBody resp, UTF8.toString `fmap` lookup hContentType (responseHeaders resp)) #else -- cgit v1.2.3 From 8d04c821aaa8a96803126140b58b8aef02d85906 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Mon, 19 May 2014 09:45:00 -0700 Subject: Avoid `import Prelude hiding (catch)`. See #1309. --- src/Text/Pandoc/UTF8.hs | 3 +-- src/Text/Pandoc/Writers/EPUB.hs | 10 +++------- 2 files changed, 4 insertions(+), 9 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/UTF8.hs b/src/Text/Pandoc/UTF8.hs index 33c9ec1c5..cf25de85b 100644 --- a/src/Text/Pandoc/UTF8.hs +++ b/src/Text/Pandoc/UTF8.hs @@ -51,8 +51,7 @@ import System.IO hiding (readFile, writeFile, getContents, #if MIN_VERSION_base(4,6,0) import Prelude hiding (readFile, writeFile, getContents, putStr, putStrLn) #else -import Prelude hiding (readFile, writeFile, getContents, putStr, putStrLn, - catch) +import Prelude hiding (readFile, writeFile, getContents, putStr, putStrLn) #endif import qualified System.IO as IO import qualified Data.ByteString.Char8 as B diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs index 4d2a39846..9514e87c9 100644 --- a/src/Text/Pandoc/Writers/EPUB.hs +++ b/src/Text/Pandoc/Writers/EPUB.hs @@ -59,11 +59,7 @@ import Text.Pandoc.Writers.Markdown ( writePlain ) import Data.Char ( toLower, isDigit, isAlphaNum ) import Network.URI ( unEscapeString ) import Text.Pandoc.MIME (getMimeType) -#if MIN_VERSION_base(4,6,0) -#else -import Prelude hiding (catch) -#endif -import Control.Exception (catch, SomeException) +import qualified Control.Exception as E import Text.Blaze.Html.Renderer.Utf8 (renderHtml) import Text.HTML.TagSoup @@ -153,10 +149,10 @@ getEPUBMetadata opts meta = do then case lookup "lang" (writerVariables opts) of Just x -> return m{ epubLanguage = x } Nothing -> do - localeLang <- catch (liftM + localeLang <- E.catch (liftM (map (\c -> if c == '_' then '-' else c) . takeWhile (/='.')) $ getEnv "LANG") - (\e -> let _ = (e :: SomeException) in return "en-US") + (\e -> let _ = (e :: E.SomeException) in return "en-US") return m{ epubLanguage = localeLang } else return m let fixDate m = -- cgit v1.2.3 From 3c77ab98bf9a055237a69be48001f5c6ef1d64ca Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Mon, 19 May 2014 12:52:25 -0700 Subject: EPUB writer: Handle multiple dates with OPF `event` attributes. Note: in EPUB3 we can have only one dc:date, so only the first one is used. --- src/Text/Pandoc/Writers/EPUB.hs | 47 +++++++++++++++++++++++++++++++++-------- 1 file changed, 38 insertions(+), 9 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs index 9514e87c9..b6687c330 100644 --- a/src/Text/Pandoc/Writers/EPUB.hs +++ b/src/Text/Pandoc/Writers/EPUB.hs @@ -72,7 +72,7 @@ data Chapter = Chapter (Maybe [Int]) [Block] data EPUBMetadata = EPUBMetadata{ epubIdentifier :: [Identifier] , epubTitle :: [Title] - , epubDate :: String + , epubDate :: [Date] , epubLanguage :: String , epubCreator :: [Creator] , epubContributor :: [Creator] @@ -93,6 +93,11 @@ data Stylesheet = StylesheetPath FilePath | StylesheetContents String deriving Show +data Date = Date{ + dateText :: String + , dateEvent :: Maybe String + } deriving Show + data Creator = Creator{ creatorText :: String , creatorRole :: Maybe String @@ -159,7 +164,9 @@ getEPUBMetadata opts meta = do if null (epubDate m) then do currentTime <- getCurrentTime - return $ m{ epubDate = showDateTimeISO8601 currentTime } + return $ m{ epubDate = [ Date{ + dateText = showDateTimeISO8601 currentTime + , dateEvent = Nothing } ] } else return m let addAuthor m = if any (\c -> creatorRole c == Just "aut") $ epubCreator m @@ -183,8 +190,10 @@ addMetadataFromXML e@(Element (QName name _ (Just "dc")) attrs _ _) md , titleFileAs = getAttr "file-as" , titleType = getAttr "type" } : epubTitle md } - | name == "date" = md{ epubDate = fromMaybe "" $ normalizeDate' - $ strContent e } + | name == "date" = md{ epubDate = + Date{ dateText = fromMaybe "" $ normalizeDate' $ strContent e + , dateEvent = getAttr "event" + } : epubDate md } | name == "language" = md{ epubLanguage = strContent e } | name == "creator" = md{ epubCreator = Creator{ creatorText = strContent e @@ -249,6 +258,16 @@ getCreator s meta = getList s meta handleMetaValue , creatorRole = metaValueToString <$> M.lookup "role" m } handleMetaValue mv = Creator (metaValueToString mv) Nothing Nothing +getDate :: String -> Meta -> [Date] +getDate s meta = getList s meta handleMetaValue + where handleMetaValue (MetaMap m) = + Date{ dateText = maybe "" id $ + M.lookup "text" m >>= normalizeDate' . metaValueToString + , dateEvent = metaValueToString <$> M.lookup "event" m } + handleMetaValue mv = Date { dateText = maybe "" + id $ normalizeDate' $ metaValueToString mv + , dateEvent = Nothing } + simpleList :: String -> Meta -> [String] simpleList s meta = case lookupMeta s meta of @@ -278,8 +297,7 @@ metadataFromMeta opts meta = EPUBMetadata{ } where identifiers = getIdentifier meta titles = getTitle meta - date = fromMaybe "" $ - (metaValueToString <$> lookupMeta "date" meta) >>= normalizeDate' + date = getDate "date" meta language = maybe "" metaValueToString $ lookupMeta "language" meta `mplus` lookupMeta "lang" meta creators = getCreator "creator" meta @@ -637,7 +655,14 @@ metadataElement version md currentTime = identifierNodes = withIds "epub-id" toIdentifierNode $ epubIdentifier md titleNodes = withIds "epub-title" toTitleNode $ epubTitle md - dateNodes = dcTag' "date" $ epubDate md + dateNodes = if version == EPUB2 + then withIds "epub-date" toDateNode $ epubDate md + else -- epub3 allows only one dc:date + -- http://www.idpf.org/epub/30/spec/epub30-publications.html#sec-opf-dcdate + case epubDate md of + [] -> [] + (x:_) -> [dcNode "date" ! [("id","epub-date")] + $ dateText x] languageNodes = [dcTag "language" $ epubLanguage md] creatorNodes = withIds "epub-creator" (toCreatorNode "creator") $ epubCreator md @@ -671,7 +696,7 @@ metadataElement version md currentTime = (schemeToOnix `fmap` scheme) toCreatorNode s id' creator | version == EPUB2 = [dcNode s ! - ([("id",id')] ++ + (("id",id') : maybe [] (\x -> [("opf:file-as",x)]) (creatorFileAs creator) ++ maybe [] (\x -> [("opf:role",x)]) (creatorRole creator >>= toRelator)) $ creatorText creator] @@ -685,7 +710,7 @@ metadataElement version md currentTime = (creatorRole creator >>= toRelator) toTitleNode id' title | version == EPUB2 = [dcNode "title" ! - ([("id",id')] ++ + (("id",id') : maybe [] (\x -> [("opf:file-as",x)]) (titleFileAs title) ++ maybe [] (\x -> [("opf:title-type",x)]) (titleType title)) $ titleText title] @@ -697,6 +722,10 @@ metadataElement version md currentTime = maybe [] (\x -> [unode "meta" ! [("refines",'#':id'),("property","title-type")] $ x]) (titleType title) + toDateNode id' date = [dcNode "date" ! + (("id",id') : + maybe [] (\x -> [("opf:event",x)]) (dateEvent date)) $ + dateText date] schemeToOnix "ISBN-10" = "02" schemeToOnix "GTIN-13" = "03" schemeToOnix "UPC" = "04" -- cgit v1.2.3 From 3238a2f9191b83864abd682261634a603ec89056 Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Tue, 20 May 2014 22:29:21 +0200 Subject: Org reader: support for inline LaTeX Inline LaTeX is now accepted and parsed by the org-mode reader. Both, math symbols (like \tau) and LaTeX commands (like \cite{Coffee}), can be used without any further escaping. --- src/Text/Pandoc/Readers/LaTeX.hs | 1 + src/Text/Pandoc/Readers/Org.hs | 32 +++++++++++++++++++++++++++++++- tests/Tests/Readers/Org.hs | 27 +++++++++++++++++++++++++++ 3 files changed, 59 insertions(+), 1 deletion(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index 6f870318f..7fc587882 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -31,6 +31,7 @@ Conversion of LaTeX to 'Pandoc' document. module Text.Pandoc.Readers.LaTeX ( readLaTeX, rawLaTeXInline, rawLaTeXBlock, + inlineCommand, handleIncludes ) where diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index 86dda2732..c3ea8d7c2 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -39,12 +39,15 @@ import Text.Pandoc.Parsing hiding ( F, unF, askF, asksF, runF , newline, orderedListMarker , parseFromString , updateLastStrPos ) +import Text.Pandoc.Readers.LaTeX (inlineCommand, rawLaTeXInline) import Text.Pandoc.Shared (compactify', compactify'DL) +import Text.Parsec.Pos (updatePosString) +import Text.TeXMath (texMathToPandoc, DisplayType(..)) import Control.Applicative ( Applicative, pure , (<$>), (<$), (<*>), (<*), (*>), (<**>) ) import Control.Arrow (first) -import Control.Monad (foldM, guard, liftM, liftM2, mzero, when) +import Control.Monad (foldM, guard, liftM, liftM2, mplus, mzero, when) import Control.Monad.Reader (Reader, runReader, ask, asks) import Data.Char (isAlphaNum, toLower) import Data.Default @@ -886,6 +889,7 @@ inline = , verbatim , subscript , superscript + , inlineLaTeX , symbol ] <* (guard =<< newlinesCountWithinLimits) "inline" @@ -1351,3 +1355,29 @@ simpleSubOrSuperString = try $ , mappend <$> option [] ((:[]) <$> oneOf "+-") <*> many1 alphaNum ] + +inlineLaTeX :: OrgParser (F Inlines) +inlineLaTeX = try $ do + cmd <- inlineLaTeXCommand + maybe mzero returnF $ parseAsMath cmd `mplus` parseAsInlineLaTeX cmd + where + parseAsMath :: String -> Maybe Inlines + parseAsMath cs = maybeRight $ B.fromList <$> texMathToPandoc DisplayInline cs + + parseAsInlineLaTeX :: String -> Maybe Inlines + parseAsInlineLaTeX cs = maybeRight $ runParser inlineCommand state "" cs + + state :: ParserState + state = def{ stateOptions = def{ readerParseRaw = True }} + +maybeRight :: Either a b -> Maybe b +maybeRight = either (const Nothing) Just + +inlineLaTeXCommand :: OrgParser String +inlineLaTeXCommand = try $ do + rest <- getInput + pos <- getPosition + case runParser rawLaTeXInline def "source" rest of + Right (RawInline _ cs) -> cs <$ (setInput $ drop (length cs) rest) + <* (setPosition $ updatePosString pos cs) + _ -> mzero diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs index ca97ba348..4ed77887f 100644 --- a/tests/Tests/Readers/Org.hs +++ b/tests/Tests/Readers/Org.hs @@ -247,6 +247,33 @@ tests = , citationNoteNum = 0 , citationHash = 0} in (para $ cite [citation] "[see @item1 p. 34-35]") + + , "Inline LaTeX symbol" =: + "\\dots" =?> + para "…" + + , "Inline LaTeX command" =: + "\\textit{Emphasised}" =?> + para (emph "Emphasised") + + , "Inline LaTeX math symbol" =: + "\\tau" =?> + para (emph "τ") + + , "Unknown inline LaTeX command" =: + "\\notacommand{foo}" =?> + para (rawInline "latex" "\\notacommand{foo}") + + , "LaTeX citation" =: + "\\cite{Coffee}" =?> + let citation = Citation + { citationId = "Coffee" + , citationPrefix = [] + , citationSuffix = [] + , citationMode = AuthorInText + , citationNoteNum = 0 + , citationHash = 0} + in (para . cite [citation] $ rawInline "latex" "\\cite{Coffee}") ] , testGroup "Meta Information" $ -- cgit v1.2.3 From 2e80613451651ec8f1945daa7540168a427f0507 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 27 May 2014 11:59:28 -0700 Subject: Markdown reader: inline math must have nonspace before final `$`. Closes #1313. --- src/Text/Pandoc/Parsing.hs | 10 ++++++---- tests/markdown-reader-more.native | 2 ++ tests/markdown-reader-more.txt | 4 ++++ 3 files changed, 12 insertions(+), 4 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs index 4cd6591c0..8bc042e28 100644 --- a/src/Text/Pandoc/Parsing.hs +++ b/src/Text/Pandoc/Parsing.hs @@ -464,11 +464,13 @@ mathInlineWith :: String -> String -> Parser [Char] st String mathInlineWith op cl = try $ do string op notFollowedBy space - words' <- many1Till (count 1 (noneOf "\n\\") + words' <- many1Till (count 1 (noneOf " \t\n\\") <|> (char '\\' >> anyChar >>= \c -> return ['\\',c]) - <|> count 1 newline <* notFollowedBy' blankline - *> return " ") - (try $ string cl) + <|> do (blankline <* notFollowedBy' blankline) <|> + (oneOf " \t" <* skipMany (oneOf " \t")) + notFollowedBy (char '$') + return " " + ) (try $ string cl) notFollowedBy digit -- to prevent capture of $5 return $ concat words' diff --git a/tests/markdown-reader-more.native b/tests/markdown-reader-more.native index 0d74c233d..b4713bc93 100644 --- a/tests/markdown-reader-more.native +++ b/tests/markdown-reader-more.native @@ -16,6 +16,8 @@ ,Header 3 ("my-header",[],[]) [Str "my",Space,Str "header"] ,Header 2 ("in-math",[],[]) [Str "$",Space,Str "in",Space,Str "math"] ,Para [Math InlineMath "\\$2 + \\$3"] +,Para [Str "This",Space,Str "should",Space,Str "not",Space,Str "be",Space,Str "math:"] +,Para [Str "$PATH",Space,Str "90",Space,Str "$PATH"] ,Header 2 ("commented-out-list-item",[],[]) [Str "Commented-out",Space,Str "list",Space,Str "item"] ,BulletList [[Plain [Str "one",Space,RawInline (Format "html") ""]] diff --git a/tests/markdown-reader-more.txt b/tests/markdown-reader-more.txt index 739543bfd..4cd69c9d8 100644 --- a/tests/markdown-reader-more.txt +++ b/tests/markdown-reader-more.txt @@ -58,6 +58,10 @@ $\$2 + \$3$ +This should not be math: + +$PATH 90 $PATH + ## Commented-out list item - one -- cgit v1.2.3 From e3ddc371dee9630dae71d61a69088b06cea8e909 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 27 May 2014 12:44:39 -0700 Subject: Markdown reader: Handle `c++` and `objective-c` as language identifiers in github-style fenced blocks. Closes #1318. Note: This is special-case handling of these two cases. It would be good to do something more systematic. --- src/Text/Pandoc/Readers/Markdown.hs | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 5129bc2e3..caa938ed6 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -618,12 +618,19 @@ codeBlockFenced = try $ do skipMany spaceChar attr <- option ([],[],[]) $ try (guardEnabled Ext_fenced_code_attributes >> attributes) - <|> ((\x -> ("",[x],[])) <$> identifier) + <|> ((\x -> ("",[toLanguageId x],[])) <$> many1 nonspaceChar) blankline contents <- manyTill anyLine (blockDelimiter (== c) (Just size)) blanklines return $ return $ B.codeBlockWith attr $ intercalate "\n" contents +-- correctly handle github language identifiers +toLanguageId :: String -> String +toLanguageId = map toLower . go + where go "c++" = "cpp" + go "objective-c" = "objectivec" + go x = x + codeBlockIndented :: MarkdownParser (F Blocks) codeBlockIndented = do contents <- many1 (indentedLine <|> -- cgit v1.2.3 From 9cf5f74e8fad9c7b898553724a37035bfc46f268 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Wed, 28 May 2014 10:40:50 -0700 Subject: PDF writer: Fixed treatment of data uris for images. Closes #1062. --- pandoc.cabal | 3 ++- src/Text/Pandoc/PDF.hs | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) (limited to 'src/Text') diff --git a/pandoc.cabal b/pandoc.cabal index a2138d6bf..c834319a2 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -237,7 +237,8 @@ Library scientific >= 0.2 && < 0.4, vector >= 0.10 && < 0.11, hslua >= 0.3 && < 0.4, - binary >= 0.5 && < 0.8 + binary >= 0.5 && < 0.8, + SHA >= 1.6 && < 1.7 Build-Tools: alex, happy if flag(https) Build-Depends: http-client >= 0.3.2 && < 0.4, diff --git a/src/Text/Pandoc/PDF.hs b/src/Text/Pandoc/PDF.hs index e4e06e6c9..bd55c565f 100644 --- a/src/Text/Pandoc/PDF.hs +++ b/src/Text/Pandoc/PDF.hs @@ -38,11 +38,11 @@ import qualified Data.ByteString as BS import System.Exit (ExitCode (..)) import System.FilePath import System.Directory +import Data.Digest.Pure.SHA (showDigest, sha1) import System.Environment import Control.Monad (unless) import Data.List (isInfixOf) import Data.Maybe (fromMaybe) -import qualified Data.ByteString.Base64 as B64 import qualified Text.Pandoc.UTF8 as UTF8 import Text.Pandoc.Definition import Text.Pandoc.Walk (walkM) @@ -98,7 +98,7 @@ handleImage' baseURL tmpdir (Image ils (src,tit)) = do Right (contents, Just mime) -> do let ext = fromMaybe (takeExtension src) $ extensionFromMimeType mime - let basename = UTF8.toString $ B64.encode $ UTF8.fromString src + let basename = showDigest $ sha1 $ BL.fromChunks [contents] let fname = tmpdir basename <.> ext BS.writeFile fname contents return $ Image ils (fname,tit) -- cgit v1.2.3 From 23a9b800a35d3c17d29a278b6bb218f05642d282 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 31 May 2014 22:02:33 -0700 Subject: Docx writer: Take over document formatting from reference.docx. This includes margins, page size, page orientation. --- src/Text/Pandoc/Writers/Docx.hs | 47 ++++++++++++++++++++++++++--------------- 1 file changed, 30 insertions(+), 17 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index 551d97855..6fd76c9c7 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -204,11 +204,35 @@ writeDocx opts doc@(Pandoc meta _) = do let toImageEntry (_,path,_,_,img) = toEntry ("word/" ++ path) epochtime $ toLazy img let imageEntries = map toImageEntry imgs + -- adjust contents to add sectPr from reference.docx + let docpath = "word/document.xml" + parsedDoc <- parseXml refArchive distArchive docpath + let sectprs = filterElementsName (\qn -> qPrefix qn == Just "w" && + qName qn == "sectPr") + parsedDoc + + let stdAttributes = + [("xmlns:w","http://schemas.openxmlformats.org/wordprocessingml/2006/main") + ,("xmlns:m","http://schemas.openxmlformats.org/officeDocument/2006/math") + ,("xmlns:r","http://schemas.openxmlformats.org/officeDocument/2006/relationships") + ,("xmlns:o","urn:schemas-microsoft-com:office:office") + ,("xmlns:v","urn:schemas-microsoft-com:vml") + ,("xmlns:w10","urn:schemas-microsoft-com:office:word") + ,("xmlns:a","http://schemas.openxmlformats.org/drawingml/2006/main") + ,("xmlns:pic","http://schemas.openxmlformats.org/drawingml/2006/picture") + ,("xmlns:wp","http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing")] + + let contents' = contents ++ sectprs + let docContents = mknode "w:document" stdAttributes + $ mknode "w:body" [] $ contents' + -- word/document.xml - let contentEntry = toEntry "word/document.xml" epochtime $ renderXml contents + let contentEntry = toEntry "word/document.xml" epochtime + $ renderXml docContents -- footnotes - let footnotesEntry = toEntry "word/footnotes.xml" epochtime $ renderXml footnotes + let notes = mknode "w:footnotes" stdAttributes footnotes + let footnotesEntry = toEntry "word/footnotes.xml" epochtime $ renderXml notes -- footnote rels let footnoteRelEntry = toEntry "word/_rels/footnotes.xml.rels" epochtime @@ -392,8 +416,9 @@ mkLvl marker lvl = getNumId :: WS Int getNumId = length `fmap` gets stLists --- | Convert Pandoc document to two OpenXML elements (the main document and footnotes). -writeOpenXML :: WriterOptions -> Pandoc -> WS (Element, Element) +-- | Convert Pandoc document to two lists of +-- OpenXML elements (the main document and footnotes). +writeOpenXML :: WriterOptions -> Pandoc -> WS ([Element], [Element]) writeOpenXML opts (Pandoc meta blocks) = do let tit = docTitle meta ++ case lookupMeta "subtitle" meta of Just (MetaBlocks [Plain xs]) -> LineBreak : xs @@ -411,19 +436,7 @@ writeOpenXML opts (Pandoc meta blocks) = do doc' <- blocksToOpenXML opts blocks' notes' <- reverse `fmap` gets stFootnotes let meta' = title ++ authors ++ date - let stdAttributes = - [("xmlns:w","http://schemas.openxmlformats.org/wordprocessingml/2006/main") - ,("xmlns:m","http://schemas.openxmlformats.org/officeDocument/2006/math") - ,("xmlns:r","http://schemas.openxmlformats.org/officeDocument/2006/relationships") - ,("xmlns:o","urn:schemas-microsoft-com:office:office") - ,("xmlns:v","urn:schemas-microsoft-com:vml") - ,("xmlns:w10","urn:schemas-microsoft-com:office:word") - ,("xmlns:a","http://schemas.openxmlformats.org/drawingml/2006/main") - ,("xmlns:pic","http://schemas.openxmlformats.org/drawingml/2006/picture") - ,("xmlns:wp","http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing")] - let doc = mknode "w:document" stdAttributes $ mknode "w:body" [] (meta' ++ doc') - let notes = mknode "w:footnotes" stdAttributes notes' - return (doc, notes) + return (meta' ++ doc', notes') -- | Convert a list of Pandoc blocks to OpenXML. blocksToOpenXML :: WriterOptions -> [Block] -> WS [Element] -- cgit v1.2.3 From 6327ccf523bb5d550d85dd7782079b8f070fe5d1 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 1 Jun 2014 15:29:27 -0700 Subject: Minor code reformat. --- src/Text/Pandoc/Writers/Docx.hs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index 6fd76c9c7..026cfcb41 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -60,6 +60,11 @@ import qualified Control.Exception as E import Text.Pandoc.MIME (getMimeType, extensionFromMimeType) import Control.Applicative ((<|>)) +data ListMarker = NoMarker + | BulletMarker + | NumberMarker ListNumberStyle ListNumberDelim Int + deriving (Show, Read, Eq, Ord) + data WriterState = WriterState{ stTextProperties :: [Element] , stParaProperties :: [Element] @@ -73,11 +78,6 @@ data WriterState = WriterState{ , stLists :: [ListMarker] } -data ListMarker = NoMarker - | BulletMarker - | NumberMarker ListNumberStyle ListNumberDelim Int - deriving (Show, Read, Eq, Ord) - defaultWriterState :: WriterState defaultWriterState = WriterState{ stTextProperties = [] -- cgit v1.2.3 From 6848f642e82322c0894c62d3215e98325ab7fd8c Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 1 Jun 2014 21:15:03 -0700 Subject: Docx writer: Header and footer are now carried over from reference.docx. --- data/reference.docx | Bin 9797 -> 9360 bytes src/Text/Pandoc/Writers/Docx.hs | 54 ++++++++++++++++++++++++++++------------ 2 files changed, 38 insertions(+), 16 deletions(-) (limited to 'src/Text') diff --git a/data/reference.docx b/data/reference.docx index a9c268b9f..789237dd8 100644 Binary files a/data/reference.docx and b/data/reference.docx differ diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index 026cfcb41..584662be8 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -155,8 +155,11 @@ writeDocx opts doc@(Pandoc meta _) = do ,("/word/document.xml", "application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml") ,("/word/footnotes.xml", - "application/vnd.openxmlformats-officedocument.wordprocessingml.footnotes+xml") - ] ++ map mkImageOverride imgs + "application/vnd.openxmlformats-officedocument.wordprocessingml.footnotes+xml"), + ("/word/header1.xml", + "application/vnd.openxmlformats-officedocument.wordprocessingml.header+xml"), + ("/word/footer1.xml", + "application/vnd.openxmlformats-officedocument.wordprocessingml.footer+xml") ] ++ map mkImageOverride imgs let defaultnodes = [mknode "Default" [("Extension","xml"),("ContentType","application/xml")] (), mknode "Default" @@ -191,7 +194,14 @@ writeDocx opts doc@(Pandoc meta _) = do "theme/theme1.xml") ,("http://schemas.openxmlformats.org/officeDocument/2006/relationships/footnotes", "rId7", - "footnotes.xml")] + "footnotes.xml") + ,("http://schemas.openxmlformats.org/officeDocument/2006/relationships/header", + "rId8", + "header1.xml") + ,("http://schemas.openxmlformats.org/officeDocument/2006/relationships/footer", + "rId9", + "footer1.xml") + ] let toImgRel (ident,path,_,_,_) = mknode "Relationship" [("Type","http://schemas.openxmlformats.org/officeDocument/2006/relationships/image"),("Id",ident),("Target",path)] () let imgrels = map toImgRel imgs let toLinkRel (src,ident) = mknode "Relationship" [("Type","http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink"),("Id",ident),("Target",src),("TargetMode","External") ] () @@ -207,9 +217,16 @@ writeDocx opts doc@(Pandoc meta _) = do -- adjust contents to add sectPr from reference.docx let docpath = "word/document.xml" parsedDoc <- parseXml refArchive distArchive docpath - let sectprs = filterElementsName (\qn -> qPrefix qn == Just "w" && - qName qn == "sectPr") - parsedDoc + let mbsectpr = filterElementName (\qn -> qPrefix qn == Just "w" && + qName qn == "sectPr") parsedDoc + let sectPrProps = case mbsectpr of + Nothing -> [] + Just e -> filterElementsName (\qn -> + qPrefix qn == Just "w" && + qName qn `notElem` ["headerReference","footerReference","sectPr"]) e + let headerPr = mknode "w:headerReference" [("w:type","default"),("r:id","rId8")] $ () + let footerPr = mknode "w:footerReference" [("w:type","default"),("r:id","rId9")] $ () + let sectpr = mknode "w:sectPr" [] $ [headerPr, footerPr] ++ sectPrProps let stdAttributes = [("xmlns:w","http://schemas.openxmlformats.org/wordprocessingml/2006/main") @@ -222,7 +239,7 @@ writeDocx opts doc@(Pandoc meta _) = do ,("xmlns:pic","http://schemas.openxmlformats.org/drawingml/2006/picture") ,("xmlns:wp","http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing")] - let contents' = contents ++ sectprs + let contents' = contents ++ [sectpr] let docContents = mknode "w:document" stdAttributes $ mknode "w:body" [] $ contents' @@ -281,20 +298,25 @@ writeDocx opts doc@(Pandoc meta _) = do ] let relsEntry = toEntry relsPath epochtime $ renderXml rels - let entryFromArchive path = + let entryFromArchive arch path = (toEntry path epochtime . renderXml) `fmap` - parseXml refArchive distArchive path - docPropsAppEntry <- entryFromArchive "docProps/app.xml" - themeEntry <- entryFromArchive "word/theme/theme1.xml" - fontTableEntry <- entryFromArchive "word/fontTable.xml" - settingsEntry <- entryFromArchive "word/settings.xml" - webSettingsEntry <- entryFromArchive "word/webSettings.xml" + parseXml arch distArchive path + docPropsAppEntry <- entryFromArchive refArchive "docProps/app.xml" + themeEntry <- entryFromArchive refArchive "word/theme/theme1.xml" + fontTableEntry <- entryFromArchive refArchive "word/fontTable.xml" + -- we take settings.xml from dist archive because the ref archive + -- sometimes references special footnotes and endnotes that may + -- not be defined in footnotes.xml or endnotes.xml. + settingsEntry <- entryFromArchive distArchive "word/settings.xml" + webSettingsEntry <- entryFromArchive distArchive "word/webSettings.xml" + headerEntry <- entryFromArchive refArchive "word/header1.xml" + footerEntry <- entryFromArchive refArchive "word/footer1.xml" let miscRels = [ f | f <- filesInArchive refArchive , "word/_rels/" `isPrefixOf` f , ".xml.rels" `isSuffixOf` f , f /= "word/_rels/document.xml.rels" , f /= "word/_rels/footnotes.xml.rels" ] - miscRelEntries <- mapM entryFromArchive miscRels + miscRelEntries <- mapM (entryFromArchive refArchive) miscRels -- Create archive let archive = foldr addEntryToArchive emptyArchive $ @@ -302,7 +324,7 @@ writeDocx opts doc@(Pandoc meta _) = do footnoteRelEntry : numEntry : styleEntry : footnotesEntry : docPropsEntry : docPropsAppEntry : themeEntry : fontTableEntry : settingsEntry : webSettingsEntry : - imageEntries ++ miscRelEntries + headerEntry : footerEntry : imageEntries ++ miscRelEntries return $ fromArchive archive styleToOpenXml :: Style -> [Element] -- cgit v1.2.3 From 7242165bed21a63b49e1ce7d639400f095800204 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 1 Jun 2014 22:29:13 -0700 Subject: Docx writer: Improved handling of headers/footers. --- src/Text/Pandoc/Writers/Docx.hs | 105 ++++++++++++++++++++-------------------- 1 file changed, 53 insertions(+), 52 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index 584662be8..098da119b 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -59,6 +59,7 @@ import Text.Printf (printf) import qualified Control.Exception as E import Text.Pandoc.MIME (getMimeType, extensionFromMimeType) import Control.Applicative ((<|>)) +import Data.Maybe (mapMaybe) data ListMarker = NoMarker | BulletMarker @@ -123,6 +124,40 @@ writeDocx opts doc@(Pandoc meta _) = do epochtime <- floor `fmap` getPOSIXTime let imgs = M.elems $ stImages st + -- create entries for images in word/media/... + let toImageEntry (_,path,_,_,img) = toEntry ("word/" ++ path) epochtime $ toLazy img + let imageEntries = map toImageEntry imgs + + -- adjust contents to add sectPr from reference.docx + parsedDoc <- parseXml refArchive distArchive "word/document.xml" + let wname f qn = qPrefix qn == Just "w" && f (qName qn) + let mbsectpr = filterElementName (wname (=="sectPr")) parsedDoc + + let sectpr = maybe (mknode "w:sectPr" [] $ ()) id mbsectpr + + let stdAttributes = + [("xmlns:w","http://schemas.openxmlformats.org/wordprocessingml/2006/main") + ,("xmlns:m","http://schemas.openxmlformats.org/officeDocument/2006/math") + ,("xmlns:r","http://schemas.openxmlformats.org/officeDocument/2006/relationships") + ,("xmlns:o","urn:schemas-microsoft-com:office:office") + ,("xmlns:v","urn:schemas-microsoft-com:vml") + ,("xmlns:w10","urn:schemas-microsoft-com:office:word") + ,("xmlns:a","http://schemas.openxmlformats.org/drawingml/2006/main") + ,("xmlns:pic","http://schemas.openxmlformats.org/drawingml/2006/picture") + ,("xmlns:wp","http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing")] + + let contents' = contents ++ [sectpr] + let docContents = mknode "w:document" stdAttributes + $ mknode "w:body" [] $ contents' + + parsedRels <- parseXml refArchive distArchive "word/_rels/document.xml.rels" + let isHeaderNode e = findAttr (QName "Type" Nothing Nothing) e == Just "http://schemas.openxmlformats.org/officeDocument/2006/relationships/header" + let isFooterNode e = findAttr (QName "Type" Nothing Nothing) e == Just "http://schemas.openxmlformats.org/officeDocument/2006/relationships/footer" + let headers = filterElements isHeaderNode parsedRels + let footers = filterElements isFooterNode parsedRels + + let extractTarget e = findAttr (QName "Target" Nothing Nothing) e + -- we create [Content_Types].xml and word/_rels/document.xml.rels -- from scratch rather than reading from reference.docx, -- because Word sometimes changes these files when a reference.docx is modified, @@ -135,7 +170,7 @@ writeDocx opts doc@(Pandoc meta _) = do let mkImageOverride (_, imgpath, mbMimeType, _, _) = mkOverrideNode ("/word/" ++ imgpath, fromMaybe "application/octet-stream" mbMimeType) - let overrides = map mkOverrideNode + let overrides = map mkOverrideNode ( [("/word/webSettings.xml", "application/vnd.openxmlformats-officedocument.wordprocessingml.webSettings+xml") ,("/word/numbering.xml", @@ -155,11 +190,13 @@ writeDocx opts doc@(Pandoc meta _) = do ,("/word/document.xml", "application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml") ,("/word/footnotes.xml", - "application/vnd.openxmlformats-officedocument.wordprocessingml.footnotes+xml"), - ("/word/header1.xml", - "application/vnd.openxmlformats-officedocument.wordprocessingml.header+xml"), - ("/word/footer1.xml", - "application/vnd.openxmlformats-officedocument.wordprocessingml.footer+xml") ] ++ map mkImageOverride imgs + "application/vnd.openxmlformats-officedocument.wordprocessingml.footnotes+xml") + ] ++ + map (\x -> (maybe "" ("/word/" ++) $ extractTarget x, + "application/vnd.openxmlformats-officedocument.wordprocessingml.header+xml")) headers ++ + map (\x -> (maybe "" ("/word/" ++) $ extractTarget x, + "application/vnd.openxmlformats-officedocument.wordprocessingml.footer+xml")) footers) ++ + map mkImageOverride imgs let defaultnodes = [mknode "Default" [("Extension","xml"),("ContentType","application/xml")] (), mknode "Default" @@ -195,13 +232,8 @@ writeDocx opts doc@(Pandoc meta _) = do ,("http://schemas.openxmlformats.org/officeDocument/2006/relationships/footnotes", "rId7", "footnotes.xml") - ,("http://schemas.openxmlformats.org/officeDocument/2006/relationships/header", - "rId8", - "header1.xml") - ,("http://schemas.openxmlformats.org/officeDocument/2006/relationships/footer", - "rId9", - "footer1.xml") - ] + ] ++ + headers ++ footers let toImgRel (ident,path,_,_,_) = mknode "Relationship" [("Type","http://schemas.openxmlformats.org/officeDocument/2006/relationships/image"),("Id",ident),("Target",path)] () let imgrels = map toImgRel imgs let toLinkRel (src,ident) = mknode "Relationship" [("Type","http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink"),("Id",ident),("Target",src),("TargetMode","External") ] () @@ -210,38 +242,6 @@ writeDocx opts doc@(Pandoc meta _) = do let relEntry = toEntry "word/_rels/document.xml.rels" epochtime $ renderXml reldoc - -- create entries for images in word/media/... - let toImageEntry (_,path,_,_,img) = toEntry ("word/" ++ path) epochtime $ toLazy img - let imageEntries = map toImageEntry imgs - - -- adjust contents to add sectPr from reference.docx - let docpath = "word/document.xml" - parsedDoc <- parseXml refArchive distArchive docpath - let mbsectpr = filterElementName (\qn -> qPrefix qn == Just "w" && - qName qn == "sectPr") parsedDoc - let sectPrProps = case mbsectpr of - Nothing -> [] - Just e -> filterElementsName (\qn -> - qPrefix qn == Just "w" && - qName qn `notElem` ["headerReference","footerReference","sectPr"]) e - let headerPr = mknode "w:headerReference" [("w:type","default"),("r:id","rId8")] $ () - let footerPr = mknode "w:footerReference" [("w:type","default"),("r:id","rId9")] $ () - let sectpr = mknode "w:sectPr" [] $ [headerPr, footerPr] ++ sectPrProps - - let stdAttributes = - [("xmlns:w","http://schemas.openxmlformats.org/wordprocessingml/2006/main") - ,("xmlns:m","http://schemas.openxmlformats.org/officeDocument/2006/math") - ,("xmlns:r","http://schemas.openxmlformats.org/officeDocument/2006/relationships") - ,("xmlns:o","urn:schemas-microsoft-com:office:office") - ,("xmlns:v","urn:schemas-microsoft-com:vml") - ,("xmlns:w10","urn:schemas-microsoft-com:office:word") - ,("xmlns:a","http://schemas.openxmlformats.org/drawingml/2006/main") - ,("xmlns:pic","http://schemas.openxmlformats.org/drawingml/2006/picture") - ,("xmlns:wp","http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing")] - - let contents' = contents ++ [sectpr] - let docContents = mknode "w:document" stdAttributes - $ mknode "w:body" [] $ contents' -- word/document.xml let contentEntry = toEntry "word/document.xml" epochtime @@ -304,13 +304,13 @@ writeDocx opts doc@(Pandoc meta _) = do docPropsAppEntry <- entryFromArchive refArchive "docProps/app.xml" themeEntry <- entryFromArchive refArchive "word/theme/theme1.xml" fontTableEntry <- entryFromArchive refArchive "word/fontTable.xml" - -- we take settings.xml from dist archive because the ref archive - -- sometimes references special footnotes and endnotes that may - -- not be defined in footnotes.xml or endnotes.xml. + -- we use dist archive for settings.xml, because Word sometimes + -- adds references to footnotes or endnotes we don't have... settingsEntry <- entryFromArchive distArchive "word/settings.xml" - webSettingsEntry <- entryFromArchive distArchive "word/webSettings.xml" - headerEntry <- entryFromArchive refArchive "word/header1.xml" - footerEntry <- entryFromArchive refArchive "word/footer1.xml" + webSettingsEntry <- entryFromArchive refArchive "word/webSettings.xml" + headerFooterEntries <- mapM (entryFromArchive refArchive) $ + mapMaybe (\e -> fmap ("word/" ++) $ extractTarget e) + (headers ++ footers) let miscRels = [ f | f <- filesInArchive refArchive , "word/_rels/" `isPrefixOf` f , ".xml.rels" `isSuffixOf` f @@ -324,7 +324,8 @@ writeDocx opts doc@(Pandoc meta _) = do footnoteRelEntry : numEntry : styleEntry : footnotesEntry : docPropsEntry : docPropsAppEntry : themeEntry : fontTableEntry : settingsEntry : webSettingsEntry : - headerEntry : footerEntry : imageEntries ++ miscRelEntries + imageEntries ++ headerFooterEntries ++ + miscRelEntries return $ fromArchive archive styleToOpenXml :: Style -> [Element] -- cgit v1.2.3 From e1cf47efa0029a385c39053c9f441ade29d7a991 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 1 Jun 2014 23:45:05 -0700 Subject: Templates: Fail informatively on template syntax errors. With the move from parsec to attoparsec, we lost good error reporting. In fact, since we weren't testing for end of input, malformed templates would fail silently. Here we revert back to Parsec for better error messages. --- pandoc.cabal | 1 - src/Text/Pandoc/Templates.hs | 70 ++++++++++++++++++++++++-------------------- 2 files changed, 38 insertions(+), 33 deletions(-) (limited to 'src/Text') diff --git a/pandoc.cabal b/pandoc.cabal index c834319a2..01b3c401a 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -232,7 +232,6 @@ Library temporary >= 1.1 && < 1.3, blaze-html >= 0.5 && < 0.8, blaze-markup >= 0.5.1 && < 0.7, - attoparsec >= 0.10 && < 0.12, yaml >= 0.8.8.2 && < 0.9, scientific >= 0.2 && < 0.4, vector >= 0.10 && < 0.11, diff --git a/src/Text/Pandoc/Templates.hs b/src/Text/Pandoc/Templates.hs index 551db6483..89856a9ee 100644 --- a/src/Text/Pandoc/Templates.hs +++ b/src/Text/Pandoc/Templates.hs @@ -96,8 +96,8 @@ module Text.Pandoc.Templates ( renderTemplate import Data.Char (isAlphaNum) import Control.Monad (guard, when) import Data.Aeson (ToJSON(..), Value(..)) -import qualified Data.Attoparsec.Text as A -import Data.Attoparsec.Text (Parser) +import qualified Text.Parsec as P +import Text.Parsec.Text (Parser) import Control.Applicative import qualified Data.Text as T import Data.Text (Text) @@ -172,7 +172,10 @@ renderTemplate :: (ToJSON a, TemplateTarget b) => Template -> a -> b renderTemplate (Template f) context = toTarget $ f $ toJSON context compileTemplate :: Text -> Either String Template -compileTemplate template = A.parseOnly pTemplate template +compileTemplate template = + case P.parse (pTemplate <* P.eof) "template" template of + Left e -> Left (show e) + Right x -> Right x -- | Like 'renderTemplate', but compiles the template first, -- raising an error if compilation fails. @@ -230,7 +233,7 @@ replaceVar _ _ old = old pTemplate :: Parser Template pTemplate = do - sp <- A.option mempty pInitialSpace + sp <- P.option mempty pInitialSpace rest <- mconcat <$> many (pConditional <|> pFor <|> pNewline <|> @@ -239,40 +242,43 @@ pTemplate = do pEscapedDollar) return $ sp <> rest +takeWhile1 :: (Char -> Bool) -> Parser Text +takeWhile1 f = T.pack <$> P.many1 (P.satisfy f) + pLit :: Parser Template -pLit = lit <$> A.takeWhile1 (\x -> x /='$' && x /= '\n') +pLit = lit <$> takeWhile1 (\x -> x /='$' && x /= '\n') pNewline :: Parser Template pNewline = do - A.char '\n' - sp <- A.option mempty pInitialSpace + P.char '\n' + sp <- P.option mempty pInitialSpace return $ lit "\n" <> sp pInitialSpace :: Parser Template pInitialSpace = do - sps <- A.takeWhile1 (==' ') + sps <- takeWhile1 (==' ') let indentVar = if T.null sps then id else indent (T.length sps) - v <- A.option mempty $ indentVar <$> pVar + v <- P.option mempty $ indentVar <$> pVar return $ lit sps <> v pEscapedDollar :: Parser Template -pEscapedDollar = lit "$" <$ A.string "$$" +pEscapedDollar = lit "$" <$ P.try (P.string "$$") pVar :: Parser Template -pVar = var <$> (A.char '$' *> pIdent <* A.char '$') +pVar = var <$> (P.try $ P.char '$' *> pIdent <* P.char '$') pIdent :: Parser [Text] pIdent = do first <- pIdentPart - rest <- many (A.char '.' *> pIdentPart) + rest <- many (P.char '.' *> pIdentPart) return (first:rest) pIdentPart :: Parser Text -pIdentPart = do - first <- A.letter - rest <- A.takeWhile (\c -> isAlphaNum c || c == '_' || c == '-') +pIdentPart = P.try $ do + first <- P.letter + rest <- T.pack <$> P.many (P.satisfy (\c -> isAlphaNum c || c == '_' || c == '-')) let id' = T.singleton first <> rest guard $ id' `notElem` reservedWords return id' @@ -281,38 +287,38 @@ reservedWords :: [Text] reservedWords = ["else","endif","for","endfor","sep"] skipEndline :: Parser () -skipEndline = A.skipWhile (`elem` " \t") >> A.char '\n' >> return () +skipEndline = P.try $ P.skipMany (P.satisfy (`elem` " \t")) >> P.char '\n' >> return () pConditional :: Parser Template pConditional = do - A.string "$if(" + P.try $ P.string "$if(" id' <- pIdent - A.string ")$" + P.string ")$" -- if newline after the "if", then a newline after "endif" will be swallowed - multiline <- A.option False (True <$ skipEndline) + multiline <- P.option False (True <$ skipEndline) ifContents <- pTemplate - elseContents <- A.option mempty $ - do A.string "$else$" - when multiline $ A.option () skipEndline + elseContents <- P.option mempty $ P.try $ + do P.string "$else$" + when multiline $ P.option () skipEndline pTemplate - A.string "$endif$" - when multiline $ A.option () skipEndline + P.string "$endif$" + when multiline $ P.option () skipEndline return $ cond id' ifContents elseContents pFor :: Parser Template pFor = do - A.string "$for(" + P.try $ P.string "$for(" id' <- pIdent - A.string ")$" + P.string ")$" -- if newline after the "for", then a newline after "endfor" will be swallowed - multiline <- A.option False $ skipEndline >> return True + multiline <- P.option False $ skipEndline >> return True contents <- pTemplate - sep <- A.option mempty $ - do A.string "$sep$" - when multiline $ A.option () skipEndline + sep <- P.option mempty $ + do P.try $ P.string "$sep$" + when multiline $ P.option () skipEndline pTemplate - A.string "$endfor$" - when multiline $ A.option () skipEndline + P.string "$endfor$" + when multiline $ P.option () skipEndline return $ iter id' contents sep indent :: Int -> Template -> Template -- cgit v1.2.3 From bf915da6cd0dc97a231100b784450e334c715969 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Mon, 2 Jun 2014 20:07:41 -0700 Subject: Docx writer: Make images work in reference.docx headers/footers. * All media from reference.docx are copied into result. * Added defaults for common image types to [Content Types]. * Avoided redundant XML parse + write for entries taken over from reference.docx, for better performance. --- src/Text/Pandoc/Writers/Docx.hs | 28 ++++++++++++++++++++-------- 1 file changed, 20 insertions(+), 8 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index 098da119b..8aaf3c1b8 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -197,10 +197,21 @@ writeDocx opts doc@(Pandoc meta _) = do map (\x -> (maybe "" ("/word/" ++) $ extractTarget x, "application/vnd.openxmlformats-officedocument.wordprocessingml.footer+xml")) footers) ++ map mkImageOverride imgs + let imageDefaults = map (\(x,y) -> mknode "Default" + [("Extension",x),("ContentType",y)] ()) + [("jpg","image/jpeg") + ,("jpeg","image/jpeg") + ,("png","image/png") + ,("svg","image/svg+xml") + ,("tif","image/tiff") + ,("tiff","image/tiff") + ,("bmp","image/x-ms-bmp") + ,("gif","image/gif") + ] let defaultnodes = [mknode "Default" [("Extension","xml"),("ContentType","application/xml")] (), mknode "Default" - [("Extension","rels"),("ContentType","application/vnd.openxmlformats-package.relationships+xml")] ()] + [("Extension","rels"),("ContentType","application/vnd.openxmlformats-package.relationships+xml")] ()] ++ imageDefaults let contentTypesDoc = mknode "Types" [("xmlns","http://schemas.openxmlformats.org/package/2006/content-types")] $ defaultnodes ++ overrides let contentTypesEntry = toEntry "[Content_Types].xml" epochtime $ renderXml contentTypesDoc @@ -311,12 +322,13 @@ writeDocx opts doc@(Pandoc meta _) = do headerFooterEntries <- mapM (entryFromArchive refArchive) $ mapMaybe (\e -> fmap ("word/" ++) $ extractTarget e) (headers ++ footers) - let miscRels = [ f | f <- filesInArchive refArchive - , "word/_rels/" `isPrefixOf` f - , ".xml.rels" `isSuffixOf` f - , f /= "word/_rels/document.xml.rels" - , f /= "word/_rels/footnotes.xml.rels" ] - miscRelEntries <- mapM (entryFromArchive refArchive) miscRels + let miscRelEntries = [ e | e <- zEntries refArchive + , "word/_rels/" `isPrefixOf` (eRelativePath e) + , ".xml.rels" `isSuffixOf` (eRelativePath e) + , eRelativePath e /= "word/_rels/document.xml.rels" + , eRelativePath e /= "word/_rels/footnotes.xml.rels" ] + let otherMediaEntries = [ e | e <- zEntries refArchive + , "word/media/" `isPrefixOf` eRelativePath e ] -- Create archive let archive = foldr addEntryToArchive emptyArchive $ @@ -325,7 +337,7 @@ writeDocx opts doc@(Pandoc meta _) = do docPropsEntry : docPropsAppEntry : themeEntry : fontTableEntry : settingsEntry : webSettingsEntry : imageEntries ++ headerFooterEntries ++ - miscRelEntries + miscRelEntries ++ otherMediaEntries return $ fromArchive archive styleToOpenXml :: Style -> [Element] -- cgit v1.2.3 From 326d7fa8f89f9a4b74042bf4cbb04931e26c8d8d Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Mon, 2 Jun 2014 20:20:16 -0700 Subject: Docx writer: Improved entryFromArchive to avoid parse. No need to parse the XML if we're just going to render it right away! --- src/Text/Pandoc/Writers/Docx.hs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index 8aaf3c1b8..1e37b5515 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -310,8 +310,9 @@ writeDocx opts doc@(Pandoc meta _) = do let relsEntry = toEntry relsPath epochtime $ renderXml rels let entryFromArchive arch path = - (toEntry path epochtime . renderXml) `fmap` - parseXml arch distArchive path + maybe (fail $ path ++ " corrupt or missing in reference docx") + return + (findEntryByPath path arch `mplus` findEntryByPath path distArchive) docPropsAppEntry <- entryFromArchive refArchive "docProps/app.xml" themeEntry <- entryFromArchive refArchive "word/theme/theme1.xml" fontTableEntry <- entryFromArchive refArchive "word/fontTable.xml" -- cgit v1.2.3 From cbfde5cb50a461995a8b60d148615b5a72159f3d Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Mon, 2 Jun 2014 20:39:27 -0700 Subject: Docx writer: Create overrides per-image for media/ in ref docx. This should be somewhat more robust and cover more types of images. --- src/Text/Pandoc/Writers/Docx.hs | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index 1e37b5515..e630c5094 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -170,6 +170,9 @@ writeDocx opts doc@(Pandoc meta _) = do let mkImageOverride (_, imgpath, mbMimeType, _, _) = mkOverrideNode ("/word/" ++ imgpath, fromMaybe "application/octet-stream" mbMimeType) + let mkMediaOverride imgpath = mkOverrideNode ('/':imgpath, + fromMaybe "application/octet-stream" + $ getMimeType imgpath) let overrides = map mkOverrideNode ( [("/word/webSettings.xml", "application/vnd.openxmlformats-officedocument.wordprocessingml.webSettings+xml") @@ -196,22 +199,14 @@ writeDocx opts doc@(Pandoc meta _) = do "application/vnd.openxmlformats-officedocument.wordprocessingml.header+xml")) headers ++ map (\x -> (maybe "" ("/word/" ++) $ extractTarget x, "application/vnd.openxmlformats-officedocument.wordprocessingml.footer+xml")) footers) ++ - map mkImageOverride imgs - let imageDefaults = map (\(x,y) -> mknode "Default" - [("Extension",x),("ContentType",y)] ()) - [("jpg","image/jpeg") - ,("jpeg","image/jpeg") - ,("png","image/png") - ,("svg","image/svg+xml") - ,("tif","image/tiff") - ,("tiff","image/tiff") - ,("bmp","image/x-ms-bmp") - ,("gif","image/gif") - ] + map mkImageOverride imgs ++ + map mkMediaOverride [ eRelativePath e | e <- zEntries refArchive + , "word/media/" `isPrefixOf` eRelativePath e ] + let defaultnodes = [mknode "Default" [("Extension","xml"),("ContentType","application/xml")] (), mknode "Default" - [("Extension","rels"),("ContentType","application/vnd.openxmlformats-package.relationships+xml")] ()] ++ imageDefaults + [("Extension","rels"),("ContentType","application/vnd.openxmlformats-package.relationships+xml")] ()] let contentTypesDoc = mknode "Types" [("xmlns","http://schemas.openxmlformats.org/package/2006/content-types")] $ defaultnodes ++ overrides let contentTypesEntry = toEntry "[Content_Types].xml" epochtime $ renderXml contentTypesDoc -- cgit v1.2.3 From 2a627f85fe27d4351e9c612454d18ae701a466a3 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 3 Jun 2014 11:00:54 -0700 Subject: Shared: Added ordNub. API change (adds export). --- src/Text/Pandoc/Shared.hs | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index d8cbe46d9..b0adf55f5 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -35,6 +35,7 @@ module Text.Pandoc.Shared ( splitByIndices, splitStringByIndices, substitute, + ordNub, -- * Text processing backslashEscapes, escapeStringUsing, @@ -94,6 +95,7 @@ import Data.List ( find, isPrefixOf, intercalate ) import qualified Data.Map as M import Network.URI ( escapeURIString, isURI, nonStrictRelativeTo, unEscapeString, parseURIReference ) +import qualified Data.Set as Set import System.Directory import Text.Pandoc.MIME (getMimeType) import System.FilePath ( (), takeExtension, dropExtension ) @@ -174,6 +176,13 @@ substitute target replacement lst@(x:xs) = then replacement ++ substitute target replacement (drop (length target) lst) else x : substitute target replacement xs +ordNub :: (Ord a) => [a] -> [a] +ordNub l = go Set.empty l + where + go _ [] = [] + go s (x:xs) = if x `Set.member` s then go s xs + else x : go (Set.insert x s) xs + -- -- Text processing -- -- cgit v1.2.3 From 9b4e772718a868392a51727ff8cc5eba2ce35bcd Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 3 Jun 2014 11:01:23 -0700 Subject: Templates: use ordNum instead of ord. Closes #1022. --- src/Text/Pandoc/Templates.hs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Templates.hs b/src/Text/Pandoc/Templates.hs index 89856a9ee..4ae6a6d8a 100644 --- a/src/Text/Pandoc/Templates.hs +++ b/src/Text/Pandoc/Templates.hs @@ -103,7 +103,7 @@ import qualified Data.Text as T import Data.Text (Text) import Data.Text.Encoding (encodeUtf8) import Text.Pandoc.Compat.Monoid ((<>), Monoid(..)) -import Data.List (intersperse, nub) +import Data.List (intersperse) import System.FilePath ((), (<.>)) import qualified Data.Map as M import qualified Data.HashMap.Strict as H @@ -116,7 +116,7 @@ import Text.Blaze.Internal (preEscapedText) import Text.Blaze (preEscapedText, Html) #endif import Data.ByteString.Lazy (ByteString, fromChunks) -import Text.Pandoc.Shared (readDataFileUTF8) +import Text.Pandoc.Shared (readDataFileUTF8, ordNub) import Data.Vector ((!?)) -- | Get default template for the specified writer. @@ -163,7 +163,7 @@ varListToJSON assoc = toJSON $ M.fromList assoc' where assoc' = [(T.pack k, toVal [T.pack z | (y,z) <- assoc, not (null z), y == k]) - | k <- nub $ map fst assoc ] + | k <- ordNub $ map fst assoc ] toVal [x] = toJSON x toVal [] = Null toVal xs = toJSON xs -- cgit v1.2.3 From 05355ac57b7ccadfa4462f3304a7f6364147c8eb Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 3 Jun 2014 11:03:40 -0700 Subject: Docx writer: Simplified abstractNumId numbering. Instead of sequential numbering, we assign numbers based on the list marker styles. This simplifies some of the code and should make it easier to modify numbering in the future. --- src/Text/Pandoc/Writers/Docx.hs | 49 +++++++++++++++++++++++++---------------- 1 file changed, 30 insertions(+), 19 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index e630c5094..572823871 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -66,6 +66,25 @@ data ListMarker = NoMarker | NumberMarker ListNumberStyle ListNumberDelim Int deriving (Show, Read, Eq, Ord) +listMarkerToId :: ListMarker -> String +listMarkerToId NoMarker = "0" +listMarkerToId BulletMarker = "1" +listMarkerToId (NumberMarker sty delim n) = + styNum : delimNum : show n + where styNum = case sty of + DefaultStyle -> '2' + Example -> '3' + Decimal -> '4' + LowerRoman -> '5' + UpperRoman -> '6' + LowerAlpha -> '7' + UpperAlpha -> '8' + delimNum = case delim of + DefaultDelim -> '0' + Period -> '1' + OneParen -> '2' + TwoParens -> '3' + data WriterState = WriterState{ stTextProperties :: [Element] , stParaProperties :: [Element] @@ -75,7 +94,6 @@ data WriterState = WriterState{ , stImages :: M.Map FilePath (String, String, Maybe String, Element, B.ByteString) , stListLevel :: Int , stListNumId :: Int - , stNumStyles :: M.Map ListMarker Int , stLists :: [ListMarker] } @@ -89,7 +107,6 @@ defaultWriterState = WriterState{ , stImages = M.empty , stListLevel = -1 , stListNumId = 1 - , stNumStyles = M.fromList [(NoMarker, 0)] , stLists = [NoMarker] } @@ -273,7 +290,7 @@ writeDocx opts doc@(Pandoc meta _) = do -- construct word/numbering.xml let numpath = "word/numbering.xml" numEntry <- (toEntry numpath epochtime . renderXml) - `fmap` mkNumbering (stNumStyles st) (stLists st) + `fmap` mkNumbering (stLists st) let docPropsPath = "docProps/core.xml" let docProps = mknode "cp:coreProperties" [("xmlns:cp","http://schemas.openxmlformats.org/package/2006/metadata/core-properties") @@ -371,29 +388,28 @@ styleToOpenXml style = parStyle : map toStyle alltoktypes $ backgroundColor style ) ] -mkNumbering :: M.Map ListMarker Int -> [ListMarker] -> IO Element -mkNumbering markers lists = do - elts <- mapM mkAbstractNum (M.toList markers) +mkNumbering :: [ListMarker] -> IO Element +mkNumbering lists = do + elts <- mapM mkAbstractNum (ordNub lists) return $ mknode "w:numbering" [("xmlns:w","http://schemas.openxmlformats.org/wordprocessingml/2006/main")] - $ elts ++ zipWith (mkNum markers) lists [1..(length lists)] + $ elts ++ zipWith mkNum lists [1..(length lists)] -mkNum :: M.Map ListMarker Int -> ListMarker -> Int -> Element -mkNum markers marker numid = +mkNum :: ListMarker -> Int -> Element +mkNum marker numid = mknode "w:num" [("w:numId",show numid)] - $ mknode "w:abstractNumId" [("w:val",show absnumid)] () + $ mknode "w:abstractNumId" [("w:val",listMarkerToId marker)] () : case marker of NoMarker -> [] BulletMarker -> [] NumberMarker _ _ start -> map (\lvl -> mknode "w:lvlOverride" [("w:ilvl",show (lvl :: Int))] $ mknode "w:startOverride" [("w:val",show start)] ()) [0..6] - where absnumid = fromMaybe 0 $ M.lookup marker markers -mkAbstractNum :: (ListMarker,Int) -> IO Element -mkAbstractNum (marker,numid) = do +mkAbstractNum :: ListMarker -> IO Element +mkAbstractNum marker = do nsid <- randomRIO (0x10000000 :: Integer, 0xFFFFFFFF :: Integer) - return $ mknode "w:abstractNum" [("w:abstractNumId",show numid)] + return $ mknode "w:abstractNum" [("w:abstractNumId",listMarkerToId marker)] $ mknode "w:nsid" [("w:val", printf "%8x" nsid)] () : mknode "w:multiLevelType" [("w:val","multilevel")] () : map (mkLvl marker) [0..6] @@ -594,11 +610,6 @@ addList :: ListMarker -> WS () addList marker = do lists <- gets stLists modify $ \st -> st{ stLists = lists ++ [marker] } - numStyles <- gets stNumStyles - case M.lookup marker numStyles of - Just _ -> return () - Nothing -> modify $ \st -> - st{ stNumStyles = M.insert marker (M.size numStyles + 1) numStyles } listItemToOpenXML :: WriterOptions -> Int -> [Block] -> WS [Element] listItemToOpenXML _ _ [] = return [] -- cgit v1.2.3 From 2842ad5a978de758d70801b5279f75b9ba679406 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 3 Jun 2014 11:33:09 -0700 Subject: Docx writer: Changed abstractNumId numbering scheme. Now the minimum id used by pandoc is 990. All ids start with "99". This gives some room for a reference.docx to define numbering styles. Note: this is not yet possible, since pandoc generates numbering.xml entirely on its own. --- src/Text/Pandoc/Writers/Docx.hs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index 572823871..ca0892547 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -67,10 +67,10 @@ data ListMarker = NoMarker deriving (Show, Read, Eq, Ord) listMarkerToId :: ListMarker -> String -listMarkerToId NoMarker = "0" -listMarkerToId BulletMarker = "1" +listMarkerToId NoMarker = "990" +listMarkerToId BulletMarker = "991" listMarkerToId (NumberMarker sty delim n) = - styNum : delimNum : show n + '9' : '9' : styNum : delimNum : show n where styNum = case sty of DefaultStyle -> '2' Example -> '3' -- cgit v1.2.3 From ec047aaa8c1c1e9d69b0029a2e4512785fbc15a8 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 3 Jun 2014 12:13:31 -0700 Subject: Docx writer: pandoc uses only numIds >= 1000 for lists. This opens up the possiblity (with further code changes) of preserving some numbering from the reference.docx (e.g. header numbering.) See #1305. --- src/Text/Pandoc/Writers/Docx.hs | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index ca0892547..3d2f5d4b5 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -388,12 +388,16 @@ styleToOpenXml style = parStyle : map toStyle alltoktypes $ backgroundColor style ) ] +-- this is the lowest number used for a list numId +baseListId :: Int +baseListId = 1000 + mkNumbering :: [ListMarker] -> IO Element mkNumbering lists = do elts <- mapM mkAbstractNum (ordNub lists) return $ mknode "w:numbering" [("xmlns:w","http://schemas.openxmlformats.org/wordprocessingml/2006/main")] - $ elts ++ zipWith mkNum lists [1..(length lists)] + $ elts ++ zipWith mkNum lists [baseListId..(baseListId + length lists - 1)] mkNum :: ListMarker -> Int -> Element mkNum marker numid = @@ -461,7 +465,7 @@ mkLvl marker lvl = patternFor _ s = s ++ "." getNumId :: WS Int -getNumId = length `fmap` gets stLists +getNumId = ((999 +) . length) `fmap` gets stLists -- | Convert Pandoc document to two lists of -- OpenXML elements (the main document and footnotes). @@ -615,7 +619,8 @@ listItemToOpenXML :: WriterOptions -> Int -> [Block] -> WS [Element] listItemToOpenXML _ _ [] = return [] listItemToOpenXML opts numid (first:rest) = do first' <- withNumId numid $ blockToOpenXML opts first - rest' <- withNumId 1 $ blocksToOpenXML opts rest + -- baseListId is the code for no list marker: + rest' <- withNumId baseListId $ blocksToOpenXML opts rest return $ first' ++ rest' alignmentToString :: Alignment -> [Char] -- cgit v1.2.3 From 0ddb4cd2e8883c226ca7ab8a92737dc29f07dfda Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 3 Jun 2014 13:14:32 -0700 Subject: Docx writer: Combine reference.docx numbering with pandoc's. This should have fixed #1305, allowing the reference.docx to define section numbering, but it doesn't. Now the headings appear with proper indentation, but the numbers don't appear. Unclear why. styles.xml and numbering.xml basically match the docx which has the expected result. --- src/Text/Pandoc/Writers/Docx.hs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index 3d2f5d4b5..785238d6f 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -289,8 +289,10 @@ writeDocx opts doc@(Pandoc meta _) = do -- construct word/numbering.xml let numpath = "word/numbering.xml" - numEntry <- (toEntry numpath epochtime . renderXml) - `fmap` mkNumbering (stLists st) + numbering <- parseXml refArchive distArchive numpath + newNumElts <- mkNumbering (stLists st) + let numEntry = toEntry numpath epochtime $ renderXml numbering{ elContent = + elContent numbering ++ map Elem newNumElts } let docPropsPath = "docProps/core.xml" let docProps = mknode "cp:coreProperties" [("xmlns:cp","http://schemas.openxmlformats.org/package/2006/metadata/core-properties") @@ -392,12 +394,10 @@ styleToOpenXml style = parStyle : map toStyle alltoktypes baseListId :: Int baseListId = 1000 -mkNumbering :: [ListMarker] -> IO Element +mkNumbering :: [ListMarker] -> IO [Element] mkNumbering lists = do elts <- mapM mkAbstractNum (ordNub lists) - return $ mknode "w:numbering" - [("xmlns:w","http://schemas.openxmlformats.org/wordprocessingml/2006/main")] - $ elts ++ zipWith mkNum lists [baseListId..(baseListId + length lists - 1)] + return $ elts ++ zipWith mkNum lists [baseListId..(baseListId + length lists - 1)] mkNum :: ListMarker -> Int -> Element mkNum marker numid = -- cgit v1.2.3 From 45f3851611007f18530b52c9fcc5f0106fbc6816 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 3 Jun 2014 16:46:55 -0700 Subject: Docx writer: Section numbering carries over from reference.docx. Closes #1305. --- src/Text/Pandoc/Writers/Docx.hs | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index 785238d6f..4e64a79df 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -291,8 +291,13 @@ writeDocx opts doc@(Pandoc meta _) = do let numpath = "word/numbering.xml" numbering <- parseXml refArchive distArchive numpath newNumElts <- mkNumbering (stLists st) + let allElts = onlyElems (elContent numbering) ++ newNumElts let numEntry = toEntry numpath epochtime $ renderXml numbering{ elContent = - elContent numbering ++ map Elem newNumElts } + -- we want all the abstractNums first, then the nums, + -- otherwise things break: + [Elem e | e <- allElts + , qName (elName e) == "abstractNum" ] ++ + [Elem e | e <- allElts, qName (elName e) == "num" ] } let docPropsPath = "docProps/core.xml" let docProps = mknode "cp:coreProperties" [("xmlns:cp","http://schemas.openxmlformats.org/package/2006/metadata/core-properties") -- cgit v1.2.3 From ab5dda7a601ad93b97ee02b3d216cc6d5321a462 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 3 Jun 2014 23:17:03 -0700 Subject: Markdown writer: Prettier pipe tables. Columns are now aligned. Closes #1323. --- src/Text/Pandoc/Writers/Markdown.hs | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs index f42a1b54c..a67271a5d 100644 --- a/src/Text/Pandoc/Writers/Markdown.hs +++ b/src/Text/Pandoc/Writers/Markdown.hs @@ -478,16 +478,24 @@ addMarkdownAttribute s = pipeTable :: Bool -> [Alignment] -> [Doc] -> [[Doc]] -> State WriterState Doc pipeTable headless aligns rawHeaders rawRows = do + let sp = text " " + let blockFor AlignLeft x y = lblock (x + 2) (sp <> y) <> lblock 0 empty + blockFor AlignCenter x y = cblock (x + 2) (sp <> y) <> lblock 0 empty + blockFor AlignRight x y = rblock (x + 2) (sp <> y) <> lblock 0 empty + blockFor _ x y = lblock (x + 2) (sp <> y) <> lblock 0 empty + let widths = map (max 3 . maximum . map offset) $ transpose (rawHeaders : rawRows) let torow cs = nowrap $ text "|" <> - hcat (intersperse (text "|") $ map chomp cs) <> text "|" - let toborder (a, h) = let wid = max (offset h) 3 - in text $ case a of - AlignLeft -> ':':replicate (wid - 1) '-' - AlignCenter -> ':':replicate (wid - 2) '-' ++ ":" - AlignRight -> replicate (wid - 1) '-' ++ ":" - AlignDefault -> replicate wid '-' + hcat (intersperse (text "|") $ + zipWith3 blockFor aligns widths (map chomp cs)) + <> text "|" + let toborder (a, w) = text $ case a of + AlignLeft -> ':':replicate (w + 1) '-' + AlignCenter -> ':':replicate w '-' ++ ":" + AlignRight -> replicate (w + 1) '-' ++ ":" + AlignDefault -> replicate (w + 2) '-' let header = if headless then empty else torow rawHeaders - let border = torow $ map toborder $ zip aligns rawHeaders + let border = nowrap $ text "|" <> hcat (intersperse (text "|") $ + map toborder $ zip aligns widths) <> text "|" let body = vcat $ map torow rawRows return $ header $$ border $$ body -- cgit v1.2.3 From 96815746611d63fae7be718cce643a52effd6525 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 3 Jun 2014 23:17:42 -0700 Subject: LaTeX reader: Handle comments at the end of tables. This resolves the issue illustrated in http://stackoverflow.com/questions/24009489/comments-in-latex-break-pandoc-table. --- src/Text/Pandoc/Readers/LaTeX.hs | 1 + 1 file changed, 1 insertion(+) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index 7fc587882..c3c0ba423 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -1269,6 +1269,7 @@ simpTable = try $ do header' <- option [] $ try (parseTableRow cols <* lbreak <* hline) rows <- sepEndBy (parseTableRow cols) (lbreak <* optional hline) spaces + skipMany (comment *> spaces) let header'' = if null header' then replicate cols mempty else header' -- cgit v1.2.3 From abbf33ae7d6ee3358fd74e434ccd897c774bc3d0 Mon Sep 17 00:00:00 2001 From: James Aspnes Date: Thu, 12 Jun 2014 21:19:00 -0400 Subject: allow (and discard) optional argument for \caption --- src/Text/Pandoc/Readers/LaTeX.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index c3c0ba423..6b5958920 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -305,7 +305,7 @@ blockCommands = M.fromList $ , ("item", skipopts *> loose_item) , ("documentclass", skipopts *> braced *> preamble) , ("centerline", (para . trimInlines) <$> (skipopts *> tok)) - , ("caption", tok >>= setCaption) + , ("caption", skipopts *> tok >>= setCaption) , ("PandocStartInclude", startInclude) , ("PandocEndInclude", endInclude) , ("bibliography", mempty <$ (skipopts *> braced >>= -- cgit v1.2.3 From 293e4cfdc3028218089115acd6f091b9ea3aa7d6 Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal Date: Sat, 14 Jun 2014 10:02:35 -0400 Subject: Add DocX files to tree. This introduces Text.Pandoc.DocX, and its exported `readDocX` function. --- src/Text/Pandoc/Readers/DocX.hs | 479 +++++++++++++++++++++++++++ src/Text/Pandoc/Readers/DocX/Lists.hs | 208 ++++++++++++ src/Text/Pandoc/Readers/DocX/Parse.hs | 604 ++++++++++++++++++++++++++++++++++ 3 files changed, 1291 insertions(+) create mode 100644 src/Text/Pandoc/Readers/DocX.hs create mode 100644 src/Text/Pandoc/Readers/DocX/Lists.hs create mode 100644 src/Text/Pandoc/Readers/DocX/Parse.hs (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/DocX.hs b/src/Text/Pandoc/Readers/DocX.hs new file mode 100644 index 000000000..976e2e271 --- /dev/null +++ b/src/Text/Pandoc/Readers/DocX.hs @@ -0,0 +1,479 @@ +{- +Copyright (C) 2014 Jesse Rosenthal + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Readers.DocX + Copyright : Copyright (C) 2014 Jesse Rosenthal + License : GNU GPL, version 2 or above + + Maintainer : Jesse Rosenthal + Stability : alpha + Portability : portable + +Conversion of DocX type (defined in Text.Pandoc.Readers.DocX.Parse) +to 'Pandoc' document. -} + +{- +Current state of implementation of DocX entities ([x] means +implemented, [-] means partially implemented): + +* Blocks + + - [X] Para + - [X] CodeBlock (styled with `SourceCode`) + - [X] BlockQuote (styled with `Quote`, `BlockQuote`, or, optionally, + indented) + - [X] OrderedList + - [X] BulletList + - [X] DefinitionList (styled with adjacent `DefinitionTerm` and `Definition`) + - [X] Header (styled with `Heading#`) + - [ ] HorizontalRule + - [-] Table (column widths and alignments not yet implemented) + +* Inlines + + - [X] Str + - [X] Emph (From italics. `underline` currently read as span. In + future, it might optionally be emph as well) + - [X] Strong + - [X] Strikeout + - [X] Superscript + - [X] Subscript + - [X] SmallCaps + - [ ] Quoted + - [ ] Cite + - [X] Code (styled with `VerbatimChar`) + - [X] Space + - [X] LineBreak (these are invisible in Word: entered with Shift-Return) + - [ ] Math + - [X] Link (links to an arbitrary bookmark create a span with the target as + id and "anchor" class) + - [-] Image (Links to path in archive. Future option for + data-encoded URI likely.) + - [X] Note (Footnotes and Endnotes are silently combined.) +-} + +module Text.Pandoc.Readers.DocX + ( readDocX + ) where + +import Codec.Archive.Zip +import Text.Pandoc.Definition +import Text.Pandoc.Options +import Text.Pandoc.Builder (text, toList) +import Text.Pandoc.Generic (bottomUp) +import Text.Pandoc.MIME (getMimeType) +import Text.Pandoc.UTF8 (toString) +import Text.Pandoc.Readers.DocX.Parse +import Text.Pandoc.Readers.DocX.Lists +import Data.Maybe (mapMaybe, isJust, fromJust) +import Data.List (delete, isPrefixOf, (\\), intersect) +import qualified Data.ByteString as BS +import qualified Data.ByteString.Lazy as B +import Data.ByteString.Base64 (encode) +import System.FilePath (combine) + +readDocX :: ReaderOptions + -> B.ByteString + -> Pandoc +readDocX opts bytes = + case archiveToDocX (toArchive bytes) of + Just docx -> Pandoc nullMeta (docxToBlocks opts docx) + Nothing -> error $ "couldn't parse docx file" + +runStyleToSpanAttr :: RunStyle -> (String, [String], [(String, String)]) +runStyleToSpanAttr rPr = ("", + mapMaybe id [ + if isBold rPr then (Just "strong") else Nothing, + if isItalic rPr then (Just "emph") else Nothing, + if isSmallCaps rPr then (Just "smallcaps") else Nothing, + if isStrike rPr then (Just "strike") else Nothing, + if isSuperScript rPr then (Just "superscript") else Nothing, + if isSubScript rPr then (Just "subscript") else Nothing, + rStyle rPr], + case underline rPr of + Just fmt -> [("underline", fmt)] + _ -> [] + ) + +parStyleToDivAttr :: ParagraphStyle -> (String, [String], [(String, String)]) +parStyleToDivAttr pPr = ("", + pStyle pPr, + case indent pPr of + Just n -> [("indent", (show n))] + Nothing -> [] + ) + +strToInlines :: String -> [Inline] +strToInlines = toList . text + +codeSpans :: [String] +codeSpans = ["VerbatimChar"] + +blockQuoteDivs :: [String] +blockQuoteDivs = ["Quote", "BlockQuote"] + +codeDivs :: [String] +codeDivs = ["SourceCode"] + +runElemToInlines :: RunElem -> [Inline] +runElemToInlines (TextRun s) = strToInlines s +runElemToInlines (LnBrk) = [LineBreak] + +runElemToString :: RunElem -> String +runElemToString (TextRun s) = s +runElemToString (LnBrk) = ['\n'] + +runElemsToString :: [RunElem] -> String +runElemsToString = concatMap runElemToString + +strNormalize :: [Inline] -> [Inline] +strNormalize [] = [] +strNormalize (Str "" : ils) = strNormalize ils +strNormalize ((Str s) : (Str s') : l) = strNormalize ((Str (s++s')) : l) +strNormalize (il:ils) = il : (strNormalize ils) + +runToInlines :: ReaderOptions -> DocX -> Run -> [Inline] +runToInlines _ _ (Run rs runElems) + | isJust (rStyle rs) && (fromJust (rStyle rs)) `elem` codeSpans = + case runStyleToSpanAttr rs == ("", [], []) of + True -> [Str (runElemsToString runElems)] + False -> [Span (runStyleToSpanAttr rs) [Str (runElemsToString runElems)]] + | otherwise = case runStyleToSpanAttr rs == ("", [], []) of + True -> concatMap runElemToInlines runElems + False -> [Span (runStyleToSpanAttr rs) (concatMap runElemToInlines runElems)] +runToInlines opts docx@(DocX _ notes _ _ _ ) (Footnote fnId) = + case (getFootNote fnId notes) of + Just bodyParts -> + [Note [Div ("", ["footnote"], []) (map (bodyPartToBlock opts docx) bodyParts)]] + Nothing -> + [Note [Div ("", ["footnote"], []) []]] +runToInlines opts docx@(DocX _ notes _ _ _) (Endnote fnId) = + case (getEndNote fnId notes) of + Just bodyParts -> + [Note [Div ("", ["endnote"], []) (map (bodyPartToBlock opts docx) bodyParts)]] + Nothing -> + [Note [Div ("", ["endnote"], []) []]] + +parPartToInlines :: ReaderOptions -> DocX -> ParPart -> [Inline] +parPartToInlines opts docx (PlainRun r) = runToInlines opts docx r +parPartToInlines _ _ (BookMark _ anchor) = + [Span (anchor, ["anchor"], []) []] +parPartToInlines _ (DocX _ _ _ rels _) (Drawing relid) = + case lookupRelationship relid rels of + Just target -> [Image [] (combine "word" target, "")] + Nothing -> [Image [] ("", "")] +parPartToInlines opts docx (InternalHyperLink anchor runs) = + [Link (concatMap (runToInlines opts docx) runs) ('#' : anchor, "")] +parPartToInlines opts docx@(DocX _ _ _ rels _) (ExternalHyperLink relid runs) = + case lookupRelationship relid rels of + Just target -> + [Link (concatMap (runToInlines opts docx) runs) (target, "")] + Nothing -> + [Link (concatMap (runToInlines opts docx) runs) ("", "")] + +isAnchorSpan :: Inline -> Bool +isAnchorSpan (Span (ident, classes, kvs) ils) = + (not . null) ident && + classes == ["anchor"] && + null kvs && + null ils +isAnchorSpan _ = False + +dummyAnchors :: [String] +dummyAnchors = ["_GoBack"] + +makeHeaderAnchors :: Block -> Block +makeHeaderAnchors h@(Header n (_, classes, kvs) ils) = + case filter isAnchorSpan ils of + [] -> h + (x@(Span (ident, _, _) _) : xs) -> + case ident `elem` dummyAnchors of + True -> h + False -> Header n (ident, classes, kvs) (ils \\ (x:xs)) + _ -> h +makeHeaderAnchors blk = blk + + +parPartsToInlines :: ReaderOptions -> DocX -> [ParPart] -> [Inline] +parPartsToInlines opts docx parparts = + -- + -- We're going to skip data-uri's for now. It should be an option, + -- not mandatory. + -- + --bottomUp (makeImagesSelfContained docx) $ + bottomUp spanCorrect $ + bottomUp spanTrim $ + bottomUp spanReduce $ + concatMap (parPartToInlines opts docx) parparts + +cellToBlocks :: ReaderOptions -> DocX -> Cell -> [Block] +cellToBlocks opts docx (Cell bps) = map (bodyPartToBlock opts docx) bps + +rowToBlocksList :: ReaderOptions -> DocX -> Row -> [[Block]] +rowToBlocksList opts docx (Row cells) = map (cellToBlocks opts docx) cells + +bodyPartToBlock :: ReaderOptions -> DocX -> BodyPart -> Block +bodyPartToBlock opts docx (Paragraph pPr parparts) = + Div (parStyleToDivAttr pPr) [Para (parPartsToInlines opts docx parparts)] +bodyPartToBlock opts docx@(DocX _ _ numbering _ _) (ListItem pPr numId lvl parparts) = + let + kvs = case lookupLevel numId lvl numbering of + Just (_, fmt, txt, Just start) -> [ ("level", lvl) + , ("num-id", numId) + , ("format", fmt) + , ("text", txt) + , ("start", (show start)) + ] + + Just (_, fmt, txt, Nothing) -> [ ("level", lvl) + , ("num-id", numId) + , ("format", fmt) + , ("text", txt) + ] + Nothing -> [] + in + Div + ("", ["list-item"], kvs) + [bodyPartToBlock opts docx (Paragraph pPr parparts)] +bodyPartToBlock _ _ (Tbl _ _ _ []) = + Para [] +bodyPartToBlock opts docx (Tbl cap _ look (r:rs)) = + let caption = strToInlines cap + (hdr, rows) = case firstRowFormatting look of + True -> (Just r, rs) + False -> (Nothing, r:rs) + hdrCells = case hdr of + Just r' -> rowToBlocksList opts docx r' + Nothing -> [] + cells = map (rowToBlocksList opts docx) rows + + size = case null hdrCells of + True -> length $ head cells + False -> length $ hdrCells + -- + -- The two following variables (horizontal column alignment and + -- relative column widths) go to the default at the + -- moment. Width information is in the TblGrid field of the Tbl, + -- so should be possible. Alignment might be more difficult, + -- since there doesn't seem to be a column entity in docx. + alignments = take size (repeat AlignDefault) + widths = take size (repeat 0) :: [Double] + in + Table caption alignments widths hdrCells cells + +makeImagesSelfContained :: DocX -> Inline -> Inline +makeImagesSelfContained (DocX _ _ _ _ media) i@(Image alt (uri, title)) = + case lookup uri media of + Just bs -> case getMimeType uri of + Just mime -> let data_uri = + "data:" ++ mime ++ ";base64," ++ toString (encode $ BS.concat $ B.toChunks bs) + in + Image alt (data_uri, title) + Nothing -> i + Nothing -> i +makeImagesSelfContained _ inline = inline + +bodyToBlocks :: ReaderOptions -> DocX -> Body -> [Block] +bodyToBlocks opts docx (Body bps) = + bottomUp removeEmptyPars $ + bottomUp strNormalize $ + bottomUp spanRemove $ + bottomUp divRemove $ + map (makeHeaderAnchors) $ + bottomUp divCorrect $ + bottomUp divReduce $ + bottomUp divCorrectPreReduce $ + bottomUp blocksToDefinitions $ + blocksToBullets $ + map (bodyPartToBlock opts docx) bps + +docxToBlocks :: ReaderOptions -> DocX -> [Block] +docxToBlocks opts d@(DocX (Document _ body) _ _ _ _) = bodyToBlocks opts d body + +spanReduce :: [Inline] -> [Inline] +spanReduce [] = [] +spanReduce ((Span (id1, classes1, kvs1) ils1) : ils) + | (id1, classes1, kvs1) == ("", [], []) = ils1 ++ (spanReduce ils) +spanReduce (s1@(Span (id1, classes1, kvs1) ils1) : + s2@(Span (id2, classes2, kvs2) ils2) : + ils) = + let classes' = classes1 `intersect` classes2 + kvs' = kvs1 `intersect` kvs2 + classes1' = classes1 \\ classes' + kvs1' = kvs1 \\ kvs' + classes2' = classes2 \\ classes' + kvs2' = kvs2 \\ kvs' + in + case null classes' && null kvs' of + True -> s1 : (spanReduce (s2 : ils)) + False -> let attr' = ("", classes', kvs') + attr1' = (id1, classes1', kvs1') + attr2' = (id2, classes2', kvs2') + in + spanReduce (Span attr' [(Span attr1' ils1), (Span attr2' ils2)] : + ils) +spanReduce (il:ils) = il : (spanReduce ils) + +ilToCode :: Inline -> String +ilToCode (Str s) = s +ilToCode _ = "" + +spanRemove' :: Inline -> [Inline] +spanRemove' s@(Span (ident, classes, _) []) + -- "_GoBack" is automatically inserted. We don't want to keep it. + | classes == ["anchor"] && not (ident `elem` dummyAnchors) = [s] +spanRemove' (Span (_, _, kvs) ils) = + case lookup "underline" kvs of + Just val -> [Span ("", [], [("underline", val)]) ils] + Nothing -> ils +spanRemove' il = [il] + +spanRemove :: [Inline] -> [Inline] +spanRemove = concatMap spanRemove' + +spanTrim' :: Inline -> [Inline] +spanTrim' il@(Span _ []) = [il] +spanTrim' il@(Span attr (il':[])) + | il' == Space = [Span attr [], Space] + | otherwise = [il] +spanTrim' (Span attr ils) + | head ils == Space && last ils == Space = + [Space, Span attr (init $ tail ils), Space] + | head ils == Space = [Space, Span attr (tail ils)] + | last ils == Space = [Span attr (init ils), Space] +spanTrim' il = [il] + +spanTrim :: [Inline] -> [Inline] +spanTrim = concatMap spanTrim' + +spanCorrect' :: Inline -> [Inline] +spanCorrect' (Span ("", [], []) ils) = ils +spanCorrect' (Span (ident, classes, kvs) ils) + | "emph" `elem` classes = + [Emph $ spanCorrect' $ Span (ident, (delete "emph" classes), kvs) ils] + | "strong" `elem` classes = + [Strong $ spanCorrect' $ Span (ident, (delete "strong" classes), kvs) ils] + | "smallcaps" `elem` classes = + [SmallCaps $ spanCorrect' $ Span (ident, (delete "smallcaps" classes), kvs) ils] + | "strike" `elem` classes = + [Strikeout $ spanCorrect' $ Span (ident, (delete "strike" classes), kvs) ils] + | "superscript" `elem` classes = + [Superscript $ spanCorrect' $ Span (ident, (delete "superscript" classes), kvs) ils] + | "subscript" `elem` classes = + [Subscript $ spanCorrect' $ Span (ident, (delete "subscript" classes), kvs) ils] + | (not . null) (codeSpans `intersect` classes) = + [Code (ident, (classes \\ codeSpans), kvs) (init $ unlines $ map ilToCode ils)] + | otherwise = + [Span (ident, classes, kvs) ils] +spanCorrect' il = [il] + +spanCorrect :: [Inline] -> [Inline] +spanCorrect = concatMap spanCorrect' + +removeEmptyPars :: [Block] -> [Block] +removeEmptyPars blks = filter (\b -> b /= (Para [])) blks + +divReduce :: [Block] -> [Block] +divReduce [] = [] +divReduce ((Div (id1, classes1, kvs1) blks1) : blks) + | (id1, classes1, kvs1) == ("", [], []) = blks1 ++ (divReduce blks) +divReduce (d1@(Div (id1, classes1, kvs1) blks1) : + d2@(Div (id2, classes2, kvs2) blks2) : + blks) = + let classes' = classes1 `intersect` classes2 + kvs' = kvs1 `intersect` kvs2 + classes1' = classes1 \\ classes' + kvs1' = kvs1 \\ kvs' + classes2' = classes2 \\ classes' + kvs2' = kvs2 \\ kvs' + in + case null classes' && null kvs' of + True -> d1 : (divReduce (d2 : blks)) + False -> let attr' = ("", classes', kvs') + attr1' = (id1, classes1', kvs1') + attr2' = (id2, classes2', kvs2') + in + divReduce (Div attr' [(Div attr1' blks1), (Div attr2' blks2)] : + blks) +divReduce (blk:blks) = blk : (divReduce blks) + +isHeaderClass :: String -> Maybe Int +isHeaderClass s | "Heading" `isPrefixOf` s = + case reads (drop (length "Heading") s) :: [(Int, String)] of + [] -> Nothing + ((n, "") : []) -> Just n + _ -> Nothing +isHeaderClass _ = Nothing + +findHeaderClass :: [String] -> Maybe Int +findHeaderClass ss = case mapMaybe id $ map isHeaderClass ss of + [] -> Nothing + n : _ -> Just n + +blksToInlines :: [Block] -> [Inline] +blksToInlines (Para ils : _) = ils +blksToInlines (Plain ils : _) = ils +blksToInlines _ = [] + +divCorrectPreReduce' :: Block -> [Block] +divCorrectPreReduce' (Div (ident, classes, kvs) blks) + | isJust $ findHeaderClass classes = + let n = fromJust $ findHeaderClass classes + in + [Header n (ident, delete ("Heading" ++ (show n)) classes, kvs) (blksToInlines blks)] + | otherwise = [Div (ident, classes, kvs) blks] +divCorrectPreReduce' blk = [blk] + +divCorrectPreReduce :: [Block] -> [Block] +divCorrectPreReduce = concatMap divCorrectPreReduce' + +blkToCode :: Block -> String +blkToCode (Para []) = "" +blkToCode (Para ((Code _ s):ils)) = s ++ (blkToCode (Para ils)) +blkToCode (Para ((Span (_, classes, _) ils'): ils)) + | (not . null) (codeSpans `intersect` classes) = + (init $ unlines $ map ilToCode ils') ++ (blkToCode (Para ils)) +blkToCode _ = "" + +divRemove' :: Block -> [Block] +divRemove' (Div (_, _, kvs) blks) = + case lookup "indent" kvs of + Just val -> [Div ("", [], [("indent", val)]) blks] + Nothing -> blks +divRemove' blk = [blk] + +divRemove :: [Block] -> [Block] +divRemove = concatMap divRemove' + +divCorrect' :: Block -> [Block] +divCorrect' b@(Div (ident, classes, kvs) blks) + | (not . null) (blockQuoteDivs `intersect` classes) = + [BlockQuote [Div (ident, classes \\ blockQuoteDivs, kvs) blks]] + | (not . null) (codeDivs `intersect` classes) = + [CodeBlock (ident, (classes \\ codeDivs), kvs) (init $ unlines $ map blkToCode blks)] + | otherwise = + case lookup "indent" kvs of + Just "0" -> [Div (ident, classes, filter (\kv -> fst kv /= "indent") kvs) blks] + Just _ -> + [BlockQuote [Div (ident, classes, filter (\kv -> fst kv /= "indent") kvs) blks]] + Nothing -> [b] +divCorrect' blk = [blk] + +divCorrect :: [Block] -> [Block] +divCorrect = concatMap divCorrect' diff --git a/src/Text/Pandoc/Readers/DocX/Lists.hs b/src/Text/Pandoc/Readers/DocX/Lists.hs new file mode 100644 index 000000000..b20679261 --- /dev/null +++ b/src/Text/Pandoc/Readers/DocX/Lists.hs @@ -0,0 +1,208 @@ +{- +Copyright (C) 2014 Jesse Rosenthal + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Readers.DocX.Lists + Copyright : Copyright (C) 2014 Jesse Rosenthal + License : GNU GPL, version 2 or above + + Maintainer : Jesse Rosenthal + Stability : alpha + Portability : portable + +Functions for converting flat DocX paragraphs into nested lists. +-} + +module Text.Pandoc.Readers.DocX.Lists ( blocksToBullets + , blocksToDefinitions) where + +import Text.Pandoc.JSON +import Text.Pandoc.Shared (trim) +import Control.Monad +import Data.List +import Data.Maybe + +isListItem :: Block -> Bool +isListItem (Div (_, classes, _) _) | "list-item" `elem` classes = True +isListItem _ = False + +getLevel :: Block -> Maybe Integer +getLevel (Div (_, _, kvs) _) = liftM read $ lookup "level" kvs +getLevel _ = Nothing + +getLevelN :: Block -> Integer +getLevelN b = case getLevel b of + Just n -> n + Nothing -> -1 + +getNumId :: Block -> Maybe Integer +getNumId (Div (_, _, kvs) _) = liftM read $ lookup "num-id" kvs +getNumId _ = Nothing + +getNumIdN :: Block -> Integer +getNumIdN b = case getNumId b of + Just n -> n + Nothing -> -1 + +getText :: Block -> Maybe String +getText (Div (_, _, kvs) _) = lookup "text" kvs +getText _ = Nothing + +data ListType = Itemized | Enumerated ListAttributes + +listStyleMap :: [(String, ListNumberStyle)] +listStyleMap = [("upperLetter", UpperAlpha), + ("lowerLetter", LowerAlpha), + ("upperRoman", UpperRoman), + ("lowerRoman", LowerRoman), + ("decimal", Decimal)] + +listDelimMap :: [(String, ListNumberDelim)] +listDelimMap = [("%1)", OneParen), + ("(%1)", TwoParens), + ("%1.", Period)] + +getListType :: Block -> Maybe ListType +getListType b@(Div (_, _, kvs) _) | isListItem b = + let + start = lookup "start" kvs + frmt = lookup "format" kvs + txt = lookup "text" kvs + in + case frmt of + Just "bullet" -> Just Itemized + Just f -> + case txt of + Just t -> Just $ Enumerated ( + read (fromMaybe "1" start) :: Int, + fromMaybe DefaultStyle (lookup f listStyleMap), + fromMaybe DefaultDelim (lookup t listDelimMap)) + Nothing -> Nothing + _ -> Nothing +getListType _ = Nothing + +listParagraphDivs :: [String] +listParagraphDivs = ["ListParagraph"] + +-- This is a first stab at going through and attaching meaning to list +-- paragraphs, without an item marker, following a list item. We +-- assume that these are paragraphs in the same item. + +handleListParagraphs :: [Block] -> [Block] +handleListParagraphs [] = [] +handleListParagraphs ( + (Div attr1@(_, classes1, _) blks1) : + (Div (ident2, classes2, kvs2) blks2) : + blks + ) | "list-item" `elem` classes1 && + not ("list-item" `elem` classes2) && + (not . null) (listParagraphDivs `intersect` classes2) = + -- We don't want to keep this indent. + let newDiv2 = + (Div (ident2, classes2, filter (\kv -> fst kv /= "indent") kvs2) blks2) + in + handleListParagraphs ((Div attr1 (blks1 ++ [newDiv2])) : blks) +handleListParagraphs (blk:blks) = blk : (handleListParagraphs blks) + +separateBlocks' :: Block -> [[Block]] -> [[Block]] +separateBlocks' blk ([] : []) = [[blk]] +separateBlocks' b@(BulletList _) acc = (init acc) ++ [(last acc) ++ [b]] +separateBlocks' b@(OrderedList _ _) acc = (init acc) ++ [(last acc) ++ [b]] +-- The following is for the invisible bullet lists. This is how +-- pandoc-generated ooxml does multiparagraph item lists. +separateBlocks' b acc | liftM trim (getText b) == Just "" = + (init acc) ++ [(last acc) ++ [b]] +separateBlocks' b acc = acc ++ [[b]] + +separateBlocks :: [Block] -> [[Block]] +separateBlocks blks = foldr separateBlocks' [[]] (reverse blks) + +flatToBullets' :: Integer -> [Block] -> [Block] +flatToBullets' _ [] = [] +flatToBullets' num xs@(b : elems) + | getLevelN b == num = b : (flatToBullets' num elems) + | otherwise = + let bNumId = getNumIdN b + bLevel = getLevelN b + (children, remaining) = + span + (\b' -> + ((getLevelN b') > bLevel || + ((getLevelN b') == bLevel && (getNumIdN b') == bNumId))) + xs + in + case getListType b of + Just (Enumerated attr) -> + (OrderedList attr (separateBlocks $ flatToBullets' bLevel children)) : + (flatToBullets' num remaining) + _ -> + (BulletList (separateBlocks $ flatToBullets' bLevel children)) : + (flatToBullets' num remaining) + +flatToBullets :: [Block] -> [Block] +flatToBullets elems = flatToBullets' (-1) elems + +blocksToBullets :: [Block] -> [Block] +blocksToBullets blks = + -- bottomUp removeListItemDivs $ + flatToBullets $ (handleListParagraphs blks) + + +plainParaInlines :: Block -> [Inline] +plainParaInlines (Plain ils) = ils +plainParaInlines (Para ils) = ils +plainParaInlines _ = [] + +blocksToDefinitions' :: [([Inline], [[Block]])] -> [Block] -> [Block] -> [Block] +blocksToDefinitions' [] acc [] = reverse acc +blocksToDefinitions' defAcc acc [] = + reverse $ (DefinitionList (reverse defAcc)) : acc +blocksToDefinitions' defAcc acc + ((Div (_, classes1, _) blks1) : (Div (ident2, classes2, kvs2) blks2) : blks) + | "DefinitionTerm" `elem` classes1 && "Definition" `elem` classes2 = + let remainingAttr2 = (ident2, delete "Definition" classes2, kvs2) + pair = case remainingAttr2 == ("", [], []) of + True -> (concatMap plainParaInlines blks1, [blks2]) + False -> (concatMap plainParaInlines blks1, [[Div remainingAttr2 blks2]]) + in + blocksToDefinitions' (pair : defAcc) acc blks +blocksToDefinitions' defAcc acc + ((Div (ident2, classes2, kvs2) blks2) : blks) + | (not . null) defAcc && "Definition" `elem` classes2 = + let remainingAttr2 = (ident2, delete "Definition" classes2, kvs2) + defItems2 = case remainingAttr2 == ("", [], []) of + True -> blks2 + False -> [Div remainingAttr2 blks2] + ((defTerm, defItems):defs) = defAcc + defAcc' = case null defItems of + True -> (defTerm, [defItems2]) : defs + False -> (defTerm, init defItems ++ [last defItems ++ defItems2]) : defs + in + blocksToDefinitions' defAcc' acc blks +blocksToDefinitions' [] acc (b:blks) = + blocksToDefinitions' [] (b:acc) blks +blocksToDefinitions' defAcc acc (b:blks) = + blocksToDefinitions' [] (b : (DefinitionList (reverse defAcc)) : acc) blks + + +blocksToDefinitions :: [Block] -> [Block] +blocksToDefinitions = blocksToDefinitions' [] [] + + + + diff --git a/src/Text/Pandoc/Readers/DocX/Parse.hs b/src/Text/Pandoc/Readers/DocX/Parse.hs new file mode 100644 index 000000000..d7033d9e8 --- /dev/null +++ b/src/Text/Pandoc/Readers/DocX/Parse.hs @@ -0,0 +1,604 @@ +{- +Copyright (C) 2014 Jesse Rosenthal + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Readers.DocX.Parse + Copyright : Copyright (C) 2014 Jesse Rosenthal + License : GNU GPL, version 2 or above + + Maintainer : Jesse Rosenthal + Stability : alpha + Portability : portable + +Conversion of DocX archive into DocX haskell type +-} + + +module Text.Pandoc.Readers.DocX.Parse ( DocX(..) + , Document(..) + , Body(..) + , BodyPart(..) + , TblLook(..) + , ParPart(..) + , Run(..) + , RunElem(..) + , Notes + , Numbering + , Relationship + , Media + , RunStyle(..) + , ParagraphStyle(..) + , Row(..) + , Cell(..) + , getFootNote + , getEndNote + , lookupLevel + , lookupRelationship + , archiveToDocX + ) where +import Codec.Archive.Zip +import Text.XML.Light +import Data.Maybe +import Data.List +import System.FilePath +import Data.Bits ((.|.)) +import qualified Data.ByteString.Lazy as B +import qualified Text.Pandoc.UTF8 as UTF8 + +attrToNSPair :: Attr -> Maybe (String, String) +attrToNSPair (Attr (QName s _ (Just "xmlns")) val) = Just (s, val) +attrToNSPair _ = Nothing + + +type NameSpaces = [(String, String)] + +data DocX = DocX Document Notes Numbering [Relationship] Media + deriving Show + +archiveToDocX :: Archive -> Maybe DocX +archiveToDocX archive = do + let notes = archiveToNotes archive + rels = archiveToRelationships archive + media = archiveToMedia archive + doc <- archiveToDocument archive + numbering <- archiveToNumbering archive + return $ DocX doc notes numbering rels media + +data Document = Document NameSpaces Body + deriving Show + +archiveToDocument :: Archive -> Maybe Document +archiveToDocument zf = do + entry <- findEntryByPath "word/document.xml" zf + docElem <- (parseXMLDoc . UTF8.toStringLazy . fromEntry) entry + let namespaces = mapMaybe attrToNSPair (elAttribs docElem) + bodyElem <- findChild (QName "body" (lookup "w" namespaces) Nothing) docElem + body <- elemToBody namespaces bodyElem + return $ Document namespaces body + +type Media = [(FilePath, B.ByteString)] + +filePathIsMedia :: FilePath -> Bool +filePathIsMedia fp = + let (dir, _) = splitFileName fp + in + (dir == "word/media/") + +getMediaPair :: Archive -> FilePath -> Maybe (FilePath, B.ByteString) +getMediaPair zf fp = + case findEntryByPath fp zf of + Just e -> Just (fp, fromEntry e) + Nothing -> Nothing + +archiveToMedia :: Archive -> Media +archiveToMedia zf = + mapMaybe (getMediaPair zf) (filter filePathIsMedia (filesInArchive zf)) + +data Numbering = Numbering NameSpaces [Numb] [AbstractNumb] + deriving Show + +data Numb = Numb String String -- right now, only a key to an abstract num + deriving Show + +data AbstractNumb = AbstractNumb String [Level] + deriving Show + +-- (ilvl, format, string, start) +type Level = (String, String, String, Maybe Integer) + +lookupLevel :: String -> String -> Numbering -> Maybe Level +lookupLevel numId ilvl (Numbering _ numbs absNumbs) = do + absNumId <- lookup numId $ map (\(Numb nid absnumid) -> (nid, absnumid)) numbs + lvls <- lookup absNumId $ map (\(AbstractNumb aid ls) -> (aid, ls)) absNumbs + lvl <- lookup ilvl $ map (\l@(i, _, _, _) -> (i, l)) lvls + return lvl + +numElemToNum :: NameSpaces -> Element -> Maybe Numb +numElemToNum ns element | + qName (elName element) == "num" && + qURI (elName element) == (lookup "w" ns) = do + numId <- findAttr (QName "numId" (lookup "w" ns) (Just "w")) element + absNumId <- findChild (QName "abstractNumId" (lookup "w" ns) (Just "w")) element + >>= findAttr (QName "val" (lookup "w" ns) (Just "w")) + return $ Numb numId absNumId +numElemToNum _ _ = Nothing + +absNumElemToAbsNum :: NameSpaces -> Element -> Maybe AbstractNumb +absNumElemToAbsNum ns element | + qName (elName element) == "abstractNum" && + qURI (elName element) == (lookup "w" ns) = do + absNumId <- findAttr + (QName "abstractNumId" (lookup "w" ns) (Just "w")) + element + let levelElems = findChildren + (QName "lvl" (lookup "w" ns) (Just "w")) + element + levels = mapMaybe id $ map (levelElemToLevel ns) levelElems + return $ AbstractNumb absNumId levels +absNumElemToAbsNum _ _ = Nothing + +levelElemToLevel :: NameSpaces -> Element -> Maybe Level +levelElemToLevel ns element | + qName (elName element) == "lvl" && + qURI (elName element) == (lookup "w" ns) = do + ilvl <- findAttr (QName "ilvl" (lookup "w" ns) (Just "w")) element + fmt <- findChild (QName "numFmt" (lookup "w" ns) (Just "w")) element + >>= findAttr (QName "val" (lookup "w" ns) (Just "w")) + txt <- findChild (QName "lvlText" (lookup "w" ns) (Just "w")) element + >>= findAttr (QName "val" (lookup "w" ns) (Just "w")) + let start = findChild (QName "start" (lookup "w" ns) (Just "w")) element + >>= findAttr (QName "val" (lookup "w" ns) (Just "w")) + >>= (\s -> listToMaybe (map fst (reads s :: [(Integer, String)]))) + return (ilvl, fmt, txt, start) +levelElemToLevel _ _ = Nothing + +archiveToNumbering :: Archive -> Maybe Numbering +archiveToNumbering zf = + case findEntryByPath "word/numbering.xml" zf of + Nothing -> Just $ Numbering [] [] [] + Just entry -> do + numberingElem <- (parseXMLDoc . UTF8.toStringLazy . fromEntry) entry + let namespaces = mapMaybe attrToNSPair (elAttribs numberingElem) + numElems = findChildren + (QName "num" (lookup "w" namespaces) (Just "w")) + numberingElem + absNumElems = findChildren + (QName "abstractNum" (lookup "w" namespaces) (Just "w")) + numberingElem + nums = mapMaybe id $ map (numElemToNum namespaces) numElems + absNums = mapMaybe id $ map (absNumElemToAbsNum namespaces) absNumElems + return $ Numbering namespaces nums absNums + +data Notes = Notes NameSpaces (Maybe [(String, [BodyPart])]) (Maybe [(String, [BodyPart])]) + deriving Show + +noteElemToNote :: NameSpaces -> Element -> Maybe (String, [BodyPart]) +noteElemToNote ns element + | qName (elName element) `elem` ["endnote", "footnote"] && + qURI (elName element) == (lookup "w" ns) = + do + noteId <- findAttr (QName "id" (lookup "w" ns) (Just "w")) element + let bps = map fromJust + $ filter isJust + $ map (elemToBodyPart ns) + $ filterChildrenName (isParOrTbl ns) element + return $ (noteId, bps) +noteElemToNote _ _ = Nothing + +getFootNote :: String -> Notes -> Maybe [BodyPart] +getFootNote s (Notes _ fns _) = fns >>= (lookup s) + +getEndNote :: String -> Notes -> Maybe [BodyPart] +getEndNote s (Notes _ _ ens) = ens >>= (lookup s) + +elemToNotes :: NameSpaces -> String -> Element -> Maybe [(String, [BodyPart])] +elemToNotes ns notetype element + | qName (elName element) == (notetype ++ "s") && + qURI (elName element) == (lookup "w" ns) = + Just $ map fromJust + $ filter isJust + $ map (noteElemToNote ns) + $ findChildren (QName notetype (lookup "w" ns) (Just "w")) element +elemToNotes _ _ _ = Nothing + +archiveToNotes :: Archive -> Notes +archiveToNotes zf = + let fnElem = findEntryByPath "word/footnotes.xml" zf + >>= (parseXMLDoc . UTF8.toStringLazy . fromEntry) + enElem = findEntryByPath "word/endnotes.xml" zf + >>= (parseXMLDoc . UTF8.toStringLazy . fromEntry) + fn_namespaces = case fnElem of + Just e -> mapMaybe attrToNSPair (elAttribs e) + Nothing -> [] + en_namespaces = case enElem of + Just e -> mapMaybe attrToNSPair (elAttribs e) + Nothing -> [] + ns = unionBy (\x y -> fst x == fst y) fn_namespaces en_namespaces + fn = fnElem >>= (elemToNotes ns "footnote") + en = enElem >>= (elemToNotes ns "endnote") + in + Notes ns fn en + + +data Relationship = Relationship (RelId, Target) + deriving Show + +lookupRelationship :: RelId -> [Relationship] -> Maybe Target +lookupRelationship relid rels = + lookup relid (map (\(Relationship pair) -> pair) rels) + +filePathIsRel :: FilePath -> Bool +filePathIsRel fp = + let (dir, name) = splitFileName fp + in + (dir == "word/_rels/") && ((takeExtension name) == ".rels") + +relElemToRelationship :: Element -> Maybe Relationship +relElemToRelationship element | qName (elName element) == "Relationship" = + do + relId <- findAttr (QName "Id" Nothing Nothing) element + target <- findAttr (QName "Target" Nothing Nothing) element + return $ Relationship (relId, target) +relElemToRelationship _ = Nothing + + +archiveToRelationships :: Archive -> [Relationship] +archiveToRelationships archive = + let relPaths = filter filePathIsRel (filesInArchive archive) + entries = map fromJust $ filter isJust $ map (\f -> findEntryByPath f archive) relPaths + relElems = map fromJust $ filter isJust $ map (parseXMLDoc . UTF8.toStringLazy . fromEntry) entries + rels = map fromJust $ filter isJust $ map relElemToRelationship $ concatMap elChildren relElems + in + rels + +data Body = Body [BodyPart] + deriving Show + +isParOrTbl :: NameSpaces -> QName -> Bool +isParOrTbl ns q = qName q `elem` ["p", "tbl"] && + qURI q == (lookup "w" ns) + +elemToBody :: NameSpaces -> Element -> Maybe Body +elemToBody ns element | qName (elName element) == "body" && qURI (elName element) == (lookup "w" ns) = + Just $ Body + $ map fromJust + $ filter isJust + $ map (elemToBodyPart ns) $ filterChildrenName (isParOrTbl ns) element +elemToBody _ _ = Nothing + +isRunOrLinkOrBookmark :: NameSpaces -> QName -> Bool +isRunOrLinkOrBookmark ns q = qName q `elem` ["r", "hyperlink", "bookmarkStart"] && + qURI q == (lookup "w" ns) + +elemToNumInfo :: NameSpaces -> Element -> Maybe (String, String) +elemToNumInfo ns element + | qName (elName element) == "p" && + qURI (elName element) == (lookup "w" ns) = + do + pPr <- findChild (QName "pPr" (lookup "w" ns) (Just "w")) element + numPr <- findChild (QName "numPr" (lookup "w" ns) (Just "w")) pPr + lvl <- findChild (QName "ilvl" (lookup "w" ns) (Just "w")) numPr >>= + findAttr (QName "val" (lookup "w" ns) (Just "w")) + numId <- findChild (QName "numId" (lookup "w" ns) (Just "w")) numPr >>= + findAttr (QName "val" (lookup "w" ns) (Just "w")) + return (numId, lvl) +elemToNumInfo _ _ = Nothing + +-- isBookMarkTag :: NameSpaces -> QName -> Bool +-- isBookMarkTag ns q = qName q `elem` ["bookmarkStart", "bookmarkEnd"] && +-- qURI q == (lookup "w" ns) + +-- parChildrenToBookmark :: NameSpaces -> [Element] -> BookMark +-- parChildrenToBookmark ns (bms : bme : _) +-- | qName (elName bms) == "bookmarkStart" && +-- qURI (elName bms) == (lookup "w" ns) && +-- qName (elName bme) == "bookmarkEnd" && +-- qURI (elName bme) == (lookup "w" ns) = do +-- bmId <- findAttr (QName "id" (lookup "w" ns) (Just "w")) bms +-- bmName <- findAttr (QName "name" (lookup "w" ns) (Just "w")) bms +-- return $ (bmId, bmName) +-- parChildrenToBookmark _ _ = Nothing + +elemToBodyPart :: NameSpaces -> Element -> Maybe BodyPart +elemToBodyPart ns element + | qName (elName element) == "p" && + qURI (elName element) == (lookup "w" ns) = + let parstyle = elemToParagraphStyle ns element + parparts = mapMaybe id + $ map (elemToParPart ns) + $ filterChildrenName (isRunOrLinkOrBookmark ns) element + in + case elemToNumInfo ns element of + Just (numId, lvl) -> Just $ ListItem parstyle numId lvl parparts + Nothing -> Just $ Paragraph parstyle parparts + | qName (elName element) == "tbl" && + qURI (elName element) == (lookup "w" ns) = + let + caption = findChild (QName "tblPr" (lookup "w" ns) (Just "w")) element + >>= findChild (QName "tblCaption" (lookup "w" ns) (Just "w")) + >>= findAttr (QName "val" (lookup "w" ns) (Just "w")) + grid = case + findChild (QName "tblGrid" (lookup "w" ns) (Just "w")) element + of + Just g -> elemToTblGrid ns g + Nothing -> [] + tblLook = findChild (QName "tblPr" (lookup "w" ns) (Just "w")) element + >>= findChild (QName "tblLook" (lookup "w" ns) (Just "w")) + >>= elemToTblLook ns + in + Just $ Tbl + (fromMaybe "" caption) + grid + (fromMaybe defaultTblLook tblLook) + (mapMaybe (elemToRow ns) (elChildren element)) + | otherwise = Nothing + +elemToTblLook :: NameSpaces -> Element -> Maybe TblLook +elemToTblLook ns element + | qName (elName element) == "tblLook" && + qURI (elName element) == (lookup "w" ns) = + let firstRow = findAttr (QName "firstRow" (lookup "w" ns) (Just "w")) element + val = findAttr (QName "val" (lookup "w" ns) (Just "w")) element + firstRowFmt = + case firstRow of + Just "1" -> True + Just _ -> False + Nothing -> case val of + Just bitMask -> testBitMask bitMask 0x020 + Nothing -> False + in + Just $ TblLook{firstRowFormatting = firstRowFmt} +elemToTblLook _ _ = Nothing + +testBitMask :: String -> Int -> Bool +testBitMask bitMaskS n = + case (reads ("0x" ++ bitMaskS) :: [(Int, String)]) of + [] -> False + ((n', _) : _) -> ((n' .|. n) /= 0) + +data ParagraphStyle = ParagraphStyle { pStyle :: [String] + , indent :: Maybe Integer + } + deriving Show + +defaultParagraphStyle :: ParagraphStyle +defaultParagraphStyle = ParagraphStyle { pStyle = [] + , indent = Nothing + } + +elemToParagraphStyle :: NameSpaces -> Element -> ParagraphStyle +elemToParagraphStyle ns element = + case findChild (QName "pPr" (lookup "w" ns) (Just "w")) element of + Just pPr -> + ParagraphStyle + {pStyle = + mapMaybe id $ + map + (findAttr (QName "val" (lookup "w" ns) (Just "w"))) + (findChildren (QName "pStyle" (lookup "w" ns) (Just "w")) pPr) + , indent = + findChild (QName "ind" (lookup "w" ns) (Just "w")) pPr >>= + findAttr (QName "left" (lookup "w" ns) (Just "w")) >>= + stringToInteger + } + Nothing -> defaultParagraphStyle + + +data BodyPart = Paragraph ParagraphStyle [ParPart] + | ListItem ParagraphStyle String String [ParPart] + | Tbl String TblGrid TblLook [Row] + + deriving Show + +type TblGrid = [Integer] + +data TblLook = TblLook {firstRowFormatting::Bool} + deriving Show + +defaultTblLook :: TblLook +defaultTblLook = TblLook{firstRowFormatting = False} + +stringToInteger :: String -> Maybe Integer +stringToInteger s = listToMaybe $ map fst (reads s :: [(Integer, String)]) + +elemToTblGrid :: NameSpaces -> Element -> TblGrid +elemToTblGrid ns element + | qName (elName element) == "tblGrid" && + qURI (elName element) == (lookup "w" ns) = + let + cols = findChildren (QName "gridCol" (lookup "w" ns) (Just "w")) element + in + mapMaybe (\e -> + findAttr (QName "val" (lookup "w" ns) (Just ("w"))) e + >>= stringToInteger + ) + cols +elemToTblGrid _ _ = [] + +data Row = Row [Cell] + deriving Show + + +elemToRow :: NameSpaces -> Element -> Maybe Row +elemToRow ns element + | qName (elName element) == "tr" && + qURI (elName element) == (lookup "w" ns) = + let + cells = findChildren (QName "tc" (lookup "w" ns) (Just "w")) element + in + Just $ Row (mapMaybe (elemToCell ns) cells) +elemToRow _ _ = Nothing + +data Cell = Cell [BodyPart] + deriving Show + +elemToCell :: NameSpaces -> Element -> Maybe Cell +elemToCell ns element + | qName (elName element) == "tc" && + qURI (elName element) == (lookup "w" ns) = + Just $ Cell (mapMaybe (elemToBodyPart ns) (elChildren element)) +elemToCell _ _ = Nothing + +data ParPart = PlainRun Run + | BookMark BookMarkId Anchor + | InternalHyperLink Anchor [Run] + | ExternalHyperLink RelId [Run] + | Drawing String + deriving Show + +data Run = Run RunStyle [RunElem] + | Footnote String + | Endnote String + deriving Show + +data RunElem = TextRun String | LnBrk + deriving Show + +data RunStyle = RunStyle { isBold :: Bool + , isItalic :: Bool + , isSmallCaps :: Bool + , isStrike :: Bool + , isSuperScript :: Bool + , isSubScript :: Bool + , underline :: Maybe String + , rStyle :: Maybe String } + deriving Show + +defaultRunStyle :: RunStyle +defaultRunStyle = RunStyle { isBold = False + , isItalic = False + , isSmallCaps = False + , isStrike = False + , isSuperScript = False + , isSubScript = False + , underline = Nothing + , rStyle = Nothing + } + +elemToRunStyle :: NameSpaces -> Element -> RunStyle +elemToRunStyle ns element = + case findChild (QName "rPr" (lookup "w" ns) (Just "w")) element of + Just rPr -> + RunStyle + { + isBold = isJust $ findChild (QName "b" (lookup "w" ns) (Just "w")) rPr + , isItalic = isJust $ findChild (QName "i" (lookup "w" ns) (Just "w")) rPr + , isSmallCaps = isJust $ findChild (QName "smallCaps" (lookup "w" ns) (Just "w")) rPr + , isStrike = isJust $ findChild (QName "strike" (lookup "w" ns) (Just "w")) rPr + , isSuperScript = + (Just "superscript" == + (findChild (QName "vertAlign" (lookup "w" ns) (Just "w")) rPr >>= + findAttr (QName "val" (lookup "w" ns) (Just "w")))) + , isSubScript = + (Just "subscript" == + (findChild (QName "vertAlign" (lookup "w" ns) (Just "w")) rPr >>= + findAttr (QName "val" (lookup "w" ns) (Just "w")))) + , underline = + findChild (QName "u" (lookup "w" ns) (Just "w")) rPr >>= + findAttr (QName "val" (lookup "w" ns) (Just "w")) + , rStyle = + findChild (QName "rStyle" (lookup "w" ns) (Just "w")) rPr >>= + findAttr (QName "val" (lookup "w" ns) (Just "w")) + } + Nothing -> defaultRunStyle + +elemToRun :: NameSpaces -> Element -> Maybe Run +elemToRun ns element + | qName (elName element) == "r" && + qURI (elName element) == (lookup "w" ns) = + case + findChild (QName "footnoteReference" (lookup "w" ns) (Just "w")) element >>= + findAttr (QName "id" (lookup "w" ns) (Just "w")) + of + Just s -> Just $ Footnote s + Nothing -> + case + findChild (QName "endnoteReference" (lookup "w" ns) (Just "w")) element >>= + findAttr (QName "id" (lookup "w" ns) (Just "w")) + of + Just s -> Just $ Endnote s + Nothing -> Just $ + Run (elemToRunStyle ns element) + (elemToRunElems ns element) +elemToRun _ _ = Nothing + +elemToRunElem :: NameSpaces -> Element -> Maybe RunElem +elemToRunElem ns element + | qName (elName element) == "t" && + qURI (elName element) == (lookup "w" ns) = + Just $ TextRun (strContent element) + | qName (elName element) == "br" && + qURI (elName element) == (lookup "w" ns) = + Just $ LnBrk + | otherwise = Nothing + + +elemToRunElems :: NameSpaces -> Element -> [RunElem] +elemToRunElems ns element + | qName (elName element) == "r" && + qURI (elName element) == (lookup "w" ns) = + mapMaybe (elemToRunElem ns) (elChildren element) + | otherwise = [] + +elemToDrawing :: NameSpaces -> Element -> Maybe ParPart +elemToDrawing ns element + | qName (elName element) == "drawing" && + qURI (elName element) == (lookup "w" ns) = + let a_ns = "http://schemas.openxmlformats.org/drawingml/2006/main" + in + findElement (QName "blip" (Just a_ns) (Just "a")) element + >>= findAttr (QName "embed" (lookup "r" ns) (Just "r")) + >>= (\s -> Just $ Drawing s) +elemToDrawing _ _ = Nothing + + +elemToParPart :: NameSpaces -> Element -> Maybe ParPart +elemToParPart ns element + | qName (elName element) == "r" && + qURI (elName element) == (lookup "w" ns) = + case findChild (QName "drawing" (lookup "w" ns) (Just "w")) element of + Just drawingElem -> elemToDrawing ns drawingElem + Nothing -> do + r <- elemToRun ns element + return $ PlainRun r +elemToParPart ns element + | qName (elName element) == "bookmarkStart" && + qURI (elName element) == (lookup "w" ns) = do + bmId <- findAttr (QName "id" (lookup "w" ns) (Just "w")) element + bmName <- findAttr (QName "name" (lookup "w" ns) (Just "w")) element + return $ BookMark bmId bmName +elemToParPart ns element + | qName (elName element) == "hyperlink" && + qURI (elName element) == (lookup "w" ns) = + let runs = map fromJust $ filter isJust $ map (elemToRun ns) + $ findChildren (QName "r" (lookup "w" ns) (Just "w")) element + in + case findAttr (QName "anchor" (lookup "w" ns) (Just "w")) element of + Just anchor -> + Just $ InternalHyperLink anchor runs + Nothing -> + case findAttr (QName "id" (lookup "r" ns) (Just "r")) element of + Just relId -> Just $ ExternalHyperLink relId runs + Nothing -> Nothing +elemToParPart _ _ = Nothing + +type Target = String +type Anchor = String +type BookMarkId = String +type RelId = String + -- cgit v1.2.3 From 3bc818d2d3079f1d31dbb409f839585a32f26f6e Mon Sep 17 00:00:00 2001 From: mpickering Date: Sun, 15 Jun 2014 13:38:16 +0100 Subject: Integrated the docx reader into the main pandoc program. Changes also include generalising the types of reader allowed. The mechanism now mimics the more general output mechanism. --- pandoc.hs | 13 ++++++++++--- src/Text/Pandoc.hs | 56 +++++++++++++++++++++++++++++++++++------------------- 2 files changed, 46 insertions(+), 23 deletions(-) (limited to 'src/Text') diff --git a/pandoc.hs b/pandoc.hs index 5dd0e6899..0a8070d7c 100644 --- a/pandoc.hs +++ b/pandoc.hs @@ -858,6 +858,7 @@ defaultReaderName fallback (x:xs) = ".textile" -> "textile" ".native" -> "native" ".json" -> "json" + ".docx" -> "docx" _ -> defaultReaderName fallback xs -- Returns True if extension of first source is .lhs @@ -1158,15 +1159,21 @@ main = do Left e -> throwIO e Right (bs,_) -> return $ UTF8.toString bs + let readFiles [] = error "Cannot read archive from stdin" + readFiles (x:_) = B.readFile x + let convertTabs = tabFilter (if preserveTabs then 0 else tabStop) let handleIncludes' = if readerName' == "latex" || readerName' == "latex+lhs" then handleIncludes else return - doc <- readSources sources >>= - handleIncludes' . convertTabs . intercalate "\n" >>= - reader readerOpts + doc <- case reader of + StringReader r-> + readSources sources >>= + handleIncludes' . convertTabs . intercalate "\n" >>= + r readerOpts + ByteStringReader r -> readFiles sources >>= r readerOpts let doc0 = M.foldWithKey setMeta doc metadata diff --git a/src/Text/Pandoc.hs b/src/Text/Pandoc.hs index 130338f0e..aff471a3c 100644 --- a/src/Text/Pandoc.hs +++ b/src/Text/Pandoc.hs @@ -62,6 +62,8 @@ module Text.Pandoc , readers , writers -- * Readers: converting /to/ Pandoc format + , Reader (..) + , readDocX , readMarkdown , readMediaWiki , readRST @@ -125,6 +127,7 @@ import Text.Pandoc.Readers.HTML import Text.Pandoc.Readers.Textile import Text.Pandoc.Readers.Native import Text.Pandoc.Readers.Haddock +import Text.Pandoc.Readers.DocX import Text.Pandoc.Writers.Native import Text.Pandoc.Writers.Markdown import Text.Pandoc.Writers.RST @@ -192,24 +195,34 @@ markdown o s = do mapM_ warn warnings return doc +data Reader = StringReader (ReaderOptions -> String -> IO Pandoc) + | ByteStringReader (ReaderOptions -> BL.ByteString -> IO Pandoc) + +mkStringReader :: (ReaderOptions -> String -> Pandoc) -> Reader +mkStringReader r = StringReader (\o s -> return $ r o s) + +mkBSReader :: (ReaderOptions -> BL.ByteString -> Pandoc) -> Reader +mkBSReader r = ByteStringReader (\o s -> return $ r o s) + -- | Association list of formats and readers. -readers :: [(String, ReaderOptions -> String -> IO Pandoc)] -readers = [ ("native" , \_ s -> return $ readNative s) - ,("json" , \o s -> return $ readJSON o s) - ,("markdown" , markdown) - ,("markdown_strict" , markdown) - ,("markdown_phpextra" , markdown) - ,("markdown_github" , markdown) - ,("markdown_mmd", markdown) - ,("rst" , \o s -> return $ readRST o s) - ,("mediawiki" , \o s -> return $ readMediaWiki o s) - ,("docbook" , \o s -> return $ readDocBook o s) - ,("opml" , \o s -> return $ readOPML o s) - ,("org" , \o s -> return $ readOrg o s) - ,("textile" , \o s -> return $ readTextile o s) -- TODO : textile+lhs - ,("html" , \o s -> return $ readHtml o s) - ,("latex" , \o s -> return $ readLaTeX o s) - ,("haddock" , \o s -> return $ readHaddock o s) +readers :: [(String, Reader)] +readers = [ ("native" , StringReader $ \_ s -> return $ readNative s) + ,("json" , mkStringReader readJSON ) + ,("markdown" , StringReader markdown) + ,("markdown_strict" , StringReader markdown) + ,("markdown_phpextra" , StringReader markdown) + ,("markdown_github" , StringReader markdown) + ,("markdown_mmd", StringReader markdown) + ,("rst" , mkStringReader readRST ) + ,("mediawiki" , mkStringReader readMediaWiki) + ,("docbook" , mkStringReader readDocBook) + ,("opml" , mkStringReader readOPML) + ,("org" , mkStringReader readOrg) + ,("textile" , mkStringReader readTextile) -- TODO : textile+lhs + ,("html" , mkStringReader readHtml) + ,("latex" , mkStringReader readLaTeX) + ,("haddock" , mkStringReader readHaddock) + ,("docx" , mkBSReader readDocX) ] data Writer = PureStringWriter (WriterOptions -> Pandoc -> String) @@ -280,14 +293,17 @@ getDefaultExtensions "textile" = Set.fromList [Ext_auto_identifiers, Ext getDefaultExtensions _ = Set.fromList [Ext_auto_identifiers] -- | Retrieve reader based on formatSpec (format+extensions). -getReader :: String -> Either String (ReaderOptions -> String -> IO Pandoc) +getReader :: String -> Either String Reader getReader s = case parseFormatSpec s of Left e -> Left $ intercalate "\n" $ [m | Message m <- errorMessages e] - Right (readerName, setExts) -> + Right (readerName, setExts) -> case lookup readerName readers of Nothing -> Left $ "Unknown reader: " ++ readerName - Just r -> Right $ \o -> + Just (StringReader r) -> Right $ StringReader $ \o -> + r o{ readerExtensions = setExts $ + getDefaultExtensions readerName } + Just (ByteStringReader r) -> Right $ ByteStringReader $ \o -> r o{ readerExtensions = setExts $ getDefaultExtensions readerName } -- cgit v1.2.3 From 7807564d4493af1462d61138c63d8ec365abc792 Mon Sep 17 00:00:00 2001 From: mpickering Date: Mon, 16 Jun 2014 20:45:54 +0100 Subject: Moved extractSpaces to Shared.hs Generalised and move the extractSpaces function from `HTML.hs` to `Shared.hs` so that the docx reader can also use it. --- src/Text/Pandoc/Readers/HTML.hs | 17 ++++------------- src/Text/Pandoc/Shared.hs | 16 ++++++++++++++++ 2 files changed, 20 insertions(+), 13 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index 905e55b22..9cdc5a567 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -50,7 +50,6 @@ import Data.Char ( isDigit ) import Control.Monad ( liftM, guard, when, mzero ) import Control.Applicative ( (<$>), (<$), (<*) ) import Data.Monoid -import Data.Sequence (ViewL(..), ViewR(..), viewr, viewl) isSpace :: Char -> Bool isSpace ' ' = True @@ -369,9 +368,9 @@ pQ = do then InSingleQuote else InDoubleQuote let constructor = case quoteType of - SingleQuote -> B.singleQuoted + SingleQuote -> B.singleQuoted DoubleQuote -> B.doubleQuoted - withQuoteContext innerQuoteContext $ + withQuoteContext innerQuoteContext $ pInlinesInTags "q" constructor pEmph :: TagParser Inlines @@ -406,7 +405,7 @@ pLink = try $ do let url = fromAttrib "href" tag let title = fromAttrib "title" tag lab <- trimInlines . mconcat <$> manyTill inline (pCloses "a") - return $ B.link (escapeURI url) title lab + return $ B.link (escapeURI url) title lab pImage :: TagParser Inlines pImage = do @@ -439,15 +438,7 @@ pRawHtmlInline = do pInlinesInTags :: String -> (Inlines -> Inlines) -> TagParser Inlines -pInlinesInTags tagtype f = do - contents <- B.unMany <$> pInTags tagtype inline - let left = case viewl contents of - (Space :< _) -> B.space - _ -> mempty - let right = case viewr contents of - (_ :> Space) -> B.space - _ -> mempty - return (left <> f (trimInlines . B.Many $ contents) <> right) +pInlinesInTags tagtype f = extractSpaces f <$> pInTags tagtype inline pInTags :: (Monoid a) => String -> TagParser a -> TagParser a diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index b0adf55f5..5b0d9b6b4 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -53,6 +53,7 @@ module Text.Pandoc.Shared ( -- * Pandoc block and inline list processing orderedListMarkers, normalizeSpaces, + extractSpaces, normalize, stringify, compactify, @@ -113,6 +114,7 @@ import qualified Data.ByteString as BS import qualified Data.ByteString.Char8 as B8 import Text.Pandoc.Compat.Monoid import Data.ByteString.Base64 (decodeLenient) +import Data.Sequence (ViewR(..), ViewL(..), viewl, viewr) #ifdef EMBED_DATA_FILES import Text.Pandoc.Data (dataFiles) @@ -331,6 +333,20 @@ isSpaceOrEmpty Space = True isSpaceOrEmpty (Str "") = True isSpaceOrEmpty _ = False +-- | Extract the leading and trailing spaces from inside an inline element +-- and place them outside the element. + +extractSpaces :: (Inlines -> Inlines) -> Inlines -> Inlines +extractSpaces f is = + let contents = B.unMany is + left = case viewl contents of + (Space :< _) -> B.space + _ -> mempty + right = case viewr contents of + (_ :> Space) -> B.space + _ -> mempty in + (left <> f (B.trimInlines . B.Many $ contents) <> right) + -- | Normalize @Pandoc@ document, consolidating doubled 'Space's, -- combining adjacent 'Str's and 'Emph's, remove 'Null's and -- empty elements, etc. -- cgit v1.2.3 From 01ef573ac2f6620e9f70ae8965e5ccc664e3aec4 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Mon, 16 Jun 2014 14:18:06 -0700 Subject: Org reader: fixed #1342. This change rewrites `inlineLaTeXCommand` so that parsec will know when input is being consumed. Previously a run-time error would be produced with some input involving raw latex. (I believe this does not affect the last release, as the inline latex reading was added recently.) --- src/Text/Pandoc/Readers/Org.hs | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index c3ea8d7c2..0e872abf0 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -38,10 +38,9 @@ import qualified Text.Pandoc.Parsing as P import Text.Pandoc.Parsing hiding ( F, unF, askF, asksF, runF , newline, orderedListMarker , parseFromString - , updateLastStrPos ) + ) import Text.Pandoc.Readers.LaTeX (inlineCommand, rawLaTeXInline) import Text.Pandoc.Shared (compactify', compactify'DL) -import Text.Parsec.Pos (updatePosString) import Text.TeXMath (texMathToPandoc, DisplayType(..)) import Control.Applicative ( Applicative, pure @@ -148,10 +147,6 @@ resetBlockAttributes :: OrgParser () resetBlockAttributes = updateState $ \s -> s{ orgStateBlockAttributes = orgStateBlockAttributes def } -updateLastStrPos :: OrgParser () -updateLastStrPos = getPosition >>= \p -> - updateState $ \s -> s{ orgStateLastStrPos = Just p } - updateLastForbiddenCharPos :: OrgParser () updateLastForbiddenCharPos = getPosition >>= \p -> updateState $ \s -> s{ orgStateLastForbiddenCharPos = Just p} @@ -1376,8 +1371,9 @@ maybeRight = either (const Nothing) Just inlineLaTeXCommand :: OrgParser String inlineLaTeXCommand = try $ do rest <- getInput - pos <- getPosition case runParser rawLaTeXInline def "source" rest of - Right (RawInline _ cs) -> cs <$ (setInput $ drop (length cs) rest) - <* (setPosition $ updatePosString pos cs) + Right (RawInline _ cs) -> do + let len = length cs + count len anyChar + return cs _ -> mzero -- cgit v1.2.3 From 31fd843133ee9482b6af353a7d793cae18929425 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Mon, 16 Jun 2014 15:41:23 -0700 Subject: HTML reader: Fixed major parsing problem with HTML tables. Table cells were being combined into one cell. Closes #1341. --- src/Text/Pandoc/Readers/HTML.hs | 26 +++++++++++--------------- 1 file changed, 11 insertions(+), 15 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index 9cdc5a567..d27afc543 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -238,30 +238,26 @@ pTable = try $ do caption <- option mempty $ pInTags "caption" inline >>~ skipMany pBlank -- TODO actually read these and take width information from them widths' <- pColgroup <|> many pCol - head' <- option mempty $ pOptInTag "thead" $ pInTags "tr" (pCell "th") + head' <- option [] $ pOptInTag "thead" $ pInTags "tr" (pCell "th") skipMany pBlank rows <- pOptInTag "tbody" $ many1 $ try $ skipMany pBlank >> pInTags "tr" (pCell "td") skipMany pBlank TagClose _ <- pSatisfy (~== TagClose "table") - let isSinglePlain [] = True - isSinglePlain [Plain _] = True - isSinglePlain _ = False - let lHead = B.toList head' - let lRows = map B.toList rows - let isSimple = all isSinglePlain (lHead:lRows) - let cols = length $ if null lHead - then head lRows - else lHead + let isSinglePlain x = case B.toList x of + [Plain _] -> True + _ -> False + let isSimple = all isSinglePlain $ concat (head':rows) + let cols = length $ if null head' then head rows else head' -- fail if there are colspans or rowspans - guard $ all (\r -> length r == cols) lRows - let aligns = replicate cols AlignLeft + guard $ all (\r -> length r == cols) rows + let aligns = replicate cols AlignDefault let widths = if null widths' then if isSimple then replicate cols 0 else replicate cols (1.0 / fromIntegral cols) else widths' - return $ B.table caption (zip aligns widths) [head'] [rows] + return $ B.table caption (zip aligns widths) head' rows pCol :: TagParser Double pCol = try $ do @@ -279,12 +275,12 @@ pColgroup = try $ do skipMany pBlank manyTill pCol (pCloses "colgroup" <|> eof) <* skipMany pBlank -pCell :: String -> TagParser Blocks +pCell :: String -> TagParser [Blocks] pCell celltype = try $ do skipMany pBlank res <- pInTags celltype block skipMany pBlank - return res + return [res] pBlockQuote :: TagParser Blocks pBlockQuote = do -- cgit v1.2.3 From 459805de4c4e7129dc624086a654febb161f99ad Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Mon, 16 Jun 2014 17:43:56 -0700 Subject: LaTeX reader: don't assume preamble doesn't contain environments. Closes #1338. --- src/Text/Pandoc/Readers/LaTeX.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index 6b5958920..9e7a38b8f 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -1134,7 +1134,7 @@ paragraph = do preamble :: LP Blocks preamble = mempty <$> manyTill preambleBlock beginDoc - where beginDoc = lookAhead $ controlSeq "begin" *> string "{document}" + where beginDoc = lookAhead $ try $ controlSeq "begin" *> string "{document}" preambleBlock = (void comment) <|> (void sp) <|> (void blanklines) -- cgit v1.2.3 From 87c08be58f54ae60cfd57be6c17a256cbdb105d6 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Mon, 16 Jun 2014 19:18:33 -0700 Subject: LaTeX reader: handle leading/trailing spaces in emph better. `\emph{ hi }` gets parsed as `[Space, Emph [Str "hi"], Space]` so that we don't get things like `* hi *` in markdown output. Also applies to textbf and some other constructions. Closes #1146. (`--normalize` isn't touched by this, but normalization should not generally be necessary with the changes to the readers.) --- src/Text/Pandoc/Readers/LaTeX.hs | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index 9e7a38b8f..3c4d4ee52 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -397,18 +397,18 @@ isBlockCommand s = maybe False (const True) $ M.lookup s blockCommands inlineCommands :: M.Map String (LP Inlines) inlineCommands = M.fromList $ - [ ("emph", emph <$> tok) - , ("textit", emph <$> tok) - , ("textsl", emph <$> tok) - , ("textsc", smallcaps <$> tok) - , ("sout", strikeout <$> tok) - , ("textsuperscript", superscript <$> tok) - , ("textsubscript", subscript <$> tok) + [ ("emph", extractSpaces emph <$> tok) + , ("textit", extractSpaces emph <$> tok) + , ("textsl", extractSpaces emph <$> tok) + , ("textsc", extractSpaces smallcaps <$> tok) + , ("sout", extractSpaces strikeout <$> tok) + , ("textsuperscript", extractSpaces superscript <$> tok) + , ("textsubscript", extractSpaces subscript <$> tok) , ("textbackslash", lit "\\") , ("backslash", lit "\\") , ("slash", lit "/") - , ("textbf", strong <$> tok) - , ("textnormal", spanWith ("",["nodecor"],[]) <$> tok) + , ("textbf", extractSpaces strong <$> tok) + , ("textnormal", extractSpaces (spanWith ("",["nodecor"],[])) <$> tok) , ("ldots", lit "…") , ("dots", lit "…") , ("mdots", lit "…") @@ -428,15 +428,15 @@ inlineCommands = M.fromList $ , ("{", lit "{") , ("}", lit "}") -- old TeX commands - , ("em", emph <$> inlines) - , ("it", emph <$> inlines) - , ("sl", emph <$> inlines) - , ("bf", strong <$> inlines) + , ("em", extractSpaces emph <$> inlines) + , ("it", extractSpaces emph <$> inlines) + , ("sl", extractSpaces emph <$> inlines) + , ("bf", extractSpaces strong <$> inlines) , ("rm", inlines) - , ("itshape", emph <$> inlines) - , ("slshape", emph <$> inlines) - , ("scshape", smallcaps <$> inlines) - , ("bfseries", strong <$> inlines) + , ("itshape", extractSpaces emph <$> inlines) + , ("slshape", extractSpaces emph <$> inlines) + , ("scshape", extractSpaces smallcaps <$> inlines) + , ("bfseries", extractSpaces strong <$> inlines) , ("/", pure mempty) -- italic correction , ("aa", lit "å") , ("AA", lit "Å") -- cgit v1.2.3 From 9da5d8955ecc090d1582a9d007a0ecace654d229 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Mon, 16 Jun 2014 20:48:55 -0700 Subject: Markdown reader: fixed #1333 (table parsing bug). --- src/Text/Pandoc/Readers/Markdown.hs | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index caa938ed6..c20a2a1fc 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -1117,13 +1117,14 @@ multilineTable headless = multilineTableHeader :: Bool -- ^ Headerless table -> MarkdownParser (F [Blocks], [Alignment], [Int]) multilineTableHeader headless = try $ do - if headless - then return '\n' - else tableSep >>~ notFollowedBy blankline + unless headless $ + tableSep >> notFollowedBy blankline rawContent <- if headless then return $ repeat "" - else many1 - (notFollowedBy tableSep >> many1Till anyChar newline) + else many1 $ do + notFollowedBy blankline + notFollowedBy tableSep + anyLine initSp <- nonindentSpaces dashes <- many1 (dashedLine '-') newline -- cgit v1.2.3 From f9b97e6bfb1d26bd328cdbb1ca83c4558e7f4a0c Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Mon, 16 Jun 2014 21:26:50 -0700 Subject: Small improvement to fix to #1333. This allows blank lines at end of multiline headers. --- src/Text/Pandoc/Readers/Markdown.hs | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index c20a2a1fc..a6720beba 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -1121,10 +1121,7 @@ multilineTableHeader headless = try $ do tableSep >> notFollowedBy blankline rawContent <- if headless then return $ repeat "" - else many1 $ do - notFollowedBy blankline - notFollowedBy tableSep - anyLine + else many1 $ notFollowedBy tableSep >> anyLine initSp <- nonindentSpaces dashes <- many1 (dashedLine '-') newline -- cgit v1.2.3 From 78ee2416d105bd25337819a49835623a8a296224 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Mon, 16 Jun 2014 22:03:26 -0700 Subject: Org reader: make tildes create inline code. Closes #1345. Also relabeled 'code' and 'verbatim' parsers to accord with the org-mode manual. I'm not sure what the distinction between code and verbatim is supposed to be, but I'm pretty sure both should be represented as Code inlines in pandoc. The previous behavior resulted in the text not appearing in any output format. --- src/Text/Pandoc/Readers/Org.hs | 8 ++++---- tests/Tests/Readers/Org.hs | 6 +++--- 2 files changed, 7 insertions(+), 7 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index 0e872abf0..7a35e2ca0 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -1148,11 +1148,11 @@ strikeout = fmap B.strikeout <$> emphasisBetween '+' underline :: OrgParser (F Inlines) underline = fmap B.strong <$> emphasisBetween '_' -code :: OrgParser (F Inlines) -code = return . B.code <$> verbatimBetween '=' - verbatim :: OrgParser (F Inlines) -verbatim = return . B.rawInline "" <$> verbatimBetween '~' +verbatim = return . B.code <$> verbatimBetween '=' + +code :: OrgParser (F Inlines) +code = return . B.code <$> verbatimBetween '~' subscript :: OrgParser (F Inlines) subscript = fmap B.subscript <$> try (char '_' *> subOrSuperExpr) diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs index 4ed77887f..f8240ca3d 100644 --- a/tests/Tests/Readers/Org.hs +++ b/tests/Tests/Readers/Org.hs @@ -50,13 +50,13 @@ tests = "+Kill Bill+" =?> para (strikeout . spcSep $ [ "Kill", "Bill" ]) - , "Code" =: + , "Verbatim" =: "=Robot.rock()=" =?> para (code "Robot.rock()") - , "Verbatim" =: + , "Code" =: "~word for word~" =?> - para (rawInline "" "word for word") + para (code "word for word") , "Math $..$" =: "$E=mc^2$" =?> -- cgit v1.2.3 From bbe99003f8d25dc65ab12851907ecd5d9aad746c Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Mon, 16 Jun 2014 22:44:40 -0700 Subject: Naming: Use Docx instead of DocX. For consistency with the existing writer. --- pandoc.cabal | 8 +- src/Text/Pandoc.hs | 6 +- src/Text/Pandoc/Readers/DocX.hs | 479 --------------------------- src/Text/Pandoc/Readers/DocX/Lists.hs | 208 ------------ src/Text/Pandoc/Readers/DocX/Parse.hs | 604 ---------------------------------- src/Text/Pandoc/Readers/Docx.hs | 479 +++++++++++++++++++++++++++ src/Text/Pandoc/Readers/Docx/Lists.hs | 208 ++++++++++++ src/Text/Pandoc/Readers/Docx/Parse.hs | 604 ++++++++++++++++++++++++++++++++++ tests/Tests/Readers/DocX.hs | 68 ---- tests/Tests/Readers/Docx.hs | 68 ++++ tests/test-pandoc.hs | 4 +- 11 files changed, 1368 insertions(+), 1368 deletions(-) delete mode 100644 src/Text/Pandoc/Readers/DocX.hs delete mode 100644 src/Text/Pandoc/Readers/DocX/Lists.hs delete mode 100644 src/Text/Pandoc/Readers/DocX/Parse.hs create mode 100644 src/Text/Pandoc/Readers/Docx.hs create mode 100644 src/Text/Pandoc/Readers/Docx/Lists.hs create mode 100644 src/Text/Pandoc/Readers/Docx/Parse.hs delete mode 100644 tests/Tests/Readers/DocX.hs create mode 100644 tests/Tests/Readers/Docx.hs (limited to 'src/Text') diff --git a/pandoc.cabal b/pandoc.cabal index a6126a331..5898af5ad 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -293,7 +293,7 @@ Library Text.Pandoc.Readers.Textile, Text.Pandoc.Readers.Native, Text.Pandoc.Readers.Haddock, - Text.Pandoc.Readers.DocX, + Text.Pandoc.Readers.Docx, Text.Pandoc.Writers.Native, Text.Pandoc.Writers.Docbook, Text.Pandoc.Writers.OPML, @@ -324,8 +324,8 @@ Library Text.Pandoc.Process Other-Modules: Text.Pandoc.Readers.Haddock.Lex, Text.Pandoc.Readers.Haddock.Parse, - Text.Pandoc.Readers.DocX.Lists, - Text.Pandoc.Readers.DocX.Parse, + Text.Pandoc.Readers.Docx.Lists, + Text.Pandoc.Readers.Docx.Parse, Text.Pandoc.Writers.Shared, Text.Pandoc.Asciify, Text.Pandoc.MIME, @@ -411,7 +411,7 @@ Test-Suite test-pandoc Tests.Readers.Markdown Tests.Readers.Org Tests.Readers.RST - Tests.Readers.DocX + Tests.Readers.Docx Tests.Writers.Native Tests.Writers.ConTeXt Tests.Writers.HTML diff --git a/src/Text/Pandoc.hs b/src/Text/Pandoc.hs index aff471a3c..45c2f453b 100644 --- a/src/Text/Pandoc.hs +++ b/src/Text/Pandoc.hs @@ -63,7 +63,7 @@ module Text.Pandoc , writers -- * Readers: converting /to/ Pandoc format , Reader (..) - , readDocX + , readDocx , readMarkdown , readMediaWiki , readRST @@ -127,7 +127,7 @@ import Text.Pandoc.Readers.HTML import Text.Pandoc.Readers.Textile import Text.Pandoc.Readers.Native import Text.Pandoc.Readers.Haddock -import Text.Pandoc.Readers.DocX +import Text.Pandoc.Readers.Docx import Text.Pandoc.Writers.Native import Text.Pandoc.Writers.Markdown import Text.Pandoc.Writers.RST @@ -222,7 +222,7 @@ readers = [ ("native" , StringReader $ \_ s -> return $ readNative s) ,("html" , mkStringReader readHtml) ,("latex" , mkStringReader readLaTeX) ,("haddock" , mkStringReader readHaddock) - ,("docx" , mkBSReader readDocX) + ,("docx" , mkBSReader readDocx) ] data Writer = PureStringWriter (WriterOptions -> Pandoc -> String) diff --git a/src/Text/Pandoc/Readers/DocX.hs b/src/Text/Pandoc/Readers/DocX.hs deleted file mode 100644 index 976e2e271..000000000 --- a/src/Text/Pandoc/Readers/DocX.hs +++ /dev/null @@ -1,479 +0,0 @@ -{- -Copyright (C) 2014 Jesse Rosenthal - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.Readers.DocX - Copyright : Copyright (C) 2014 Jesse Rosenthal - License : GNU GPL, version 2 or above - - Maintainer : Jesse Rosenthal - Stability : alpha - Portability : portable - -Conversion of DocX type (defined in Text.Pandoc.Readers.DocX.Parse) -to 'Pandoc' document. -} - -{- -Current state of implementation of DocX entities ([x] means -implemented, [-] means partially implemented): - -* Blocks - - - [X] Para - - [X] CodeBlock (styled with `SourceCode`) - - [X] BlockQuote (styled with `Quote`, `BlockQuote`, or, optionally, - indented) - - [X] OrderedList - - [X] BulletList - - [X] DefinitionList (styled with adjacent `DefinitionTerm` and `Definition`) - - [X] Header (styled with `Heading#`) - - [ ] HorizontalRule - - [-] Table (column widths and alignments not yet implemented) - -* Inlines - - - [X] Str - - [X] Emph (From italics. `underline` currently read as span. In - future, it might optionally be emph as well) - - [X] Strong - - [X] Strikeout - - [X] Superscript - - [X] Subscript - - [X] SmallCaps - - [ ] Quoted - - [ ] Cite - - [X] Code (styled with `VerbatimChar`) - - [X] Space - - [X] LineBreak (these are invisible in Word: entered with Shift-Return) - - [ ] Math - - [X] Link (links to an arbitrary bookmark create a span with the target as - id and "anchor" class) - - [-] Image (Links to path in archive. Future option for - data-encoded URI likely.) - - [X] Note (Footnotes and Endnotes are silently combined.) --} - -module Text.Pandoc.Readers.DocX - ( readDocX - ) where - -import Codec.Archive.Zip -import Text.Pandoc.Definition -import Text.Pandoc.Options -import Text.Pandoc.Builder (text, toList) -import Text.Pandoc.Generic (bottomUp) -import Text.Pandoc.MIME (getMimeType) -import Text.Pandoc.UTF8 (toString) -import Text.Pandoc.Readers.DocX.Parse -import Text.Pandoc.Readers.DocX.Lists -import Data.Maybe (mapMaybe, isJust, fromJust) -import Data.List (delete, isPrefixOf, (\\), intersect) -import qualified Data.ByteString as BS -import qualified Data.ByteString.Lazy as B -import Data.ByteString.Base64 (encode) -import System.FilePath (combine) - -readDocX :: ReaderOptions - -> B.ByteString - -> Pandoc -readDocX opts bytes = - case archiveToDocX (toArchive bytes) of - Just docx -> Pandoc nullMeta (docxToBlocks opts docx) - Nothing -> error $ "couldn't parse docx file" - -runStyleToSpanAttr :: RunStyle -> (String, [String], [(String, String)]) -runStyleToSpanAttr rPr = ("", - mapMaybe id [ - if isBold rPr then (Just "strong") else Nothing, - if isItalic rPr then (Just "emph") else Nothing, - if isSmallCaps rPr then (Just "smallcaps") else Nothing, - if isStrike rPr then (Just "strike") else Nothing, - if isSuperScript rPr then (Just "superscript") else Nothing, - if isSubScript rPr then (Just "subscript") else Nothing, - rStyle rPr], - case underline rPr of - Just fmt -> [("underline", fmt)] - _ -> [] - ) - -parStyleToDivAttr :: ParagraphStyle -> (String, [String], [(String, String)]) -parStyleToDivAttr pPr = ("", - pStyle pPr, - case indent pPr of - Just n -> [("indent", (show n))] - Nothing -> [] - ) - -strToInlines :: String -> [Inline] -strToInlines = toList . text - -codeSpans :: [String] -codeSpans = ["VerbatimChar"] - -blockQuoteDivs :: [String] -blockQuoteDivs = ["Quote", "BlockQuote"] - -codeDivs :: [String] -codeDivs = ["SourceCode"] - -runElemToInlines :: RunElem -> [Inline] -runElemToInlines (TextRun s) = strToInlines s -runElemToInlines (LnBrk) = [LineBreak] - -runElemToString :: RunElem -> String -runElemToString (TextRun s) = s -runElemToString (LnBrk) = ['\n'] - -runElemsToString :: [RunElem] -> String -runElemsToString = concatMap runElemToString - -strNormalize :: [Inline] -> [Inline] -strNormalize [] = [] -strNormalize (Str "" : ils) = strNormalize ils -strNormalize ((Str s) : (Str s') : l) = strNormalize ((Str (s++s')) : l) -strNormalize (il:ils) = il : (strNormalize ils) - -runToInlines :: ReaderOptions -> DocX -> Run -> [Inline] -runToInlines _ _ (Run rs runElems) - | isJust (rStyle rs) && (fromJust (rStyle rs)) `elem` codeSpans = - case runStyleToSpanAttr rs == ("", [], []) of - True -> [Str (runElemsToString runElems)] - False -> [Span (runStyleToSpanAttr rs) [Str (runElemsToString runElems)]] - | otherwise = case runStyleToSpanAttr rs == ("", [], []) of - True -> concatMap runElemToInlines runElems - False -> [Span (runStyleToSpanAttr rs) (concatMap runElemToInlines runElems)] -runToInlines opts docx@(DocX _ notes _ _ _ ) (Footnote fnId) = - case (getFootNote fnId notes) of - Just bodyParts -> - [Note [Div ("", ["footnote"], []) (map (bodyPartToBlock opts docx) bodyParts)]] - Nothing -> - [Note [Div ("", ["footnote"], []) []]] -runToInlines opts docx@(DocX _ notes _ _ _) (Endnote fnId) = - case (getEndNote fnId notes) of - Just bodyParts -> - [Note [Div ("", ["endnote"], []) (map (bodyPartToBlock opts docx) bodyParts)]] - Nothing -> - [Note [Div ("", ["endnote"], []) []]] - -parPartToInlines :: ReaderOptions -> DocX -> ParPart -> [Inline] -parPartToInlines opts docx (PlainRun r) = runToInlines opts docx r -parPartToInlines _ _ (BookMark _ anchor) = - [Span (anchor, ["anchor"], []) []] -parPartToInlines _ (DocX _ _ _ rels _) (Drawing relid) = - case lookupRelationship relid rels of - Just target -> [Image [] (combine "word" target, "")] - Nothing -> [Image [] ("", "")] -parPartToInlines opts docx (InternalHyperLink anchor runs) = - [Link (concatMap (runToInlines opts docx) runs) ('#' : anchor, "")] -parPartToInlines opts docx@(DocX _ _ _ rels _) (ExternalHyperLink relid runs) = - case lookupRelationship relid rels of - Just target -> - [Link (concatMap (runToInlines opts docx) runs) (target, "")] - Nothing -> - [Link (concatMap (runToInlines opts docx) runs) ("", "")] - -isAnchorSpan :: Inline -> Bool -isAnchorSpan (Span (ident, classes, kvs) ils) = - (not . null) ident && - classes == ["anchor"] && - null kvs && - null ils -isAnchorSpan _ = False - -dummyAnchors :: [String] -dummyAnchors = ["_GoBack"] - -makeHeaderAnchors :: Block -> Block -makeHeaderAnchors h@(Header n (_, classes, kvs) ils) = - case filter isAnchorSpan ils of - [] -> h - (x@(Span (ident, _, _) _) : xs) -> - case ident `elem` dummyAnchors of - True -> h - False -> Header n (ident, classes, kvs) (ils \\ (x:xs)) - _ -> h -makeHeaderAnchors blk = blk - - -parPartsToInlines :: ReaderOptions -> DocX -> [ParPart] -> [Inline] -parPartsToInlines opts docx parparts = - -- - -- We're going to skip data-uri's for now. It should be an option, - -- not mandatory. - -- - --bottomUp (makeImagesSelfContained docx) $ - bottomUp spanCorrect $ - bottomUp spanTrim $ - bottomUp spanReduce $ - concatMap (parPartToInlines opts docx) parparts - -cellToBlocks :: ReaderOptions -> DocX -> Cell -> [Block] -cellToBlocks opts docx (Cell bps) = map (bodyPartToBlock opts docx) bps - -rowToBlocksList :: ReaderOptions -> DocX -> Row -> [[Block]] -rowToBlocksList opts docx (Row cells) = map (cellToBlocks opts docx) cells - -bodyPartToBlock :: ReaderOptions -> DocX -> BodyPart -> Block -bodyPartToBlock opts docx (Paragraph pPr parparts) = - Div (parStyleToDivAttr pPr) [Para (parPartsToInlines opts docx parparts)] -bodyPartToBlock opts docx@(DocX _ _ numbering _ _) (ListItem pPr numId lvl parparts) = - let - kvs = case lookupLevel numId lvl numbering of - Just (_, fmt, txt, Just start) -> [ ("level", lvl) - , ("num-id", numId) - , ("format", fmt) - , ("text", txt) - , ("start", (show start)) - ] - - Just (_, fmt, txt, Nothing) -> [ ("level", lvl) - , ("num-id", numId) - , ("format", fmt) - , ("text", txt) - ] - Nothing -> [] - in - Div - ("", ["list-item"], kvs) - [bodyPartToBlock opts docx (Paragraph pPr parparts)] -bodyPartToBlock _ _ (Tbl _ _ _ []) = - Para [] -bodyPartToBlock opts docx (Tbl cap _ look (r:rs)) = - let caption = strToInlines cap - (hdr, rows) = case firstRowFormatting look of - True -> (Just r, rs) - False -> (Nothing, r:rs) - hdrCells = case hdr of - Just r' -> rowToBlocksList opts docx r' - Nothing -> [] - cells = map (rowToBlocksList opts docx) rows - - size = case null hdrCells of - True -> length $ head cells - False -> length $ hdrCells - -- - -- The two following variables (horizontal column alignment and - -- relative column widths) go to the default at the - -- moment. Width information is in the TblGrid field of the Tbl, - -- so should be possible. Alignment might be more difficult, - -- since there doesn't seem to be a column entity in docx. - alignments = take size (repeat AlignDefault) - widths = take size (repeat 0) :: [Double] - in - Table caption alignments widths hdrCells cells - -makeImagesSelfContained :: DocX -> Inline -> Inline -makeImagesSelfContained (DocX _ _ _ _ media) i@(Image alt (uri, title)) = - case lookup uri media of - Just bs -> case getMimeType uri of - Just mime -> let data_uri = - "data:" ++ mime ++ ";base64," ++ toString (encode $ BS.concat $ B.toChunks bs) - in - Image alt (data_uri, title) - Nothing -> i - Nothing -> i -makeImagesSelfContained _ inline = inline - -bodyToBlocks :: ReaderOptions -> DocX -> Body -> [Block] -bodyToBlocks opts docx (Body bps) = - bottomUp removeEmptyPars $ - bottomUp strNormalize $ - bottomUp spanRemove $ - bottomUp divRemove $ - map (makeHeaderAnchors) $ - bottomUp divCorrect $ - bottomUp divReduce $ - bottomUp divCorrectPreReduce $ - bottomUp blocksToDefinitions $ - blocksToBullets $ - map (bodyPartToBlock opts docx) bps - -docxToBlocks :: ReaderOptions -> DocX -> [Block] -docxToBlocks opts d@(DocX (Document _ body) _ _ _ _) = bodyToBlocks opts d body - -spanReduce :: [Inline] -> [Inline] -spanReduce [] = [] -spanReduce ((Span (id1, classes1, kvs1) ils1) : ils) - | (id1, classes1, kvs1) == ("", [], []) = ils1 ++ (spanReduce ils) -spanReduce (s1@(Span (id1, classes1, kvs1) ils1) : - s2@(Span (id2, classes2, kvs2) ils2) : - ils) = - let classes' = classes1 `intersect` classes2 - kvs' = kvs1 `intersect` kvs2 - classes1' = classes1 \\ classes' - kvs1' = kvs1 \\ kvs' - classes2' = classes2 \\ classes' - kvs2' = kvs2 \\ kvs' - in - case null classes' && null kvs' of - True -> s1 : (spanReduce (s2 : ils)) - False -> let attr' = ("", classes', kvs') - attr1' = (id1, classes1', kvs1') - attr2' = (id2, classes2', kvs2') - in - spanReduce (Span attr' [(Span attr1' ils1), (Span attr2' ils2)] : - ils) -spanReduce (il:ils) = il : (spanReduce ils) - -ilToCode :: Inline -> String -ilToCode (Str s) = s -ilToCode _ = "" - -spanRemove' :: Inline -> [Inline] -spanRemove' s@(Span (ident, classes, _) []) - -- "_GoBack" is automatically inserted. We don't want to keep it. - | classes == ["anchor"] && not (ident `elem` dummyAnchors) = [s] -spanRemove' (Span (_, _, kvs) ils) = - case lookup "underline" kvs of - Just val -> [Span ("", [], [("underline", val)]) ils] - Nothing -> ils -spanRemove' il = [il] - -spanRemove :: [Inline] -> [Inline] -spanRemove = concatMap spanRemove' - -spanTrim' :: Inline -> [Inline] -spanTrim' il@(Span _ []) = [il] -spanTrim' il@(Span attr (il':[])) - | il' == Space = [Span attr [], Space] - | otherwise = [il] -spanTrim' (Span attr ils) - | head ils == Space && last ils == Space = - [Space, Span attr (init $ tail ils), Space] - | head ils == Space = [Space, Span attr (tail ils)] - | last ils == Space = [Span attr (init ils), Space] -spanTrim' il = [il] - -spanTrim :: [Inline] -> [Inline] -spanTrim = concatMap spanTrim' - -spanCorrect' :: Inline -> [Inline] -spanCorrect' (Span ("", [], []) ils) = ils -spanCorrect' (Span (ident, classes, kvs) ils) - | "emph" `elem` classes = - [Emph $ spanCorrect' $ Span (ident, (delete "emph" classes), kvs) ils] - | "strong" `elem` classes = - [Strong $ spanCorrect' $ Span (ident, (delete "strong" classes), kvs) ils] - | "smallcaps" `elem` classes = - [SmallCaps $ spanCorrect' $ Span (ident, (delete "smallcaps" classes), kvs) ils] - | "strike" `elem` classes = - [Strikeout $ spanCorrect' $ Span (ident, (delete "strike" classes), kvs) ils] - | "superscript" `elem` classes = - [Superscript $ spanCorrect' $ Span (ident, (delete "superscript" classes), kvs) ils] - | "subscript" `elem` classes = - [Subscript $ spanCorrect' $ Span (ident, (delete "subscript" classes), kvs) ils] - | (not . null) (codeSpans `intersect` classes) = - [Code (ident, (classes \\ codeSpans), kvs) (init $ unlines $ map ilToCode ils)] - | otherwise = - [Span (ident, classes, kvs) ils] -spanCorrect' il = [il] - -spanCorrect :: [Inline] -> [Inline] -spanCorrect = concatMap spanCorrect' - -removeEmptyPars :: [Block] -> [Block] -removeEmptyPars blks = filter (\b -> b /= (Para [])) blks - -divReduce :: [Block] -> [Block] -divReduce [] = [] -divReduce ((Div (id1, classes1, kvs1) blks1) : blks) - | (id1, classes1, kvs1) == ("", [], []) = blks1 ++ (divReduce blks) -divReduce (d1@(Div (id1, classes1, kvs1) blks1) : - d2@(Div (id2, classes2, kvs2) blks2) : - blks) = - let classes' = classes1 `intersect` classes2 - kvs' = kvs1 `intersect` kvs2 - classes1' = classes1 \\ classes' - kvs1' = kvs1 \\ kvs' - classes2' = classes2 \\ classes' - kvs2' = kvs2 \\ kvs' - in - case null classes' && null kvs' of - True -> d1 : (divReduce (d2 : blks)) - False -> let attr' = ("", classes', kvs') - attr1' = (id1, classes1', kvs1') - attr2' = (id2, classes2', kvs2') - in - divReduce (Div attr' [(Div attr1' blks1), (Div attr2' blks2)] : - blks) -divReduce (blk:blks) = blk : (divReduce blks) - -isHeaderClass :: String -> Maybe Int -isHeaderClass s | "Heading" `isPrefixOf` s = - case reads (drop (length "Heading") s) :: [(Int, String)] of - [] -> Nothing - ((n, "") : []) -> Just n - _ -> Nothing -isHeaderClass _ = Nothing - -findHeaderClass :: [String] -> Maybe Int -findHeaderClass ss = case mapMaybe id $ map isHeaderClass ss of - [] -> Nothing - n : _ -> Just n - -blksToInlines :: [Block] -> [Inline] -blksToInlines (Para ils : _) = ils -blksToInlines (Plain ils : _) = ils -blksToInlines _ = [] - -divCorrectPreReduce' :: Block -> [Block] -divCorrectPreReduce' (Div (ident, classes, kvs) blks) - | isJust $ findHeaderClass classes = - let n = fromJust $ findHeaderClass classes - in - [Header n (ident, delete ("Heading" ++ (show n)) classes, kvs) (blksToInlines blks)] - | otherwise = [Div (ident, classes, kvs) blks] -divCorrectPreReduce' blk = [blk] - -divCorrectPreReduce :: [Block] -> [Block] -divCorrectPreReduce = concatMap divCorrectPreReduce' - -blkToCode :: Block -> String -blkToCode (Para []) = "" -blkToCode (Para ((Code _ s):ils)) = s ++ (blkToCode (Para ils)) -blkToCode (Para ((Span (_, classes, _) ils'): ils)) - | (not . null) (codeSpans `intersect` classes) = - (init $ unlines $ map ilToCode ils') ++ (blkToCode (Para ils)) -blkToCode _ = "" - -divRemove' :: Block -> [Block] -divRemove' (Div (_, _, kvs) blks) = - case lookup "indent" kvs of - Just val -> [Div ("", [], [("indent", val)]) blks] - Nothing -> blks -divRemove' blk = [blk] - -divRemove :: [Block] -> [Block] -divRemove = concatMap divRemove' - -divCorrect' :: Block -> [Block] -divCorrect' b@(Div (ident, classes, kvs) blks) - | (not . null) (blockQuoteDivs `intersect` classes) = - [BlockQuote [Div (ident, classes \\ blockQuoteDivs, kvs) blks]] - | (not . null) (codeDivs `intersect` classes) = - [CodeBlock (ident, (classes \\ codeDivs), kvs) (init $ unlines $ map blkToCode blks)] - | otherwise = - case lookup "indent" kvs of - Just "0" -> [Div (ident, classes, filter (\kv -> fst kv /= "indent") kvs) blks] - Just _ -> - [BlockQuote [Div (ident, classes, filter (\kv -> fst kv /= "indent") kvs) blks]] - Nothing -> [b] -divCorrect' blk = [blk] - -divCorrect :: [Block] -> [Block] -divCorrect = concatMap divCorrect' diff --git a/src/Text/Pandoc/Readers/DocX/Lists.hs b/src/Text/Pandoc/Readers/DocX/Lists.hs deleted file mode 100644 index b20679261..000000000 --- a/src/Text/Pandoc/Readers/DocX/Lists.hs +++ /dev/null @@ -1,208 +0,0 @@ -{- -Copyright (C) 2014 Jesse Rosenthal - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.Readers.DocX.Lists - Copyright : Copyright (C) 2014 Jesse Rosenthal - License : GNU GPL, version 2 or above - - Maintainer : Jesse Rosenthal - Stability : alpha - Portability : portable - -Functions for converting flat DocX paragraphs into nested lists. --} - -module Text.Pandoc.Readers.DocX.Lists ( blocksToBullets - , blocksToDefinitions) where - -import Text.Pandoc.JSON -import Text.Pandoc.Shared (trim) -import Control.Monad -import Data.List -import Data.Maybe - -isListItem :: Block -> Bool -isListItem (Div (_, classes, _) _) | "list-item" `elem` classes = True -isListItem _ = False - -getLevel :: Block -> Maybe Integer -getLevel (Div (_, _, kvs) _) = liftM read $ lookup "level" kvs -getLevel _ = Nothing - -getLevelN :: Block -> Integer -getLevelN b = case getLevel b of - Just n -> n - Nothing -> -1 - -getNumId :: Block -> Maybe Integer -getNumId (Div (_, _, kvs) _) = liftM read $ lookup "num-id" kvs -getNumId _ = Nothing - -getNumIdN :: Block -> Integer -getNumIdN b = case getNumId b of - Just n -> n - Nothing -> -1 - -getText :: Block -> Maybe String -getText (Div (_, _, kvs) _) = lookup "text" kvs -getText _ = Nothing - -data ListType = Itemized | Enumerated ListAttributes - -listStyleMap :: [(String, ListNumberStyle)] -listStyleMap = [("upperLetter", UpperAlpha), - ("lowerLetter", LowerAlpha), - ("upperRoman", UpperRoman), - ("lowerRoman", LowerRoman), - ("decimal", Decimal)] - -listDelimMap :: [(String, ListNumberDelim)] -listDelimMap = [("%1)", OneParen), - ("(%1)", TwoParens), - ("%1.", Period)] - -getListType :: Block -> Maybe ListType -getListType b@(Div (_, _, kvs) _) | isListItem b = - let - start = lookup "start" kvs - frmt = lookup "format" kvs - txt = lookup "text" kvs - in - case frmt of - Just "bullet" -> Just Itemized - Just f -> - case txt of - Just t -> Just $ Enumerated ( - read (fromMaybe "1" start) :: Int, - fromMaybe DefaultStyle (lookup f listStyleMap), - fromMaybe DefaultDelim (lookup t listDelimMap)) - Nothing -> Nothing - _ -> Nothing -getListType _ = Nothing - -listParagraphDivs :: [String] -listParagraphDivs = ["ListParagraph"] - --- This is a first stab at going through and attaching meaning to list --- paragraphs, without an item marker, following a list item. We --- assume that these are paragraphs in the same item. - -handleListParagraphs :: [Block] -> [Block] -handleListParagraphs [] = [] -handleListParagraphs ( - (Div attr1@(_, classes1, _) blks1) : - (Div (ident2, classes2, kvs2) blks2) : - blks - ) | "list-item" `elem` classes1 && - not ("list-item" `elem` classes2) && - (not . null) (listParagraphDivs `intersect` classes2) = - -- We don't want to keep this indent. - let newDiv2 = - (Div (ident2, classes2, filter (\kv -> fst kv /= "indent") kvs2) blks2) - in - handleListParagraphs ((Div attr1 (blks1 ++ [newDiv2])) : blks) -handleListParagraphs (blk:blks) = blk : (handleListParagraphs blks) - -separateBlocks' :: Block -> [[Block]] -> [[Block]] -separateBlocks' blk ([] : []) = [[blk]] -separateBlocks' b@(BulletList _) acc = (init acc) ++ [(last acc) ++ [b]] -separateBlocks' b@(OrderedList _ _) acc = (init acc) ++ [(last acc) ++ [b]] --- The following is for the invisible bullet lists. This is how --- pandoc-generated ooxml does multiparagraph item lists. -separateBlocks' b acc | liftM trim (getText b) == Just "" = - (init acc) ++ [(last acc) ++ [b]] -separateBlocks' b acc = acc ++ [[b]] - -separateBlocks :: [Block] -> [[Block]] -separateBlocks blks = foldr separateBlocks' [[]] (reverse blks) - -flatToBullets' :: Integer -> [Block] -> [Block] -flatToBullets' _ [] = [] -flatToBullets' num xs@(b : elems) - | getLevelN b == num = b : (flatToBullets' num elems) - | otherwise = - let bNumId = getNumIdN b - bLevel = getLevelN b - (children, remaining) = - span - (\b' -> - ((getLevelN b') > bLevel || - ((getLevelN b') == bLevel && (getNumIdN b') == bNumId))) - xs - in - case getListType b of - Just (Enumerated attr) -> - (OrderedList attr (separateBlocks $ flatToBullets' bLevel children)) : - (flatToBullets' num remaining) - _ -> - (BulletList (separateBlocks $ flatToBullets' bLevel children)) : - (flatToBullets' num remaining) - -flatToBullets :: [Block] -> [Block] -flatToBullets elems = flatToBullets' (-1) elems - -blocksToBullets :: [Block] -> [Block] -blocksToBullets blks = - -- bottomUp removeListItemDivs $ - flatToBullets $ (handleListParagraphs blks) - - -plainParaInlines :: Block -> [Inline] -plainParaInlines (Plain ils) = ils -plainParaInlines (Para ils) = ils -plainParaInlines _ = [] - -blocksToDefinitions' :: [([Inline], [[Block]])] -> [Block] -> [Block] -> [Block] -blocksToDefinitions' [] acc [] = reverse acc -blocksToDefinitions' defAcc acc [] = - reverse $ (DefinitionList (reverse defAcc)) : acc -blocksToDefinitions' defAcc acc - ((Div (_, classes1, _) blks1) : (Div (ident2, classes2, kvs2) blks2) : blks) - | "DefinitionTerm" `elem` classes1 && "Definition" `elem` classes2 = - let remainingAttr2 = (ident2, delete "Definition" classes2, kvs2) - pair = case remainingAttr2 == ("", [], []) of - True -> (concatMap plainParaInlines blks1, [blks2]) - False -> (concatMap plainParaInlines blks1, [[Div remainingAttr2 blks2]]) - in - blocksToDefinitions' (pair : defAcc) acc blks -blocksToDefinitions' defAcc acc - ((Div (ident2, classes2, kvs2) blks2) : blks) - | (not . null) defAcc && "Definition" `elem` classes2 = - let remainingAttr2 = (ident2, delete "Definition" classes2, kvs2) - defItems2 = case remainingAttr2 == ("", [], []) of - True -> blks2 - False -> [Div remainingAttr2 blks2] - ((defTerm, defItems):defs) = defAcc - defAcc' = case null defItems of - True -> (defTerm, [defItems2]) : defs - False -> (defTerm, init defItems ++ [last defItems ++ defItems2]) : defs - in - blocksToDefinitions' defAcc' acc blks -blocksToDefinitions' [] acc (b:blks) = - blocksToDefinitions' [] (b:acc) blks -blocksToDefinitions' defAcc acc (b:blks) = - blocksToDefinitions' [] (b : (DefinitionList (reverse defAcc)) : acc) blks - - -blocksToDefinitions :: [Block] -> [Block] -blocksToDefinitions = blocksToDefinitions' [] [] - - - - diff --git a/src/Text/Pandoc/Readers/DocX/Parse.hs b/src/Text/Pandoc/Readers/DocX/Parse.hs deleted file mode 100644 index d7033d9e8..000000000 --- a/src/Text/Pandoc/Readers/DocX/Parse.hs +++ /dev/null @@ -1,604 +0,0 @@ -{- -Copyright (C) 2014 Jesse Rosenthal - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.Readers.DocX.Parse - Copyright : Copyright (C) 2014 Jesse Rosenthal - License : GNU GPL, version 2 or above - - Maintainer : Jesse Rosenthal - Stability : alpha - Portability : portable - -Conversion of DocX archive into DocX haskell type --} - - -module Text.Pandoc.Readers.DocX.Parse ( DocX(..) - , Document(..) - , Body(..) - , BodyPart(..) - , TblLook(..) - , ParPart(..) - , Run(..) - , RunElem(..) - , Notes - , Numbering - , Relationship - , Media - , RunStyle(..) - , ParagraphStyle(..) - , Row(..) - , Cell(..) - , getFootNote - , getEndNote - , lookupLevel - , lookupRelationship - , archiveToDocX - ) where -import Codec.Archive.Zip -import Text.XML.Light -import Data.Maybe -import Data.List -import System.FilePath -import Data.Bits ((.|.)) -import qualified Data.ByteString.Lazy as B -import qualified Text.Pandoc.UTF8 as UTF8 - -attrToNSPair :: Attr -> Maybe (String, String) -attrToNSPair (Attr (QName s _ (Just "xmlns")) val) = Just (s, val) -attrToNSPair _ = Nothing - - -type NameSpaces = [(String, String)] - -data DocX = DocX Document Notes Numbering [Relationship] Media - deriving Show - -archiveToDocX :: Archive -> Maybe DocX -archiveToDocX archive = do - let notes = archiveToNotes archive - rels = archiveToRelationships archive - media = archiveToMedia archive - doc <- archiveToDocument archive - numbering <- archiveToNumbering archive - return $ DocX doc notes numbering rels media - -data Document = Document NameSpaces Body - deriving Show - -archiveToDocument :: Archive -> Maybe Document -archiveToDocument zf = do - entry <- findEntryByPath "word/document.xml" zf - docElem <- (parseXMLDoc . UTF8.toStringLazy . fromEntry) entry - let namespaces = mapMaybe attrToNSPair (elAttribs docElem) - bodyElem <- findChild (QName "body" (lookup "w" namespaces) Nothing) docElem - body <- elemToBody namespaces bodyElem - return $ Document namespaces body - -type Media = [(FilePath, B.ByteString)] - -filePathIsMedia :: FilePath -> Bool -filePathIsMedia fp = - let (dir, _) = splitFileName fp - in - (dir == "word/media/") - -getMediaPair :: Archive -> FilePath -> Maybe (FilePath, B.ByteString) -getMediaPair zf fp = - case findEntryByPath fp zf of - Just e -> Just (fp, fromEntry e) - Nothing -> Nothing - -archiveToMedia :: Archive -> Media -archiveToMedia zf = - mapMaybe (getMediaPair zf) (filter filePathIsMedia (filesInArchive zf)) - -data Numbering = Numbering NameSpaces [Numb] [AbstractNumb] - deriving Show - -data Numb = Numb String String -- right now, only a key to an abstract num - deriving Show - -data AbstractNumb = AbstractNumb String [Level] - deriving Show - --- (ilvl, format, string, start) -type Level = (String, String, String, Maybe Integer) - -lookupLevel :: String -> String -> Numbering -> Maybe Level -lookupLevel numId ilvl (Numbering _ numbs absNumbs) = do - absNumId <- lookup numId $ map (\(Numb nid absnumid) -> (nid, absnumid)) numbs - lvls <- lookup absNumId $ map (\(AbstractNumb aid ls) -> (aid, ls)) absNumbs - lvl <- lookup ilvl $ map (\l@(i, _, _, _) -> (i, l)) lvls - return lvl - -numElemToNum :: NameSpaces -> Element -> Maybe Numb -numElemToNum ns element | - qName (elName element) == "num" && - qURI (elName element) == (lookup "w" ns) = do - numId <- findAttr (QName "numId" (lookup "w" ns) (Just "w")) element - absNumId <- findChild (QName "abstractNumId" (lookup "w" ns) (Just "w")) element - >>= findAttr (QName "val" (lookup "w" ns) (Just "w")) - return $ Numb numId absNumId -numElemToNum _ _ = Nothing - -absNumElemToAbsNum :: NameSpaces -> Element -> Maybe AbstractNumb -absNumElemToAbsNum ns element | - qName (elName element) == "abstractNum" && - qURI (elName element) == (lookup "w" ns) = do - absNumId <- findAttr - (QName "abstractNumId" (lookup "w" ns) (Just "w")) - element - let levelElems = findChildren - (QName "lvl" (lookup "w" ns) (Just "w")) - element - levels = mapMaybe id $ map (levelElemToLevel ns) levelElems - return $ AbstractNumb absNumId levels -absNumElemToAbsNum _ _ = Nothing - -levelElemToLevel :: NameSpaces -> Element -> Maybe Level -levelElemToLevel ns element | - qName (elName element) == "lvl" && - qURI (elName element) == (lookup "w" ns) = do - ilvl <- findAttr (QName "ilvl" (lookup "w" ns) (Just "w")) element - fmt <- findChild (QName "numFmt" (lookup "w" ns) (Just "w")) element - >>= findAttr (QName "val" (lookup "w" ns) (Just "w")) - txt <- findChild (QName "lvlText" (lookup "w" ns) (Just "w")) element - >>= findAttr (QName "val" (lookup "w" ns) (Just "w")) - let start = findChild (QName "start" (lookup "w" ns) (Just "w")) element - >>= findAttr (QName "val" (lookup "w" ns) (Just "w")) - >>= (\s -> listToMaybe (map fst (reads s :: [(Integer, String)]))) - return (ilvl, fmt, txt, start) -levelElemToLevel _ _ = Nothing - -archiveToNumbering :: Archive -> Maybe Numbering -archiveToNumbering zf = - case findEntryByPath "word/numbering.xml" zf of - Nothing -> Just $ Numbering [] [] [] - Just entry -> do - numberingElem <- (parseXMLDoc . UTF8.toStringLazy . fromEntry) entry - let namespaces = mapMaybe attrToNSPair (elAttribs numberingElem) - numElems = findChildren - (QName "num" (lookup "w" namespaces) (Just "w")) - numberingElem - absNumElems = findChildren - (QName "abstractNum" (lookup "w" namespaces) (Just "w")) - numberingElem - nums = mapMaybe id $ map (numElemToNum namespaces) numElems - absNums = mapMaybe id $ map (absNumElemToAbsNum namespaces) absNumElems - return $ Numbering namespaces nums absNums - -data Notes = Notes NameSpaces (Maybe [(String, [BodyPart])]) (Maybe [(String, [BodyPart])]) - deriving Show - -noteElemToNote :: NameSpaces -> Element -> Maybe (String, [BodyPart]) -noteElemToNote ns element - | qName (elName element) `elem` ["endnote", "footnote"] && - qURI (elName element) == (lookup "w" ns) = - do - noteId <- findAttr (QName "id" (lookup "w" ns) (Just "w")) element - let bps = map fromJust - $ filter isJust - $ map (elemToBodyPart ns) - $ filterChildrenName (isParOrTbl ns) element - return $ (noteId, bps) -noteElemToNote _ _ = Nothing - -getFootNote :: String -> Notes -> Maybe [BodyPart] -getFootNote s (Notes _ fns _) = fns >>= (lookup s) - -getEndNote :: String -> Notes -> Maybe [BodyPart] -getEndNote s (Notes _ _ ens) = ens >>= (lookup s) - -elemToNotes :: NameSpaces -> String -> Element -> Maybe [(String, [BodyPart])] -elemToNotes ns notetype element - | qName (elName element) == (notetype ++ "s") && - qURI (elName element) == (lookup "w" ns) = - Just $ map fromJust - $ filter isJust - $ map (noteElemToNote ns) - $ findChildren (QName notetype (lookup "w" ns) (Just "w")) element -elemToNotes _ _ _ = Nothing - -archiveToNotes :: Archive -> Notes -archiveToNotes zf = - let fnElem = findEntryByPath "word/footnotes.xml" zf - >>= (parseXMLDoc . UTF8.toStringLazy . fromEntry) - enElem = findEntryByPath "word/endnotes.xml" zf - >>= (parseXMLDoc . UTF8.toStringLazy . fromEntry) - fn_namespaces = case fnElem of - Just e -> mapMaybe attrToNSPair (elAttribs e) - Nothing -> [] - en_namespaces = case enElem of - Just e -> mapMaybe attrToNSPair (elAttribs e) - Nothing -> [] - ns = unionBy (\x y -> fst x == fst y) fn_namespaces en_namespaces - fn = fnElem >>= (elemToNotes ns "footnote") - en = enElem >>= (elemToNotes ns "endnote") - in - Notes ns fn en - - -data Relationship = Relationship (RelId, Target) - deriving Show - -lookupRelationship :: RelId -> [Relationship] -> Maybe Target -lookupRelationship relid rels = - lookup relid (map (\(Relationship pair) -> pair) rels) - -filePathIsRel :: FilePath -> Bool -filePathIsRel fp = - let (dir, name) = splitFileName fp - in - (dir == "word/_rels/") && ((takeExtension name) == ".rels") - -relElemToRelationship :: Element -> Maybe Relationship -relElemToRelationship element | qName (elName element) == "Relationship" = - do - relId <- findAttr (QName "Id" Nothing Nothing) element - target <- findAttr (QName "Target" Nothing Nothing) element - return $ Relationship (relId, target) -relElemToRelationship _ = Nothing - - -archiveToRelationships :: Archive -> [Relationship] -archiveToRelationships archive = - let relPaths = filter filePathIsRel (filesInArchive archive) - entries = map fromJust $ filter isJust $ map (\f -> findEntryByPath f archive) relPaths - relElems = map fromJust $ filter isJust $ map (parseXMLDoc . UTF8.toStringLazy . fromEntry) entries - rels = map fromJust $ filter isJust $ map relElemToRelationship $ concatMap elChildren relElems - in - rels - -data Body = Body [BodyPart] - deriving Show - -isParOrTbl :: NameSpaces -> QName -> Bool -isParOrTbl ns q = qName q `elem` ["p", "tbl"] && - qURI q == (lookup "w" ns) - -elemToBody :: NameSpaces -> Element -> Maybe Body -elemToBody ns element | qName (elName element) == "body" && qURI (elName element) == (lookup "w" ns) = - Just $ Body - $ map fromJust - $ filter isJust - $ map (elemToBodyPart ns) $ filterChildrenName (isParOrTbl ns) element -elemToBody _ _ = Nothing - -isRunOrLinkOrBookmark :: NameSpaces -> QName -> Bool -isRunOrLinkOrBookmark ns q = qName q `elem` ["r", "hyperlink", "bookmarkStart"] && - qURI q == (lookup "w" ns) - -elemToNumInfo :: NameSpaces -> Element -> Maybe (String, String) -elemToNumInfo ns element - | qName (elName element) == "p" && - qURI (elName element) == (lookup "w" ns) = - do - pPr <- findChild (QName "pPr" (lookup "w" ns) (Just "w")) element - numPr <- findChild (QName "numPr" (lookup "w" ns) (Just "w")) pPr - lvl <- findChild (QName "ilvl" (lookup "w" ns) (Just "w")) numPr >>= - findAttr (QName "val" (lookup "w" ns) (Just "w")) - numId <- findChild (QName "numId" (lookup "w" ns) (Just "w")) numPr >>= - findAttr (QName "val" (lookup "w" ns) (Just "w")) - return (numId, lvl) -elemToNumInfo _ _ = Nothing - --- isBookMarkTag :: NameSpaces -> QName -> Bool --- isBookMarkTag ns q = qName q `elem` ["bookmarkStart", "bookmarkEnd"] && --- qURI q == (lookup "w" ns) - --- parChildrenToBookmark :: NameSpaces -> [Element] -> BookMark --- parChildrenToBookmark ns (bms : bme : _) --- | qName (elName bms) == "bookmarkStart" && --- qURI (elName bms) == (lookup "w" ns) && --- qName (elName bme) == "bookmarkEnd" && --- qURI (elName bme) == (lookup "w" ns) = do --- bmId <- findAttr (QName "id" (lookup "w" ns) (Just "w")) bms --- bmName <- findAttr (QName "name" (lookup "w" ns) (Just "w")) bms --- return $ (bmId, bmName) --- parChildrenToBookmark _ _ = Nothing - -elemToBodyPart :: NameSpaces -> Element -> Maybe BodyPart -elemToBodyPart ns element - | qName (elName element) == "p" && - qURI (elName element) == (lookup "w" ns) = - let parstyle = elemToParagraphStyle ns element - parparts = mapMaybe id - $ map (elemToParPart ns) - $ filterChildrenName (isRunOrLinkOrBookmark ns) element - in - case elemToNumInfo ns element of - Just (numId, lvl) -> Just $ ListItem parstyle numId lvl parparts - Nothing -> Just $ Paragraph parstyle parparts - | qName (elName element) == "tbl" && - qURI (elName element) == (lookup "w" ns) = - let - caption = findChild (QName "tblPr" (lookup "w" ns) (Just "w")) element - >>= findChild (QName "tblCaption" (lookup "w" ns) (Just "w")) - >>= findAttr (QName "val" (lookup "w" ns) (Just "w")) - grid = case - findChild (QName "tblGrid" (lookup "w" ns) (Just "w")) element - of - Just g -> elemToTblGrid ns g - Nothing -> [] - tblLook = findChild (QName "tblPr" (lookup "w" ns) (Just "w")) element - >>= findChild (QName "tblLook" (lookup "w" ns) (Just "w")) - >>= elemToTblLook ns - in - Just $ Tbl - (fromMaybe "" caption) - grid - (fromMaybe defaultTblLook tblLook) - (mapMaybe (elemToRow ns) (elChildren element)) - | otherwise = Nothing - -elemToTblLook :: NameSpaces -> Element -> Maybe TblLook -elemToTblLook ns element - | qName (elName element) == "tblLook" && - qURI (elName element) == (lookup "w" ns) = - let firstRow = findAttr (QName "firstRow" (lookup "w" ns) (Just "w")) element - val = findAttr (QName "val" (lookup "w" ns) (Just "w")) element - firstRowFmt = - case firstRow of - Just "1" -> True - Just _ -> False - Nothing -> case val of - Just bitMask -> testBitMask bitMask 0x020 - Nothing -> False - in - Just $ TblLook{firstRowFormatting = firstRowFmt} -elemToTblLook _ _ = Nothing - -testBitMask :: String -> Int -> Bool -testBitMask bitMaskS n = - case (reads ("0x" ++ bitMaskS) :: [(Int, String)]) of - [] -> False - ((n', _) : _) -> ((n' .|. n) /= 0) - -data ParagraphStyle = ParagraphStyle { pStyle :: [String] - , indent :: Maybe Integer - } - deriving Show - -defaultParagraphStyle :: ParagraphStyle -defaultParagraphStyle = ParagraphStyle { pStyle = [] - , indent = Nothing - } - -elemToParagraphStyle :: NameSpaces -> Element -> ParagraphStyle -elemToParagraphStyle ns element = - case findChild (QName "pPr" (lookup "w" ns) (Just "w")) element of - Just pPr -> - ParagraphStyle - {pStyle = - mapMaybe id $ - map - (findAttr (QName "val" (lookup "w" ns) (Just "w"))) - (findChildren (QName "pStyle" (lookup "w" ns) (Just "w")) pPr) - , indent = - findChild (QName "ind" (lookup "w" ns) (Just "w")) pPr >>= - findAttr (QName "left" (lookup "w" ns) (Just "w")) >>= - stringToInteger - } - Nothing -> defaultParagraphStyle - - -data BodyPart = Paragraph ParagraphStyle [ParPart] - | ListItem ParagraphStyle String String [ParPart] - | Tbl String TblGrid TblLook [Row] - - deriving Show - -type TblGrid = [Integer] - -data TblLook = TblLook {firstRowFormatting::Bool} - deriving Show - -defaultTblLook :: TblLook -defaultTblLook = TblLook{firstRowFormatting = False} - -stringToInteger :: String -> Maybe Integer -stringToInteger s = listToMaybe $ map fst (reads s :: [(Integer, String)]) - -elemToTblGrid :: NameSpaces -> Element -> TblGrid -elemToTblGrid ns element - | qName (elName element) == "tblGrid" && - qURI (elName element) == (lookup "w" ns) = - let - cols = findChildren (QName "gridCol" (lookup "w" ns) (Just "w")) element - in - mapMaybe (\e -> - findAttr (QName "val" (lookup "w" ns) (Just ("w"))) e - >>= stringToInteger - ) - cols -elemToTblGrid _ _ = [] - -data Row = Row [Cell] - deriving Show - - -elemToRow :: NameSpaces -> Element -> Maybe Row -elemToRow ns element - | qName (elName element) == "tr" && - qURI (elName element) == (lookup "w" ns) = - let - cells = findChildren (QName "tc" (lookup "w" ns) (Just "w")) element - in - Just $ Row (mapMaybe (elemToCell ns) cells) -elemToRow _ _ = Nothing - -data Cell = Cell [BodyPart] - deriving Show - -elemToCell :: NameSpaces -> Element -> Maybe Cell -elemToCell ns element - | qName (elName element) == "tc" && - qURI (elName element) == (lookup "w" ns) = - Just $ Cell (mapMaybe (elemToBodyPart ns) (elChildren element)) -elemToCell _ _ = Nothing - -data ParPart = PlainRun Run - | BookMark BookMarkId Anchor - | InternalHyperLink Anchor [Run] - | ExternalHyperLink RelId [Run] - | Drawing String - deriving Show - -data Run = Run RunStyle [RunElem] - | Footnote String - | Endnote String - deriving Show - -data RunElem = TextRun String | LnBrk - deriving Show - -data RunStyle = RunStyle { isBold :: Bool - , isItalic :: Bool - , isSmallCaps :: Bool - , isStrike :: Bool - , isSuperScript :: Bool - , isSubScript :: Bool - , underline :: Maybe String - , rStyle :: Maybe String } - deriving Show - -defaultRunStyle :: RunStyle -defaultRunStyle = RunStyle { isBold = False - , isItalic = False - , isSmallCaps = False - , isStrike = False - , isSuperScript = False - , isSubScript = False - , underline = Nothing - , rStyle = Nothing - } - -elemToRunStyle :: NameSpaces -> Element -> RunStyle -elemToRunStyle ns element = - case findChild (QName "rPr" (lookup "w" ns) (Just "w")) element of - Just rPr -> - RunStyle - { - isBold = isJust $ findChild (QName "b" (lookup "w" ns) (Just "w")) rPr - , isItalic = isJust $ findChild (QName "i" (lookup "w" ns) (Just "w")) rPr - , isSmallCaps = isJust $ findChild (QName "smallCaps" (lookup "w" ns) (Just "w")) rPr - , isStrike = isJust $ findChild (QName "strike" (lookup "w" ns) (Just "w")) rPr - , isSuperScript = - (Just "superscript" == - (findChild (QName "vertAlign" (lookup "w" ns) (Just "w")) rPr >>= - findAttr (QName "val" (lookup "w" ns) (Just "w")))) - , isSubScript = - (Just "subscript" == - (findChild (QName "vertAlign" (lookup "w" ns) (Just "w")) rPr >>= - findAttr (QName "val" (lookup "w" ns) (Just "w")))) - , underline = - findChild (QName "u" (lookup "w" ns) (Just "w")) rPr >>= - findAttr (QName "val" (lookup "w" ns) (Just "w")) - , rStyle = - findChild (QName "rStyle" (lookup "w" ns) (Just "w")) rPr >>= - findAttr (QName "val" (lookup "w" ns) (Just "w")) - } - Nothing -> defaultRunStyle - -elemToRun :: NameSpaces -> Element -> Maybe Run -elemToRun ns element - | qName (elName element) == "r" && - qURI (elName element) == (lookup "w" ns) = - case - findChild (QName "footnoteReference" (lookup "w" ns) (Just "w")) element >>= - findAttr (QName "id" (lookup "w" ns) (Just "w")) - of - Just s -> Just $ Footnote s - Nothing -> - case - findChild (QName "endnoteReference" (lookup "w" ns) (Just "w")) element >>= - findAttr (QName "id" (lookup "w" ns) (Just "w")) - of - Just s -> Just $ Endnote s - Nothing -> Just $ - Run (elemToRunStyle ns element) - (elemToRunElems ns element) -elemToRun _ _ = Nothing - -elemToRunElem :: NameSpaces -> Element -> Maybe RunElem -elemToRunElem ns element - | qName (elName element) == "t" && - qURI (elName element) == (lookup "w" ns) = - Just $ TextRun (strContent element) - | qName (elName element) == "br" && - qURI (elName element) == (lookup "w" ns) = - Just $ LnBrk - | otherwise = Nothing - - -elemToRunElems :: NameSpaces -> Element -> [RunElem] -elemToRunElems ns element - | qName (elName element) == "r" && - qURI (elName element) == (lookup "w" ns) = - mapMaybe (elemToRunElem ns) (elChildren element) - | otherwise = [] - -elemToDrawing :: NameSpaces -> Element -> Maybe ParPart -elemToDrawing ns element - | qName (elName element) == "drawing" && - qURI (elName element) == (lookup "w" ns) = - let a_ns = "http://schemas.openxmlformats.org/drawingml/2006/main" - in - findElement (QName "blip" (Just a_ns) (Just "a")) element - >>= findAttr (QName "embed" (lookup "r" ns) (Just "r")) - >>= (\s -> Just $ Drawing s) -elemToDrawing _ _ = Nothing - - -elemToParPart :: NameSpaces -> Element -> Maybe ParPart -elemToParPart ns element - | qName (elName element) == "r" && - qURI (elName element) == (lookup "w" ns) = - case findChild (QName "drawing" (lookup "w" ns) (Just "w")) element of - Just drawingElem -> elemToDrawing ns drawingElem - Nothing -> do - r <- elemToRun ns element - return $ PlainRun r -elemToParPart ns element - | qName (elName element) == "bookmarkStart" && - qURI (elName element) == (lookup "w" ns) = do - bmId <- findAttr (QName "id" (lookup "w" ns) (Just "w")) element - bmName <- findAttr (QName "name" (lookup "w" ns) (Just "w")) element - return $ BookMark bmId bmName -elemToParPart ns element - | qName (elName element) == "hyperlink" && - qURI (elName element) == (lookup "w" ns) = - let runs = map fromJust $ filter isJust $ map (elemToRun ns) - $ findChildren (QName "r" (lookup "w" ns) (Just "w")) element - in - case findAttr (QName "anchor" (lookup "w" ns) (Just "w")) element of - Just anchor -> - Just $ InternalHyperLink anchor runs - Nothing -> - case findAttr (QName "id" (lookup "r" ns) (Just "r")) element of - Just relId -> Just $ ExternalHyperLink relId runs - Nothing -> Nothing -elemToParPart _ _ = Nothing - -type Target = String -type Anchor = String -type BookMarkId = String -type RelId = String - diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs new file mode 100644 index 000000000..df4be41ff --- /dev/null +++ b/src/Text/Pandoc/Readers/Docx.hs @@ -0,0 +1,479 @@ +{- +Copyright (C) 2014 Jesse Rosenthal + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Readers.Docx + Copyright : Copyright (C) 2014 Jesse Rosenthal + License : GNU GPL, version 2 or above + + Maintainer : Jesse Rosenthal + Stability : alpha + Portability : portable + +Conversion of Docx type (defined in Text.Pandoc.Readers.Docx.Parse) +to 'Pandoc' document. -} + +{- +Current state of implementation of Docx entities ([x] means +implemented, [-] means partially implemented): + +* Blocks + + - [X] Para + - [X] CodeBlock (styled with `SourceCode`) + - [X] BlockQuote (styled with `Quote`, `BlockQuote`, or, optionally, + indented) + - [X] OrderedList + - [X] BulletList + - [X] DefinitionList (styled with adjacent `DefinitionTerm` and `Definition`) + - [X] Header (styled with `Heading#`) + - [ ] HorizontalRule + - [-] Table (column widths and alignments not yet implemented) + +* Inlines + + - [X] Str + - [X] Emph (From italics. `underline` currently read as span. In + future, it might optionally be emph as well) + - [X] Strong + - [X] Strikeout + - [X] Superscript + - [X] Subscript + - [X] SmallCaps + - [ ] Quoted + - [ ] Cite + - [X] Code (styled with `VerbatimChar`) + - [X] Space + - [X] LineBreak (these are invisible in Word: entered with Shift-Return) + - [ ] Math + - [X] Link (links to an arbitrary bookmark create a span with the target as + id and "anchor" class) + - [-] Image (Links to path in archive. Future option for + data-encoded URI likely.) + - [X] Note (Footnotes and Endnotes are silently combined.) +-} + +module Text.Pandoc.Readers.Docx + ( readDocx + ) where + +import Codec.Archive.Zip +import Text.Pandoc.Definition +import Text.Pandoc.Options +import Text.Pandoc.Builder (text, toList) +import Text.Pandoc.Generic (bottomUp) +import Text.Pandoc.MIME (getMimeType) +import Text.Pandoc.UTF8 (toString) +import Text.Pandoc.Readers.Docx.Parse +import Text.Pandoc.Readers.Docx.Lists +import Data.Maybe (mapMaybe, isJust, fromJust) +import Data.List (delete, isPrefixOf, (\\), intersect) +import qualified Data.ByteString as BS +import qualified Data.ByteString.Lazy as B +import Data.ByteString.Base64 (encode) +import System.FilePath (combine) + +readDocx :: ReaderOptions + -> B.ByteString + -> Pandoc +readDocx opts bytes = + case archiveToDocx (toArchive bytes) of + Just docx -> Pandoc nullMeta (docxToBlocks opts docx) + Nothing -> error $ "couldn't parse docx file" + +runStyleToSpanAttr :: RunStyle -> (String, [String], [(String, String)]) +runStyleToSpanAttr rPr = ("", + mapMaybe id [ + if isBold rPr then (Just "strong") else Nothing, + if isItalic rPr then (Just "emph") else Nothing, + if isSmallCaps rPr then (Just "smallcaps") else Nothing, + if isStrike rPr then (Just "strike") else Nothing, + if isSuperScript rPr then (Just "superscript") else Nothing, + if isSubScript rPr then (Just "subscript") else Nothing, + rStyle rPr], + case underline rPr of + Just fmt -> [("underline", fmt)] + _ -> [] + ) + +parStyleToDivAttr :: ParagraphStyle -> (String, [String], [(String, String)]) +parStyleToDivAttr pPr = ("", + pStyle pPr, + case indent pPr of + Just n -> [("indent", (show n))] + Nothing -> [] + ) + +strToInlines :: String -> [Inline] +strToInlines = toList . text + +codeSpans :: [String] +codeSpans = ["VerbatimChar"] + +blockQuoteDivs :: [String] +blockQuoteDivs = ["Quote", "BlockQuote"] + +codeDivs :: [String] +codeDivs = ["SourceCode"] + +runElemToInlines :: RunElem -> [Inline] +runElemToInlines (TextRun s) = strToInlines s +runElemToInlines (LnBrk) = [LineBreak] + +runElemToString :: RunElem -> String +runElemToString (TextRun s) = s +runElemToString (LnBrk) = ['\n'] + +runElemsToString :: [RunElem] -> String +runElemsToString = concatMap runElemToString + +strNormalize :: [Inline] -> [Inline] +strNormalize [] = [] +strNormalize (Str "" : ils) = strNormalize ils +strNormalize ((Str s) : (Str s') : l) = strNormalize ((Str (s++s')) : l) +strNormalize (il:ils) = il : (strNormalize ils) + +runToInlines :: ReaderOptions -> Docx -> Run -> [Inline] +runToInlines _ _ (Run rs runElems) + | isJust (rStyle rs) && (fromJust (rStyle rs)) `elem` codeSpans = + case runStyleToSpanAttr rs == ("", [], []) of + True -> [Str (runElemsToString runElems)] + False -> [Span (runStyleToSpanAttr rs) [Str (runElemsToString runElems)]] + | otherwise = case runStyleToSpanAttr rs == ("", [], []) of + True -> concatMap runElemToInlines runElems + False -> [Span (runStyleToSpanAttr rs) (concatMap runElemToInlines runElems)] +runToInlines opts docx@(Docx _ notes _ _ _ ) (Footnote fnId) = + case (getFootNote fnId notes) of + Just bodyParts -> + [Note [Div ("", ["footnote"], []) (map (bodyPartToBlock opts docx) bodyParts)]] + Nothing -> + [Note [Div ("", ["footnote"], []) []]] +runToInlines opts docx@(Docx _ notes _ _ _) (Endnote fnId) = + case (getEndNote fnId notes) of + Just bodyParts -> + [Note [Div ("", ["endnote"], []) (map (bodyPartToBlock opts docx) bodyParts)]] + Nothing -> + [Note [Div ("", ["endnote"], []) []]] + +parPartToInlines :: ReaderOptions -> Docx -> ParPart -> [Inline] +parPartToInlines opts docx (PlainRun r) = runToInlines opts docx r +parPartToInlines _ _ (BookMark _ anchor) = + [Span (anchor, ["anchor"], []) []] +parPartToInlines _ (Docx _ _ _ rels _) (Drawing relid) = + case lookupRelationship relid rels of + Just target -> [Image [] (combine "word" target, "")] + Nothing -> [Image [] ("", "")] +parPartToInlines opts docx (InternalHyperLink anchor runs) = + [Link (concatMap (runToInlines opts docx) runs) ('#' : anchor, "")] +parPartToInlines opts docx@(Docx _ _ _ rels _) (ExternalHyperLink relid runs) = + case lookupRelationship relid rels of + Just target -> + [Link (concatMap (runToInlines opts docx) runs) (target, "")] + Nothing -> + [Link (concatMap (runToInlines opts docx) runs) ("", "")] + +isAnchorSpan :: Inline -> Bool +isAnchorSpan (Span (ident, classes, kvs) ils) = + (not . null) ident && + classes == ["anchor"] && + null kvs && + null ils +isAnchorSpan _ = False + +dummyAnchors :: [String] +dummyAnchors = ["_GoBack"] + +makeHeaderAnchors :: Block -> Block +makeHeaderAnchors h@(Header n (_, classes, kvs) ils) = + case filter isAnchorSpan ils of + [] -> h + (x@(Span (ident, _, _) _) : xs) -> + case ident `elem` dummyAnchors of + True -> h + False -> Header n (ident, classes, kvs) (ils \\ (x:xs)) + _ -> h +makeHeaderAnchors blk = blk + + +parPartsToInlines :: ReaderOptions -> Docx -> [ParPart] -> [Inline] +parPartsToInlines opts docx parparts = + -- + -- We're going to skip data-uri's for now. It should be an option, + -- not mandatory. + -- + --bottomUp (makeImagesSelfContained docx) $ + bottomUp spanCorrect $ + bottomUp spanTrim $ + bottomUp spanReduce $ + concatMap (parPartToInlines opts docx) parparts + +cellToBlocks :: ReaderOptions -> Docx -> Cell -> [Block] +cellToBlocks opts docx (Cell bps) = map (bodyPartToBlock opts docx) bps + +rowToBlocksList :: ReaderOptions -> Docx -> Row -> [[Block]] +rowToBlocksList opts docx (Row cells) = map (cellToBlocks opts docx) cells + +bodyPartToBlock :: ReaderOptions -> Docx -> BodyPart -> Block +bodyPartToBlock opts docx (Paragraph pPr parparts) = + Div (parStyleToDivAttr pPr) [Para (parPartsToInlines opts docx parparts)] +bodyPartToBlock opts docx@(Docx _ _ numbering _ _) (ListItem pPr numId lvl parparts) = + let + kvs = case lookupLevel numId lvl numbering of + Just (_, fmt, txt, Just start) -> [ ("level", lvl) + , ("num-id", numId) + , ("format", fmt) + , ("text", txt) + , ("start", (show start)) + ] + + Just (_, fmt, txt, Nothing) -> [ ("level", lvl) + , ("num-id", numId) + , ("format", fmt) + , ("text", txt) + ] + Nothing -> [] + in + Div + ("", ["list-item"], kvs) + [bodyPartToBlock opts docx (Paragraph pPr parparts)] +bodyPartToBlock _ _ (Tbl _ _ _ []) = + Para [] +bodyPartToBlock opts docx (Tbl cap _ look (r:rs)) = + let caption = strToInlines cap + (hdr, rows) = case firstRowFormatting look of + True -> (Just r, rs) + False -> (Nothing, r:rs) + hdrCells = case hdr of + Just r' -> rowToBlocksList opts docx r' + Nothing -> [] + cells = map (rowToBlocksList opts docx) rows + + size = case null hdrCells of + True -> length $ head cells + False -> length $ hdrCells + -- + -- The two following variables (horizontal column alignment and + -- relative column widths) go to the default at the + -- moment. Width information is in the TblGrid field of the Tbl, + -- so should be possible. Alignment might be more difficult, + -- since there doesn't seem to be a column entity in docx. + alignments = take size (repeat AlignDefault) + widths = take size (repeat 0) :: [Double] + in + Table caption alignments widths hdrCells cells + +makeImagesSelfContained :: Docx -> Inline -> Inline +makeImagesSelfContained (Docx _ _ _ _ media) i@(Image alt (uri, title)) = + case lookup uri media of + Just bs -> case getMimeType uri of + Just mime -> let data_uri = + "data:" ++ mime ++ ";base64," ++ toString (encode $ BS.concat $ B.toChunks bs) + in + Image alt (data_uri, title) + Nothing -> i + Nothing -> i +makeImagesSelfContained _ inline = inline + +bodyToBlocks :: ReaderOptions -> Docx -> Body -> [Block] +bodyToBlocks opts docx (Body bps) = + bottomUp removeEmptyPars $ + bottomUp strNormalize $ + bottomUp spanRemove $ + bottomUp divRemove $ + map (makeHeaderAnchors) $ + bottomUp divCorrect $ + bottomUp divReduce $ + bottomUp divCorrectPreReduce $ + bottomUp blocksToDefinitions $ + blocksToBullets $ + map (bodyPartToBlock opts docx) bps + +docxToBlocks :: ReaderOptions -> Docx -> [Block] +docxToBlocks opts d@(Docx (Document _ body) _ _ _ _) = bodyToBlocks opts d body + +spanReduce :: [Inline] -> [Inline] +spanReduce [] = [] +spanReduce ((Span (id1, classes1, kvs1) ils1) : ils) + | (id1, classes1, kvs1) == ("", [], []) = ils1 ++ (spanReduce ils) +spanReduce (s1@(Span (id1, classes1, kvs1) ils1) : + s2@(Span (id2, classes2, kvs2) ils2) : + ils) = + let classes' = classes1 `intersect` classes2 + kvs' = kvs1 `intersect` kvs2 + classes1' = classes1 \\ classes' + kvs1' = kvs1 \\ kvs' + classes2' = classes2 \\ classes' + kvs2' = kvs2 \\ kvs' + in + case null classes' && null kvs' of + True -> s1 : (spanReduce (s2 : ils)) + False -> let attr' = ("", classes', kvs') + attr1' = (id1, classes1', kvs1') + attr2' = (id2, classes2', kvs2') + in + spanReduce (Span attr' [(Span attr1' ils1), (Span attr2' ils2)] : + ils) +spanReduce (il:ils) = il : (spanReduce ils) + +ilToCode :: Inline -> String +ilToCode (Str s) = s +ilToCode _ = "" + +spanRemove' :: Inline -> [Inline] +spanRemove' s@(Span (ident, classes, _) []) + -- "_GoBack" is automatically inserted. We don't want to keep it. + | classes == ["anchor"] && not (ident `elem` dummyAnchors) = [s] +spanRemove' (Span (_, _, kvs) ils) = + case lookup "underline" kvs of + Just val -> [Span ("", [], [("underline", val)]) ils] + Nothing -> ils +spanRemove' il = [il] + +spanRemove :: [Inline] -> [Inline] +spanRemove = concatMap spanRemove' + +spanTrim' :: Inline -> [Inline] +spanTrim' il@(Span _ []) = [il] +spanTrim' il@(Span attr (il':[])) + | il' == Space = [Span attr [], Space] + | otherwise = [il] +spanTrim' (Span attr ils) + | head ils == Space && last ils == Space = + [Space, Span attr (init $ tail ils), Space] + | head ils == Space = [Space, Span attr (tail ils)] + | last ils == Space = [Span attr (init ils), Space] +spanTrim' il = [il] + +spanTrim :: [Inline] -> [Inline] +spanTrim = concatMap spanTrim' + +spanCorrect' :: Inline -> [Inline] +spanCorrect' (Span ("", [], []) ils) = ils +spanCorrect' (Span (ident, classes, kvs) ils) + | "emph" `elem` classes = + [Emph $ spanCorrect' $ Span (ident, (delete "emph" classes), kvs) ils] + | "strong" `elem` classes = + [Strong $ spanCorrect' $ Span (ident, (delete "strong" classes), kvs) ils] + | "smallcaps" `elem` classes = + [SmallCaps $ spanCorrect' $ Span (ident, (delete "smallcaps" classes), kvs) ils] + | "strike" `elem` classes = + [Strikeout $ spanCorrect' $ Span (ident, (delete "strike" classes), kvs) ils] + | "superscript" `elem` classes = + [Superscript $ spanCorrect' $ Span (ident, (delete "superscript" classes), kvs) ils] + | "subscript" `elem` classes = + [Subscript $ spanCorrect' $ Span (ident, (delete "subscript" classes), kvs) ils] + | (not . null) (codeSpans `intersect` classes) = + [Code (ident, (classes \\ codeSpans), kvs) (init $ unlines $ map ilToCode ils)] + | otherwise = + [Span (ident, classes, kvs) ils] +spanCorrect' il = [il] + +spanCorrect :: [Inline] -> [Inline] +spanCorrect = concatMap spanCorrect' + +removeEmptyPars :: [Block] -> [Block] +removeEmptyPars blks = filter (\b -> b /= (Para [])) blks + +divReduce :: [Block] -> [Block] +divReduce [] = [] +divReduce ((Div (id1, classes1, kvs1) blks1) : blks) + | (id1, classes1, kvs1) == ("", [], []) = blks1 ++ (divReduce blks) +divReduce (d1@(Div (id1, classes1, kvs1) blks1) : + d2@(Div (id2, classes2, kvs2) blks2) : + blks) = + let classes' = classes1 `intersect` classes2 + kvs' = kvs1 `intersect` kvs2 + classes1' = classes1 \\ classes' + kvs1' = kvs1 \\ kvs' + classes2' = classes2 \\ classes' + kvs2' = kvs2 \\ kvs' + in + case null classes' && null kvs' of + True -> d1 : (divReduce (d2 : blks)) + False -> let attr' = ("", classes', kvs') + attr1' = (id1, classes1', kvs1') + attr2' = (id2, classes2', kvs2') + in + divReduce (Div attr' [(Div attr1' blks1), (Div attr2' blks2)] : + blks) +divReduce (blk:blks) = blk : (divReduce blks) + +isHeaderClass :: String -> Maybe Int +isHeaderClass s | "Heading" `isPrefixOf` s = + case reads (drop (length "Heading") s) :: [(Int, String)] of + [] -> Nothing + ((n, "") : []) -> Just n + _ -> Nothing +isHeaderClass _ = Nothing + +findHeaderClass :: [String] -> Maybe Int +findHeaderClass ss = case mapMaybe id $ map isHeaderClass ss of + [] -> Nothing + n : _ -> Just n + +blksToInlines :: [Block] -> [Inline] +blksToInlines (Para ils : _) = ils +blksToInlines (Plain ils : _) = ils +blksToInlines _ = [] + +divCorrectPreReduce' :: Block -> [Block] +divCorrectPreReduce' (Div (ident, classes, kvs) blks) + | isJust $ findHeaderClass classes = + let n = fromJust $ findHeaderClass classes + in + [Header n (ident, delete ("Heading" ++ (show n)) classes, kvs) (blksToInlines blks)] + | otherwise = [Div (ident, classes, kvs) blks] +divCorrectPreReduce' blk = [blk] + +divCorrectPreReduce :: [Block] -> [Block] +divCorrectPreReduce = concatMap divCorrectPreReduce' + +blkToCode :: Block -> String +blkToCode (Para []) = "" +blkToCode (Para ((Code _ s):ils)) = s ++ (blkToCode (Para ils)) +blkToCode (Para ((Span (_, classes, _) ils'): ils)) + | (not . null) (codeSpans `intersect` classes) = + (init $ unlines $ map ilToCode ils') ++ (blkToCode (Para ils)) +blkToCode _ = "" + +divRemove' :: Block -> [Block] +divRemove' (Div (_, _, kvs) blks) = + case lookup "indent" kvs of + Just val -> [Div ("", [], [("indent", val)]) blks] + Nothing -> blks +divRemove' blk = [blk] + +divRemove :: [Block] -> [Block] +divRemove = concatMap divRemove' + +divCorrect' :: Block -> [Block] +divCorrect' b@(Div (ident, classes, kvs) blks) + | (not . null) (blockQuoteDivs `intersect` classes) = + [BlockQuote [Div (ident, classes \\ blockQuoteDivs, kvs) blks]] + | (not . null) (codeDivs `intersect` classes) = + [CodeBlock (ident, (classes \\ codeDivs), kvs) (init $ unlines $ map blkToCode blks)] + | otherwise = + case lookup "indent" kvs of + Just "0" -> [Div (ident, classes, filter (\kv -> fst kv /= "indent") kvs) blks] + Just _ -> + [BlockQuote [Div (ident, classes, filter (\kv -> fst kv /= "indent") kvs) blks]] + Nothing -> [b] +divCorrect' blk = [blk] + +divCorrect :: [Block] -> [Block] +divCorrect = concatMap divCorrect' diff --git a/src/Text/Pandoc/Readers/Docx/Lists.hs b/src/Text/Pandoc/Readers/Docx/Lists.hs new file mode 100644 index 000000000..68559d98b --- /dev/null +++ b/src/Text/Pandoc/Readers/Docx/Lists.hs @@ -0,0 +1,208 @@ +{- +Copyright (C) 2014 Jesse Rosenthal + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Readers.Docx.Lists + Copyright : Copyright (C) 2014 Jesse Rosenthal + License : GNU GPL, version 2 or above + + Maintainer : Jesse Rosenthal + Stability : alpha + Portability : portable + +Functions for converting flat docx paragraphs into nested lists. +-} + +module Text.Pandoc.Readers.Docx.Lists ( blocksToBullets + , blocksToDefinitions) where + +import Text.Pandoc.JSON +import Text.Pandoc.Shared (trim) +import Control.Monad +import Data.List +import Data.Maybe + +isListItem :: Block -> Bool +isListItem (Div (_, classes, _) _) | "list-item" `elem` classes = True +isListItem _ = False + +getLevel :: Block -> Maybe Integer +getLevel (Div (_, _, kvs) _) = liftM read $ lookup "level" kvs +getLevel _ = Nothing + +getLevelN :: Block -> Integer +getLevelN b = case getLevel b of + Just n -> n + Nothing -> -1 + +getNumId :: Block -> Maybe Integer +getNumId (Div (_, _, kvs) _) = liftM read $ lookup "num-id" kvs +getNumId _ = Nothing + +getNumIdN :: Block -> Integer +getNumIdN b = case getNumId b of + Just n -> n + Nothing -> -1 + +getText :: Block -> Maybe String +getText (Div (_, _, kvs) _) = lookup "text" kvs +getText _ = Nothing + +data ListType = Itemized | Enumerated ListAttributes + +listStyleMap :: [(String, ListNumberStyle)] +listStyleMap = [("upperLetter", UpperAlpha), + ("lowerLetter", LowerAlpha), + ("upperRoman", UpperRoman), + ("lowerRoman", LowerRoman), + ("decimal", Decimal)] + +listDelimMap :: [(String, ListNumberDelim)] +listDelimMap = [("%1)", OneParen), + ("(%1)", TwoParens), + ("%1.", Period)] + +getListType :: Block -> Maybe ListType +getListType b@(Div (_, _, kvs) _) | isListItem b = + let + start = lookup "start" kvs + frmt = lookup "format" kvs + txt = lookup "text" kvs + in + case frmt of + Just "bullet" -> Just Itemized + Just f -> + case txt of + Just t -> Just $ Enumerated ( + read (fromMaybe "1" start) :: Int, + fromMaybe DefaultStyle (lookup f listStyleMap), + fromMaybe DefaultDelim (lookup t listDelimMap)) + Nothing -> Nothing + _ -> Nothing +getListType _ = Nothing + +listParagraphDivs :: [String] +listParagraphDivs = ["ListParagraph"] + +-- This is a first stab at going through and attaching meaning to list +-- paragraphs, without an item marker, following a list item. We +-- assume that these are paragraphs in the same item. + +handleListParagraphs :: [Block] -> [Block] +handleListParagraphs [] = [] +handleListParagraphs ( + (Div attr1@(_, classes1, _) blks1) : + (Div (ident2, classes2, kvs2) blks2) : + blks + ) | "list-item" `elem` classes1 && + not ("list-item" `elem` classes2) && + (not . null) (listParagraphDivs `intersect` classes2) = + -- We don't want to keep this indent. + let newDiv2 = + (Div (ident2, classes2, filter (\kv -> fst kv /= "indent") kvs2) blks2) + in + handleListParagraphs ((Div attr1 (blks1 ++ [newDiv2])) : blks) +handleListParagraphs (blk:blks) = blk : (handleListParagraphs blks) + +separateBlocks' :: Block -> [[Block]] -> [[Block]] +separateBlocks' blk ([] : []) = [[blk]] +separateBlocks' b@(BulletList _) acc = (init acc) ++ [(last acc) ++ [b]] +separateBlocks' b@(OrderedList _ _) acc = (init acc) ++ [(last acc) ++ [b]] +-- The following is for the invisible bullet lists. This is how +-- pandoc-generated ooxml does multiparagraph item lists. +separateBlocks' b acc | liftM trim (getText b) == Just "" = + (init acc) ++ [(last acc) ++ [b]] +separateBlocks' b acc = acc ++ [[b]] + +separateBlocks :: [Block] -> [[Block]] +separateBlocks blks = foldr separateBlocks' [[]] (reverse blks) + +flatToBullets' :: Integer -> [Block] -> [Block] +flatToBullets' _ [] = [] +flatToBullets' num xs@(b : elems) + | getLevelN b == num = b : (flatToBullets' num elems) + | otherwise = + let bNumId = getNumIdN b + bLevel = getLevelN b + (children, remaining) = + span + (\b' -> + ((getLevelN b') > bLevel || + ((getLevelN b') == bLevel && (getNumIdN b') == bNumId))) + xs + in + case getListType b of + Just (Enumerated attr) -> + (OrderedList attr (separateBlocks $ flatToBullets' bLevel children)) : + (flatToBullets' num remaining) + _ -> + (BulletList (separateBlocks $ flatToBullets' bLevel children)) : + (flatToBullets' num remaining) + +flatToBullets :: [Block] -> [Block] +flatToBullets elems = flatToBullets' (-1) elems + +blocksToBullets :: [Block] -> [Block] +blocksToBullets blks = + -- bottomUp removeListItemDivs $ + flatToBullets $ (handleListParagraphs blks) + + +plainParaInlines :: Block -> [Inline] +plainParaInlines (Plain ils) = ils +plainParaInlines (Para ils) = ils +plainParaInlines _ = [] + +blocksToDefinitions' :: [([Inline], [[Block]])] -> [Block] -> [Block] -> [Block] +blocksToDefinitions' [] acc [] = reverse acc +blocksToDefinitions' defAcc acc [] = + reverse $ (DefinitionList (reverse defAcc)) : acc +blocksToDefinitions' defAcc acc + ((Div (_, classes1, _) blks1) : (Div (ident2, classes2, kvs2) blks2) : blks) + | "DefinitionTerm" `elem` classes1 && "Definition" `elem` classes2 = + let remainingAttr2 = (ident2, delete "Definition" classes2, kvs2) + pair = case remainingAttr2 == ("", [], []) of + True -> (concatMap plainParaInlines blks1, [blks2]) + False -> (concatMap plainParaInlines blks1, [[Div remainingAttr2 blks2]]) + in + blocksToDefinitions' (pair : defAcc) acc blks +blocksToDefinitions' defAcc acc + ((Div (ident2, classes2, kvs2) blks2) : blks) + | (not . null) defAcc && "Definition" `elem` classes2 = + let remainingAttr2 = (ident2, delete "Definition" classes2, kvs2) + defItems2 = case remainingAttr2 == ("", [], []) of + True -> blks2 + False -> [Div remainingAttr2 blks2] + ((defTerm, defItems):defs) = defAcc + defAcc' = case null defItems of + True -> (defTerm, [defItems2]) : defs + False -> (defTerm, init defItems ++ [last defItems ++ defItems2]) : defs + in + blocksToDefinitions' defAcc' acc blks +blocksToDefinitions' [] acc (b:blks) = + blocksToDefinitions' [] (b:acc) blks +blocksToDefinitions' defAcc acc (b:blks) = + blocksToDefinitions' [] (b : (DefinitionList (reverse defAcc)) : acc) blks + + +blocksToDefinitions :: [Block] -> [Block] +blocksToDefinitions = blocksToDefinitions' [] [] + + + + diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs new file mode 100644 index 000000000..22e9dd909 --- /dev/null +++ b/src/Text/Pandoc/Readers/Docx/Parse.hs @@ -0,0 +1,604 @@ +{- +Copyright (C) 2014 Jesse Rosenthal + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Readers.Docx.Parse + Copyright : Copyright (C) 2014 Jesse Rosenthal + License : GNU GPL, version 2 or above + + Maintainer : Jesse Rosenthal + Stability : alpha + Portability : portable + +Conversion of docx archive into Docx haskell type +-} + + +module Text.Pandoc.Readers.Docx.Parse ( Docx(..) + , Document(..) + , Body(..) + , BodyPart(..) + , TblLook(..) + , ParPart(..) + , Run(..) + , RunElem(..) + , Notes + , Numbering + , Relationship + , Media + , RunStyle(..) + , ParagraphStyle(..) + , Row(..) + , Cell(..) + , getFootNote + , getEndNote + , lookupLevel + , lookupRelationship + , archiveToDocx + ) where +import Codec.Archive.Zip +import Text.XML.Light +import Data.Maybe +import Data.List +import System.FilePath +import Data.Bits ((.|.)) +import qualified Data.ByteString.Lazy as B +import qualified Text.Pandoc.UTF8 as UTF8 + +attrToNSPair :: Attr -> Maybe (String, String) +attrToNSPair (Attr (QName s _ (Just "xmlns")) val) = Just (s, val) +attrToNSPair _ = Nothing + + +type NameSpaces = [(String, String)] + +data Docx = Docx Document Notes Numbering [Relationship] Media + deriving Show + +archiveToDocx :: Archive -> Maybe Docx +archiveToDocx archive = do + let notes = archiveToNotes archive + rels = archiveToRelationships archive + media = archiveToMedia archive + doc <- archiveToDocument archive + numbering <- archiveToNumbering archive + return $ Docx doc notes numbering rels media + +data Document = Document NameSpaces Body + deriving Show + +archiveToDocument :: Archive -> Maybe Document +archiveToDocument zf = do + entry <- findEntryByPath "word/document.xml" zf + docElem <- (parseXMLDoc . UTF8.toStringLazy . fromEntry) entry + let namespaces = mapMaybe attrToNSPair (elAttribs docElem) + bodyElem <- findChild (QName "body" (lookup "w" namespaces) Nothing) docElem + body <- elemToBody namespaces bodyElem + return $ Document namespaces body + +type Media = [(FilePath, B.ByteString)] + +filePathIsMedia :: FilePath -> Bool +filePathIsMedia fp = + let (dir, _) = splitFileName fp + in + (dir == "word/media/") + +getMediaPair :: Archive -> FilePath -> Maybe (FilePath, B.ByteString) +getMediaPair zf fp = + case findEntryByPath fp zf of + Just e -> Just (fp, fromEntry e) + Nothing -> Nothing + +archiveToMedia :: Archive -> Media +archiveToMedia zf = + mapMaybe (getMediaPair zf) (filter filePathIsMedia (filesInArchive zf)) + +data Numbering = Numbering NameSpaces [Numb] [AbstractNumb] + deriving Show + +data Numb = Numb String String -- right now, only a key to an abstract num + deriving Show + +data AbstractNumb = AbstractNumb String [Level] + deriving Show + +-- (ilvl, format, string, start) +type Level = (String, String, String, Maybe Integer) + +lookupLevel :: String -> String -> Numbering -> Maybe Level +lookupLevel numId ilvl (Numbering _ numbs absNumbs) = do + absNumId <- lookup numId $ map (\(Numb nid absnumid) -> (nid, absnumid)) numbs + lvls <- lookup absNumId $ map (\(AbstractNumb aid ls) -> (aid, ls)) absNumbs + lvl <- lookup ilvl $ map (\l@(i, _, _, _) -> (i, l)) lvls + return lvl + +numElemToNum :: NameSpaces -> Element -> Maybe Numb +numElemToNum ns element | + qName (elName element) == "num" && + qURI (elName element) == (lookup "w" ns) = do + numId <- findAttr (QName "numId" (lookup "w" ns) (Just "w")) element + absNumId <- findChild (QName "abstractNumId" (lookup "w" ns) (Just "w")) element + >>= findAttr (QName "val" (lookup "w" ns) (Just "w")) + return $ Numb numId absNumId +numElemToNum _ _ = Nothing + +absNumElemToAbsNum :: NameSpaces -> Element -> Maybe AbstractNumb +absNumElemToAbsNum ns element | + qName (elName element) == "abstractNum" && + qURI (elName element) == (lookup "w" ns) = do + absNumId <- findAttr + (QName "abstractNumId" (lookup "w" ns) (Just "w")) + element + let levelElems = findChildren + (QName "lvl" (lookup "w" ns) (Just "w")) + element + levels = mapMaybe id $ map (levelElemToLevel ns) levelElems + return $ AbstractNumb absNumId levels +absNumElemToAbsNum _ _ = Nothing + +levelElemToLevel :: NameSpaces -> Element -> Maybe Level +levelElemToLevel ns element | + qName (elName element) == "lvl" && + qURI (elName element) == (lookup "w" ns) = do + ilvl <- findAttr (QName "ilvl" (lookup "w" ns) (Just "w")) element + fmt <- findChild (QName "numFmt" (lookup "w" ns) (Just "w")) element + >>= findAttr (QName "val" (lookup "w" ns) (Just "w")) + txt <- findChild (QName "lvlText" (lookup "w" ns) (Just "w")) element + >>= findAttr (QName "val" (lookup "w" ns) (Just "w")) + let start = findChild (QName "start" (lookup "w" ns) (Just "w")) element + >>= findAttr (QName "val" (lookup "w" ns) (Just "w")) + >>= (\s -> listToMaybe (map fst (reads s :: [(Integer, String)]))) + return (ilvl, fmt, txt, start) +levelElemToLevel _ _ = Nothing + +archiveToNumbering :: Archive -> Maybe Numbering +archiveToNumbering zf = + case findEntryByPath "word/numbering.xml" zf of + Nothing -> Just $ Numbering [] [] [] + Just entry -> do + numberingElem <- (parseXMLDoc . UTF8.toStringLazy . fromEntry) entry + let namespaces = mapMaybe attrToNSPair (elAttribs numberingElem) + numElems = findChildren + (QName "num" (lookup "w" namespaces) (Just "w")) + numberingElem + absNumElems = findChildren + (QName "abstractNum" (lookup "w" namespaces) (Just "w")) + numberingElem + nums = mapMaybe id $ map (numElemToNum namespaces) numElems + absNums = mapMaybe id $ map (absNumElemToAbsNum namespaces) absNumElems + return $ Numbering namespaces nums absNums + +data Notes = Notes NameSpaces (Maybe [(String, [BodyPart])]) (Maybe [(String, [BodyPart])]) + deriving Show + +noteElemToNote :: NameSpaces -> Element -> Maybe (String, [BodyPart]) +noteElemToNote ns element + | qName (elName element) `elem` ["endnote", "footnote"] && + qURI (elName element) == (lookup "w" ns) = + do + noteId <- findAttr (QName "id" (lookup "w" ns) (Just "w")) element + let bps = map fromJust + $ filter isJust + $ map (elemToBodyPart ns) + $ filterChildrenName (isParOrTbl ns) element + return $ (noteId, bps) +noteElemToNote _ _ = Nothing + +getFootNote :: String -> Notes -> Maybe [BodyPart] +getFootNote s (Notes _ fns _) = fns >>= (lookup s) + +getEndNote :: String -> Notes -> Maybe [BodyPart] +getEndNote s (Notes _ _ ens) = ens >>= (lookup s) + +elemToNotes :: NameSpaces -> String -> Element -> Maybe [(String, [BodyPart])] +elemToNotes ns notetype element + | qName (elName element) == (notetype ++ "s") && + qURI (elName element) == (lookup "w" ns) = + Just $ map fromJust + $ filter isJust + $ map (noteElemToNote ns) + $ findChildren (QName notetype (lookup "w" ns) (Just "w")) element +elemToNotes _ _ _ = Nothing + +archiveToNotes :: Archive -> Notes +archiveToNotes zf = + let fnElem = findEntryByPath "word/footnotes.xml" zf + >>= (parseXMLDoc . UTF8.toStringLazy . fromEntry) + enElem = findEntryByPath "word/endnotes.xml" zf + >>= (parseXMLDoc . UTF8.toStringLazy . fromEntry) + fn_namespaces = case fnElem of + Just e -> mapMaybe attrToNSPair (elAttribs e) + Nothing -> [] + en_namespaces = case enElem of + Just e -> mapMaybe attrToNSPair (elAttribs e) + Nothing -> [] + ns = unionBy (\x y -> fst x == fst y) fn_namespaces en_namespaces + fn = fnElem >>= (elemToNotes ns "footnote") + en = enElem >>= (elemToNotes ns "endnote") + in + Notes ns fn en + + +data Relationship = Relationship (RelId, Target) + deriving Show + +lookupRelationship :: RelId -> [Relationship] -> Maybe Target +lookupRelationship relid rels = + lookup relid (map (\(Relationship pair) -> pair) rels) + +filePathIsRel :: FilePath -> Bool +filePathIsRel fp = + let (dir, name) = splitFileName fp + in + (dir == "word/_rels/") && ((takeExtension name) == ".rels") + +relElemToRelationship :: Element -> Maybe Relationship +relElemToRelationship element | qName (elName element) == "Relationship" = + do + relId <- findAttr (QName "Id" Nothing Nothing) element + target <- findAttr (QName "Target" Nothing Nothing) element + return $ Relationship (relId, target) +relElemToRelationship _ = Nothing + + +archiveToRelationships :: Archive -> [Relationship] +archiveToRelationships archive = + let relPaths = filter filePathIsRel (filesInArchive archive) + entries = map fromJust $ filter isJust $ map (\f -> findEntryByPath f archive) relPaths + relElems = map fromJust $ filter isJust $ map (parseXMLDoc . UTF8.toStringLazy . fromEntry) entries + rels = map fromJust $ filter isJust $ map relElemToRelationship $ concatMap elChildren relElems + in + rels + +data Body = Body [BodyPart] + deriving Show + +isParOrTbl :: NameSpaces -> QName -> Bool +isParOrTbl ns q = qName q `elem` ["p", "tbl"] && + qURI q == (lookup "w" ns) + +elemToBody :: NameSpaces -> Element -> Maybe Body +elemToBody ns element | qName (elName element) == "body" && qURI (elName element) == (lookup "w" ns) = + Just $ Body + $ map fromJust + $ filter isJust + $ map (elemToBodyPart ns) $ filterChildrenName (isParOrTbl ns) element +elemToBody _ _ = Nothing + +isRunOrLinkOrBookmark :: NameSpaces -> QName -> Bool +isRunOrLinkOrBookmark ns q = qName q `elem` ["r", "hyperlink", "bookmarkStart"] && + qURI q == (lookup "w" ns) + +elemToNumInfo :: NameSpaces -> Element -> Maybe (String, String) +elemToNumInfo ns element + | qName (elName element) == "p" && + qURI (elName element) == (lookup "w" ns) = + do + pPr <- findChild (QName "pPr" (lookup "w" ns) (Just "w")) element + numPr <- findChild (QName "numPr" (lookup "w" ns) (Just "w")) pPr + lvl <- findChild (QName "ilvl" (lookup "w" ns) (Just "w")) numPr >>= + findAttr (QName "val" (lookup "w" ns) (Just "w")) + numId <- findChild (QName "numId" (lookup "w" ns) (Just "w")) numPr >>= + findAttr (QName "val" (lookup "w" ns) (Just "w")) + return (numId, lvl) +elemToNumInfo _ _ = Nothing + +-- isBookMarkTag :: NameSpaces -> QName -> Bool +-- isBookMarkTag ns q = qName q `elem` ["bookmarkStart", "bookmarkEnd"] && +-- qURI q == (lookup "w" ns) + +-- parChildrenToBookmark :: NameSpaces -> [Element] -> BookMark +-- parChildrenToBookmark ns (bms : bme : _) +-- | qName (elName bms) == "bookmarkStart" && +-- qURI (elName bms) == (lookup "w" ns) && +-- qName (elName bme) == "bookmarkEnd" && +-- qURI (elName bme) == (lookup "w" ns) = do +-- bmId <- findAttr (QName "id" (lookup "w" ns) (Just "w")) bms +-- bmName <- findAttr (QName "name" (lookup "w" ns) (Just "w")) bms +-- return $ (bmId, bmName) +-- parChildrenToBookmark _ _ = Nothing + +elemToBodyPart :: NameSpaces -> Element -> Maybe BodyPart +elemToBodyPart ns element + | qName (elName element) == "p" && + qURI (elName element) == (lookup "w" ns) = + let parstyle = elemToParagraphStyle ns element + parparts = mapMaybe id + $ map (elemToParPart ns) + $ filterChildrenName (isRunOrLinkOrBookmark ns) element + in + case elemToNumInfo ns element of + Just (numId, lvl) -> Just $ ListItem parstyle numId lvl parparts + Nothing -> Just $ Paragraph parstyle parparts + | qName (elName element) == "tbl" && + qURI (elName element) == (lookup "w" ns) = + let + caption = findChild (QName "tblPr" (lookup "w" ns) (Just "w")) element + >>= findChild (QName "tblCaption" (lookup "w" ns) (Just "w")) + >>= findAttr (QName "val" (lookup "w" ns) (Just "w")) + grid = case + findChild (QName "tblGrid" (lookup "w" ns) (Just "w")) element + of + Just g -> elemToTblGrid ns g + Nothing -> [] + tblLook = findChild (QName "tblPr" (lookup "w" ns) (Just "w")) element + >>= findChild (QName "tblLook" (lookup "w" ns) (Just "w")) + >>= elemToTblLook ns + in + Just $ Tbl + (fromMaybe "" caption) + grid + (fromMaybe defaultTblLook tblLook) + (mapMaybe (elemToRow ns) (elChildren element)) + | otherwise = Nothing + +elemToTblLook :: NameSpaces -> Element -> Maybe TblLook +elemToTblLook ns element + | qName (elName element) == "tblLook" && + qURI (elName element) == (lookup "w" ns) = + let firstRow = findAttr (QName "firstRow" (lookup "w" ns) (Just "w")) element + val = findAttr (QName "val" (lookup "w" ns) (Just "w")) element + firstRowFmt = + case firstRow of + Just "1" -> True + Just _ -> False + Nothing -> case val of + Just bitMask -> testBitMask bitMask 0x020 + Nothing -> False + in + Just $ TblLook{firstRowFormatting = firstRowFmt} +elemToTblLook _ _ = Nothing + +testBitMask :: String -> Int -> Bool +testBitMask bitMaskS n = + case (reads ("0x" ++ bitMaskS) :: [(Int, String)]) of + [] -> False + ((n', _) : _) -> ((n' .|. n) /= 0) + +data ParagraphStyle = ParagraphStyle { pStyle :: [String] + , indent :: Maybe Integer + } + deriving Show + +defaultParagraphStyle :: ParagraphStyle +defaultParagraphStyle = ParagraphStyle { pStyle = [] + , indent = Nothing + } + +elemToParagraphStyle :: NameSpaces -> Element -> ParagraphStyle +elemToParagraphStyle ns element = + case findChild (QName "pPr" (lookup "w" ns) (Just "w")) element of + Just pPr -> + ParagraphStyle + {pStyle = + mapMaybe id $ + map + (findAttr (QName "val" (lookup "w" ns) (Just "w"))) + (findChildren (QName "pStyle" (lookup "w" ns) (Just "w")) pPr) + , indent = + findChild (QName "ind" (lookup "w" ns) (Just "w")) pPr >>= + findAttr (QName "left" (lookup "w" ns) (Just "w")) >>= + stringToInteger + } + Nothing -> defaultParagraphStyle + + +data BodyPart = Paragraph ParagraphStyle [ParPart] + | ListItem ParagraphStyle String String [ParPart] + | Tbl String TblGrid TblLook [Row] + + deriving Show + +type TblGrid = [Integer] + +data TblLook = TblLook {firstRowFormatting::Bool} + deriving Show + +defaultTblLook :: TblLook +defaultTblLook = TblLook{firstRowFormatting = False} + +stringToInteger :: String -> Maybe Integer +stringToInteger s = listToMaybe $ map fst (reads s :: [(Integer, String)]) + +elemToTblGrid :: NameSpaces -> Element -> TblGrid +elemToTblGrid ns element + | qName (elName element) == "tblGrid" && + qURI (elName element) == (lookup "w" ns) = + let + cols = findChildren (QName "gridCol" (lookup "w" ns) (Just "w")) element + in + mapMaybe (\e -> + findAttr (QName "val" (lookup "w" ns) (Just ("w"))) e + >>= stringToInteger + ) + cols +elemToTblGrid _ _ = [] + +data Row = Row [Cell] + deriving Show + + +elemToRow :: NameSpaces -> Element -> Maybe Row +elemToRow ns element + | qName (elName element) == "tr" && + qURI (elName element) == (lookup "w" ns) = + let + cells = findChildren (QName "tc" (lookup "w" ns) (Just "w")) element + in + Just $ Row (mapMaybe (elemToCell ns) cells) +elemToRow _ _ = Nothing + +data Cell = Cell [BodyPart] + deriving Show + +elemToCell :: NameSpaces -> Element -> Maybe Cell +elemToCell ns element + | qName (elName element) == "tc" && + qURI (elName element) == (lookup "w" ns) = + Just $ Cell (mapMaybe (elemToBodyPart ns) (elChildren element)) +elemToCell _ _ = Nothing + +data ParPart = PlainRun Run + | BookMark BookMarkId Anchor + | InternalHyperLink Anchor [Run] + | ExternalHyperLink RelId [Run] + | Drawing String + deriving Show + +data Run = Run RunStyle [RunElem] + | Footnote String + | Endnote String + deriving Show + +data RunElem = TextRun String | LnBrk + deriving Show + +data RunStyle = RunStyle { isBold :: Bool + , isItalic :: Bool + , isSmallCaps :: Bool + , isStrike :: Bool + , isSuperScript :: Bool + , isSubScript :: Bool + , underline :: Maybe String + , rStyle :: Maybe String } + deriving Show + +defaultRunStyle :: RunStyle +defaultRunStyle = RunStyle { isBold = False + , isItalic = False + , isSmallCaps = False + , isStrike = False + , isSuperScript = False + , isSubScript = False + , underline = Nothing + , rStyle = Nothing + } + +elemToRunStyle :: NameSpaces -> Element -> RunStyle +elemToRunStyle ns element = + case findChild (QName "rPr" (lookup "w" ns) (Just "w")) element of + Just rPr -> + RunStyle + { + isBold = isJust $ findChild (QName "b" (lookup "w" ns) (Just "w")) rPr + , isItalic = isJust $ findChild (QName "i" (lookup "w" ns) (Just "w")) rPr + , isSmallCaps = isJust $ findChild (QName "smallCaps" (lookup "w" ns) (Just "w")) rPr + , isStrike = isJust $ findChild (QName "strike" (lookup "w" ns) (Just "w")) rPr + , isSuperScript = + (Just "superscript" == + (findChild (QName "vertAlign" (lookup "w" ns) (Just "w")) rPr >>= + findAttr (QName "val" (lookup "w" ns) (Just "w")))) + , isSubScript = + (Just "subscript" == + (findChild (QName "vertAlign" (lookup "w" ns) (Just "w")) rPr >>= + findAttr (QName "val" (lookup "w" ns) (Just "w")))) + , underline = + findChild (QName "u" (lookup "w" ns) (Just "w")) rPr >>= + findAttr (QName "val" (lookup "w" ns) (Just "w")) + , rStyle = + findChild (QName "rStyle" (lookup "w" ns) (Just "w")) rPr >>= + findAttr (QName "val" (lookup "w" ns) (Just "w")) + } + Nothing -> defaultRunStyle + +elemToRun :: NameSpaces -> Element -> Maybe Run +elemToRun ns element + | qName (elName element) == "r" && + qURI (elName element) == (lookup "w" ns) = + case + findChild (QName "footnoteReference" (lookup "w" ns) (Just "w")) element >>= + findAttr (QName "id" (lookup "w" ns) (Just "w")) + of + Just s -> Just $ Footnote s + Nothing -> + case + findChild (QName "endnoteReference" (lookup "w" ns) (Just "w")) element >>= + findAttr (QName "id" (lookup "w" ns) (Just "w")) + of + Just s -> Just $ Endnote s + Nothing -> Just $ + Run (elemToRunStyle ns element) + (elemToRunElems ns element) +elemToRun _ _ = Nothing + +elemToRunElem :: NameSpaces -> Element -> Maybe RunElem +elemToRunElem ns element + | qName (elName element) == "t" && + qURI (elName element) == (lookup "w" ns) = + Just $ TextRun (strContent element) + | qName (elName element) == "br" && + qURI (elName element) == (lookup "w" ns) = + Just $ LnBrk + | otherwise = Nothing + + +elemToRunElems :: NameSpaces -> Element -> [RunElem] +elemToRunElems ns element + | qName (elName element) == "r" && + qURI (elName element) == (lookup "w" ns) = + mapMaybe (elemToRunElem ns) (elChildren element) + | otherwise = [] + +elemToDrawing :: NameSpaces -> Element -> Maybe ParPart +elemToDrawing ns element + | qName (elName element) == "drawing" && + qURI (elName element) == (lookup "w" ns) = + let a_ns = "http://schemas.openxmlformats.org/drawingml/2006/main" + in + findElement (QName "blip" (Just a_ns) (Just "a")) element + >>= findAttr (QName "embed" (lookup "r" ns) (Just "r")) + >>= (\s -> Just $ Drawing s) +elemToDrawing _ _ = Nothing + + +elemToParPart :: NameSpaces -> Element -> Maybe ParPart +elemToParPart ns element + | qName (elName element) == "r" && + qURI (elName element) == (lookup "w" ns) = + case findChild (QName "drawing" (lookup "w" ns) (Just "w")) element of + Just drawingElem -> elemToDrawing ns drawingElem + Nothing -> do + r <- elemToRun ns element + return $ PlainRun r +elemToParPart ns element + | qName (elName element) == "bookmarkStart" && + qURI (elName element) == (lookup "w" ns) = do + bmId <- findAttr (QName "id" (lookup "w" ns) (Just "w")) element + bmName <- findAttr (QName "name" (lookup "w" ns) (Just "w")) element + return $ BookMark bmId bmName +elemToParPart ns element + | qName (elName element) == "hyperlink" && + qURI (elName element) == (lookup "w" ns) = + let runs = map fromJust $ filter isJust $ map (elemToRun ns) + $ findChildren (QName "r" (lookup "w" ns) (Just "w")) element + in + case findAttr (QName "anchor" (lookup "w" ns) (Just "w")) element of + Just anchor -> + Just $ InternalHyperLink anchor runs + Nothing -> + case findAttr (QName "id" (lookup "r" ns) (Just "r")) element of + Just relId -> Just $ ExternalHyperLink relId runs + Nothing -> Nothing +elemToParPart _ _ = Nothing + +type Target = String +type Anchor = String +type BookMarkId = String +type RelId = String + diff --git a/tests/Tests/Readers/DocX.hs b/tests/Tests/Readers/DocX.hs deleted file mode 100644 index f4564ea1d..000000000 --- a/tests/Tests/Readers/DocX.hs +++ /dev/null @@ -1,68 +0,0 @@ -module Tests.Readers.DocX (tests) where - -import Text.Pandoc.Options -import Text.Pandoc.Readers.Native -import Text.Pandoc.Definition -import Tests.Helpers -import Test.Framework -import qualified Data.ByteString.Lazy as B -import Text.Pandoc.Readers.DocX - -compareOutput :: FilePath -> FilePath -> IO (Pandoc, Pandoc) -compareOutput docxFile nativeFile = do - df <- B.readFile docxFile - nf <- Prelude.readFile nativeFile - return $ (readDocX def df, readNative nf) - -testCompare' :: String -> FilePath -> FilePath -> IO Test -testCompare' name docxFile nativeFile = do - (dp, np) <- compareOutput docxFile nativeFile - return $ test id name (dp, np) - -testCompare :: String -> FilePath -> FilePath -> Test -testCompare name docxFile nativeFile = - buildTest $ testCompare' name docxFile nativeFile - - -tests :: [Test] -tests = [ testGroup "inlines" - [ testCompare - "font formatting" - "docx.inline_formatting.docx" - "docx.inline_formatting.native" - , testCompare - "hyperlinks" - "docx.links.docx" - "docx.links.native" - , testCompare - "inline image with reference output" - "docx.image.docx" - "docx.image_no_embed.native" - , testCompare - "handling unicode input" - "docx.unicode.docx" - "docx.unicode.native"] - , testGroup "blocks" - [ testCompare - "headers" - "docx.headers.docx" - "docx.headers.native" - , testCompare - "lists" - "docx.lists.docx" - "docx.lists.native" - , testCompare - "footnotes and endnotes" - "docx.notes.docx" - "docx.notes.native" - , testCompare - "blockquotes (parsing indent as blockquote)" - "docx.block_quotes.docx" - "docx.block_quotes_parse_indent.native" - , testCompare - "tables" - "docx.tables.docx" - "docx.tables.native" - ] - ] - diff --git a/tests/Tests/Readers/Docx.hs b/tests/Tests/Readers/Docx.hs new file mode 100644 index 000000000..0a963ddc6 --- /dev/null +++ b/tests/Tests/Readers/Docx.hs @@ -0,0 +1,68 @@ +module Tests.Readers.Docx (tests) where + +import Text.Pandoc.Options +import Text.Pandoc.Readers.Native +import Text.Pandoc.Definition +import Tests.Helpers +import Test.Framework +import qualified Data.ByteString.Lazy as B +import Text.Pandoc.Readers.Docx + +compareOutput :: FilePath -> FilePath -> IO (Pandoc, Pandoc) +compareOutput docxFile nativeFile = do + df <- B.readFile docxFile + nf <- Prelude.readFile nativeFile + return $ (readDocx def df, readNative nf) + +testCompare' :: String -> FilePath -> FilePath -> IO Test +testCompare' name docxFile nativeFile = do + (dp, np) <- compareOutput docxFile nativeFile + return $ test id name (dp, np) + +testCompare :: String -> FilePath -> FilePath -> Test +testCompare name docxFile nativeFile = + buildTest $ testCompare' name docxFile nativeFile + + +tests :: [Test] +tests = [ testGroup "inlines" + [ testCompare + "font formatting" + "docx.inline_formatting.docx" + "docx.inline_formatting.native" + , testCompare + "hyperlinks" + "docx.links.docx" + "docx.links.native" + , testCompare + "inline image with reference output" + "docx.image.docx" + "docx.image_no_embed.native" + , testCompare + "handling unicode input" + "docx.unicode.docx" + "docx.unicode.native"] + , testGroup "blocks" + [ testCompare + "headers" + "docx.headers.docx" + "docx.headers.native" + , testCompare + "lists" + "docx.lists.docx" + "docx.lists.native" + , testCompare + "footnotes and endnotes" + "docx.notes.docx" + "docx.notes.native" + , testCompare + "blockquotes (parsing indent as blockquote)" + "docx.block_quotes.docx" + "docx.block_quotes_parse_indent.native" + , testCompare + "tables" + "docx.tables.docx" + "docx.tables.native" + ] + ] + diff --git a/tests/test-pandoc.hs b/tests/test-pandoc.hs index 9f9d85147..c07a51ec5 100644 --- a/tests/test-pandoc.hs +++ b/tests/test-pandoc.hs @@ -9,7 +9,7 @@ import qualified Tests.Readers.LaTeX import qualified Tests.Readers.Markdown import qualified Tests.Readers.Org import qualified Tests.Readers.RST -import qualified Tests.Readers.DocX +import qualified Tests.Readers.Docx import qualified Tests.Writers.ConTeXt import qualified Tests.Writers.LaTeX import qualified Tests.Writers.HTML @@ -39,7 +39,7 @@ tests = [ testGroup "Old" Tests.Old.tests , testGroup "Markdown" Tests.Readers.Markdown.tests , testGroup "Org" Tests.Readers.Org.tests , testGroup "RST" Tests.Readers.RST.tests - , testGroup "DocX" Tests.Readers.DocX.tests + , testGroup "Docx" Tests.Readers.Docx.tests ] ] -- cgit v1.2.3 From 7d60c798bf12a93ca4d7f4d973c917ba0d5a96ff Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Mon, 16 Jun 2014 23:02:20 -0700 Subject: Fixed compiler warning. --- src/Text/Pandoc/Readers/Docx.hs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs index df4be41ff..4035cde99 100644 --- a/src/Text/Pandoc/Readers/Docx.hs +++ b/src/Text/Pandoc/Readers/Docx.hs @@ -216,7 +216,9 @@ parPartsToInlines opts docx parparts = -- We're going to skip data-uri's for now. It should be an option, -- not mandatory. -- - --bottomUp (makeImagesSelfContained docx) $ + (if False -- TODO depend on option + then bottomUp (makeImagesSelfContained docx) + else id) $ bottomUp spanCorrect $ bottomUp spanTrim $ bottomUp spanReduce $ -- cgit v1.2.3 From fc291efad3430be0645e979e0279c93195012075 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 17 Jun 2014 00:38:55 -0700 Subject: LaTeX reader: Correctly handle table rows with too few cells. LaTeX seems to treat them as if they have empty cells at the end. Closes #241. --- src/Text/Pandoc/Readers/LaTeX.hs | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index 3c4d4ee52..97bfaa455 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -1255,10 +1255,14 @@ parseTableRow :: Int -- ^ number of columns parseTableRow cols = try $ do let tableCellInline = notFollowedBy (amp <|> lbreak) >> inline let tableCell = (plain . trimInlines . mconcat) <$> many tableCellInline - cells' <- sepBy tableCell amp - guard $ length cells' == cols + cells' <- sepBy1 tableCell amp + let numcells = length cells' + guard $ numcells <= cols && numcells >= 1 + guard $ cells' /= [mempty] + -- note: a & b in a three-column table leaves an empty 3rd cell: + let cells'' = cells' ++ replicate (cols - numcells) mempty spaces - return cells' + return cells'' simpTable :: LP Blocks simpTable = try $ do -- cgit v1.2.3 From 59272e4d99668ddc48f07eb761979c2f49cf76d5 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 17 Jun 2014 12:14:02 -0700 Subject: DocBook reader: Support . Closes #1236. Note, this is a bit of a kludge, to work around the fact that xml-light doesn't parse `` correctly. We preprocess the input, replacing that instruction with `
`, and then parse that as a line break. Other XML instructions are simply removed from the input stream. --- src/Text/Pandoc/Readers/DocBook.hs | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs index d58f8b3c5..cf1d5132e 100644 --- a/src/Text/Pandoc/Readers/DocBook.hs +++ b/src/Text/Pandoc/Readers/DocBook.hs @@ -492,7 +492,7 @@ List of all DocBook tags, with [x] indicating implemented, anything else [ ] xref - A cross reference to another part of the document [ ] year - The year of publication of a document - +[x] ?asciidoc-br? - line break from asciidoc docbook output -} type DB = State DBState @@ -507,7 +507,7 @@ data DBState = DBState{ dbSectionLevel :: Int readDocBook :: ReaderOptions -> String -> Pandoc readDocBook _ inp = Pandoc (dbMeta st') (toList $ mconcat bs) - where (bs, st') = runState (mapM parseBlock $ normalizeTree $ parseXML inp) + where (bs, st') = runState (mapM parseBlock $ normalizeTree $ parseXML inp') DBState{ dbSectionLevel = 0 , dbQuoteType = DoubleQuote , dbMeta = mempty @@ -515,6 +515,17 @@ readDocBook _ inp = Pandoc (dbMeta st') (toList $ mconcat bs) , dbBook = False , dbFigureTitle = mempty } + inp' = handleInstructions inp + +-- We treat specially (issue #1236), converting it +-- to
, since xml-light doesn't parse the instruction correctly. +-- Other xml instructions are simply removed from the input stream. +handleInstructions :: String -> String +handleInstructions ('<':'?':'a':'s':'c':'i':'i':'d':'o':'c':'-':'b':'r':'?':'>':xs) = '<':'b':'r':'/':'>': handleInstructions xs +handleInstructions xs = case break (=='<') xs of + (ys, []) -> ys + ([], '<':zs) -> '<' : handleInstructions zs + (ys, zs) -> ys ++ handleInstructions zs getFigure :: Element -> DB Blocks getFigure e = do @@ -920,6 +931,10 @@ parseInline (Elem e) = "footnote" -> (note . mconcat) <$> (mapM parseBlock $ elContent e) "title" -> return mempty "affiliation" -> return mempty + -- Note: this isn't a real docbook tag; it's what we convert + -- to in handleInstructions, above. A kludge to + -- work around xml-light's inability to parse an instruction. + "br" -> return linebreak _ -> innerInlines where innerInlines = (trimInlines . mconcat) <$> (mapM parseInline $ elContent e) -- cgit v1.2.3 From b371e83d7362c6ffc6e4dd98b7288ddd7f23f46a Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 17 Jun 2014 15:15:56 -0700 Subject: Highlighting: Let .numberLines work even if no language given. Closes #1287, jgm/highlighting-kate#40. --- src/Text/Pandoc/Highlighting.hs | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Highlighting.hs b/src/Text/Pandoc/Highlighting.hs index 2e7a9f648..7f975d4c6 100644 --- a/src/Text/Pandoc/Highlighting.hs +++ b/src/Text/Pandoc/Highlighting.hs @@ -74,7 +74,12 @@ highlight formatter (_, classes, keyvals) rawCode = ["number","numberLines", "number-lines"]) classes } lcclasses = map (map toLower) classes in case find (`elem` lcLanguages) lcclasses of - Nothing -> Nothing + Nothing + | numberLines fmtOpts -> Just + $ formatter fmtOpts{ codeClasses = [], + containerClasses = classes } + $ map (\ln -> [(NormalTok, ln)]) $ lines rawCode + | otherwise -> Nothing Just language -> Just $ formatter fmtOpts{ codeClasses = [language], containerClasses = classes } -- cgit v1.2.3 From ab390a10ec3bc42c71d8746152acbf3ee7b1595b Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Wed, 18 Jun 2014 11:33:09 -0700 Subject: Removed old haddock reader code. Add dependency on haddock-library. This also removes the dependency on alex and happy. --- INSTALL | 6 +- pandoc.cabal | 8 +- src/Text/Pandoc/Readers/Haddock.hs | 32 ++++-- src/Text/Pandoc/Readers/Haddock/Lex.x | 171 ------------------------------ src/Text/Pandoc/Readers/Haddock/Parse.y | 178 -------------------------------- 5 files changed, 25 insertions(+), 370 deletions(-) delete mode 100644 src/Text/Pandoc/Readers/Haddock/Lex.x delete mode 100644 src/Text/Pandoc/Readers/Haddock/Parse.y (limited to 'src/Text') diff --git a/INSTALL b/INSTALL index f3366e103..eb9b2b030 100644 --- a/INSTALL +++ b/INSTALL @@ -12,11 +12,7 @@ Quick install ------------- 1. Install the [Haskell platform]. This will give you [GHC] and - the [cabal-install] build tool, as well as `alex` and `happy`. - If you do not use the Haskell platform, you'll need to install - `alex` and `happy` separately: - - cabal install alex happy + the [cabal-install] build tool. 2. Update your package database: diff --git a/pandoc.cabal b/pandoc.cabal index 5898af5ad..f9938bede 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -255,8 +255,8 @@ Library vector >= 0.10 && < 0.11, hslua >= 0.3 && < 0.4, binary >= 0.5 && < 0.8, - SHA >= 1.6 && < 1.7 - Build-Tools: alex, happy + SHA >= 1.6 && < 1.7, + haddock-library >= 1.0 && < 1.1 if flag(https) Build-Depends: http-client >= 0.3.2 && < 0.4, http-client-tls >= 0.2 && < 0.3, @@ -322,9 +322,7 @@ Library Text.Pandoc.XML, Text.Pandoc.SelfContained, Text.Pandoc.Process - Other-Modules: Text.Pandoc.Readers.Haddock.Lex, - Text.Pandoc.Readers.Haddock.Parse, - Text.Pandoc.Readers.Docx.Lists, + Other-Modules: Text.Pandoc.Readers.Docx.Lists, Text.Pandoc.Readers.Docx.Parse, Text.Pandoc.Writers.Shared, Text.Pandoc.Asciify, diff --git a/src/Text/Pandoc/Readers/Haddock.hs b/src/Text/Pandoc/Readers/Haddock.hs index 0e74406ef..65d8de98f 100644 --- a/src/Text/Pandoc/Readers/Haddock.hs +++ b/src/Text/Pandoc/Readers/Haddock.hs @@ -3,7 +3,8 @@ Copyright : Copyright (C) 2013 David Lazar License : GNU GPL, version 2 or above - Maintainer : David Lazar + Maintainer : David Lazar , + John MacFarlane Stability : alpha Conversion of Haddock markup to 'Pandoc' document. @@ -12,22 +13,31 @@ module Text.Pandoc.Readers.Haddock ( readHaddock ) where -import Text.Pandoc.Builder +import Text.Pandoc.Builder (Blocks, Inlines) +import qualified Text.Pandoc.Builder as B +import Data.Monoid +import Text.Pandoc.Definition import Text.Pandoc.Options -import Text.Pandoc.Readers.Haddock.Lex -import Text.Pandoc.Readers.Haddock.Parse +import Documentation.Haddock.Parser (parseParas, Identifier) +import Documentation.Haddock.Types -- | Parse Haddock markup and return a 'Pandoc' document. readHaddock :: ReaderOptions -- ^ Reader options -> String -- ^ String to parse -> Pandoc -readHaddock _ s = Pandoc nullMeta blocks - where - blocks = case parseParas (tokenise s (0,0)) of - Left [] -> error "parse failure" - Left (tok:_) -> error $ "parse failure " ++ pos (tokenPos tok) - where pos (l, c) = "(line " ++ show l ++ ", column " ++ show c ++ ")" - Right x -> mergeLists (toList x) +readHaddock _ = B.doc . docHToBlocks . parseParas + +docHToBlocks :: DocH mod Identifier -> Blocks +docHToBlocks d = + case d of + DocAppend d1 d2 -> mappend (docHToBlocks d1) (docHToBlocks d2) + DocParagraph ils -> B.para $ docHToInlines ils + +docHToInlines :: DocH mod Identifier -> Inlines +docHToInlines d = + case d of + DocAppend d1 d2 -> mappend (docHToInlines d1) (docHToInlines d2) + DocString s -> B.text s -- similar to 'docAppend' in Haddock.Doc mergeLists :: [Block] -> [Block] diff --git a/src/Text/Pandoc/Readers/Haddock/Lex.x b/src/Text/Pandoc/Readers/Haddock/Lex.x deleted file mode 100644 index 120e96ebf..000000000 --- a/src/Text/Pandoc/Readers/Haddock/Lex.x +++ /dev/null @@ -1,171 +0,0 @@ --- --- Haddock - A Haskell Documentation Tool --- --- (c) Simon Marlow 2002 --- --- This file was modified and integrated into GHC by David Waern 2006. --- Then moved back into Haddock by Isaac Dupree in 2009 :-) --- Then copied into Pandoc by David Lazar in 2013 :-D - -{ -{-# LANGUAGE BangPatterns #-} -- Generated by Alex -{-# OPTIONS -Wwarn -w #-} --- The above warning supression flag is a temporary kludge. --- While working on this module you are encouraged to remove it and fix --- any warnings in the module. See --- http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings --- for details - -module Text.Pandoc.Readers.Haddock.Lex ( - Token(..), - LToken, - tokenise, - tokenPos - ) where - -import Data.Char -import Numeric (readHex) -} - -%wrapper "posn" - -$ws = $white # \n -$digit = [0-9] -$hexdigit = [0-9a-fA-F] -$special = [\"\@] -$alphanum = [A-Za-z0-9] -$ident = [$alphanum \'\_\.\!\#\$\%\&\*\+\/\<\=\>\?\@\\\\\^\|\-\~\:] - -:- - --- beginning of a paragraph -<0,para> { - $ws* \n ; - $ws* \> { begin birdtrack } - $ws* prop \> .* \n { strtoken TokProperty `andBegin` property} - $ws* \>\>\> { strtoken TokExamplePrompt `andBegin` exampleexpr } - $ws* [\*\-] { token TokBullet `andBegin` string } - $ws* \[ { token TokDefStart `andBegin` def } - $ws* \( $digit+ \) { token TokNumber `andBegin` string } - $ws* $digit+ \. { token TokNumber `andBegin` string } - $ws* { begin string } -} - --- beginning of a line - { - $ws* \> { begin birdtrack } - $ws* \>\>\> { strtoken TokExamplePrompt `andBegin` exampleexpr } - - $ws* \n { token TokPara `andBegin` para } - -- ^ Here, we really want to be able to say - -- $ws* (\n | ) { token TokPara `andBegin` para} - -- because otherwise a trailing line of whitespace will result in - -- a spurious TokString at the end of a docstring. We don't have , - -- though (NOW I realise what it was for :-). To get around this, we always - -- append \n to the end of a docstring. - - () { begin string } -} - - .* \n? { strtokenNL TokBirdTrack `andBegin` line } - - () { token TokPara `andBegin` para } - - { - $ws* \n { token TokPara `andBegin` para } - $ws* \>\>\> { strtoken TokExamplePrompt `andBegin` exampleexpr } - () { begin exampleresult } -} - - .* \n { strtokenNL TokExampleExpression `andBegin` example } - - .* \n { strtokenNL TokExampleResult `andBegin` example } - - { - $special { strtoken $ \s -> TokSpecial (head s) } - \<\< [^\>]* \>\> { strtoken $ \s -> TokPic (init $ init $ tail $ tail s) } - \< [^\>]* \> { strtoken $ \s -> TokURL (init (tail s)) } - \# [^\#]* \# { strtoken $ \s -> TokAName (init (tail s)) } - \/ [^\/]* \/ { strtoken $ \s -> TokEmphasis (init (tail s)) } - [\'\`] $ident+ [\'\`] { strtoken $ \s -> TokIdent (init (tail s)) } - \\ . { strtoken (TokString . tail) } - "&#" $digit+ \; { strtoken $ \s -> TokString [chr (read (init (drop 2 s)))] } - "&#" [xX] $hexdigit+ \; - { strtoken $ \s -> case readHex (init (drop 3 s)) of [(n,_)] -> TokString [chr n] } - -- allow special characters through if they don't fit one of the previous - -- patterns. - [\/\'\`\<\#\&\\] { strtoken TokString } - [^ $special \/ \< \# \n \'\` \& \\ \]]* \n { strtokenNL TokString `andBegin` line } - [^ $special \/ \< \# \n \'\` \& \\ \]]+ { strtoken TokString } -} - - { - \] { token TokDefEnd `andBegin` string } -} - --- ']' doesn't have any special meaning outside of the [...] at the beginning --- of a definition paragraph. - { - \] { strtoken TokString } -} - -{ --- | A located token -type LToken = (Token, AlexPosn) - -data Token - = TokPara - | TokNumber - | TokBullet - | TokDefStart - | TokDefEnd - | TokSpecial Char - | TokIdent String - | TokString String - | TokURL String - | TokPic String - | TokEmphasis String - | TokAName String - | TokBirdTrack String - | TokProperty String - | TokExamplePrompt String - | TokExampleExpression String - | TokExampleResult String - deriving Show - -tokenPos :: LToken -> (Int, Int) -tokenPos t = let AlexPn _ line col = snd t in (line, col) - -type StartCode = Int -type Action = AlexPosn -> String -> StartCode -> (StartCode -> [LToken]) -> [LToken] - -tokenise :: String -> (Int, Int) -> [LToken] -tokenise str (line, col) = go (posn,'\n',[],eofHack str) para - where posn = AlexPn 0 line col - go inp@(pos,_,_,str) sc = - case alexScan inp sc of - AlexEOF -> [] - AlexError _ -> [] - AlexSkip inp' len -> go inp' sc - AlexToken inp' len act -> act pos (take len str) sc (\sc -> go inp' sc) - --- NB. we add a final \n to the string, (see comment in the beginning of line --- production above). -eofHack str = str++"\n" - -andBegin :: Action -> StartCode -> Action -andBegin act new_sc = \pos str _ cont -> act pos str new_sc cont - -token :: Token -> Action -token t = \pos _ sc cont -> (t, pos) : cont sc - -strtoken, strtokenNL :: (String -> Token) -> Action -strtoken t = \pos str sc cont -> (t str, pos) : cont sc -strtokenNL t = \pos str sc cont -> (t (filter (/= '\r') str), pos) : cont sc --- ^ We only want LF line endings in our internal doc string format, so we --- filter out all CRs. - -begin :: StartCode -> Action -begin sc = \_ _ _ cont -> cont sc - -} diff --git a/src/Text/Pandoc/Readers/Haddock/Parse.y b/src/Text/Pandoc/Readers/Haddock/Parse.y deleted file mode 100644 index 9c2bbc8a9..000000000 --- a/src/Text/Pandoc/Readers/Haddock/Parse.y +++ /dev/null @@ -1,178 +0,0 @@ --- This code was copied from the 'haddock' package, modified, and integrated --- into Pandoc by David Lazar. -{ -{-# LANGUAGE BangPatterns #-} -- required for versions of Happy before 1.18.6 --- The above warning supression flag is a temporary kludge. --- While working on this module you are encouraged to remove it and fix --- any warnings in the module. See --- http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings --- for details - -module Text.Pandoc.Readers.Haddock.Parse (parseString, parseParas) where - -import Text.Pandoc.Readers.Haddock.Lex -import Text.Pandoc.Builder -import Text.Pandoc.Shared (trim, trimr) -import Data.Generics (everywhere, mkT) -import Data.Char (isSpace) -import Data.Maybe (fromMaybe) -import Data.List (stripPrefix, intersperse) -import Data.Monoid (mempty, mconcat) -} - -%expect 0 - -%tokentype { LToken } - -%token - '/' { (TokSpecial '/',_) } - '@' { (TokSpecial '@',_) } - '[' { (TokDefStart,_) } - ']' { (TokDefEnd,_) } - DQUO { (TokSpecial '\"',_) } - URL { (TokURL $$,_) } - PIC { (TokPic $$,_) } - ANAME { (TokAName $$,_) } - '/../' { (TokEmphasis $$,_) } - '-' { (TokBullet,_) } - '(n)' { (TokNumber,_) } - '>..' { (TokBirdTrack $$,_) } - PROP { (TokProperty $$,_) } - PROMPT { (TokExamplePrompt $$,_) } - RESULT { (TokExampleResult $$,_) } - EXP { (TokExampleExpression $$,_) } - IDENT { (TokIdent $$,_) } - PARA { (TokPara,_) } - STRING { (TokString $$,_) } - -%monad { Either [LToken] } - -%name parseParas doc -%name parseString seq - -%% - -doc :: { Blocks } - : apara PARA doc { $1 <> $3 } - | PARA doc { $2 } - | apara { $1 } - | {- empty -} { mempty } - -apara :: { Blocks } - : ulpara { bulletList [$1] } - | olpara { orderedList [$1] } - | defpara { definitionList [$1] } - | para { $1 } - -ulpara :: { Blocks } - : '-' para { $2 } - -olpara :: { Blocks } - : '(n)' para { $2 } - -defpara :: { (Inlines, [Blocks]) } - : '[' seq ']' seq { (trimInlines $2, [plain $ trimInlines $4]) } - -para :: { Blocks } - : seq { para' $1 } - | codepara { codeBlockWith ([], ["haskell"], []) $1 } - | property { $1 } - | examples { $1 } - -codepara :: { String } - : '>..' codepara { $1 ++ $2 } - | '>..' { $1 } - -property :: { Blocks } - : PROP { makeProperty $1 } - -examples :: { Blocks } - : example examples { $1 <> $2 } - | example { $1 } - -example :: { Blocks } - : PROMPT EXP result { makeExample $1 $2 (lines $3) } - | PROMPT EXP { makeExample $1 $2 [] } - -result :: { String } - : RESULT result { $1 ++ $2 } - | RESULT { $1 } - -seq :: { Inlines } - : elem seq { $1 <> $2 } - | elem { $1 } - -elem :: { Inlines } - : elem1 { $1 } - | '@' seq1 '@' { monospace $2 } - -seq1 :: { Inlines } - : PARA seq1 { linebreak <> $2 } - | elem1 seq1 { $1 <> $2 } - | elem1 { $1 } - -elem1 :: { Inlines } - : STRING { text $1 } - | '/../' { emph (str $1) } - | URL { makeHyperlink $1 } - | PIC { image $1 $1 mempty } - | ANAME { mempty } -- TODO - | IDENT { codeWith ([], ["haskell"], []) $1 } - | DQUO strings DQUO { codeWith ([], ["haskell"], []) $2 } - -strings :: { String } - : STRING { $1 } - | STRING strings { $1 ++ $2 } - -{ -happyError :: [LToken] -> Either [LToken] a -happyError toks = Left toks - -para' :: Inlines -> Blocks -para' = para . trimInlines - -monospace :: Inlines -> Inlines -monospace = everywhere (mkT go) - where - go (Str s) = Code nullAttr s - go x = x - --- | Create a `Hyperlink` from given string. --- --- A hyperlink consists of a URL and an optional label. The label is separated --- from the url by one or more whitespace characters. -makeHyperlink :: String -> Inlines -makeHyperlink input = case break isSpace $ trim input of - (url, "") -> link url url (str url) - (url, lb) -> link url url (trimInlines $ text lb) - -makeProperty :: String -> Blocks -makeProperty s = case trim s of - 'p':'r':'o':'p':'>':xs -> - codeBlockWith ([], ["property"], []) (dropWhile isSpace xs) - xs -> - error $ "makeProperty: invalid input " ++ show xs - --- | Create an 'Example', stripping superfluous characters as appropriate -makeExample :: String -> String -> [String] -> Blocks -makeExample prompt expression result = - para $ codeWith ([], ["haskell","expr"], []) (trim expression) - <> linebreak - <> (mconcat $ intersperse linebreak $ map coder result') - where - -- 1. drop trailing whitespace from the prompt, remember the prefix - prefix = takeWhile isSpace prompt - - -- 2. drop, if possible, the exact same sequence of whitespace - -- characters from each result line - -- - -- 3. interpret lines that only contain the string "" as an - -- empty line - result' = map (substituteBlankLine . tryStripPrefix prefix) result - where - tryStripPrefix xs ys = fromMaybe ys $ stripPrefix xs ys - - substituteBlankLine "" = "" - substituteBlankLine line = line - coder = codeWith ([], ["result"], []) -} -- cgit v1.2.3 From 9fc5c8d7af31a47d8e3e8ea6dbb541178ec9ca66 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Wed, 18 Jun 2014 12:27:27 -0700 Subject: Rewrote haddock reader to use haddock-library. This brings pandoc's rendering of haddock markup in line with the new haddock. Note that we preserve line breaks in `@` code blocks, unlike the earlier version. Modified tests pass. More tests would be good. --- src/Text/Pandoc/Readers/Haddock.hs | 124 ++++++++++++++++++++++++++++++------- tests/haddock-reader.haddock | 20 +++--- tests/haddock-reader.native | 6 +- 3 files changed, 115 insertions(+), 35 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Haddock.hs b/src/Text/Pandoc/Readers/Haddock.hs index 65d8de98f..a512f969d 100644 --- a/src/Text/Pandoc/Readers/Haddock.hs +++ b/src/Text/Pandoc/Readers/Haddock.hs @@ -15,10 +15,13 @@ module Text.Pandoc.Readers.Haddock import Text.Pandoc.Builder (Blocks, Inlines) import qualified Text.Pandoc.Builder as B +import Text.Pandoc.Shared (trim, splitBy) import Data.Monoid +import Data.List (intersperse, stripPrefix) +import Data.Maybe (fromMaybe) import Text.Pandoc.Definition import Text.Pandoc.Options -import Documentation.Haddock.Parser (parseParas, Identifier) +import Documentation.Haddock.Parser import Documentation.Haddock.Types -- | Parse Haddock markup and return a 'Pandoc' document. @@ -27,25 +30,102 @@ readHaddock :: ReaderOptions -- ^ Reader options -> Pandoc readHaddock _ = B.doc . docHToBlocks . parseParas -docHToBlocks :: DocH mod Identifier -> Blocks -docHToBlocks d = - case d of +docHToBlocks :: DocH String Identifier -> Blocks +docHToBlocks d' = + case d' of + DocEmpty -> mempty DocAppend d1 d2 -> mappend (docHToBlocks d1) (docHToBlocks d2) - DocParagraph ils -> B.para $ docHToInlines ils - -docHToInlines :: DocH mod Identifier -> Inlines -docHToInlines d = - case d of - DocAppend d1 d2 -> mappend (docHToInlines d1) (docHToInlines d2) - DocString s -> B.text s - --- similar to 'docAppend' in Haddock.Doc -mergeLists :: [Block] -> [Block] -mergeLists (BulletList xs : BulletList ys : blocks) - = mergeLists (BulletList (xs ++ ys) : blocks) -mergeLists (OrderedList _ xs : OrderedList a ys : blocks) - = mergeLists (OrderedList a (xs ++ ys) : blocks) -mergeLists (DefinitionList xs : DefinitionList ys : blocks) - = mergeLists (DefinitionList (xs ++ ys) : blocks) -mergeLists (x : blocks) = x : mergeLists blocks -mergeLists [] = [] + DocString _ -> inlineFallback + DocParagraph ils -> B.para $ docHToInlines False ils + DocIdentifier _ -> inlineFallback + DocIdentifierUnchecked _ -> inlineFallback + DocModule s -> B.plain $ docHToInlines False $ DocModule s + DocWarning _ -> mempty -- TODO + DocEmphasis _ -> inlineFallback + DocMonospaced _ -> inlineFallback + DocBold _ -> inlineFallback + DocHeader h -> B.header (headerLevel h) + (docHToInlines False $ headerTitle h) + DocUnorderedList items -> B.bulletList (map docHToBlocks items) + DocOrderedList items -> B.orderedList (map docHToBlocks items) + DocDefList items -> B.definitionList (map (\(d,t) -> + (docHToInlines False d, + [consolidatePlains $ docHToBlocks t])) items) + DocCodeBlock (DocString s) -> B.codeBlockWith ("",["haskell"],[]) s + DocCodeBlock d -> B.para $ docHToInlines True d + DocHyperlink _ -> inlineFallback + DocPic _ -> inlineFallback + DocAName _ -> inlineFallback + DocProperty s -> B.codeBlockWith ("",["property","haskell"],[]) (trim s) + DocExamples es -> mconcat $ map (\e -> + makeExample ">>>" (exampleExpression e) (exampleResult e)) es + + where inlineFallback = B.plain $ docHToInlines False d' + consolidatePlains = B.fromList . consolidatePlains' . B.toList + consolidatePlains' zs@(Plain _ : _) = + let (xs, ys) = span isPlain zs in + Plain (concatMap extractContents xs) : consolidatePlains' ys + consolidatePlains' (x : xs) = x : consolidatePlains' xs + consolidatePlains' [] = [] + isPlain (Plain _) = True + isPlain _ = False + extractContents (Plain xs) = xs + extractContents _ = [] + +docHToInlines :: Bool -> DocH String Identifier -> Inlines +docHToInlines isCode d' = + case d' of + DocEmpty -> mempty + DocAppend d1 d2 -> mappend (docHToInlines isCode d1) + (docHToInlines isCode d2) + DocString s + | isCode -> mconcat $ intersperse B.linebreak + $ map B.code $ splitBy (=='\n') s + | otherwise -> B.text s + DocParagraph _ -> mempty + DocIdentifier (_,s,_) -> B.codeWith ("",["haskell"],[]) s + DocIdentifierUnchecked s -> B.codeWith ("",["haskell"],[]) s + DocModule s -> B.codeWith ("",["haskell"],[]) s + DocWarning _ -> mempty -- TODO + DocEmphasis d -> B.emph (docHToInlines isCode d) + DocMonospaced (DocString s) -> B.code s + DocMonospaced d -> docHToInlines True d + DocBold d -> B.strong (docHToInlines isCode d) + DocHeader _ -> mempty + DocUnorderedList _ -> mempty + DocOrderedList _ -> mempty + DocDefList _ -> mempty + DocCodeBlock _ -> mempty + DocHyperlink h -> B.link (hyperlinkUrl h) (hyperlinkUrl h) + (maybe (B.text $ hyperlinkUrl h) B.text $ hyperlinkLabel h) + DocPic p -> B.image (pictureUri p) (fromMaybe (pictureUri p) $ pictureTitle p) + (maybe mempty B.text $ pictureTitle p) + DocAName s -> B.spanWith (s,["anchor"],[]) mempty + DocProperty _ -> mempty + DocExamples _ -> mempty + +-- | Create an 'Example', stripping superfluous characters as appropriate +makeExample :: String -> String -> [String] -> Blocks +makeExample prompt expression result = + B.para $ B.codeWith ("",["prompt"],[]) prompt + <> B.space + <> B.codeWith ([], ["haskell","expr"], []) (trim expression) + <> B.linebreak + <> (mconcat $ intersperse B.linebreak $ map coder result') + where + -- 1. drop trailing whitespace from the prompt, remember the prefix + prefix = takeWhile (`elem` " \t") prompt + + -- 2. drop, if possible, the exact same sequence of whitespace + -- characters from each result line + -- + -- 3. interpret lines that only contain the string "" as an + -- empty line + result' = map (substituteBlankLine . tryStripPrefix prefix) result + where + tryStripPrefix xs ys = fromMaybe ys $ stripPrefix xs ys + + substituteBlankLine "" = "" + substituteBlankLine line = line + coder = B.codeWith ([], ["result"], []) + diff --git a/tests/haddock-reader.haddock b/tests/haddock-reader.haddock index c4f6d6c36..c3ef0c9fc 100644 --- a/tests/haddock-reader.haddock +++ b/tests/haddock-reader.haddock @@ -18,10 +18,10 @@ This is a code block: This is another code block: @ - f x = x + x. - The \@...\@ code block /interprets markup normally/. - "Module.Foo" - \"Hello World\" +f x = x + x. +The \@...\@ code block /interprets markup normally/. +"Module.Foo" +\"Hello World\" @ Haddock supports REPL examples: @@ -42,21 +42,21 @@ This is a reference to the "Foo" module. This is a bulleted list: - * first item + * first item - * second item + * second item This is an enumerated list: - (1) first item + (1) first item - 2. second item + 2. second item This is a definition list: - [@foo@] The description of @foo@. + [@foo@] The description of @foo@. - [@bar@] The description of @bar@. + [@bar@] The description of @bar@. Here is a link: diff --git a/tests/haddock-reader.native b/tests/haddock-reader.native index 877719b50..8edb0b29a 100644 --- a/tests/haddock-reader.native +++ b/tests/haddock-reader.native @@ -4,13 +4,13 @@ Pandoc (Meta {unMeta = fromList []}) ,Para [Str "*",Space,Str "This",Space,Str "is",Space,Str "a",Space,Str "paragraph,",Space,Str "not",Space,Str "a",Space,Str "list",Space,Str "item.",Space,Str ">",Space,Str "This",Space,Str "sentence",Space,Str "is",Space,Str "not",Space,Str "code.",Space,Str ">>>",Space,Str "This",Space,Str "is",Space,Str "not",Space,Str "an",Space,Str "example."] ,Para [Str "The",Space,Str "references",Space,Str "\955,",Space,Str "\955",Space,Str "and",Space,Str "\955",Space,Str "all",Space,Str "represent",Space,Str "the",Space,Str "lower-case",Space,Str "letter",Space,Str "lambda."] ,Para [Str "This",Space,Str "is",Space,Str "a",Space,Str "code",Space,Str "block:"] -,CodeBlock ("",["haskell"],[]) " map :: (a -> b) -> [a] -> [b]\n map _ [] = []\n map f (x:xs) = f x : map f xs\n" +,CodeBlock ("",["haskell"],[]) "map :: (a -> b) -> [a] -> [b]\nmap _ [] = []\nmap f (x:xs) = f x : map f xs" ,Para [Str "This",Space,Str "is",Space,Str "another",Space,Str "code",Space,Str "block:"] -,Para [Code ("",[],[]) "f",Space,Code ("",[],[]) "x",Space,Code ("",[],[]) "=",Space,Code ("",[],[]) "x",Space,Code ("",[],[]) "+",Space,Code ("",[],[]) "x.",Space,Code ("",[],[]) "The",Space,Code ("",[],[]) "@...@",Space,Code ("",[],[]) "code",Space,Code ("",[],[]) "block",Space,Emph [Code ("",[],[]) "interprets markup normally"],Code ("",[],[]) ".",Space,Code ("",["haskell"],[]) "Module.Foo",Space,Code ("",[],[]) "\"Hello",Space,Code ("",[],[]) "World\""] +,Para [Code ("",[],[]) "f x = x + x.",LineBreak,Code ("",[],[]) "The @...@ code block ",Emph [Code ("",[],[]) "interprets markup normally"],Code ("",[],[]) ".",Code ("",["haskell"],[]) "Module.Foo",Code ("",[],[]) "",LineBreak,Code ("",[],[]) "\"Hello World\""] ,Para [Str "Haddock",Space,Str "supports",Space,Str "REPL",Space,Str "examples:"] ,Para [Code ("",["haskell","expr"],[]) "fib 10",LineBreak,Code ("",["result"],[]) "55"] ,Para [Code ("",["haskell","expr"],[]) "putStrLn \"foo\\nbar\"",LineBreak,Code ("",["result"],[]) "foo",LineBreak,Code ("",["result"],[]) "bar"] -,Para [Str "That",Space,Str "was",Space,Emph [Str "really cool"],Str "!",Space,Str "I",Space,Str "had",Space,Str "no",Space,Str "idea",Space,Code ("",[],[]) "fib",Space,Code ("",[],[]) "10",Space,Code ("",[],[]) "=",Space,Code ("",[],[]) "55",Str "."] +,Para [Str "That",Space,Str "was",Space,Emph [Str "really",Space,Str "cool"],Str "!",Space,Str "I",Space,Str "had",Space,Str "no",Space,Str "idea",Space,Code ("",[],[]) "fib 10 = 55",Str "."] ,Para [Str "This",Space,Str "module",Space,Str "defines",Space,Str "the",Space,Str "type",Space,Code ("",["haskell"],[]) "T",Str ".",Space,Str "The",Space,Str "identifier",Space,Code ("",["haskell"],[]) "M.T",Space,Str "is",Space,Str "not",Space,Str "in",Space,Str "scope",Space,Str "I",Space,Str "don't",Space,Str "have",Space,Str "to",Space,Str "escape",Space,Str "my",Space,Str "apostrophes;",Space,Str "great,",Space,Str "isn't",Space,Str "it?",Space,Str "This",Space,Str "is",Space,Str "a",Space,Str "reference",Space,Str "to",Space,Str "the",Space,Code ("",["haskell"],[]) "Foo",Space,Str "module."] ,Para [Str "This",Space,Str "is",Space,Str "a",Space,Str "bulleted",Space,Str "list:"] ,BulletList -- cgit v1.2.3 From 35e57db5c292957e74c24eb2cee63928c7865cc6 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Wed, 18 Jun 2014 15:32:13 -0700 Subject: Finished first draft of Haddock writer. --- README | 12 +- pandoc.cabal | 7 +- src/Text/Pandoc.hs | 3 + src/Text/Pandoc/Readers/Haddock.hs | 13 +- src/Text/Pandoc/Writers/Haddock.hs | 357 +++++++++++++++++++++++++++++++++++++ 5 files changed, 381 insertions(+), 11 deletions(-) create mode 100644 src/Text/Pandoc/Writers/Haddock.hs (limited to 'src/Text') diff --git a/README b/README index 1883ecd57..122db23ec 100644 --- a/README +++ b/README @@ -18,10 +18,10 @@ Org-mode], [DocBook], and [Word docx]; and it can write plain text, [markdown], [reStructuredText], [XHTML], [HTML 5], [LaTeX] (including [beamer] slide shows), [ConTeXt], [RTF], [OPML], [DocBook], [OpenDocument], [ODT], [Word docx], [GNU Texinfo], [MediaWiki markup], -[EPUB] (v2 or v3), [FictionBook2], [Textile], [groff man] pages, -[Emacs Org-Mode], [AsciiDoc], [InDesign ICML], and [Slidy], -[Slideous], [DZSlides], [reveal.js] or [S5] HTML slide shows. It can -also produce [PDF] output on systems where LaTeX is installed. +[Haddock markup], [EPUB] (v2 or v3), [FictionBook2], [Textile], +[groff man] pages, [Emacs Org-Mode], [AsciiDoc], [InDesign ICML], +and [Slidy], [Slideous], [DZSlides], [reveal.js] or [S5] HTML slide shows. +It can also produce [PDF] output on systems where LaTeX is installed. Pandoc's enhanced version of markdown includes syntax for footnotes, tables, flexible ordered lists, definition lists, fenced code blocks, @@ -169,8 +169,8 @@ General options `context` (ConTeXt), `man` (groff man), `mediawiki` (MediaWiki markup), `textile` (Textile), `org` (Emacs Org-Mode), `texinfo` (GNU Texinfo), `opml` (OPML), `docbook` (DocBook), `opendocument` (OpenDocument), `odt` - (OpenOffice text document), `docx` (Word docx), - `rtf` (rich text format), `epub` (EPUB v2 book), `epub3` + (OpenOffice text document), `docx` (Word docx), `haddock` (Haddock + markup), `rtf` (rich text format), `epub` (EPUB v2 book), `epub3` (EPUB v3), `fb2` (FictionBook2 e-book), `asciidoc` (AsciiDoc), `icml` (InDesign ICML), `slidy` (Slidy HTML and javascript slide show), `slideous` (Slideous HTML and javascript slide show), `dzslides` diff --git a/pandoc.cabal b/pandoc.cabal index f9938bede..058e82a7f 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -21,9 +21,9 @@ Description: Pandoc is a Haskell library for converting from one markup markdown, reStructuredText, HTML, LaTeX, ConTeXt, Docbook, OPML, OpenDocument, ODT, Word docx, RTF, MediaWiki, Textile, groff man pages, plain text, Emacs Org-Mode, AsciiDoc, - EPUB (v2 and v3), FictionBook2, InDesign ICML, and several kinds - of HTML/javascript slide shows (S5, Slidy, Slideous, DZSlides, - reveal.js). + Haddock markup, EPUB (v2 and v3), FictionBook2, + InDesign ICML, and several kinds of HTML/javascript + slide shows (S5, Slidy, Slideous, DZSlides, reveal.js). . Pandoc extends standard markdown syntax with footnotes, embedded LaTeX, definition lists, tables, and other @@ -305,6 +305,7 @@ Library Text.Pandoc.Writers.Texinfo, Text.Pandoc.Writers.Man, Text.Pandoc.Writers.Markdown, + Text.Pandoc.Writers.Haddock, Text.Pandoc.Writers.RST, Text.Pandoc.Writers.Org, Text.Pandoc.Writers.AsciiDoc, diff --git a/src/Text/Pandoc.hs b/src/Text/Pandoc.hs index 45c2f453b..d2e7887b5 100644 --- a/src/Text/Pandoc.hs +++ b/src/Text/Pandoc.hs @@ -102,6 +102,7 @@ module Text.Pandoc , writeFB2 , writeOrg , writeAsciiDoc + , writeHaddock , writeCustom -- * Rendering templates and default templates , module Text.Pandoc.Templates @@ -149,6 +150,7 @@ import Text.Pandoc.Writers.MediaWiki import Text.Pandoc.Writers.Textile import Text.Pandoc.Writers.Org import Text.Pandoc.Writers.AsciiDoc +import Text.Pandoc.Writers.Haddock import Text.Pandoc.Writers.Custom import Text.Pandoc.Templates import Text.Pandoc.Options @@ -279,6 +281,7 @@ writers = [ ,("rtf" , IOStringWriter writeRTFWithEmbeddedImages) ,("org" , PureStringWriter writeOrg) ,("asciidoc" , PureStringWriter writeAsciiDoc) + ,("haddock" , PureStringWriter writeHaddock) ] getDefaultExtensions :: String -> Set Extension diff --git a/src/Text/Pandoc/Readers/Haddock.hs b/src/Text/Pandoc/Readers/Haddock.hs index a512f969d..f184eabdb 100644 --- a/src/Text/Pandoc/Readers/Haddock.hs +++ b/src/Text/Pandoc/Readers/Haddock.hs @@ -23,19 +23,28 @@ import Text.Pandoc.Definition import Text.Pandoc.Options import Documentation.Haddock.Parser import Documentation.Haddock.Types +import Debug.Trace (trace) -- | Parse Haddock markup and return a 'Pandoc' document. readHaddock :: ReaderOptions -- ^ Reader options -> String -- ^ String to parse -> Pandoc -readHaddock _ = B.doc . docHToBlocks . parseParas +readHaddock opts = B.doc . docHToBlocks . trace' . parseParas + where trace' x = if readerTrace opts + then trace (show x) x + else x docHToBlocks :: DocH String Identifier -> Blocks docHToBlocks d' = case d' of DocEmpty -> mempty + DocAppend (DocParagraph (DocHeader h)) (DocParagraph (DocAName ident)) -> + B.headerWith (ident,[],[]) (headerLevel h) + (docHToInlines False $ headerTitle h) DocAppend d1 d2 -> mappend (docHToBlocks d1) (docHToBlocks d2) DocString _ -> inlineFallback + DocParagraph (DocHeader h) -> docHToBlocks (DocHeader h) + DocParagraph (DocAName h) -> B.plain $ docHToInlines False $ DocAName h DocParagraph ils -> B.para $ docHToInlines False ils DocIdentifier _ -> inlineFallback DocIdentifierUnchecked _ -> inlineFallback @@ -64,7 +73,7 @@ docHToBlocks d' = consolidatePlains = B.fromList . consolidatePlains' . B.toList consolidatePlains' zs@(Plain _ : _) = let (xs, ys) = span isPlain zs in - Plain (concatMap extractContents xs) : consolidatePlains' ys + Para (concatMap extractContents xs) : consolidatePlains' ys consolidatePlains' (x : xs) = x : consolidatePlains' xs consolidatePlains' [] = [] isPlain (Plain _) = True diff --git a/src/Text/Pandoc/Writers/Haddock.hs b/src/Text/Pandoc/Writers/Haddock.hs new file mode 100644 index 000000000..4d6b8e69f --- /dev/null +++ b/src/Text/Pandoc/Writers/Haddock.hs @@ -0,0 +1,357 @@ +{-# LANGUAGE OverloadedStrings, TupleSections, ScopedTypeVariables #-} +{- +Copyright (C) 2014 John MacFarlane + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Writers.Haddock + Copyright : Copyright (C) 2014 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane + Stability : alpha + Portability : portable + +Conversion of 'Pandoc' documents to haddock markup. + +Haddock: +-} +module Text.Pandoc.Writers.Haddock (writeHaddock) where +import Text.Pandoc.Definition +import Text.Pandoc.Templates (renderTemplate') +import Text.Pandoc.Shared +import Text.Pandoc.Writers.Shared +import Text.Pandoc.Options +import Data.List ( intersperse, transpose ) +import Text.Pandoc.Pretty +import Control.Monad.State +import Text.Pandoc.Readers.TeXMath (readTeXMath') +import Network.URI (isURI) +import Data.Default + +type Notes = [[Block]] +data WriterState = WriterState { stNotes :: Notes } +instance Default WriterState + where def = WriterState{ stNotes = [] } + +-- | Convert Pandoc to Haddock. +writeHaddock :: WriterOptions -> Pandoc -> String +writeHaddock opts document = + evalState (pandocToHaddock opts{ + writerWrapText = writerWrapText opts } document) def + +-- | Return haddock representation of document. +pandocToHaddock :: WriterOptions -> Pandoc -> State WriterState String +pandocToHaddock opts (Pandoc meta blocks) = do + let colwidth = if writerWrapText opts + then Just $ writerColumns opts + else Nothing + body <- blockListToHaddock opts blocks + st <- get + notes' <- notesToHaddock opts (reverse $ stNotes st) + let render' :: Doc -> String + render' = render colwidth + let main = render' $ body <> + (if isEmpty notes' then empty else blankline <> notes') + metadata <- metaToJSON opts + (fmap (render colwidth) . blockListToHaddock opts) + (fmap (render colwidth) . inlineListToHaddock opts) + meta + let context = defField "body" main + $ metadata + if writerStandalone opts + then return $ renderTemplate' (writerTemplate opts) context + else return main + +-- | Return haddock representation of notes. +notesToHaddock :: WriterOptions -> [[Block]] -> State WriterState Doc +notesToHaddock opts notes = + mapM (\(num, note) -> noteToHaddock opts num note) (zip [1..] notes) >>= + return . vsep + +-- | Return haddock representation of a note. +noteToHaddock :: WriterOptions -> Int -> [Block] -> State WriterState Doc +noteToHaddock opts num blocks = do + contents <- blockListToHaddock opts blocks + let num' = text $ writerIdentifierPrefix opts ++ show num + let marker = text "[" <> num' <> text "]" + let markerSize = 4 + offset num' + let spacer = case writerTabStop opts - markerSize of + n | n > 0 -> text $ replicate n ' ' + _ -> text " " + return $ if isEnabled Ext_footnotes opts + then hang (writerTabStop opts) (marker <> spacer) contents + else marker <> spacer <> contents + +-- | Escape special characters for Haddock. +escapeString :: String -> String +escapeString = escapeStringUsing haddockEscapes + where haddockEscapes = backslashEscapes "\\/'`\"@<" + +-- | Convert Pandoc block element to haddock. +blockToHaddock :: WriterOptions -- ^ Options + -> Block -- ^ Block element + -> State WriterState Doc +blockToHaddock _ Null = return empty +blockToHaddock opts (Div _ ils) = do + contents <- blockListToHaddock opts ils + return $ contents <> blankline +blockToHaddock opts (Plain inlines) = do + contents <- inlineListToHaddock opts inlines + return $ contents <> cr +-- title beginning with fig: indicates figure +blockToHaddock opts (Para [Image alt (src,'f':'i':'g':':':tit)]) = + blockToHaddock opts (Para [Image alt (src,tit)]) +blockToHaddock opts (Para inlines) = + -- TODO: if it contains linebreaks, we need to use a @...@ block + (<> blankline) `fmap` blockToHaddock opts (Plain inlines) +blockToHaddock _ (RawBlock f str) + | f == "haddock" = do + return $ text str <> text "\n" + | otherwise = return empty +blockToHaddock _ HorizontalRule = + return $ blankline <> text "--------------" <> blankline +blockToHaddock opts (Header level (ident,_,_) inlines) = do + contents <- inlineListToHaddock opts inlines + let attr' = if null ident + then empty + else cr <> text "#" <> text ident <> text "#" + return $ nowrap (text (replicate level '=') <> space <> contents) + <> attr' <> blankline +blockToHaddock _ (CodeBlock (_,_,_) str) = + return $ prefixed "> " (text str) <> blankline +-- Nothing in haddock corresponds to block quotes: +blockToHaddock opts (BlockQuote blocks) = + blockListToHaddock opts blocks +-- Haddock doesn't have tables. Use haddock tables in code. +blockToHaddock opts (Table caption aligns widths headers rows) = do + caption' <- inlineListToHaddock opts caption + let caption'' = if null caption || not (isEnabled Ext_table_captions opts) + then empty + else blankline <> caption' <> blankline + rawHeaders <- mapM (blockListToHaddock opts) headers + rawRows <- mapM (mapM (blockListToHaddock opts)) rows + let isSimple = all (==0) widths + let isPlainBlock (Plain _) = True + isPlainBlock _ = False + let hasBlocks = not (all isPlainBlock $ concat . concat $ headers:rows) + (nst,tbl) <- case True of + _ | isSimple -> fmap (nest 2,) $ + pandocTable opts (all null headers) aligns widths + rawHeaders rawRows + | not hasBlocks -> fmap (nest 2,) $ + pandocTable opts (all null headers) aligns widths + rawHeaders rawRows + | otherwise -> fmap (id,) $ + gridTable opts (all null headers) aligns widths + rawHeaders rawRows + return $ prefixed "> " $ nst $ tbl $$ blankline $$ caption'' $$ blankline +blockToHaddock opts (BulletList items) = do + contents <- mapM (bulletListItemToHaddock opts) items + return $ cat contents <> blankline +blockToHaddock opts (OrderedList (start,sty,delim) items) = do + let attribs = (start, sty, delim) + let markers = orderedListMarkers attribs + let markers' = map (\m -> if length m < 3 + then m ++ replicate (3 - length m) ' ' + else m) markers + contents <- mapM (\(item, num) -> orderedListItemToHaddock opts item num) $ + zip markers' items + return $ cat contents <> blankline +blockToHaddock opts (DefinitionList items) = do + contents <- mapM (definitionListItemToHaddock opts) items + return $ cat contents <> blankline + +pandocTable :: WriterOptions -> Bool -> [Alignment] -> [Double] + -> [Doc] -> [[Doc]] -> State WriterState Doc +pandocTable opts headless aligns widths rawHeaders rawRows = do + let isSimple = all (==0) widths + let alignHeader alignment = case alignment of + AlignLeft -> lblock + AlignCenter -> cblock + AlignRight -> rblock + AlignDefault -> lblock + let numChars = maximum . map offset + let widthsInChars = if isSimple + then map ((+2) . numChars) + $ transpose (rawHeaders : rawRows) + else map + (floor . (fromIntegral (writerColumns opts) *)) + widths + let makeRow = hcat . intersperse (lblock 1 (text " ")) . + (zipWith3 alignHeader aligns widthsInChars) + let rows' = map makeRow rawRows + let head' = makeRow rawHeaders + let maxRowHeight = maximum $ map height (head':rows') + let underline = cat $ intersperse (text " ") $ + map (\width -> text (replicate width '-')) widthsInChars + let border = if maxRowHeight > 1 + then text (replicate (sum widthsInChars + + length widthsInChars - 1) '-') + else if headless + then underline + else empty + let head'' = if headless + then empty + else border <> cr <> head' + let body = if maxRowHeight > 1 + then vsep rows' + else vcat rows' + let bottom = if headless + then underline + else border + return $ head'' $$ underline $$ body $$ bottom + +gridTable :: WriterOptions -> Bool -> [Alignment] -> [Double] + -> [Doc] -> [[Doc]] -> State WriterState Doc +gridTable opts headless _aligns widths headers' rawRows = do + let numcols = length headers' + let widths' = if all (==0) widths + then replicate numcols (1.0 / fromIntegral numcols) + else widths + let widthsInChars = map (floor . (fromIntegral (writerColumns opts) *)) widths' + let hpipeBlocks blocks = hcat [beg, middle, end] + where h = maximum (map height blocks) + sep' = lblock 3 $ vcat (map text $ replicate h " | ") + beg = lblock 2 $ vcat (map text $ replicate h "| ") + end = lblock 2 $ vcat (map text $ replicate h " |") + middle = chomp $ hcat $ intersperse sep' blocks + let makeRow = hpipeBlocks . zipWith lblock widthsInChars + let head' = makeRow headers' + let rows' = map (makeRow . map chomp) rawRows + let border ch = char '+' <> char ch <> + (hcat $ intersperse (char ch <> char '+' <> char ch) $ + map (\l -> text $ replicate l ch) widthsInChars) <> + char ch <> char '+' + let body = vcat $ intersperse (border '-') rows' + let head'' = if headless + then empty + else head' $$ border '=' + return $ border '-' $$ head'' $$ body $$ border '-' + +-- | Convert bullet list item (list of blocks) to haddock +bulletListItemToHaddock :: WriterOptions -> [Block] -> State WriterState Doc +bulletListItemToHaddock opts items = do + contents <- blockListToHaddock opts items + let sps = replicate (writerTabStop opts - 2) ' ' + let start = text ('-' : ' ' : sps) + -- remove trailing blank line if it is a tight list + let contents' = case reverse items of + (BulletList xs:_) | isTightList xs -> + chomp contents <> cr + (OrderedList _ xs:_) | isTightList xs -> + chomp contents <> cr + _ -> contents + return $ hang (writerTabStop opts) start $ contents' <> cr + +-- | Convert ordered list item (a list of blocks) to haddock +orderedListItemToHaddock :: WriterOptions -- ^ options + -> String -- ^ list item marker + -> [Block] -- ^ list item (list of blocks) + -> State WriterState Doc +orderedListItemToHaddock opts marker items = do + contents <- blockListToHaddock opts items + let sps = case length marker - writerTabStop opts of + n | n > 0 -> text $ replicate n ' ' + _ -> text " " + let start = text marker <> sps + return $ hang (writerTabStop opts) start $ contents <> cr + +-- | Convert definition list item (label, list of blocks) to haddock +definitionListItemToHaddock :: WriterOptions + -> ([Inline],[[Block]]) + -> State WriterState Doc +definitionListItemToHaddock opts (label, defs) = do + labelText <- inlineListToHaddock opts label + defs' <- mapM (mapM (blockToHaddock opts)) defs + let contents = vcat $ map (\d -> hang 4 empty $ vcat d <> cr) defs' + return $ nowrap (brackets labelText) <> cr <> contents <> cr + +-- | Convert list of Pandoc block elements to haddock +blockListToHaddock :: WriterOptions -- ^ Options + -> [Block] -- ^ List of block elements + -> State WriterState Doc +blockListToHaddock opts blocks = + mapM (blockToHaddock opts) blocks >>= return . cat + +-- | Convert list of Pandoc inline elements to haddock. +inlineListToHaddock :: WriterOptions -> [Inline] -> State WriterState Doc +inlineListToHaddock opts lst = + mapM (inlineToHaddock opts) lst >>= return . cat + +-- | Convert Pandoc inline element to haddock. +inlineToHaddock :: WriterOptions -> Inline -> State WriterState Doc +inlineToHaddock opts (Span (ident,_,_) ils) = do + contents <- inlineListToHaddock opts ils + if not (null ident) && null ils + then return $ "#" <> text ident <> "#" + else return contents +inlineToHaddock opts (Emph lst) = do + contents <- inlineListToHaddock opts lst + return $ "/" <> contents <> "/" +inlineToHaddock opts (Strong lst) = do + contents <- inlineListToHaddock opts lst + return $ "__" <> contents <> "__" +inlineToHaddock opts (Strikeout lst) = do + contents <- inlineListToHaddock opts lst + -- not supported in haddock, but we fake it: + return $ "~~" <> contents <> "~~" +-- not supported in haddock: +inlineToHaddock opts (Superscript lst) = inlineListToHaddock opts lst +-- not supported in haddock: +inlineToHaddock opts (Subscript lst) = inlineListToHaddock opts lst +-- not supported in haddock: +inlineToHaddock opts (SmallCaps lst) = inlineListToHaddock opts lst +inlineToHaddock opts (Quoted SingleQuote lst) = do + contents <- inlineListToHaddock opts lst + return $ "‘" <> contents <> "’" +inlineToHaddock opts (Quoted DoubleQuote lst) = do + contents <- inlineListToHaddock opts lst + return $ "“" <> contents <> "”" +inlineToHaddock _ (Code _ str) = + return $ "@" <> text (escapeString str) <> "@" +inlineToHaddock _ (Str str) = do + return $ text $ escapeString str +inlineToHaddock opts (Math mt str) = do + let adjust x = case mt of + DisplayMath -> cr <> x <> cr + InlineMath -> x + adjust `fmap` (inlineListToHaddock opts $ readTeXMath' mt str) +inlineToHaddock _ (RawInline f str) + | f == "haddock" = return $ text str + | otherwise = return empty +-- no line break in haddock (see above on CodeBlock) +inlineToHaddock _ (LineBreak) = return cr +inlineToHaddock _ Space = return space +inlineToHaddock opts (Cite _ lst) = inlineListToHaddock opts lst +inlineToHaddock opts (Link txt (src, _)) = do + linktext <- inlineListToHaddock opts txt + let useAuto = isURI src && + case txt of + [Str s] | escapeURI s == src -> True + _ -> False + return $ nowrap $ "<" <> text src <> + (if useAuto then empty else space <> linktext) <> ">" +inlineToHaddock opts (Image alternate (source, tit)) = do + linkhaddock <- inlineToHaddock opts (Link alternate (source, tit)) + return $ "<" <> linkhaddock <> ">" +-- haddock doesn't have notes, but we can fake it: +inlineToHaddock opts (Note contents) = do + modify (\st -> st{ stNotes = contents : stNotes st }) + st <- get + let ref = text $ writerIdentifierPrefix opts ++ show (length $ stNotes st) + return $ "[" <> ref <> "]" -- cgit v1.2.3 From ff6a2baeb9940276fbbaf486a5711378d13cc1e1 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Wed, 18 Jun 2014 17:49:59 -0700 Subject: More polish on Haddock reader/writer. --- src/Text/Pandoc/Readers/Haddock.hs | 46 +++++++++++++++++++++++++++++++++----- src/Text/Pandoc/Writers/Haddock.hs | 23 +++++-------------- 2 files changed, 47 insertions(+), 22 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Haddock.hs b/src/Text/Pandoc/Readers/Haddock.hs index f184eabdb..a3dfb7c3c 100644 --- a/src/Text/Pandoc/Readers/Haddock.hs +++ b/src/Text/Pandoc/Readers/Haddock.hs @@ -45,7 +45,9 @@ docHToBlocks d' = DocString _ -> inlineFallback DocParagraph (DocHeader h) -> docHToBlocks (DocHeader h) DocParagraph (DocAName h) -> B.plain $ docHToInlines False $ DocAName h - DocParagraph ils -> B.para $ docHToInlines False ils + DocParagraph x -> let (ils, rest) = getInlines x + in (B.para $ docHToInlines False ils) + <> docHToBlocks rest DocIdentifier _ -> inlineFallback DocIdentifierUnchecked _ -> inlineFallback DocModule s -> B.plain $ docHToInlines False $ DocModule s @@ -60,7 +62,7 @@ docHToBlocks d' = DocDefList items -> B.definitionList (map (\(d,t) -> (docHToInlines False d, [consolidatePlains $ docHToBlocks t])) items) - DocCodeBlock (DocString s) -> B.codeBlockWith ("",["haskell"],[]) s + DocCodeBlock (DocString s) -> B.codeBlockWith ("",[],[]) s DocCodeBlock d -> B.para $ docHToInlines True d DocHyperlink _ -> inlineFallback DocPic _ -> inlineFallback @@ -92,9 +94,9 @@ docHToInlines isCode d' = $ map B.code $ splitBy (=='\n') s | otherwise -> B.text s DocParagraph _ -> mempty - DocIdentifier (_,s,_) -> B.codeWith ("",["haskell"],[]) s - DocIdentifierUnchecked s -> B.codeWith ("",["haskell"],[]) s - DocModule s -> B.codeWith ("",["haskell"],[]) s + DocIdentifier (_,s,_) -> B.codeWith ("",["haskell","identifier"],[]) s + DocIdentifierUnchecked s -> B.codeWith ("",["haskell","identifier"],[]) s + DocModule s -> B.codeWith ("",["haskell","module"],[]) s DocWarning _ -> mempty -- TODO DocEmphasis d -> B.emph (docHToInlines isCode d) DocMonospaced (DocString s) -> B.code s @@ -113,6 +115,40 @@ docHToInlines isCode d' = DocProperty _ -> mempty DocExamples _ -> mempty +getInlines :: DocH String Identifier -> (DocH String Identifier, DocH String Identifier) +getInlines (DocAppend x y) = if isInline x + then let (a, b) = getInlines y + in (DocAppend x a, b) + else (DocEmpty, DocAppend x y) +getInlines x = if isInline x + then (x, DocEmpty) + else (DocEmpty, x) + +isInline :: DocH String Identifier -> Bool +isInline d' = + case d' of + DocEmpty -> True + DocAppend d1 _ -> isInline d1 + DocString _ -> True + DocParagraph _ -> False + DocIdentifier _ -> True + DocIdentifierUnchecked _ -> True + DocModule _ -> True + DocWarning _ -> True + DocEmphasis _ -> True + DocMonospaced _ -> True + DocBold _ -> True + DocHeader _ -> False + DocUnorderedList _ -> False + DocOrderedList _ -> False + DocDefList _ -> False + DocCodeBlock _ -> False + DocHyperlink _ -> True + DocPic _ -> True + DocAName _ -> True + DocProperty _ -> False + DocExamples _ -> False + -- | Create an 'Example', stripping superfluous characters as appropriate makeExample :: String -> String -> [String] -> Blocks makeExample prompt expression result = diff --git a/src/Text/Pandoc/Writers/Haddock.hs b/src/Text/Pandoc/Writers/Haddock.hs index 4d6b8e69f..36f57c2b7 100644 --- a/src/Text/Pandoc/Writers/Haddock.hs +++ b/src/Text/Pandoc/Writers/Haddock.hs @@ -80,22 +80,11 @@ pandocToHaddock opts (Pandoc meta blocks) = do -- | Return haddock representation of notes. notesToHaddock :: WriterOptions -> [[Block]] -> State WriterState Doc notesToHaddock opts notes = - mapM (\(num, note) -> noteToHaddock opts num note) (zip [1..] notes) >>= - return . vsep - --- | Return haddock representation of a note. -noteToHaddock :: WriterOptions -> Int -> [Block] -> State WriterState Doc -noteToHaddock opts num blocks = do - contents <- blockListToHaddock opts blocks - let num' = text $ writerIdentifierPrefix opts ++ show num - let marker = text "[" <> num' <> text "]" - let markerSize = 4 + offset num' - let spacer = case writerTabStop opts - markerSize of - n | n > 0 -> text $ replicate n ' ' - _ -> text " " - return $ if isEnabled Ext_footnotes opts - then hang (writerTabStop opts) (marker <> spacer) contents - else marker <> spacer <> contents + if null notes + then return empty + else do + contents <- blockToHaddock opts $ OrderedList (1,DefaultStyle,DefaultDelim) notes + return $ text "#notes#" <> blankline <> contents -- | Escape special characters for Haddock. escapeString :: String -> String @@ -354,4 +343,4 @@ inlineToHaddock opts (Note contents) = do modify (\st -> st{ stNotes = contents : stNotes st }) st <- get let ref = text $ writerIdentifierPrefix opts ++ show (length $ stNotes st) - return $ "[" <> ref <> "]" + return $ "<#notes [" <> ref <> "]>" -- cgit v1.2.3 From c4182b39ca009f02fc4e0768056d37d64b93df7c Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Wed, 18 Jun 2014 18:08:41 -0700 Subject: Small fix to haddock "tables". --- src/Text/Pandoc/Writers/Haddock.hs | 4 ++-- tests/tables.haddock | 13 +++++++++---- 2 files changed, 11 insertions(+), 6 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Writers/Haddock.hs b/src/Text/Pandoc/Writers/Haddock.hs index 36f57c2b7..1939d3a6d 100644 --- a/src/Text/Pandoc/Writers/Haddock.hs +++ b/src/Text/Pandoc/Writers/Haddock.hs @@ -129,7 +129,7 @@ blockToHaddock opts (BlockQuote blocks) = -- Haddock doesn't have tables. Use haddock tables in code. blockToHaddock opts (Table caption aligns widths headers rows) = do caption' <- inlineListToHaddock opts caption - let caption'' = if null caption || not (isEnabled Ext_table_captions opts) + let caption'' = if null caption then empty else blankline <> caption' <> blankline rawHeaders <- mapM (blockListToHaddock opts) headers @@ -148,7 +148,7 @@ blockToHaddock opts (Table caption aligns widths headers rows) = do | otherwise -> fmap (id,) $ gridTable opts (all null headers) aligns widths rawHeaders rawRows - return $ prefixed "> " $ nst $ tbl $$ blankline $$ caption'' $$ blankline + return $ (prefixed "> " $ nst $ tbl $$ blankline $$ caption'') $$ blankline blockToHaddock opts (BulletList items) = do contents <- mapM (bulletListItemToHaddock opts) items return $ cat contents <> blankline diff --git a/tests/tables.haddock b/tests/tables.haddock index e054dc072..413ec97ad 100644 --- a/tests/tables.haddock +++ b/tests/tables.haddock @@ -6,6 +6,8 @@ Simple table with caption: > 123 123 123 123 > 1 1 1 1 > +> Demonstration of simple table syntax. + Simple table without caption: > Right Left Center Default @@ -13,7 +15,7 @@ Simple table without caption: > 12 12 12 12 > 123 123 123 123 > 1 1 1 1 -> + Simple table indented two spaces: > Right Left Center Default @@ -22,6 +24,8 @@ Simple table indented two spaces: > 123 123 123 123 > 1 1 1 1 > +> Demonstration of simple table syntax. + Multiline table with caption: > -------------------------------------------------------------- @@ -36,6 +40,8 @@ Multiline table with caption: > rows. > -------------------------------------------------------------- > +> Here\'s the caption. It may span multiple lines. + Multiline table without caption: > -------------------------------------------------------------- @@ -49,7 +55,7 @@ Multiline table without caption: > the blank line between > rows. > -------------------------------------------------------------- -> + Table without column headers: > ----- ----- ----- ----- @@ -57,7 +63,7 @@ Table without column headers: > 123 123 123 123 > 1 1 1 1 > ----- ----- ----- ----- -> + Multiline table without column headers: > ----------- ---------- ------------ -------------------------- @@ -68,5 +74,4 @@ Multiline table without column headers: > the blank line between > rows. > ----------- ---------- ------------ -------------------------- -> -- cgit v1.2.3 From de7b3a3d08264d6aa755436583d53f9a61252fa2 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Wed, 18 Jun 2014 18:11:01 -0700 Subject: Haddock writer: Only use Decimal list style. --- src/Text/Pandoc/Writers/Haddock.hs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Writers/Haddock.hs b/src/Text/Pandoc/Writers/Haddock.hs index 1939d3a6d..59d979ea8 100644 --- a/src/Text/Pandoc/Writers/Haddock.hs +++ b/src/Text/Pandoc/Writers/Haddock.hs @@ -152,8 +152,8 @@ blockToHaddock opts (Table caption aligns widths headers rows) = do blockToHaddock opts (BulletList items) = do contents <- mapM (bulletListItemToHaddock opts) items return $ cat contents <> blankline -blockToHaddock opts (OrderedList (start,sty,delim) items) = do - let attribs = (start, sty, delim) +blockToHaddock opts (OrderedList (start,_,delim) items) = do + let attribs = (start, Decimal, delim) let markers = orderedListMarkers attribs let markers' = map (\m -> if length m < 3 then m ++ replicate (3 - length m) ' ' -- cgit v1.2.3 From 00281559bf9c955ece6b18d48ef487fdc5f4406e Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Thu, 19 Jun 2014 00:28:23 -0700 Subject: Haddock writer: Use _____ for hrule. Avoids interpretation as list. --- src/Text/Pandoc/Writers/Haddock.hs | 4 ++-- tests/writer.haddock | 26 +++++++++++++------------- 2 files changed, 15 insertions(+), 15 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Writers/Haddock.hs b/src/Text/Pandoc/Writers/Haddock.hs index 59d979ea8..1c82839d0 100644 --- a/src/Text/Pandoc/Writers/Haddock.hs +++ b/src/Text/Pandoc/Writers/Haddock.hs @@ -112,8 +112,8 @@ blockToHaddock _ (RawBlock f str) | f == "haddock" = do return $ text str <> text "\n" | otherwise = return empty -blockToHaddock _ HorizontalRule = - return $ blankline <> text "--------------" <> blankline +blockToHaddock opts HorizontalRule = + return $ blankline <> text (replicate (writerColumns opts) '_') <> blankline blockToHaddock opts (Header level (ident,_,_) inlines) = do contents <- inlineListToHaddock opts inlines let attr' = if null ident diff --git a/tests/writer.haddock b/tests/writer.haddock index 129242153..0772331e3 100644 --- a/tests/writer.haddock +++ b/tests/writer.haddock @@ -1,7 +1,7 @@ This is a set of tests for pandoc. Most of them are adapted from John Gruber’s markdown test suite. --------------- +______________________________________________________________________________ = Headers #headers# @@ -34,7 +34,7 @@ with no blank line with no blank line --------------- +______________________________________________________________________________ = Paragraphs #paragraphs# @@ -50,7 +50,7 @@ Here’s one with a bullet. * criminey. There should be a hard line break here. --------------- +______________________________________________________________________________ = Block Quotes #block-quotes# @@ -80,7 +80,7 @@ This should not be a block quote: 2 > 1. And a following paragraph. --------------- +______________________________________________________________________________ = Code Blocks #code-blocks# @@ -101,7 +101,7 @@ And: > > These should not be escaped: \$ \\ \> \[ \{ --------------- +______________________________________________________________________________ = Lists #lists# @@ -264,7 +264,7 @@ M.A. 2007 B. Williams --------------- +______________________________________________________________________________ = Definition Lists #definition-lists# @@ -397,7 +397,7 @@ Code: Hr’s: --------------- +______________________________________________________________________________ = Inline Markup #inline-markup# @@ -427,7 +427,7 @@ Subscripts: H2O, H23O, Hmany of themO. These should not be superscripts or subscripts, because of the unescaped spaces: a^b c^d, a~b c~d. --------------- +______________________________________________________________________________ = Smart quotes, ellipses, dashes #smart-quotes-ellipses-dashes# @@ -449,7 +449,7 @@ Dashes between numbers: 5–7, 255–66, 1987–1999. Ellipses…and…and…. --------------- +______________________________________________________________________________ = LaTeX #latex# @@ -474,7 +474,7 @@ These shouldn’t be math: Here’s a LaTeX table: --------------- +______________________________________________________________________________ = Special Characters #special-characters# @@ -529,7 +529,7 @@ Plus: + Minus: - --------------- +______________________________________________________________________________ = Links #links# @@ -610,7 +610,7 @@ Auto-links should not occur here: @\@ > or here: --------------- +______________________________________________________________________________ = Images #images# @@ -621,7 +621,7 @@ From “Voyage dans la Lune” by Georges Melies (1902): Here is a movie <> icon. --------------- +______________________________________________________________________________ = Footnotes #footnotes# -- cgit v1.2.3 From 5cb53a48d541b97b5f60968715a5969133196d70 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Thu, 19 Jun 2014 14:30:03 -0700 Subject: ImageSize: ignore unknown exif header tag rather than crashing. Some images seem to have tag type of 256, which was causing a runtime error. --- src/Text/Pandoc/ImageSize.hs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/ImageSize.hs b/src/Text/Pandoc/ImageSize.hs index a6d076fa9..9e6b457c0 100644 --- a/src/Text/Pandoc/ImageSize.hs +++ b/src/Text/Pandoc/ImageSize.hs @@ -217,7 +217,7 @@ exifHeader hdr = do numentries <- getWord16 let ifdEntry = do tag <- getWord16 >>= \t -> - maybe (fail $ "Unknown tag type " ++ show t) return + maybe (return UnknownTagType) return (M.lookup t tagTypeTable) dataFormat <- getWord16 numComponents <- getWord32 @@ -337,6 +337,7 @@ data TagType = ImageDescription | SensingMethod | FileSource | SceneType + | UnknownTagType deriving (Show, Eq, Ord) tagTypeTable :: M.Map Word16 TagType -- cgit v1.2.3 From 86fc44d6b3f82a2b274d4b592d1dd6152bd1eaf5 Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal Date: Thu, 19 Jun 2014 17:53:52 -0400 Subject: Add literal tabs to parser. --- src/Text/Pandoc/Readers/Docx/Parse.hs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs index 22e9dd909..18200bcf9 100644 --- a/src/Text/Pandoc/Readers/Docx/Parse.hs +++ b/src/Text/Pandoc/Readers/Docx/Parse.hs @@ -466,7 +466,7 @@ data Run = Run RunStyle [RunElem] | Endnote String deriving Show -data RunElem = TextRun String | LnBrk +data RunElem = TextRun String | LnBrk | Tab deriving Show data RunStyle = RunStyle { isBold :: Bool @@ -545,6 +545,9 @@ elemToRunElem ns element | qName (elName element) == "br" && qURI (elName element) == (lookup "w" ns) = Just $ LnBrk + | qName (elName element) == "tab" && + qURI (elName element) == (lookup "w" ns) = + Just $ Tab | otherwise = Nothing -- cgit v1.2.3 From 0e7d2dbd4304902cb6c6d4e9618592b5148dc598 Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal Date: Thu, 19 Jun 2014 17:55:02 -0400 Subject: Have Docx reader properly interpret tabs. --- src/Text/Pandoc/Readers/Docx.hs | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs index 4035cde99..c43879ed9 100644 --- a/src/Text/Pandoc/Readers/Docx.hs +++ b/src/Text/Pandoc/Readers/Docx.hs @@ -134,10 +134,12 @@ codeDivs = ["SourceCode"] runElemToInlines :: RunElem -> [Inline] runElemToInlines (TextRun s) = strToInlines s runElemToInlines (LnBrk) = [LineBreak] +runElemToInlines (Tab) = [Space] runElemToString :: RunElem -> String runElemToString (TextRun s) = s runElemToString (LnBrk) = ['\n'] +runElemToString (Tab) = ['\t'] runElemsToString :: [RunElem] -> String runElemsToString = concatMap runElemToString -- cgit v1.2.3 From a934db9a320ec76e15e62954b75d0e8d2d972244 Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal Date: Thu, 19 Jun 2014 19:28:55 -0400 Subject: Introduce blockNormalize This will help take care of spaces introduced at the beginning of strings. --- src/Text/Pandoc/Readers/Docx.hs | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs index c43879ed9..9c1d0c5e6 100644 --- a/src/Text/Pandoc/Readers/Docx.hs +++ b/src/Text/Pandoc/Readers/Docx.hs @@ -150,6 +150,19 @@ strNormalize (Str "" : ils) = strNormalize ils strNormalize ((Str s) : (Str s') : l) = strNormalize ((Str (s++s')) : l) strNormalize (il:ils) = il : (strNormalize ils) +blockNormalize :: Block -> Block +blockNormalize (Plain (Space : ils)) = blockNormalize (Plain ils) +blockNormalize (Plain ils) = Plain $ strNormalize ils +blockNormalize (Para (Space : ils)) = blockNormalize (Para ils) +blockNormalize (Para ils) = Para $ strNormalize ils +blockNormalize (Header n attr (Space : ils)) = + blockNormalize $ Header n attr ils +blockNormalize (Table (Space : ils) align width hdr cells) = + blockNormalize $ Table ils align width hdr cells +blockNormalize (Table ils align width hdr cells) = + Table (strNormalize ils) align width hdr cells +blockNormalize blk = blk + runToInlines :: ReaderOptions -> Docx -> Run -> [Inline] runToInlines _ _ (Run rs runElems) | isJust (rStyle rs) && (fromJust (rStyle rs)) `elem` codeSpans = @@ -296,7 +309,7 @@ makeImagesSelfContained _ inline = inline bodyToBlocks :: ReaderOptions -> Docx -> Body -> [Block] bodyToBlocks opts docx (Body bps) = bottomUp removeEmptyPars $ - bottomUp strNormalize $ + bottomUp blockNormalize $ bottomUp spanRemove $ bottomUp divRemove $ map (makeHeaderAnchors) $ -- cgit v1.2.3 From 3c059dbe600608f4166b02c63d7153ace3156665 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Thu, 19 Jun 2014 23:24:28 -0700 Subject: HTML reader: Allow space between `` and ``. Test case: ```
X Y
1 2
``` --- src/Text/Pandoc/Readers/HTML.hs | 1 + 1 file changed, 1 insertion(+) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index d27afc543..204239923 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -262,6 +262,7 @@ pTable = try $ do pCol :: TagParser Double pCol = try $ do TagOpen _ attribs <- pSatisfy (~== TagOpen "col" []) + skipMany pBlank optional $ pSatisfy (~== TagClose "col") skipMany pBlank return $ case lookup "width" attribs of -- cgit v1.2.3 From 557b302731411057cf12e62c87d98752f713d5d0 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Thu, 19 Jun 2014 23:31:17 -0700 Subject: Docx writer: Use Compact style for empty table cells. Otherwise we get overly tall lines when there are empty table cells and the other cells are compact. Closes #1353. --- src/Text/Pandoc/Writers/Docx.hs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index 4e64a79df..31e64f14e 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -571,10 +571,12 @@ blockToOpenXML opts (Table caption aligns widths headers rows) = do [ mknode "w:tcBorders" [] $ mknode "w:bottom" [("w:val","single")] () , mknode "w:vAlign" [("w:val","bottom")] () ] + let emptyCell = [mknode "w:p" [] [mknode "w:pPr" [] $ + [mknode "w:pStyle" [("w:val","Compact")] ()]]] let mkcell border contents = mknode "w:tc" [] $ [ borderProps | border ] ++ if null contents - then [mknode "w:p" [] ()] + then emptyCell else contents let mkrow border cells = mknode "w:tr" [] $ map (mkcell border) cells let textwidth = 7920 -- 5.5 in in twips, 1/20 pt -- cgit v1.2.3 From 7fd48b30e0a6e1e3c02a2b66c76118d10c02636f Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal Date: Fri, 20 Jun 2014 09:30:30 -0400 Subject: Docx reader: Fix hdr handling in block norm `blockNormalize` previously forgot to account for the case in which a Header's inlines did not start with a space. --- src/Text/Pandoc/Readers/Docx.hs | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs index 9c1d0c5e6..84d50a396 100644 --- a/src/Text/Pandoc/Readers/Docx.hs +++ b/src/Text/Pandoc/Readers/Docx.hs @@ -157,6 +157,8 @@ blockNormalize (Para (Space : ils)) = blockNormalize (Para ils) blockNormalize (Para ils) = Para $ strNormalize ils blockNormalize (Header n attr (Space : ils)) = blockNormalize $ Header n attr ils +blockNormalize (Header n attr ils) = + Header n attr $ strNormalize ils blockNormalize (Table (Space : ils) align width hdr cells) = blockNormalize $ Table ils align width hdr cells blockNormalize (Table ils align width hdr cells) = -- cgit v1.2.3 From 3da515bdb005cf16589b88d80aa4a8a71760e366 Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal Date: Fri, 20 Jun 2014 10:12:28 -0400 Subject: Docx reader: simplify blockNormalize Use a function `stripSpaces`, instead of recursion. Makes it a bit easier to read and mantain, and simplify normalizing DefinitionList, which was left out the first time. --- src/Text/Pandoc/Readers/Docx.hs | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs index 84d50a396..08afc94e6 100644 --- a/src/Text/Pandoc/Readers/Docx.hs +++ b/src/Text/Pandoc/Readers/Docx.hs @@ -150,19 +150,17 @@ strNormalize (Str "" : ils) = strNormalize ils strNormalize ((Str s) : (Str s') : l) = strNormalize ((Str (s++s')) : l) strNormalize (il:ils) = il : (strNormalize ils) +stripSpaces :: [Inline] -> [Inline] +stripSpaces ils = + reverse $ dropWhile (Space ==) $ reverse $ dropWhile (Space ==) ils + blockNormalize :: Block -> Block -blockNormalize (Plain (Space : ils)) = blockNormalize (Plain ils) -blockNormalize (Plain ils) = Plain $ strNormalize ils -blockNormalize (Para (Space : ils)) = blockNormalize (Para ils) -blockNormalize (Para ils) = Para $ strNormalize ils -blockNormalize (Header n attr (Space : ils)) = - blockNormalize $ Header n attr ils +blockNormalize (Plain ils) = Plain $ strNormalize $ stripSpaces ils +blockNormalize (Para ils) = Para $ strNormalize $ stripSpaces ils blockNormalize (Header n attr ils) = - Header n attr $ strNormalize ils -blockNormalize (Table (Space : ils) align width hdr cells) = - blockNormalize $ Table ils align width hdr cells + Header n attr $ strNormalize $ stripSpaces ils blockNormalize (Table ils align width hdr cells) = - Table (strNormalize ils) align width hdr cells + Table (strNormalize $ stripSpaces ils) align width hdr cells blockNormalize blk = blk runToInlines :: ReaderOptions -> Docx -> Run -> [Inline] -- cgit v1.2.3 From 03af19a7e12ff3a7f0a396ebed73c6c17f12ad07 Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal Date: Fri, 20 Jun 2014 10:16:32 -0400 Subject: Docx Reader: Normalize DefinitionLists Previously DefinitionList had been left out of `blockNormalize`. Now it is included. --- src/Text/Pandoc/Readers/Docx.hs | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs index 08afc94e6..e62cf6f0e 100644 --- a/src/Text/Pandoc/Readers/Docx.hs +++ b/src/Text/Pandoc/Readers/Docx.hs @@ -161,6 +161,8 @@ blockNormalize (Header n attr ils) = Header n attr $ strNormalize $ stripSpaces ils blockNormalize (Table ils align width hdr cells) = Table (strNormalize $ stripSpaces ils) align width hdr cells +blockNormalize (DefinitionList pairs) = + DefinitionList $ map (\(ils, blklsts) -> (strNormalize (stripSpaces ils), blklsts)) pairs blockNormalize blk = blk runToInlines :: ReaderOptions -> Docx -> Run -> [Inline] -- cgit v1.2.3 From 2aa5f58c5b82dd5750e1bf5f30e1936d132104ac Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal Date: Fri, 20 Jun 2014 10:27:18 -0400 Subject: Docx reader: Add a comment explaining strNormalize `normalize` from Text.Pandoc.Shared is more general. In tests, though, it more than doubles the run time. `strNormalize` does less, but it does what we need. This comment is added for future maintainability. --- src/Text/Pandoc/Readers/Docx.hs | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs index e62cf6f0e..8a8bc46a6 100644 --- a/src/Text/Pandoc/Readers/Docx.hs +++ b/src/Text/Pandoc/Readers/Docx.hs @@ -144,6 +144,10 @@ runElemToString (Tab) = ['\t'] runElemsToString :: [RunElem] -> String runElemsToString = concatMap runElemToString +--- We use this instead of the more general +--- Text.Pandoc.Shared.normalize for reasons of efficiency. For +--- whatever reason, `normalize` makes a run take almost twice as +--- long. (It does more, but this does what we need) strNormalize :: [Inline] -> [Inline] strNormalize [] = [] strNormalize (Str "" : ils) = strNormalize ils -- cgit v1.2.3 From f6ae644831aa40f7199ecfc9a1631880639b6312 Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal Date: Fri, 20 Jun 2014 12:31:36 -0400 Subject: Make strNormalize go bottomUp. This was how it used to be before it was folded into blockNormalize. --- src/Text/Pandoc/Readers/Docx.hs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs index 8a8bc46a6..479a88161 100644 --- a/src/Text/Pandoc/Readers/Docx.hs +++ b/src/Text/Pandoc/Readers/Docx.hs @@ -159,14 +159,14 @@ stripSpaces ils = reverse $ dropWhile (Space ==) $ reverse $ dropWhile (Space ==) ils blockNormalize :: Block -> Block -blockNormalize (Plain ils) = Plain $ strNormalize $ stripSpaces ils -blockNormalize (Para ils) = Para $ strNormalize $ stripSpaces ils +blockNormalize (Plain ils) = Plain $ bottomUp strNormalize $ stripSpaces ils +blockNormalize (Para ils) = Para $ bottomUp strNormalize $ stripSpaces ils blockNormalize (Header n attr ils) = - Header n attr $ strNormalize $ stripSpaces ils + Header n attr $ bottomUp strNormalize $ stripSpaces ils blockNormalize (Table ils align width hdr cells) = - Table (strNormalize $ stripSpaces ils) align width hdr cells + Table (bottomUp strNormalize $ stripSpaces ils) align width hdr cells blockNormalize (DefinitionList pairs) = - DefinitionList $ map (\(ils, blklsts) -> (strNormalize (stripSpaces ils), blklsts)) pairs + DefinitionList $ map (\(ils, blklsts) -> (bottomUp strNormalize (stripSpaces ils), blklsts)) pairs blockNormalize blk = blk runToInlines :: ReaderOptions -> Docx -> Run -> [Inline] -- cgit v1.2.3 From 12efffa85a257dbe81137f97334b2c6a7e072777 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Fri, 20 Jun 2014 10:24:30 -0700 Subject: LaTeX writer: Fixed strikeout + highlighted code. Closes #1294. Previously strikeout highlighted code caused an error. --- src/Text/Pandoc/Writers/LaTeX.hs | 11 ++++++++++- tests/Tests/Writers/LaTeX.hs | 12 +++++++++++- 2 files changed, 21 insertions(+), 2 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs index c221b318e..ed735242f 100644 --- a/src/Text/Pandoc/Writers/LaTeX.hs +++ b/src/Text/Pandoc/Writers/LaTeX.hs @@ -678,7 +678,9 @@ inlineToLaTeX (Emph lst) = inlineToLaTeX (Strong lst) = inlineListToLaTeX lst >>= return . inCmd "textbf" inlineToLaTeX (Strikeout lst) = do - contents <- inlineListToLaTeX lst + -- we need to protect VERB in an mbox or we get an error + -- see #1294 + contents <- inlineListToLaTeX $ protectCode lst modify $ \s -> s{ stStrikeout = True } return $ inCmd "sout" contents inlineToLaTeX (Superscript lst) = @@ -784,6 +786,13 @@ inlineToLaTeX (Note contents) = do -- note: a \n before } needed when note ends with a Verbatim environment else "\\footnote" <> braces noteContents +protectCode :: [Inline] -> [Inline] +protectCode [] = [] +protectCode (x@(Code ("",[],[]) _) : xs) = x : protectCode xs +protectCode (x@(Code _ _) : xs) = ltx "\\mbox{" : x : ltx "}" : xs + where ltx = RawInline (Format "latex") +protectCode (x : xs) = x : protectCode xs + citationsToNatbib :: [Citation] -> State WriterState Doc citationsToNatbib (one:[]) = citeCommand c p s k diff --git a/tests/Tests/Writers/LaTeX.hs b/tests/Tests/Writers/LaTeX.hs index 8a9519e2e..6db6542a0 100644 --- a/tests/Tests/Writers/LaTeX.hs +++ b/tests/Tests/Writers/LaTeX.hs @@ -8,7 +8,7 @@ import Tests.Helpers import Tests.Arbitrary() latex :: (ToString a, ToPandoc a) => a -> String -latex = writeLaTeX def . toPandoc +latex = writeLaTeX def{ writerHighlight = True } . toPandoc latexListing :: (ToString a, ToPandoc a) => a -> String latexListing = writeLaTeX def{ writerListings = True } . toPandoc @@ -54,4 +54,14 @@ tests = [ testGroup "code blocks" (text "Header 1" <> note (plain $ text "note")) =?> "\\section*{Header 1\\footnote{note}}\\label{foo}\n\\addcontentsline{toc}{section}{Header 1}\n" ] + , testGroup "inline code" + [ "struck out and highlighted" =: + strikeout (codeWith ("",["haskell"],[]) "foo" <> space + <> str "bar") =?> + "\\sout{\\mbox{\\VERB|\\NormalTok{foo}|} bar}" + , "struck out and not highlighted" =: + strikeout (code "foo" <> space + <> str "bar") =?> + "\\sout{\\texttt{foo} bar}" + ] ] -- cgit v1.2.3 From cab4b829b3683cec1def11d7189b5a850f23b016 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Fri, 20 Jun 2014 10:39:24 -0700 Subject: Support --trace in HTML reader. --- src/Text/Pandoc/Readers/HTML.hs | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index 204239923..7bbad4257 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -50,6 +50,8 @@ import Data.Char ( isDigit ) import Control.Monad ( liftM, guard, when, mzero ) import Control.Applicative ( (<$>), (<$), (<*) ) import Data.Monoid +import Text.Printf (printf) +import Debug.Trace (trace) isSpace :: Char -> Bool isSpace ' ' = True @@ -92,7 +94,10 @@ pHead = pInTags "head" $ pTitle <|> pMetaTag <|> (mempty <$ pAnyTag) return mempty block :: TagParser Blocks -block = choice +block = do + tr <- getOption readerTrace + pos <- getPosition + res <- choice [ pPara , pHeader , pBlockQuote @@ -106,6 +111,10 @@ block = choice , pDiv , pRawHtmlBlock ] + when tr $ trace (printf "line %d: %s" (sourceLine pos) + (take 60 $ show $ B.toList res)) (return ()) + return res + pList :: TagParser Blocks pList = pBulletList <|> pOrderedList <|> pDefinitionList -- cgit v1.2.3 From b3b40546cb5ad00ee6fadcd83bcc38854fb137ae Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Fri, 20 Jun 2014 10:47:29 -0700 Subject: HTML reader: Fix performance issue with malformed HTML tables. We let a `` tag close an open `` or ``. Closes #1167. --- src/Text/Pandoc/Readers/HTML.hs | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index 7bbad4257..552e8a251 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -471,6 +471,8 @@ pCloses tagtype = try $ do (TagClose "ul") | tagtype == "li" -> return () (TagClose "ol") | tagtype == "li" -> return () (TagClose "dl") | tagtype == "li" -> return () + (TagClose "table") | tagtype == "td" -> return () + (TagClose "table") | tagtype == "tr" -> return () _ -> mzero pTagText :: TagParser Inlines -- cgit v1.2.3 From 2eadc7805392c165f7286bd9a337b310b41c897d Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Fri, 20 Jun 2014 10:58:26 -0700 Subject: ImageSize: Use default instead of failing if image size not found in exif header. Closes #1358. --- src/Text/Pandoc/ImageSize.hs | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/ImageSize.hs b/src/Text/Pandoc/ImageSize.hs index 9e6b457c0..68b34dcf3 100644 --- a/src/Text/Pandoc/ImageSize.hs +++ b/src/Text/Pandoc/ImageSize.hs @@ -76,6 +76,9 @@ imageSize img = do Eps -> epsSize img Pdf -> Nothing -- TODO +defaultSize :: (Integer, Integer) +defaultSize = (72, 72) + sizeInPixels :: ImageSize -> (Integer, Integer) sizeInPixels s = (pxX s, pxY s) @@ -260,7 +263,9 @@ exifHeader hdr = do lookup ExifImageHeight allentries) of (Just (UnsignedLong w), Just (UnsignedLong h)) -> return (fromIntegral w, fromIntegral h) - _ -> fail "Could not determine image width, height" + _ -> return defaultSize + -- we return a default width and height when + -- the exif header doesn't contain these let resfactor = case lookup ResolutionUnit allentries of Just (UnsignedShort 1) -> (100 / 254) _ -> 1 -- cgit v1.2.3 From 56c410ef6a3b7c3dd4054b18ea667cb4ab5a5ee0 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Fri, 20 Jun 2014 11:10:35 -0700 Subject: Markdown reader: Prevent spurious line breaks after list items. When the `hard_line_breaks` option was specified, pandoc would produce a spurious line break after a tight list item. This patch solves the problem. Closes #1137. --- src/Text/Pandoc/Readers/Markdown.hs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index a6720beba..2c9064994 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -1560,7 +1560,8 @@ endline = try $ do guardEnabled Ext_blank_before_header <|> notFollowedBy (char '#') -- atx header guardDisabled Ext_backtick_code_blocks <|> notFollowedBy (() <$ (lookAhead (char '`') >> codeBlockFenced)) - (guardEnabled Ext_hard_line_breaks >> return (return B.linebreak)) + (eof >> return mempty) + <|> (guardEnabled Ext_hard_line_breaks >> return (return B.linebreak)) <|> (guardEnabled Ext_ignore_line_breaks >> return mempty) <|> (return $ return B.space) -- cgit v1.2.3 From d81b4358ea8d0da615c01a468196fd0592934af6 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Fri, 20 Jun 2014 11:26:38 -0700 Subject: LaTeX writer: Correctly handle figures in notes. Notes can't contain figures in LaTeX, so we fake it to avoid an error. Closes #1053. --- src/Text/Pandoc/Writers/LaTeX.hs | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs index ed735242f..f2f7438c4 100644 --- a/src/Text/Pandoc/Writers/LaTeX.hs +++ b/src/Text/Pandoc/Writers/LaTeX.hs @@ -311,12 +311,14 @@ blockToLaTeX (Plain lst) = inlineListToLaTeX $ dropWhile isLineBreakOrSpace lst -- title beginning with fig: indicates that the image is a figure blockToLaTeX (Para [Image txt (src,'f':'i':'g':':':tit)]) = do - capt <- if null txt - then return empty - else (\c -> "\\caption" <> braces c) `fmap` inlineListToLaTeX txt + inNote <- gets stInNote + capt <- inlineListToLaTeX txt img <- inlineToLaTeX (Image txt (src,tit)) - return $ "\\begin{figure}[htbp]" $$ "\\centering" $$ img $$ - capt $$ "\\end{figure}" + return $ if inNote + -- can't have figures in notes + then "\\begin{center}" $$ img $+$ capt $$ "\\end{center}" + else "\\begin{figure}[htbp]" $$ "\\centering" $$ img $$ + ("\\caption" <> braces capt) $$ "\\end{figure}" -- . . . indicates pause in beamer slides blockToLaTeX (Para [Str ".",Space,Str ".",Space,Str "."]) = do beamer <- writerBeamer `fmap` gets stOptions -- cgit v1.2.3 From 8f20ac3da3b0d1111f94161d5b3528dfa94d1069 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Fri, 20 Jun 2014 11:39:24 -0700 Subject: MediaWiki reader: Support --trace. --- src/Text/Pandoc/Readers/MediaWiki.hs | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/MediaWiki.hs b/src/Text/Pandoc/Readers/MediaWiki.hs index e4fabc898..eb9dd5b8a 100644 --- a/src/Text/Pandoc/Readers/MediaWiki.hs +++ b/src/Text/Pandoc/Readers/MediaWiki.hs @@ -55,6 +55,8 @@ import qualified Data.Foldable as F import qualified Data.Map as M import Data.Char (isDigit, isSpace) import Data.Maybe (fromMaybe) +import Text.Printf (printf) +import Debug.Trace (trace) -- | Read mediawiki from an input string and return a Pandoc document. readMediaWiki :: ReaderOptions -- ^ Reader options @@ -187,7 +189,10 @@ parseMediaWiki = do -- block :: MWParser Blocks -block = mempty <$ skipMany1 blankline +block = do + tr <- getOption readerTrace + pos <- getPosition + res <- mempty <$ skipMany1 blankline <|> table <|> header <|> hrule @@ -199,6 +204,10 @@ block = mempty <$ skipMany1 blankline <|> blockTag <|> (B.rawBlock "mediawiki" <$> template) <|> para + when tr $ + trace (printf "line %d: %s" (sourceLine pos) + (take 60 $ show $ B.toList res)) (return ()) + return res para :: MWParser Blocks para = do -- cgit v1.2.3 From d397a66107a932e702d0f9cbba5df3ce09be25fd Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Fri, 20 Jun 2014 12:00:26 -0700 Subject: MediaWiki reader: Tightened up template parsing. The opening "{{" must be followed by an alphanumeric or ':'. This prevents the exponential slowdown in #1033. Closes #1033. --- src/Text/Pandoc/Readers/MediaWiki.hs | 1 + 1 file changed, 1 insertion(+) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/MediaWiki.hs b/src/Text/Pandoc/Readers/MediaWiki.hs index eb9dd5b8a..f1dcce8f7 100644 --- a/src/Text/Pandoc/Readers/MediaWiki.hs +++ b/src/Text/Pandoc/Readers/MediaWiki.hs @@ -317,6 +317,7 @@ template :: MWParser String template = try $ do string "{{" notFollowedBy (char '{') + lookAhead $ letter <|> digit <|> char ':' let chunk = template <|> variable <|> many1 (noneOf "{}") <|> count 1 anyChar contents <- manyTill chunk (try $ string "}}") return $ "{{" ++ concat contents ++ "}}" -- cgit v1.2.3 From 5d0103606f469a6336beb2f7a90f210dd29485bd Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Fri, 20 Jun 2014 15:24:24 -0700 Subject: Markdown reader: Support smallcaps through span. `foo` will be parsed as a `SmallCaps` inline, and will work in all output formats that support small caps. Closes #1360. --- README | 8 ++++++++ src/Text/Pandoc/Readers/Markdown.hs | 7 ++++++- 2 files changed, 14 insertions(+), 1 deletion(-) (limited to 'src/Text') diff --git a/README b/README index 122db23ec..012b75e2a 100644 --- a/README +++ b/README @@ -2020,6 +2020,14 @@ Attributes can be attached to verbatim text, just as with `<$>`{.haskell} +### Small caps ### + +To write small caps, you can use an HTML span tag: + + Small caps + +This will work in all output formats that support small caps. + Math ---- diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 2c9064994..6c710c8ff 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -1736,7 +1736,12 @@ spanHtml = try $ do let ident = fromMaybe "" $ lookup "id" attrs let classes = maybe [] words $ lookup "class" attrs let keyvals = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"] - return $ B.spanWith (ident, classes, keyvals) <$> contents + case lookup "style" keyvals of + Just s | null ident && null classes && + map toLower (filter (`notElem` " \t;") s) == + "font-variant:small-caps" + -> return $ B.smallcaps <$> contents + _ -> return $ B.spanWith (ident, classes, keyvals) <$> contents divHtml :: MarkdownParser (F Blocks) divHtml = try $ do -- cgit v1.2.3 From 9c7e0dc84b2384347099827999f0e2f7be4f7e51 Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal Date: Sat, 21 Jun 2014 17:53:45 -0400 Subject: Implement new normalization. There were some problems with the old str normalization. This fixes those problems. Also, since it drills down on its own, it only needs to be mapped over the blocks, not walked over the tree. --- src/Text/Pandoc/Readers/Docx.hs | 68 ++++++++++++++++++++++++++++++++++------- 1 file changed, 57 insertions(+), 11 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs index 479a88161..299adf5a8 100644 --- a/src/Text/Pandoc/Readers/Docx.hs +++ b/src/Text/Pandoc/Readers/Docx.hs @@ -148,25 +148,71 @@ runElemsToString = concatMap runElemToString --- Text.Pandoc.Shared.normalize for reasons of efficiency. For --- whatever reason, `normalize` makes a run take almost twice as --- long. (It does more, but this does what we need) -strNormalize :: [Inline] -> [Inline] -strNormalize [] = [] -strNormalize (Str "" : ils) = strNormalize ils -strNormalize ((Str s) : (Str s') : l) = strNormalize ((Str (s++s')) : l) -strNormalize (il:ils) = il : (strNormalize ils) +inlineNormalize :: [Inline] -> [Inline] +inlineNormalize [] = [] +inlineNormalize (Str "" : ils) = inlineNormalize ils +inlineNormalize ((Str s) : (Str s') : l) = + inlineNormalize (Str (s++s') : l) +inlineNormalize ((Emph ils) : (Emph ils') : l) = + inlineNormalize $ (Emph $ inlineNormalize (ils ++ ils')) : l +inlineNormalize ((Emph ils) : l) = + Emph (inlineNormalize ils) : (inlineNormalize l) +inlineNormalize ((Strong ils) : (Strong ils') : l) = + inlineNormalize $ (Strong $ inlineNormalize (ils ++ ils')) : l +inlineNormalize ((Strong ils) : l) = + Strong (inlineNormalize ils) : (inlineNormalize l) +inlineNormalize ((Strikeout ils) : (Strikeout ils') : l) = + inlineNormalize $ (Strikeout $ inlineNormalize (ils ++ ils')) : l +inlineNormalize ((Strikeout ils) : l) = + Strikeout (inlineNormalize ils) : (inlineNormalize l) +inlineNormalize ((Superscript ils) : (Superscript ils') : l) = + inlineNormalize $ (Superscript $ inlineNormalize (ils ++ ils')) : l +inlineNormalize ((Superscript ils) : l) = + Superscript (inlineNormalize ils) : (inlineNormalize l) +inlineNormalize ((Subscript ils) : (Subscript ils') : l) = + inlineNormalize $ (Subscript $ inlineNormalize (ils ++ ils')) : l +inlineNormalize ((Subscript ils) : l) = + Subscript (inlineNormalize ils) : (inlineNormalize l) +inlineNormalize ((Space : Space : l)) = + inlineNormalize $ (Space : l) +inlineNormalize ((Quoted qt ils) : l) = + Quoted qt (inlineNormalize ils) : inlineNormalize l +inlineNormalize ((Cite cits ils) : l) = + let + f :: Citation -> Citation + f (Citation s pref suff mode num hash) = + Citation s (inlineNormalize pref) (inlineNormalize suff) mode num hash + in + Cite (map f cits) (inlineNormalize ils) : (inlineNormalize l) +inlineNormalize ((Link ils s) : l) = + Link (inlineNormalize ils) s : (inlineNormalize l) +inlineNormalize ((Image ils s) : l) = + Image (inlineNormalize ils) s : (inlineNormalize l) +inlineNormalize ((Note blks) : l) = + Note (map blockNormalize blks) : (inlineNormalize l) +inlineNormalize ((Span attr ils) : l) = + Span attr (inlineNormalize ils) : (inlineNormalize l) +inlineNormalize (il : l) = il : (inlineNormalize l) stripSpaces :: [Inline] -> [Inline] stripSpaces ils = reverse $ dropWhile (Space ==) $ reverse $ dropWhile (Space ==) ils blockNormalize :: Block -> Block -blockNormalize (Plain ils) = Plain $ bottomUp strNormalize $ stripSpaces ils -blockNormalize (Para ils) = Para $ bottomUp strNormalize $ stripSpaces ils +blockNormalize (Plain ils) = Plain $ stripSpaces $ inlineNormalize ils +blockNormalize (Para ils) = Para $ stripSpaces $ inlineNormalize ils blockNormalize (Header n attr ils) = - Header n attr $ bottomUp strNormalize $ stripSpaces ils + Header n attr $ stripSpaces $ inlineNormalize ils blockNormalize (Table ils align width hdr cells) = - Table (bottomUp strNormalize $ stripSpaces ils) align width hdr cells + Table (stripSpaces $ inlineNormalize ils) align width hdr cells blockNormalize (DefinitionList pairs) = - DefinitionList $ map (\(ils, blklsts) -> (bottomUp strNormalize (stripSpaces ils), blklsts)) pairs + DefinitionList $ map (\(ils, blklsts) -> (stripSpaces (inlineNormalize ils), (map (map blockNormalize) blklsts))) pairs +blockNormalize (BlockQuote blks) = BlockQuote (map blockNormalize blks) +blockNormalize (OrderedList attr blkslst) = + OrderedList attr $ map (\blks -> map blockNormalize blks) blkslst +blockNormalize (BulletList blkslst) = + BulletList $ map (\blks -> map blockNormalize blks) blkslst +blockNormalize (Div attr blks) = Div attr (map blockNormalize blks) blockNormalize blk = blk runToInlines :: ReaderOptions -> Docx -> Run -> [Inline] @@ -315,7 +361,7 @@ makeImagesSelfContained _ inline = inline bodyToBlocks :: ReaderOptions -> Docx -> Body -> [Block] bodyToBlocks opts docx (Body bps) = bottomUp removeEmptyPars $ - bottomUp blockNormalize $ + map blockNormalize $ bottomUp spanRemove $ bottomUp divRemove $ map (makeHeaderAnchors) $ -- cgit v1.2.3 From 8e5bd9d851aa0f60462015f61e3980b134e3c131 Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal Date: Sun, 22 Jun 2014 01:47:11 -0400 Subject: Docx reader: Fix spacing in formatting. The normalizing tests revealed a problem with unformatted spaces, brought about by `spanTrim`. This fixes by not trimming the spaces out of spans until they are in their final form. --- src/Text/Pandoc/Readers/Docx.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs index 299adf5a8..09c2330fb 100644 --- a/src/Text/Pandoc/Readers/Docx.hs +++ b/src/Text/Pandoc/Readers/Docx.hs @@ -286,8 +286,8 @@ parPartsToInlines opts docx parparts = (if False -- TODO depend on option then bottomUp (makeImagesSelfContained docx) else id) $ - bottomUp spanCorrect $ bottomUp spanTrim $ + bottomUp spanCorrect $ bottomUp spanReduce $ concatMap (parPartToInlines opts docx) parparts -- cgit v1.2.3 From e03ed7377cfd3d64d65f186aa76b17417a4e4fde Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Mon, 23 Jun 2014 12:41:47 -0700 Subject: Markdown reader: Combine consecutive latex environments. This helps when you have two minipages which can't have blank lines between them. See #690, #1196. --- src/Text/Pandoc/Readers/Markdown.hs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 6c710c8ff..690256224 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -939,8 +939,10 @@ rawVerbatimBlock = try $ do rawTeXBlock :: MarkdownParser (F Blocks) rawTeXBlock = do guardEnabled Ext_raw_tex - result <- (B.rawBlock "latex" <$> rawLaTeXBlock) - <|> (B.rawBlock "context" <$> rawConTeXtEnvironment) + result <- (B.rawBlock "latex" . concat <$> + rawLaTeXBlock `sepEndBy1` blankline) + <|> (B.rawBlock "context" . concat <$> + rawConTeXtEnvironment `sepEndBy1` blankline) spaces return $ return result -- cgit v1.2.3 From 87ab01637e1dc0f583277828bc458567a72e38ce Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Mon, 23 Jun 2014 12:51:10 -0700 Subject: LaTeX writer: Use `\textquotesingle` for `'` in inline code. Otherwise we get curly quotes in the PDF output. Closes #1364. --- src/Text/Pandoc/Writers/LaTeX.hs | 1 + tests/Tests/Writers/LaTeX.hs | 2 ++ 2 files changed, 3 insertions(+) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs index f2f7438c4..100bf900d 100644 --- a/src/Text/Pandoc/Writers/LaTeX.hs +++ b/src/Text/Pandoc/Writers/LaTeX.hs @@ -220,6 +220,7 @@ stringToLaTeX ctx (x:xs) = do '>' -> "\\textgreater{}" ++ rest '[' -> "{[}" ++ rest -- to avoid interpretation as ']' -> "{]}" ++ rest -- optional arguments + '\'' | ctx == CodeString -> "\\textquotesingle{}" ++ rest '\160' -> "~" ++ rest '\x2026' -> "\\ldots{}" ++ rest '\x2018' | ligatures -> "`" ++ rest diff --git a/tests/Tests/Writers/LaTeX.hs b/tests/Tests/Writers/LaTeX.hs index 6db6542a0..c32ded36d 100644 --- a/tests/Tests/Writers/LaTeX.hs +++ b/tests/Tests/Writers/LaTeX.hs @@ -63,5 +63,7 @@ tests = [ testGroup "code blocks" strikeout (code "foo" <> space <> str "bar") =?> "\\sout{\\texttt{foo} bar}" + , "single quotes" =: + code "dog's" =?> "\\texttt{dog\\textquotesingle{}s}" ] ] -- cgit v1.2.3 From ef5fad2698f3d4c1fe528f138264cc8abb3b2943 Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal Date: Mon, 23 Jun 2014 15:25:46 -0400 Subject: Add new typeclass, Reducible This defines a typeclass `Reducible` which allows us to "reduce" pandoc Inlines and Blocks, like so Emph [Strong [Str "foo", Space]] <++> Strong [Emph [Str "bar"]], Str "baz"] = [Strong [Emph [Str "foo", Space, Str "bar"], Space, Str "baz"]] So adjacent formattings and strings are appropriately grouped. Another set of operators for `(Reducible a) => (Many a)` are also included. --- src/Text/Pandoc/Readers/Docx/Reducible.hs | 150 ++++++++++++++++++++++++++++++ 1 file changed, 150 insertions(+) create mode 100644 src/Text/Pandoc/Readers/Docx/Reducible.hs (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Docx/Reducible.hs b/src/Text/Pandoc/Readers/Docx/Reducible.hs new file mode 100644 index 000000000..1ed31ebd0 --- /dev/null +++ b/src/Text/Pandoc/Readers/Docx/Reducible.hs @@ -0,0 +1,150 @@ +{-# LANGUAGE OverloadedStrings #-} + +module Text.Pandoc.Readers.Docx.Reducible ((<++>), + (<+++>), + Reducible, + Container(..), + container, + innards, + reduceList, + reduceListB, + rebuild) + where + +import Text.Pandoc.Builder +import Data.List ((\\), intersect) + +data Container a = Container ([a] -> a) | NullContainer + +instance (Eq a) => Eq (Container a) where + (Container x) == (Container y) = ((x []) == (y [])) + NullContainer == NullContainer = True + _ == _ = False + +instance (Show a) => Show (Container a) where + show (Container x) = "Container {" ++ + (reverse $ drop 3 $ reverse $ show $ x []) ++ + "}" + show (NullContainer) = "NullContainer" + +class Reducible a where + (<++>) :: a -> a -> [a] + container :: a -> Container a + innards :: a -> [a] + isSpace :: a -> Bool + +(<+++>) :: (Reducible a) => Many a -> Many a -> Many a +mr <+++> ms = fromList $ reduceList $ toList mr ++ toList ms + +reduceListB :: (Reducible a) => Many a -> Many a +reduceListB = fromList . reduceList . toList + +reduceList' :: (Reducible a) => [a] -> [a] -> [a] +reduceList' acc [] = acc +reduceList' [] (x:xs) = reduceList' [x] xs +reduceList' as (x:xs) = reduceList' (init as ++ (last as <++> x) ) xs + +reduceList :: (Reducible a) => [a] -> [a] +reduceList = reduceList' [] + +combineReducibles :: (Reducible a, Eq a) => a -> a -> [a] +combineReducibles r s = + let (conts, rs) = topLevelContainers r + (conts', ss) = topLevelContainers s + shared = conts `intersect` conts' + remaining = conts \\ shared + remaining' = conts' \\ shared + in + case null shared of + True -> case (not . null) rs && isSpace (last rs) of + True -> rebuild conts (init rs) ++ [last rs, s] + False -> [r,s] + False -> rebuild + shared $ + reduceList $ + (rebuild remaining rs) ++ (rebuild remaining' ss) + +instance Reducible Inline where + s1@(Span (id1, classes1, kvs1) ils1) <++> s2@(Span (id2, classes2, kvs2) ils2) = + let classes' = classes1 `intersect` classes2 + kvs' = kvs1 `intersect` kvs2 + classes1' = classes1 \\ classes' + kvs1' = kvs1 \\ kvs' + classes2' = classes2 \\ classes' + kvs2' = kvs2 \\ kvs' + in + case null classes' && null kvs' of + True -> [s1,s2] + False -> let attr' = ("", classes', kvs') + attr1' = (id1, classes1', kvs1') + attr2' = (id2, classes2', kvs2') + s1' = case null classes1' && null kvs1' of + True -> ils1 + False -> [Span attr1' ils1] + s2' = case null classes2' && null kvs2' of + True -> ils2 + False -> [Span attr2' ils2] + in + [Span attr' $ reduceList $ s1' ++ s2'] + + (Str x) <++> (Str y) = [Str (x++y)] + il <++> il' = combineReducibles il il' + + container (Emph _) = Container Emph + container (Strong _) = Container Strong + container (Strikeout _) = Container Strikeout + container (Subscript _) = Container Subscript + container (Superscript _) = Container Superscript + container (Quoted qt _) = Container $ Quoted qt + container (Cite cs _) = Container $ Cite cs + container (Span attr _) = Container $ Span attr + container _ = NullContainer + + innards (Emph ils) = ils + innards (Strong ils) = ils + innards (Strikeout ils) = ils + innards (Subscript ils) = ils + innards (Superscript ils) = ils + innards (Quoted _ ils) = ils + innards (Cite _ ils) = ils + innards (Span _ ils) = ils + innards _ = [] + + isSpace Space = True + isSpace _ = False + +instance Reducible Block where + (Div (ident, classes, kvs) blks) <++> blk | "list-item" `elem` classes = + [Div (ident, classes, kvs) (reduceList blks), blk] + + blk <++> blk' = combineReducibles blk blk' + + container (BlockQuote _) = Container BlockQuote + container (Div attr _) = Container $ Div attr + container _ = NullContainer + + innards (BlockQuote bs) = bs + innards (Div _ bs) = bs + innards _ = [] + + isSpace _ = False + + +topLevelContainers' :: (Reducible a) => [a] -> ([Container a], [a]) +topLevelContainers' (r : []) = case container r of + NullContainer -> ([], [r]) + _ -> + let (conts, inns) = topLevelContainers' (innards r) + in + ((container r) : conts, inns) +topLevelContainers' rs = ([], rs) + +topLevelContainers :: (Reducible a) => a -> ([Container a], [a]) +topLevelContainers il = topLevelContainers' [il] + +rebuild :: [Container a] -> [a] -> [a] +rebuild [] xs = xs +rebuild ((Container f) : cs) xs = rebuild cs $ [f xs] +rebuild (NullContainer : cs) xs = rebuild cs $ xs + + -- cgit v1.2.3 From 94d0fb15382a4855938c540c9e521642bccc00e3 Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal Date: Mon, 23 Jun 2014 15:27:01 -0400 Subject: Move some of the clean-up logic into List module. This will allow us to get rid of more general functions we no longer need in the main reader. --- src/Text/Pandoc/Readers/Docx/Lists.hs | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Docx/Lists.hs b/src/Text/Pandoc/Readers/Docx/Lists.hs index 68559d98b..1e37d0076 100644 --- a/src/Text/Pandoc/Readers/Docx/Lists.hs +++ b/src/Text/Pandoc/Readers/Docx/Lists.hs @@ -29,9 +29,12 @@ Functions for converting flat docx paragraphs into nested lists. -} module Text.Pandoc.Readers.Docx.Lists ( blocksToBullets - , blocksToDefinitions) where + , blocksToDefinitions + , listParagraphDivs + ) where import Text.Pandoc.JSON +import Text.Pandoc.Generic (bottomUp) import Text.Pandoc.Shared (trim) import Control.Monad import Data.List @@ -159,10 +162,9 @@ flatToBullets elems = flatToBullets' (-1) elems blocksToBullets :: [Block] -> [Block] blocksToBullets blks = - -- bottomUp removeListItemDivs $ + bottomUp removeListDivs $ flatToBullets $ (handleListParagraphs blks) - plainParaInlines :: Block -> [Inline] plainParaInlines (Plain ils) = ils plainParaInlines (Para ils) = ils @@ -199,6 +201,23 @@ blocksToDefinitions' [] acc (b:blks) = blocksToDefinitions' defAcc acc (b:blks) = blocksToDefinitions' [] (b : (DefinitionList (reverse defAcc)) : acc) blks +removeListDivs' :: Block -> [Block] +removeListDivs' (Div (ident, classes, kvs) blks) + | "list-item" `elem` classes = + case delete "list-item" classes of + [] -> blks + classes' -> [Div (ident, classes', kvs) $ blks] +removeListDivs' (Div (ident, classes, kvs) blks) + | not $ null $ listParagraphDivs `intersect` classes = + case classes \\ listParagraphDivs of + [] -> blks + classes' -> [Div (ident, classes', kvs) blks] +removeListDivs' blk = [blk] + +removeListDivs :: [Block] -> [Block] +removeListDivs = concatMap removeListDivs' + + blocksToDefinitions :: [Block] -> [Block] blocksToDefinitions = blocksToDefinitions' [] [] -- cgit v1.2.3 From 11b0778744d0eeb61e2502e452d010631fab979b Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal Date: Mon, 23 Jun 2014 15:27:55 -0400 Subject: Use Reducible in docx reader. This cleans up them implementation, and cuts down on tree-walking. Anecdotally, I've seen about a 3-fold speedup. --- src/Text/Pandoc/Readers/Docx.hs | 384 ++++++++++++---------------------------- 1 file changed, 111 insertions(+), 273 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs index 09c2330fb..ffe7f5a92 100644 --- a/src/Text/Pandoc/Readers/Docx.hs +++ b/src/Text/Pandoc/Readers/Docx.hs @@ -79,8 +79,10 @@ import Text.Pandoc.Builder (text, toList) import Text.Pandoc.Generic (bottomUp) import Text.Pandoc.MIME (getMimeType) import Text.Pandoc.UTF8 (toString) +import Text.Pandoc.Walk import Text.Pandoc.Readers.Docx.Parse import Text.Pandoc.Readers.Docx.Lists +import Text.Pandoc.Readers.Docx.Reducible import Data.Maybe (mapMaybe, isJust, fromJust) import Data.List (delete, isPrefixOf, (\\), intersect) import qualified Data.ByteString as BS @@ -96,28 +98,65 @@ readDocx opts bytes = Just docx -> Pandoc nullMeta (docxToBlocks opts docx) Nothing -> error $ "couldn't parse docx file" -runStyleToSpanAttr :: RunStyle -> (String, [String], [(String, String)]) -runStyleToSpanAttr rPr = ("", - mapMaybe id [ - if isBold rPr then (Just "strong") else Nothing, - if isItalic rPr then (Just "emph") else Nothing, - if isSmallCaps rPr then (Just "smallcaps") else Nothing, - if isStrike rPr then (Just "strike") else Nothing, - if isSuperScript rPr then (Just "superscript") else Nothing, - if isSubScript rPr then (Just "subscript") else Nothing, - rStyle rPr], - case underline rPr of - Just fmt -> [("underline", fmt)] - _ -> [] - ) - -parStyleToDivAttr :: ParagraphStyle -> (String, [String], [(String, String)]) -parStyleToDivAttr pPr = ("", - pStyle pPr, - case indent pPr of - Just n -> [("indent", (show n))] - Nothing -> [] - ) +spansToKeep :: [String] +spansToKeep = ["list-item", "Definition", "DefinitionTerm"] ++ codeSpans + + +-- This is empty, but we put it in for future-proofing. +divsToKeep :: [String] +divsToKeep = [] + +runStyleToContainers :: RunStyle -> [Container Inline] +runStyleToContainers rPr = + let formatters = mapMaybe id + [ if isBold rPr then (Just Strong) else Nothing + , if isItalic rPr then (Just Emph) else Nothing + , if isSmallCaps rPr then (Just SmallCaps) else Nothing + , if isStrike rPr then (Just Strikeout) else Nothing + , if isSuperScript rPr then (Just Superscript) else Nothing + , if isSubScript rPr then (Just Subscript) else Nothing + , rStyle rPr >>= + (\s -> if s `elem` spansToKeep then Just s else Nothing) >>= + (\s -> Just $ Span ("", [s], [])) + , underline rPr >>= (\f -> Just $ Span ("", [], [("underline", f)])) + ] + in + map Container formatters + + +divAttrToContainers :: [String] -> [(String, String)] -> [Container Block] +divAttrToContainers [] [] = [] +divAttrToContainers (c:cs) _ | isJust (isHeaderClass c) = + let n = fromJust (isHeaderClass c) + in + [(Container $ \blks -> + Header n ("", delete ("Heading" ++ show n) cs, []) (blksToInlines blks))] +divAttrToContainers (c:_) _ | c `elem` codeDivs = + [Container $ \blks -> CodeBlock ("", [], []) (concatMap blkToCode blks)] +divAttrToContainers (c:cs) kvs | c `elem` listParagraphDivs = + let kvs' = filter (\(k,_) -> k /= "indent") kvs + in + (Container $ Div ("", [c], [])) : (divAttrToContainers cs kvs') +divAttrToContainers (c:cs) kvs | c `elem` blockQuoteDivs = + (Container BlockQuote) : (divAttrToContainers (cs \\ blockQuoteDivs) kvs) +divAttrToContainers (c:cs) kvs | c `elem` divsToKeep = + (Container $ Div ("", [c], [])) : (divAttrToContainers cs kvs) +divAttrToContainers (_:cs) kvs = divAttrToContainers cs kvs +divAttrToContainers [] (kv:kvs) | fst kv == "indent" = + (Container BlockQuote) : divAttrToContainers [] kvs +divAttrToContainers [] (_:kvs) = + divAttrToContainers [] kvs + + +parStyleToContainers :: ParagraphStyle -> [Container Block] +parStyleToContainers pPr = + let classes = pStyle pPr + kvs = case indent pPr of + Just n -> [("indent", show n)] + Nothing -> [] + in + divAttrToContainers classes kvs + strToInlines :: String -> [Inline] strToInlines = toList . text @@ -144,103 +183,42 @@ runElemToString (Tab) = ['\t'] runElemsToString :: [RunElem] -> String runElemsToString = concatMap runElemToString ---- We use this instead of the more general ---- Text.Pandoc.Shared.normalize for reasons of efficiency. For ---- whatever reason, `normalize` makes a run take almost twice as ---- long. (It does more, but this does what we need) -inlineNormalize :: [Inline] -> [Inline] -inlineNormalize [] = [] -inlineNormalize (Str "" : ils) = inlineNormalize ils -inlineNormalize ((Str s) : (Str s') : l) = - inlineNormalize (Str (s++s') : l) -inlineNormalize ((Emph ils) : (Emph ils') : l) = - inlineNormalize $ (Emph $ inlineNormalize (ils ++ ils')) : l -inlineNormalize ((Emph ils) : l) = - Emph (inlineNormalize ils) : (inlineNormalize l) -inlineNormalize ((Strong ils) : (Strong ils') : l) = - inlineNormalize $ (Strong $ inlineNormalize (ils ++ ils')) : l -inlineNormalize ((Strong ils) : l) = - Strong (inlineNormalize ils) : (inlineNormalize l) -inlineNormalize ((Strikeout ils) : (Strikeout ils') : l) = - inlineNormalize $ (Strikeout $ inlineNormalize (ils ++ ils')) : l -inlineNormalize ((Strikeout ils) : l) = - Strikeout (inlineNormalize ils) : (inlineNormalize l) -inlineNormalize ((Superscript ils) : (Superscript ils') : l) = - inlineNormalize $ (Superscript $ inlineNormalize (ils ++ ils')) : l -inlineNormalize ((Superscript ils) : l) = - Superscript (inlineNormalize ils) : (inlineNormalize l) -inlineNormalize ((Subscript ils) : (Subscript ils') : l) = - inlineNormalize $ (Subscript $ inlineNormalize (ils ++ ils')) : l -inlineNormalize ((Subscript ils) : l) = - Subscript (inlineNormalize ils) : (inlineNormalize l) -inlineNormalize ((Space : Space : l)) = - inlineNormalize $ (Space : l) -inlineNormalize ((Quoted qt ils) : l) = - Quoted qt (inlineNormalize ils) : inlineNormalize l -inlineNormalize ((Cite cits ils) : l) = - let - f :: Citation -> Citation - f (Citation s pref suff mode num hash) = - Citation s (inlineNormalize pref) (inlineNormalize suff) mode num hash - in - Cite (map f cits) (inlineNormalize ils) : (inlineNormalize l) -inlineNormalize ((Link ils s) : l) = - Link (inlineNormalize ils) s : (inlineNormalize l) -inlineNormalize ((Image ils s) : l) = - Image (inlineNormalize ils) s : (inlineNormalize l) -inlineNormalize ((Note blks) : l) = - Note (map blockNormalize blks) : (inlineNormalize l) -inlineNormalize ((Span attr ils) : l) = - Span attr (inlineNormalize ils) : (inlineNormalize l) -inlineNormalize (il : l) = il : (inlineNormalize l) - -stripSpaces :: [Inline] -> [Inline] -stripSpaces ils = - reverse $ dropWhile (Space ==) $ reverse $ dropWhile (Space ==) ils - -blockNormalize :: Block -> Block -blockNormalize (Plain ils) = Plain $ stripSpaces $ inlineNormalize ils -blockNormalize (Para ils) = Para $ stripSpaces $ inlineNormalize ils -blockNormalize (Header n attr ils) = - Header n attr $ stripSpaces $ inlineNormalize ils -blockNormalize (Table ils align width hdr cells) = - Table (stripSpaces $ inlineNormalize ils) align width hdr cells -blockNormalize (DefinitionList pairs) = - DefinitionList $ map (\(ils, blklsts) -> (stripSpaces (inlineNormalize ils), (map (map blockNormalize) blklsts))) pairs -blockNormalize (BlockQuote blks) = BlockQuote (map blockNormalize blks) -blockNormalize (OrderedList attr blkslst) = - OrderedList attr $ map (\blks -> map blockNormalize blks) blkslst -blockNormalize (BulletList blkslst) = - BulletList $ map (\blks -> map blockNormalize blks) blkslst -blockNormalize (Div attr blks) = Div attr (map blockNormalize blks) -blockNormalize blk = blk + +inlineCodeContainer :: Container Inline -> Bool +inlineCodeContainer (Container f) = case f [] of + Span (_, classes, _) _ -> (not . null) (classes `intersect` codeSpans) + _ -> False +inlineCodeContainer _ = False + +-- blockCodeContainer :: Container Block -> Bool +-- blockCodeContainer (Container f) = case f [] of +-- Div (ident, classes, kvs) _ -> (not . null) (classes `intersect` codeDivs) +-- _ -> False +-- blockCodeContainer _ = False runToInlines :: ReaderOptions -> Docx -> Run -> [Inline] runToInlines _ _ (Run rs runElems) - | isJust (rStyle rs) && (fromJust (rStyle rs)) `elem` codeSpans = - case runStyleToSpanAttr rs == ("", [], []) of - True -> [Str (runElemsToString runElems)] - False -> [Span (runStyleToSpanAttr rs) [Str (runElemsToString runElems)]] - | otherwise = case runStyleToSpanAttr rs == ("", [], []) of - True -> concatMap runElemToInlines runElems - False -> [Span (runStyleToSpanAttr rs) (concatMap runElemToInlines runElems)] + | any inlineCodeContainer (runStyleToContainers rs) = + rebuild (runStyleToContainers rs) $ [Str $ runElemsToString runElems] + | otherwise = + rebuild (runStyleToContainers rs) (concatMap runElemToInlines runElems) runToInlines opts docx@(Docx _ notes _ _ _ ) (Footnote fnId) = case (getFootNote fnId notes) of Just bodyParts -> - [Note [Div ("", ["footnote"], []) (map (bodyPartToBlock opts docx) bodyParts)]] + [Note (concatMap (bodyPartToBlocks opts docx) bodyParts)] Nothing -> - [Note [Div ("", ["footnote"], []) []]] + [Note []] runToInlines opts docx@(Docx _ notes _ _ _) (Endnote fnId) = case (getEndNote fnId notes) of Just bodyParts -> - [Note [Div ("", ["endnote"], []) (map (bodyPartToBlock opts docx) bodyParts)]] + [Note (concatMap (bodyPartToBlocks opts docx) bodyParts)] Nothing -> - [Note [Div ("", ["endnote"], []) []]] + [Note []] parPartToInlines :: ReaderOptions -> Docx -> ParPart -> [Inline] parPartToInlines opts docx (PlainRun r) = runToInlines opts docx r -parPartToInlines _ _ (BookMark _ anchor) = - [Span (anchor, ["anchor"], []) []] +parPartToInlines _ _ (BookMark _ anchor) | anchor `elem` dummyAnchors = [] +parPartToInlines _ _ (BookMark _ anchor) = [Span (anchor, ["anchor"], []) []] parPartToInlines _ (Docx _ _ _ rels _) (Drawing relid) = case lookupRelationship relid rels of Just target -> [Image [] (combine "word" target, "")] @@ -276,7 +254,6 @@ makeHeaderAnchors h@(Header n (_, classes, kvs) ils) = _ -> h makeHeaderAnchors blk = blk - parPartsToInlines :: ReaderOptions -> Docx -> [ParPart] -> [Inline] parPartsToInlines opts docx parparts = -- @@ -284,23 +261,32 @@ parPartsToInlines opts docx parparts = -- not mandatory. -- (if False -- TODO depend on option - then bottomUp (makeImagesSelfContained docx) + then walk (makeImagesSelfContained docx) else id) $ - bottomUp spanTrim $ - bottomUp spanCorrect $ - bottomUp spanReduce $ - concatMap (parPartToInlines opts docx) parparts + -- bottomUp spanTrim $ + -- bottomUp spanCorrect $ + -- bottomUp spanReduce $ + reduceList $ concatMap (parPartToInlines opts docx) parparts cellToBlocks :: ReaderOptions -> Docx -> Cell -> [Block] -cellToBlocks opts docx (Cell bps) = map (bodyPartToBlock opts docx) bps +cellToBlocks opts docx (Cell bps) = concatMap (bodyPartToBlocks opts docx) bps rowToBlocksList :: ReaderOptions -> Docx -> Row -> [[Block]] rowToBlocksList opts docx (Row cells) = map (cellToBlocks opts docx) cells -bodyPartToBlock :: ReaderOptions -> Docx -> BodyPart -> Block -bodyPartToBlock opts docx (Paragraph pPr parparts) = - Div (parStyleToDivAttr pPr) [Para (parPartsToInlines opts docx parparts)] -bodyPartToBlock opts docx@(Docx _ _ numbering _ _) (ListItem pPr numId lvl parparts) = +bodyPartToBlocks :: ReaderOptions -> Docx -> BodyPart -> [Block] +bodyPartToBlocks opts docx (Paragraph pPr parparts) = + case parPartsToInlines opts docx parparts of + [] -> + [] + _ -> + let parContents = parPartsToInlines opts docx parparts + trimmedContents = reverse $ dropWhile (Space ==) $ reverse $ dropWhile (Space ==) parContents + in + rebuild + (parStyleToContainers pPr) + [Para trimmedContents] +bodyPartToBlocks opts docx@(Docx _ _ numbering _ _) (ListItem pPr numId lvl parparts) = let kvs = case lookupLevel numId lvl numbering of Just (_, fmt, txt, Just start) -> [ ("level", lvl) @@ -317,12 +303,12 @@ bodyPartToBlock opts docx@(Docx _ _ numbering _ _) (ListItem pPr numId lvl parpa ] Nothing -> [] in - Div - ("", ["list-item"], kvs) - [bodyPartToBlock opts docx (Paragraph pPr parparts)] -bodyPartToBlock _ _ (Tbl _ _ _ []) = - Para [] -bodyPartToBlock opts docx (Tbl cap _ look (r:rs)) = + [Div + ("", ["list-item"], kvs) + (bodyPartToBlocks opts docx (Paragraph pPr parparts))] +bodyPartToBlocks _ _ (Tbl _ _ _ []) = + [Para []] +bodyPartToBlocks opts docx (Tbl cap _ look (r:rs)) = let caption = strToInlines cap (hdr, rows) = case firstRowFormatting look of True -> (Just r, rs) @@ -344,7 +330,8 @@ bodyPartToBlock opts docx (Tbl cap _ look (r:rs)) = alignments = take size (repeat AlignDefault) widths = take size (repeat 0) :: [Double] in - Table caption alignments widths hdrCells cells + [Table caption alignments widths hdrCells cells] + makeImagesSelfContained :: Docx -> Inline -> Inline makeImagesSelfContained (Docx _ _ _ _ media) i@(Image alt (uri, title)) = @@ -360,127 +347,19 @@ makeImagesSelfContained _ inline = inline bodyToBlocks :: ReaderOptions -> Docx -> Body -> [Block] bodyToBlocks opts docx (Body bps) = - bottomUp removeEmptyPars $ - map blockNormalize $ - bottomUp spanRemove $ - bottomUp divRemove $ map (makeHeaderAnchors) $ - bottomUp divCorrect $ - bottomUp divReduce $ - bottomUp divCorrectPreReduce $ bottomUp blocksToDefinitions $ blocksToBullets $ - map (bodyPartToBlock opts docx) bps + concatMap (bodyPartToBlocks opts docx) bps docxToBlocks :: ReaderOptions -> Docx -> [Block] docxToBlocks opts d@(Docx (Document _ body) _ _ _ _) = bodyToBlocks opts d body -spanReduce :: [Inline] -> [Inline] -spanReduce [] = [] -spanReduce ((Span (id1, classes1, kvs1) ils1) : ils) - | (id1, classes1, kvs1) == ("", [], []) = ils1 ++ (spanReduce ils) -spanReduce (s1@(Span (id1, classes1, kvs1) ils1) : - s2@(Span (id2, classes2, kvs2) ils2) : - ils) = - let classes' = classes1 `intersect` classes2 - kvs' = kvs1 `intersect` kvs2 - classes1' = classes1 \\ classes' - kvs1' = kvs1 \\ kvs' - classes2' = classes2 \\ classes' - kvs2' = kvs2 \\ kvs' - in - case null classes' && null kvs' of - True -> s1 : (spanReduce (s2 : ils)) - False -> let attr' = ("", classes', kvs') - attr1' = (id1, classes1', kvs1') - attr2' = (id2, classes2', kvs2') - in - spanReduce (Span attr' [(Span attr1' ils1), (Span attr2' ils2)] : - ils) -spanReduce (il:ils) = il : (spanReduce ils) ilToCode :: Inline -> String ilToCode (Str s) = s ilToCode _ = "" -spanRemove' :: Inline -> [Inline] -spanRemove' s@(Span (ident, classes, _) []) - -- "_GoBack" is automatically inserted. We don't want to keep it. - | classes == ["anchor"] && not (ident `elem` dummyAnchors) = [s] -spanRemove' (Span (_, _, kvs) ils) = - case lookup "underline" kvs of - Just val -> [Span ("", [], [("underline", val)]) ils] - Nothing -> ils -spanRemove' il = [il] - -spanRemove :: [Inline] -> [Inline] -spanRemove = concatMap spanRemove' - -spanTrim' :: Inline -> [Inline] -spanTrim' il@(Span _ []) = [il] -spanTrim' il@(Span attr (il':[])) - | il' == Space = [Span attr [], Space] - | otherwise = [il] -spanTrim' (Span attr ils) - | head ils == Space && last ils == Space = - [Space, Span attr (init $ tail ils), Space] - | head ils == Space = [Space, Span attr (tail ils)] - | last ils == Space = [Span attr (init ils), Space] -spanTrim' il = [il] - -spanTrim :: [Inline] -> [Inline] -spanTrim = concatMap spanTrim' - -spanCorrect' :: Inline -> [Inline] -spanCorrect' (Span ("", [], []) ils) = ils -spanCorrect' (Span (ident, classes, kvs) ils) - | "emph" `elem` classes = - [Emph $ spanCorrect' $ Span (ident, (delete "emph" classes), kvs) ils] - | "strong" `elem` classes = - [Strong $ spanCorrect' $ Span (ident, (delete "strong" classes), kvs) ils] - | "smallcaps" `elem` classes = - [SmallCaps $ spanCorrect' $ Span (ident, (delete "smallcaps" classes), kvs) ils] - | "strike" `elem` classes = - [Strikeout $ spanCorrect' $ Span (ident, (delete "strike" classes), kvs) ils] - | "superscript" `elem` classes = - [Superscript $ spanCorrect' $ Span (ident, (delete "superscript" classes), kvs) ils] - | "subscript" `elem` classes = - [Subscript $ spanCorrect' $ Span (ident, (delete "subscript" classes), kvs) ils] - | (not . null) (codeSpans `intersect` classes) = - [Code (ident, (classes \\ codeSpans), kvs) (init $ unlines $ map ilToCode ils)] - | otherwise = - [Span (ident, classes, kvs) ils] -spanCorrect' il = [il] - -spanCorrect :: [Inline] -> [Inline] -spanCorrect = concatMap spanCorrect' - -removeEmptyPars :: [Block] -> [Block] -removeEmptyPars blks = filter (\b -> b /= (Para [])) blks - -divReduce :: [Block] -> [Block] -divReduce [] = [] -divReduce ((Div (id1, classes1, kvs1) blks1) : blks) - | (id1, classes1, kvs1) == ("", [], []) = blks1 ++ (divReduce blks) -divReduce (d1@(Div (id1, classes1, kvs1) blks1) : - d2@(Div (id2, classes2, kvs2) blks2) : - blks) = - let classes' = classes1 `intersect` classes2 - kvs' = kvs1 `intersect` kvs2 - classes1' = classes1 \\ classes' - kvs1' = kvs1 \\ kvs' - classes2' = classes2 \\ classes' - kvs2' = kvs2 \\ kvs' - in - case null classes' && null kvs' of - True -> d1 : (divReduce (d2 : blks)) - False -> let attr' = ("", classes', kvs') - attr1' = (id1, classes1', kvs1') - attr2' = (id2, classes2', kvs2') - in - divReduce (Div attr' [(Div attr1' blks1), (Div attr2' blks2)] : - blks) -divReduce (blk:blks) = blk : (divReduce blks) isHeaderClass :: String -> Maybe Int isHeaderClass s | "Heading" `isPrefixOf` s = @@ -490,27 +369,12 @@ isHeaderClass s | "Heading" `isPrefixOf` s = _ -> Nothing isHeaderClass _ = Nothing -findHeaderClass :: [String] -> Maybe Int -findHeaderClass ss = case mapMaybe id $ map isHeaderClass ss of - [] -> Nothing - n : _ -> Just n blksToInlines :: [Block] -> [Inline] blksToInlines (Para ils : _) = ils blksToInlines (Plain ils : _) = ils blksToInlines _ = [] -divCorrectPreReduce' :: Block -> [Block] -divCorrectPreReduce' (Div (ident, classes, kvs) blks) - | isJust $ findHeaderClass classes = - let n = fromJust $ findHeaderClass classes - in - [Header n (ident, delete ("Heading" ++ (show n)) classes, kvs) (blksToInlines blks)] - | otherwise = [Div (ident, classes, kvs) blks] -divCorrectPreReduce' blk = [blk] - -divCorrectPreReduce :: [Block] -> [Block] -divCorrectPreReduce = concatMap divCorrectPreReduce' blkToCode :: Block -> String blkToCode (Para []) = "" @@ -520,29 +384,3 @@ blkToCode (Para ((Span (_, classes, _) ils'): ils)) (init $ unlines $ map ilToCode ils') ++ (blkToCode (Para ils)) blkToCode _ = "" -divRemove' :: Block -> [Block] -divRemove' (Div (_, _, kvs) blks) = - case lookup "indent" kvs of - Just val -> [Div ("", [], [("indent", val)]) blks] - Nothing -> blks -divRemove' blk = [blk] - -divRemove :: [Block] -> [Block] -divRemove = concatMap divRemove' - -divCorrect' :: Block -> [Block] -divCorrect' b@(Div (ident, classes, kvs) blks) - | (not . null) (blockQuoteDivs `intersect` classes) = - [BlockQuote [Div (ident, classes \\ blockQuoteDivs, kvs) blks]] - | (not . null) (codeDivs `intersect` classes) = - [CodeBlock (ident, (classes \\ codeDivs), kvs) (init $ unlines $ map blkToCode blks)] - | otherwise = - case lookup "indent" kvs of - Just "0" -> [Div (ident, classes, filter (\kv -> fst kv /= "indent") kvs) blks] - Just _ -> - [BlockQuote [Div (ident, classes, filter (\kv -> fst kv /= "indent") kvs) blks]] - Nothing -> [b] -divCorrect' blk = [blk] - -divCorrect :: [Block] -> [Block] -divCorrect = concatMap divCorrect' -- cgit v1.2.3 From 08633fad332fe9acfb884a2ba0ee9f8543ab23ed Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal Date: Mon, 23 Jun 2014 20:26:08 -0400 Subject: Add copyright block to T.P.R.Docx.Reducible. --- src/Text/Pandoc/Readers/Docx/Reducible.hs | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Docx/Reducible.hs b/src/Text/Pandoc/Readers/Docx/Reducible.hs index 1ed31ebd0..8c105d1f1 100644 --- a/src/Text/Pandoc/Readers/Docx/Reducible.hs +++ b/src/Text/Pandoc/Readers/Docx/Reducible.hs @@ -1,5 +1,36 @@ {-# LANGUAGE OverloadedStrings #-} +{- +Copyright (C) 2014 Jesse Rosenthal + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Readers.Docx.Reducible + Copyright : Copyright (C) 2014 Jesse Rosenthal + License : GNU GPL, version 2 or above + + Maintainer : Jesse Rosenthal + Stability : alpha + Portability : portable + +Typeclass for combining adjacent blocks and inlines correctly. +-} + + module Text.Pandoc.Readers.Docx.Reducible ((<++>), (<+++>), Reducible, -- cgit v1.2.3 From bebea5e936d6c3c90b977a1b8f69bb3a290d30dc Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal Date: Tue, 24 Jun 2014 10:34:07 -0400 Subject: Docx reader: pass code tests. --- src/Text/Pandoc/Readers/Docx.hs | 80 ++++++++++++++++++++++++----------------- 1 file changed, 47 insertions(+), 33 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs index ffe7f5a92..5f62d0b21 100644 --- a/src/Text/Pandoc/Readers/Docx.hs +++ b/src/Text/Pandoc/Readers/Docx.hs @@ -84,7 +84,7 @@ import Text.Pandoc.Readers.Docx.Parse import Text.Pandoc.Readers.Docx.Lists import Text.Pandoc.Readers.Docx.Reducible import Data.Maybe (mapMaybe, isJust, fromJust) -import Data.List (delete, isPrefixOf, (\\), intersect) +import Data.List (delete, isPrefixOf, (\\)) import qualified Data.ByteString as BS import qualified Data.ByteString.Lazy as B import Data.ByteString.Base64 (encode) @@ -99,7 +99,7 @@ readDocx opts bytes = Nothing -> error $ "couldn't parse docx file" spansToKeep :: [String] -spansToKeep = ["list-item", "Definition", "DefinitionTerm"] ++ codeSpans +spansToKeep = ["list-item", "Definition", "DefinitionTerm"] -- This is empty, but we put it in for future-proofing. @@ -108,20 +108,28 @@ divsToKeep = [] runStyleToContainers :: RunStyle -> [Container Inline] runStyleToContainers rPr = - let formatters = mapMaybe id - [ if isBold rPr then (Just Strong) else Nothing - , if isItalic rPr then (Just Emph) else Nothing - , if isSmallCaps rPr then (Just SmallCaps) else Nothing - , if isStrike rPr then (Just Strikeout) else Nothing - , if isSuperScript rPr then (Just Superscript) else Nothing - , if isSubScript rPr then (Just Subscript) else Nothing - , rStyle rPr >>= - (\s -> if s `elem` spansToKeep then Just s else Nothing) >>= - (\s -> Just $ Span ("", [s], [])) - , underline rPr >>= (\f -> Just $ Span ("", [], [("underline", f)])) + let spanClassToContainers :: String -> [Container Inline] + spanClassToContainers s | s `elem` codeSpans = + [Container $ (\ils -> Code ("", [], []) (concatMap ilToCode ils))] + spanClassToContainers s | s `elem` spansToKeep = + [Container $ Span ("", [s], [])] + spanClassToContainers _ = [] + + classContainers = case rStyle rPr of + Nothing -> [] + Just s -> spanClassToContainers s + + formatters = map Container $ mapMaybe id + [ if isBold rPr then (Just Strong) else Nothing + , if isItalic rPr then (Just Emph) else Nothing + , if isSmallCaps rPr then (Just SmallCaps) else Nothing + , if isStrike rPr then (Just Strikeout) else Nothing + , if isSuperScript rPr then (Just Superscript) else Nothing + , if isSubScript rPr then (Just Subscript) else Nothing + , underline rPr >>= (\f -> Just $ Span ("", [], [("underline", f)])) ] in - map Container formatters + classContainers ++ formatters divAttrToContainers :: [String] -> [(String, String)] -> [Container Block] @@ -132,7 +140,9 @@ divAttrToContainers (c:cs) _ | isJust (isHeaderClass c) = [(Container $ \blks -> Header n ("", delete ("Heading" ++ show n) cs, []) (blksToInlines blks))] divAttrToContainers (c:_) _ | c `elem` codeDivs = - [Container $ \blks -> CodeBlock ("", [], []) (concatMap blkToCode blks)] + -- This is a bit of a cludge. We make the codeblock from the raw + -- parparts in bodyPartToBlocks. But we need something to match against. + [Container $ \_ -> CodeBlock ("", [], []) ""] divAttrToContainers (c:cs) kvs | c `elem` listParagraphDivs = let kvs' = filter (\(k,_) -> k /= "indent") kvs in @@ -183,18 +193,23 @@ runElemToString (Tab) = ['\t'] runElemsToString :: [RunElem] -> String runElemsToString = concatMap runElemToString +runToString :: Run -> String +runToString (Run _ runElems) = runElemsToString runElems +runToString _ = "" + +parPartToString :: ParPart -> String +parPartToString (PlainRun run) = runToString run +parPartToString (InternalHyperLink _ runs) = concatMap runToString runs +parPartToString (ExternalHyperLink _ runs) = concatMap runToString runs +parPartToString _ = "" + inlineCodeContainer :: Container Inline -> Bool inlineCodeContainer (Container f) = case f [] of - Span (_, classes, _) _ -> (not . null) (classes `intersect` codeSpans) + Code _ "" -> True _ -> False inlineCodeContainer _ = False --- blockCodeContainer :: Container Block -> Bool --- blockCodeContainer (Container f) = case f [] of --- Div (ident, classes, kvs) _ -> (not . null) (classes `intersect` codeDivs) --- _ -> False --- blockCodeContainer _ = False runToInlines :: ReaderOptions -> Docx -> Run -> [Inline] runToInlines _ _ (Run rs runElems) @@ -274,7 +289,16 @@ cellToBlocks opts docx (Cell bps) = concatMap (bodyPartToBlocks opts docx) bps rowToBlocksList :: ReaderOptions -> Docx -> Row -> [[Block]] rowToBlocksList opts docx (Row cells) = map (cellToBlocks opts docx) cells +blockCodeContainer :: Container Block -> Bool +blockCodeContainer (Container f) = case f [] of + CodeBlock _ _ -> True + _ -> False +blockCodeContainer _ = False + bodyPartToBlocks :: ReaderOptions -> Docx -> BodyPart -> [Block] +bodyPartToBlocks _ _ (Paragraph pPr parparts) + | any blockCodeContainer (parStyleToContainers pPr) = + [CodeBlock ("", [], []) (concatMap parPartToString parparts)] bodyPartToBlocks opts docx (Paragraph pPr parparts) = case parPartsToInlines opts docx parparts of [] -> @@ -358,7 +382,8 @@ docxToBlocks opts d@(Docx (Document _ body) _ _ _ _) = bodyToBlocks opts d body ilToCode :: Inline -> String ilToCode (Str s) = s -ilToCode _ = "" +ilToCode Space = " " +ilToCode _ = "" isHeaderClass :: String -> Maybe Int @@ -369,18 +394,7 @@ isHeaderClass s | "Heading" `isPrefixOf` s = _ -> Nothing isHeaderClass _ = Nothing - blksToInlines :: [Block] -> [Inline] blksToInlines (Para ils : _) = ils blksToInlines (Plain ils : _) = ils blksToInlines _ = [] - - -blkToCode :: Block -> String -blkToCode (Para []) = "" -blkToCode (Para ((Code _ s):ils)) = s ++ (blkToCode (Para ils)) -blkToCode (Para ((Span (_, classes, _) ils'): ils)) - | (not . null) (codeSpans `intersect` classes) = - (init $ unlines $ map ilToCode ils') ++ (blkToCode (Para ils)) -blkToCode _ = "" - -- cgit v1.2.3 From 5ae6b8c6f1ba29fa8b6eec065c6d463e5e03a9aa Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal Date: Tue, 24 Jun 2014 12:10:49 -0400 Subject: Docx reader: pass definition test. This commit also fixes a problem with the previous code pushes, which wouldn't allow code blocks to share a div. --- src/Text/Pandoc/Readers/Docx.hs | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs index 5f62d0b21..67cf519dc 100644 --- a/src/Text/Pandoc/Readers/Docx.hs +++ b/src/Text/Pandoc/Readers/Docx.hs @@ -99,12 +99,12 @@ readDocx opts bytes = Nothing -> error $ "couldn't parse docx file" spansToKeep :: [String] -spansToKeep = ["list-item", "Definition", "DefinitionTerm"] +spansToKeep = [] -- This is empty, but we put it in for future-proofing. divsToKeep :: [String] -divsToKeep = [] +divsToKeep = ["list-item", "Definition", "DefinitionTerm"] runStyleToContainers :: RunStyle -> [Container Inline] runStyleToContainers rPr = @@ -139,18 +139,18 @@ divAttrToContainers (c:cs) _ | isJust (isHeaderClass c) = in [(Container $ \blks -> Header n ("", delete ("Heading" ++ show n) cs, []) (blksToInlines blks))] -divAttrToContainers (c:_) _ | c `elem` codeDivs = +divAttrToContainers (c:cs) kvs | c `elem` divsToKeep = + (Container $ Div ("", [c], [])) : (divAttrToContainers cs kvs) +divAttrToContainers (c:cs) kvs | c `elem` codeDivs = -- This is a bit of a cludge. We make the codeblock from the raw -- parparts in bodyPartToBlocks. But we need something to match against. - [Container $ \_ -> CodeBlock ("", [], []) ""] + (Container $ \_ -> CodeBlock ("", [], []) "") : (divAttrToContainers cs kvs) divAttrToContainers (c:cs) kvs | c `elem` listParagraphDivs = let kvs' = filter (\(k,_) -> k /= "indent") kvs in (Container $ Div ("", [c], [])) : (divAttrToContainers cs kvs') divAttrToContainers (c:cs) kvs | c `elem` blockQuoteDivs = (Container BlockQuote) : (divAttrToContainers (cs \\ blockQuoteDivs) kvs) -divAttrToContainers (c:cs) kvs | c `elem` divsToKeep = - (Container $ Div ("", [c], [])) : (divAttrToContainers cs kvs) divAttrToContainers (_:cs) kvs = divAttrToContainers cs kvs divAttrToContainers [] (kv:kvs) | fst kv == "indent" = (Container BlockQuote) : divAttrToContainers [] kvs @@ -298,7 +298,12 @@ blockCodeContainer _ = False bodyPartToBlocks :: ReaderOptions -> Docx -> BodyPart -> [Block] bodyPartToBlocks _ _ (Paragraph pPr parparts) | any blockCodeContainer (parStyleToContainers pPr) = - [CodeBlock ("", [], []) (concatMap parPartToString parparts)] + let + otherConts = filter (not . blockCodeContainer) (parStyleToContainers pPr) + in + rebuild + otherConts + [CodeBlock ("", [], []) (concatMap parPartToString parparts)] bodyPartToBlocks opts docx (Paragraph pPr parparts) = case parPartsToInlines opts docx parparts of [] -> @@ -372,7 +377,7 @@ makeImagesSelfContained _ inline = inline bodyToBlocks :: ReaderOptions -> Docx -> Body -> [Block] bodyToBlocks opts docx (Body bps) = map (makeHeaderAnchors) $ - bottomUp blocksToDefinitions $ + blocksToDefinitions $ blocksToBullets $ concatMap (bodyPartToBlocks opts docx) bps -- cgit v1.2.3 From a8866bc1215a4e4c6582dedc940c86cdaeb02d9f Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal Date: Tue, 24 Jun 2014 12:15:26 -0400 Subject: Docx reader: remove T.P.Generic import. This marks the removal of the final tree-walk in the code. (Though there is still one in the Lists module.) --- src/Text/Pandoc/Readers/Docx.hs | 1 - 1 file changed, 1 deletion(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs index 67cf519dc..59fb7b37f 100644 --- a/src/Text/Pandoc/Readers/Docx.hs +++ b/src/Text/Pandoc/Readers/Docx.hs @@ -76,7 +76,6 @@ import Codec.Archive.Zip import Text.Pandoc.Definition import Text.Pandoc.Options import Text.Pandoc.Builder (text, toList) -import Text.Pandoc.Generic (bottomUp) import Text.Pandoc.MIME (getMimeType) import Text.Pandoc.UTF8 (toString) import Text.Pandoc.Walk -- cgit v1.2.3 From 69743cd5981d7e910c5d83da18fc698c8d522e69 Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal Date: Tue, 24 Jun 2014 14:24:38 -0400 Subject: Docx reader: Ignore zero (or negative) indent If a block has an indentation less than or equal to zero, it should not be treated as a block quote. --- src/Text/Pandoc/Readers/Docx.hs | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs index 59fb7b37f..b787ca9fb 100644 --- a/src/Text/Pandoc/Readers/Docx.hs +++ b/src/Text/Pandoc/Readers/Docx.hs @@ -132,7 +132,6 @@ runStyleToContainers rPr = divAttrToContainers :: [String] -> [(String, String)] -> [Container Block] -divAttrToContainers [] [] = [] divAttrToContainers (c:cs) _ | isJust (isHeaderClass c) = let n = fromJust (isHeaderClass c) in @@ -151,10 +150,14 @@ divAttrToContainers (c:cs) kvs | c `elem` listParagraphDivs = divAttrToContainers (c:cs) kvs | c `elem` blockQuoteDivs = (Container BlockQuote) : (divAttrToContainers (cs \\ blockQuoteDivs) kvs) divAttrToContainers (_:cs) kvs = divAttrToContainers cs kvs -divAttrToContainers [] (kv:kvs) | fst kv == "indent" = - (Container BlockQuote) : divAttrToContainers [] kvs -divAttrToContainers [] (_:kvs) = - divAttrToContainers [] kvs +divAttrToContainers [] kvs | isJust (lookup "indent" kvs) = + let kvs' = filter (\(k,_) -> k /= "indent") kvs + in + case fromJust (lookup "indent" kvs) of + "0" -> divAttrToContainers [] kvs' + ('-' : _) -> divAttrToContainers [] kvs' + _ -> (Container BlockQuote) : divAttrToContainers [] kvs' +divAttrToContainers _ _ = [] parStyleToContainers :: ParagraphStyle -> [Container Block] -- cgit v1.2.3 From c343f1a90bc35d745de673de5ff771ddbe60be54 Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal Date: Wed, 25 Jun 2014 08:10:19 -0400 Subject: Docx Reader: Add change types Insertion and deletion. Dates are just strings for now. --- src/Text/Pandoc/Readers/Docx/Parse.hs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs index 18200bcf9..1cb5fe2e3 100644 --- a/src/Text/Pandoc/Readers/Docx/Parse.hs +++ b/src/Text/Pandoc/Readers/Docx/Parse.hs @@ -455,6 +455,8 @@ elemToCell ns element elemToCell _ _ = Nothing data ParPart = PlainRun Run + | Insertion ChangeId Author ChangeDate [Run] + | Deletion ChangeId Author ChangeDate [Run] | BookMark BookMarkId Anchor | InternalHyperLink Anchor [Run] | ExternalHyperLink RelId [Run] @@ -604,4 +606,6 @@ type Target = String type Anchor = String type BookMarkId = String type RelId = String - +type ChangeId = String +type Author = String +type ChangeDate = String -- cgit v1.2.3 From 38e1d3e95b8240eeb35db0a1a56e308cfb4835e4 Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal Date: Wed, 25 Jun 2014 10:32:48 -0400 Subject: Docx reader: Parse Insertions and Deletions. This is just for the Parse module, reading it into the Docx format. It still has to be translated into pandoc. --- src/Text/Pandoc/Readers/Docx/Parse.hs | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs index 1cb5fe2e3..c76ef7511 100644 --- a/src/Text/Pandoc/Readers/Docx/Parse.hs +++ b/src/Text/Pandoc/Readers/Docx/Parse.hs @@ -541,7 +541,7 @@ elemToRun _ _ = Nothing elemToRunElem :: NameSpaces -> Element -> Maybe RunElem elemToRunElem ns element - | qName (elName element) == "t" && + | (qName (elName element) == "t" || qName (elName element) == "delText") && qURI (elName element) == (lookup "w" ns) = Just $ TextRun (strContent element) | qName (elName element) == "br" && @@ -581,6 +581,22 @@ elemToParPart ns element Nothing -> do r <- elemToRun ns element return $ PlainRun r +elemToParPart ns element + | qName (elName element) == "ins" && + qURI (elName element) == (lookup "w" ns) = do + cId <- findAttr (QName "id" (lookup "w" ns) (Just "w")) element + cAuthor <- findAttr (QName "author" (lookup "w" ns) (Just "w")) element + cDate <- findAttr (QName "date" (lookup "w" ns) (Just "w")) element + let runs = mapMaybe (elemToRun ns) (elChildren element) + return $ Insertion cId cAuthor cDate runs +elemToParPart ns element + | qName (elName element) == "del" && + qURI (elName element) == (lookup "w" ns) = do + cId <- findAttr (QName "id" (lookup "w" ns) (Just "w")) element + cAuthor <- findAttr (QName "author" (lookup "w" ns) (Just "w")) element + cDate <- findAttr (QName "date" (lookup "w" ns) (Just "w")) element + let runs = mapMaybe (elemToRun ns) (elChildren element) + return $ Deletion cId cAuthor cDate runs elemToParPart ns element | qName (elName element) == "bookmarkStart" && qURI (elName element) == (lookup "w" ns) = do -- cgit v1.2.3 From ed44e4ca8c8f3d3c4c7ac65b98f16732c8173b88 Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal Date: Wed, 25 Jun 2014 10:38:01 -0400 Subject: Docx reader: Add rudimentary track changes support. This will only read the insertions, and ignore the deletions. --- src/Text/Pandoc/Readers/Docx.hs | 3 +++ 1 file changed, 3 insertions(+) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs index b787ca9fb..130e2a1e2 100644 --- a/src/Text/Pandoc/Readers/Docx.hs +++ b/src/Text/Pandoc/Readers/Docx.hs @@ -234,6 +234,9 @@ runToInlines opts docx@(Docx _ notes _ _ _) (Endnote fnId) = parPartToInlines :: ReaderOptions -> Docx -> ParPart -> [Inline] parPartToInlines opts docx (PlainRun r) = runToInlines opts docx r +parPartToInlines opts docx (Insertion _ _ _ runs) = + concatMap (runToInlines opts docx) runs +parPartToInlines _ _ (Deletion _ _ _ _) = [] parPartToInlines _ _ (BookMark _ anchor) | anchor `elem` dummyAnchors = [] parPartToInlines _ _ (BookMark _ anchor) = [Span (anchor, ["anchor"], []) []] parPartToInlines _ (Docx _ _ _ rels _) (Drawing relid) = -- cgit v1.2.3 From 9614ddfedc18cccbf9fbe1a23fae200c7e67d72d Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal Date: Wed, 25 Jun 2014 11:00:15 -0400 Subject: Docx reader: Remove unnecessary filter in Parse. mapMaybe does the filtering for us. --- src/Text/Pandoc/Readers/Docx/Parse.hs | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs index c76ef7511..dbbd65681 100644 --- a/src/Text/Pandoc/Readers/Docx/Parse.hs +++ b/src/Text/Pandoc/Readers/Docx/Parse.hs @@ -281,10 +281,6 @@ elemToBody ns element | qName (elName element) == "body" && qURI (elName element $ map (elemToBodyPart ns) $ filterChildrenName (isParOrTbl ns) element elemToBody _ _ = Nothing -isRunOrLinkOrBookmark :: NameSpaces -> QName -> Bool -isRunOrLinkOrBookmark ns q = qName q `elem` ["r", "hyperlink", "bookmarkStart"] && - qURI q == (lookup "w" ns) - elemToNumInfo :: NameSpaces -> Element -> Maybe (String, String) elemToNumInfo ns element | qName (elName element) == "p" && @@ -319,9 +315,8 @@ elemToBodyPart ns element | qName (elName element) == "p" && qURI (elName element) == (lookup "w" ns) = let parstyle = elemToParagraphStyle ns element - parparts = mapMaybe id - $ map (elemToParPart ns) - $ filterChildrenName (isRunOrLinkOrBookmark ns) element + parparts = mapMaybe (elemToParPart ns) + $ elChildren element in case elemToNumInfo ns element of Just (numId, lvl) -> Just $ ListItem parstyle numId lvl parparts -- cgit v1.2.3 From 3ec62d006483d369bb896b283db82e4437b66d05 Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal Date: Wed, 25 Jun 2014 13:50:08 -0400 Subject: Add TrackChanges type to options. --- src/Text/Pandoc/Options.hs | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Options.hs b/src/Text/Pandoc/Options.hs index 611a6bb06..e0ad866ad 100644 --- a/src/Text/Pandoc/Options.hs +++ b/src/Text/Pandoc/Options.hs @@ -264,6 +264,12 @@ data HTMLSlideVariant = S5Slides | NoSlides deriving (Show, Read, Eq) +-- | Options for accepting or rejecting MS Word track-changes. +data TrackChanges = AcceptChanges + | RejectChanges + | AllChanges + deriving (Show, Read, Eq) + -- | Options for writers data WriterOptions = WriterOptions { writerStandalone :: Bool -- ^ Include header and footer -- cgit v1.2.3 From 6ff84b5e8da47ff7f4b77bd6cd017beae81fed97 Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal Date: Wed, 25 Jun 2014 13:57:56 -0400 Subject: Add reader option for track changes. --- src/Text/Pandoc/Options.hs | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Options.hs b/src/Text/Pandoc/Options.hs index e0ad866ad..d0a76a001 100644 --- a/src/Text/Pandoc/Options.hs +++ b/src/Text/Pandoc/Options.hs @@ -211,6 +211,7 @@ data ReaderOptions = ReaderOptions{ -- indented code blocks , readerDefaultImageExtension :: String -- ^ Default extension for images , readerTrace :: Bool -- ^ Print debugging info + , readerTrackChanges :: TrackChanges } deriving (Show, Read) instance Default ReaderOptions @@ -227,6 +228,7 @@ instance Default ReaderOptions , readerIndentedCodeClasses = [] , readerDefaultImageExtension = "" , readerTrace = False + , readerTrackChanges = AcceptChanges } -- -- cgit v1.2.3 From d824f89fb3996fd27e156da1141808fbf468819d Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal Date: Wed, 25 Jun 2014 14:05:21 -0400 Subject: Add TrackChanges to Options export. --- src/Text/Pandoc/Options.hs | 1 + 1 file changed, 1 insertion(+) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Options.hs b/src/Text/Pandoc/Options.hs index d0a76a001..b7a3a4b7b 100644 --- a/src/Text/Pandoc/Options.hs +++ b/src/Text/Pandoc/Options.hs @@ -41,6 +41,7 @@ module Text.Pandoc.Options ( Extension(..) , HTMLSlideVariant (..) , EPUBVersion (..) , WriterOptions (..) + , TrackChanges (..) , def , isEnabled ) where -- cgit v1.2.3 From 0e9bf37f64be0a121a0d682570fc8f0cf2b27c51 Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal Date: Wed, 25 Jun 2014 14:17:20 -0400 Subject: Docx reader: Make use of track-changes option. --- src/Text/Pandoc/Readers/Docx.hs | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs index 130e2a1e2..cb0735e31 100644 --- a/src/Text/Pandoc/Readers/Docx.hs +++ b/src/Text/Pandoc/Readers/Docx.hs @@ -234,9 +234,22 @@ runToInlines opts docx@(Docx _ notes _ _ _) (Endnote fnId) = parPartToInlines :: ReaderOptions -> Docx -> ParPart -> [Inline] parPartToInlines opts docx (PlainRun r) = runToInlines opts docx r -parPartToInlines opts docx (Insertion _ _ _ runs) = - concatMap (runToInlines opts docx) runs -parPartToInlines _ _ (Deletion _ _ _ _) = [] +parPartToInlines opts docx (Insertion _ author date runs) = + case readerTrackChanges opts of + AcceptChanges -> concatMap (runToInlines opts docx) runs + RejectChanges -> [] + AllChanges -> + [Span + ("", ["insertion"], [("author", author), ("date", date)]) + (concatMap (runToInlines opts docx) runs)] +parPartToInlines opts docx (Deletion _ author date runs) = + case readerTrackChanges opts of + AcceptChanges -> [] + RejectChanges -> concatMap (runToInlines opts docx) runs + AllChanges -> + [Span + ("", ["deletion"], [("author", author), ("date", date)]) + (concatMap (runToInlines opts docx) runs)] parPartToInlines _ _ (BookMark _ anchor) | anchor `elem` dummyAnchors = [] parPartToInlines _ _ (BookMark _ anchor) = [Span (anchor, ["anchor"], []) []] parPartToInlines _ (Docx _ _ _ rels _) (Drawing relid) = -- cgit v1.2.3 From 2396be6f5777ac04067264d489fb84fbec72d164 Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal Date: Wed, 25 Jun 2014 17:12:03 -0400 Subject: Docx reader: Code cleanup in parse. Remove some redundant ways of dealing with Maybe. --- src/Text/Pandoc/Readers/Docx/Parse.hs | 50 +++++++++-------------------------- 1 file changed, 12 insertions(+), 38 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs index dbbd65681..07f34450d 100644 --- a/src/Text/Pandoc/Readers/Docx/Parse.hs +++ b/src/Text/Pandoc/Readers/Docx/Parse.hs @@ -148,7 +148,7 @@ absNumElemToAbsNum ns element | let levelElems = findChildren (QName "lvl" (lookup "w" ns) (Just "w")) element - levels = mapMaybe id $ map (levelElemToLevel ns) levelElems + levels = mapMaybe (levelElemToLevel ns) levelElems return $ AbstractNumb absNumId levels absNumElemToAbsNum _ _ = Nothing @@ -180,8 +180,8 @@ archiveToNumbering zf = absNumElems = findChildren (QName "abstractNum" (lookup "w" namespaces) (Just "w")) numberingElem - nums = mapMaybe id $ map (numElemToNum namespaces) numElems - absNums = mapMaybe id $ map (absNumElemToAbsNum namespaces) absNumElems + nums = mapMaybe (numElemToNum namespaces) numElems + absNums = mapMaybe (absNumElemToAbsNum namespaces) absNumElems return $ Numbering namespaces nums absNums data Notes = Notes NameSpaces (Maybe [(String, [BodyPart])]) (Maybe [(String, [BodyPart])]) @@ -193,10 +193,8 @@ noteElemToNote ns element qURI (elName element) == (lookup "w" ns) = do noteId <- findAttr (QName "id" (lookup "w" ns) (Just "w")) element - let bps = map fromJust - $ filter isJust - $ map (elemToBodyPart ns) - $ filterChildrenName (isParOrTbl ns) element + let bps = mapMaybe (elemToBodyPart ns) + $ elChildren element return $ (noteId, bps) noteElemToNote _ _ = Nothing @@ -210,9 +208,7 @@ elemToNotes :: NameSpaces -> String -> Element -> Maybe [(String, [BodyPart])] elemToNotes ns notetype element | qName (elName element) == (notetype ++ "s") && qURI (elName element) == (lookup "w" ns) = - Just $ map fromJust - $ filter isJust - $ map (noteElemToNote ns) + Just $ mapMaybe (noteElemToNote ns) $ findChildren (QName notetype (lookup "w" ns) (Just "w")) element elemToNotes _ _ _ = Nothing @@ -260,25 +256,19 @@ relElemToRelationship _ = Nothing archiveToRelationships :: Archive -> [Relationship] archiveToRelationships archive = let relPaths = filter filePathIsRel (filesInArchive archive) - entries = map fromJust $ filter isJust $ map (\f -> findEntryByPath f archive) relPaths - relElems = map fromJust $ filter isJust $ map (parseXMLDoc . UTF8.toStringLazy . fromEntry) entries - rels = map fromJust $ filter isJust $ map relElemToRelationship $ concatMap elChildren relElems + entries = mapMaybe (\f -> findEntryByPath f archive) relPaths + relElems = mapMaybe (parseXMLDoc . UTF8.toStringLazy . fromEntry) entries + rels = mapMaybe relElemToRelationship $ concatMap elChildren relElems in rels data Body = Body [BodyPart] deriving Show -isParOrTbl :: NameSpaces -> QName -> Bool -isParOrTbl ns q = qName q `elem` ["p", "tbl"] && - qURI q == (lookup "w" ns) - elemToBody :: NameSpaces -> Element -> Maybe Body elemToBody ns element | qName (elName element) == "body" && qURI (elName element) == (lookup "w" ns) = Just $ Body - $ map fromJust - $ filter isJust - $ map (elemToBodyPart ns) $ filterChildrenName (isParOrTbl ns) element + $ mapMaybe (elemToBodyPart ns) $ elChildren element elemToBody _ _ = Nothing elemToNumInfo :: NameSpaces -> Element -> Maybe (String, String) @@ -295,21 +285,6 @@ elemToNumInfo ns element return (numId, lvl) elemToNumInfo _ _ = Nothing --- isBookMarkTag :: NameSpaces -> QName -> Bool --- isBookMarkTag ns q = qName q `elem` ["bookmarkStart", "bookmarkEnd"] && --- qURI q == (lookup "w" ns) - --- parChildrenToBookmark :: NameSpaces -> [Element] -> BookMark --- parChildrenToBookmark ns (bms : bme : _) --- | qName (elName bms) == "bookmarkStart" && --- qURI (elName bms) == (lookup "w" ns) && --- qName (elName bme) == "bookmarkEnd" && --- qURI (elName bme) == (lookup "w" ns) = do --- bmId <- findAttr (QName "id" (lookup "w" ns) (Just "w")) bms --- bmName <- findAttr (QName "name" (lookup "w" ns) (Just "w")) bms --- return $ (bmId, bmName) --- parChildrenToBookmark _ _ = Nothing - elemToBodyPart :: NameSpaces -> Element -> Maybe BodyPart elemToBodyPart ns element | qName (elName element) == "p" && @@ -382,8 +357,7 @@ elemToParagraphStyle ns element = Just pPr -> ParagraphStyle {pStyle = - mapMaybe id $ - map + mapMaybe (findAttr (QName "val" (lookup "w" ns) (Just "w"))) (findChildren (QName "pStyle" (lookup "w" ns) (Just "w")) pPr) , indent = @@ -601,7 +575,7 @@ elemToParPart ns element elemToParPart ns element | qName (elName element) == "hyperlink" && qURI (elName element) == (lookup "w" ns) = - let runs = map fromJust $ filter isJust $ map (elemToRun ns) + let runs = mapMaybe (elemToRun ns) $ findChildren (QName "r" (lookup "w" ns) (Just "w")) element in case findAttr (QName "anchor" (lookup "w" ns) (Just "w")) element of -- cgit v1.2.3 From b2127311cb360479dbea59264ada0112a94d7819 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Thu, 26 Jun 2014 12:34:41 -0700 Subject: Require haddock-library >= 1.1 and simplify haddock reader code. See #1346. --- pandoc.cabal | 2 +- src/Text/Pandoc/Readers/Haddock.hs | 40 +------------------------------------- 2 files changed, 2 insertions(+), 40 deletions(-) (limited to 'src/Text') diff --git a/pandoc.cabal b/pandoc.cabal index 634d249fe..eeb233d3d 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -259,7 +259,7 @@ Library hslua >= 0.3 && < 0.4, binary >= 0.5 && < 0.8, SHA >= 1.6 && < 1.7, - haddock-library >= 1.0 && < 1.1 + haddock-library >= 1.1 && < 1.2 if flag(https) Build-Depends: http-client >= 0.3.2 && < 0.4, http-client-tls >= 0.2 && < 0.3, diff --git a/src/Text/Pandoc/Readers/Haddock.hs b/src/Text/Pandoc/Readers/Haddock.hs index a3dfb7c3c..4b46c869d 100644 --- a/src/Text/Pandoc/Readers/Haddock.hs +++ b/src/Text/Pandoc/Readers/Haddock.hs @@ -43,11 +43,8 @@ docHToBlocks d' = (docHToInlines False $ headerTitle h) DocAppend d1 d2 -> mappend (docHToBlocks d1) (docHToBlocks d2) DocString _ -> inlineFallback - DocParagraph (DocHeader h) -> docHToBlocks (DocHeader h) DocParagraph (DocAName h) -> B.plain $ docHToInlines False $ DocAName h - DocParagraph x -> let (ils, rest) = getInlines x - in (B.para $ docHToInlines False ils) - <> docHToBlocks rest + DocParagraph x -> B.para $ docHToInlines False x DocIdentifier _ -> inlineFallback DocIdentifierUnchecked _ -> inlineFallback DocModule s -> B.plain $ docHToInlines False $ DocModule s @@ -115,40 +112,6 @@ docHToInlines isCode d' = DocProperty _ -> mempty DocExamples _ -> mempty -getInlines :: DocH String Identifier -> (DocH String Identifier, DocH String Identifier) -getInlines (DocAppend x y) = if isInline x - then let (a, b) = getInlines y - in (DocAppend x a, b) - else (DocEmpty, DocAppend x y) -getInlines x = if isInline x - then (x, DocEmpty) - else (DocEmpty, x) - -isInline :: DocH String Identifier -> Bool -isInline d' = - case d' of - DocEmpty -> True - DocAppend d1 _ -> isInline d1 - DocString _ -> True - DocParagraph _ -> False - DocIdentifier _ -> True - DocIdentifierUnchecked _ -> True - DocModule _ -> True - DocWarning _ -> True - DocEmphasis _ -> True - DocMonospaced _ -> True - DocBold _ -> True - DocHeader _ -> False - DocUnorderedList _ -> False - DocOrderedList _ -> False - DocDefList _ -> False - DocCodeBlock _ -> False - DocHyperlink _ -> True - DocPic _ -> True - DocAName _ -> True - DocProperty _ -> False - DocExamples _ -> False - -- | Create an 'Example', stripping superfluous characters as appropriate makeExample :: String -> String -> [String] -> Blocks makeExample prompt expression result = @@ -173,4 +136,3 @@ makeExample prompt expression result = substituteBlankLine "" = "" substituteBlankLine line = line coder = B.codeWith ([], ["result"], []) - -- cgit v1.2.3 From 4248f25152d5715ad99f9d8dda8bf83f33f650ff Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal Date: Thu, 26 Jun 2014 16:48:41 -0400 Subject: Move Docx reader to DocxContext monad This is a ReaderT State stack, which keeps track of some environment info, such as the options and the docx doc. The state will come in handy in the future, for a couple of planned features (rewriting the section anchors as auto_idents, and hopefully smart-quoting). --- src/Text/Pandoc/Readers/Docx.hs | 247 +++++++++++++++++++++++----------------- 1 file changed, 140 insertions(+), 107 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs index cb0735e31..5773027f2 100644 --- a/src/Text/Pandoc/Readers/Docx.hs +++ b/src/Text/Pandoc/Readers/Docx.hs @@ -88,6 +88,9 @@ import qualified Data.ByteString as BS import qualified Data.ByteString.Lazy as B import Data.ByteString.Base64 (encode) import System.FilePath (combine) +import qualified Data.Map as M +import Control.Monad.Reader +import Control.Monad.State readDocx :: ReaderOptions -> B.ByteString @@ -97,11 +100,24 @@ readDocx opts bytes = Just docx -> Pandoc nullMeta (docxToBlocks opts docx) Nothing -> error $ "couldn't parse docx file" -spansToKeep :: [String] -spansToKeep = [] +data DState = DState { docxHdrLinks :: M.Map String String } + +data DEnv = DEnv { docxOptions :: ReaderOptions + , docxDocument :: Docx} + +type DocxContext = ReaderT DEnv (State DState) + +evalDocxContext :: DocxContext a -> DEnv -> DState -> a +evalDocxContext ctx env st = evalState (runReaderT ctx env) st + +concatMapM :: (Monad m) => (a -> m [b]) -> [a] -> m [b] +concatMapM f xs = liftM concat (mapM f xs) -- This is empty, but we put it in for future-proofing. +spansToKeep :: [String] +spansToKeep = [] + divsToKeep :: [String] divsToKeep = ["list-item", "Definition", "DefinitionTerm"] @@ -213,57 +229,69 @@ inlineCodeContainer (Container f) = case f [] of inlineCodeContainer _ = False -runToInlines :: ReaderOptions -> Docx -> Run -> [Inline] -runToInlines _ _ (Run rs runElems) +runToInlines :: Run -> DocxContext [Inline] +runToInlines (Run rs runElems) | any inlineCodeContainer (runStyleToContainers rs) = + return $ rebuild (runStyleToContainers rs) $ [Str $ runElemsToString runElems] | otherwise = + return $ rebuild (runStyleToContainers rs) (concatMap runElemToInlines runElems) -runToInlines opts docx@(Docx _ notes _ _ _ ) (Footnote fnId) = +runToInlines (Footnote fnId) = do + (Docx _ notes _ _ _ ) <- asks docxDocument case (getFootNote fnId notes) of - Just bodyParts -> - [Note (concatMap (bodyPartToBlocks opts docx) bodyParts)] - Nothing -> - [Note []] -runToInlines opts docx@(Docx _ notes _ _ _) (Endnote fnId) = + Just bodyParts -> do + blks <- concatMapM bodyPartToBlocks bodyParts + return $ [Note blks] + Nothing -> return [Note []] +runToInlines (Endnote fnId) = do + (Docx _ notes _ _ _ ) <- asks docxDocument case (getEndNote fnId notes) of - Just bodyParts -> - [Note (concatMap (bodyPartToBlocks opts docx) bodyParts)] - Nothing -> - [Note []] - -parPartToInlines :: ReaderOptions -> Docx -> ParPart -> [Inline] -parPartToInlines opts docx (PlainRun r) = runToInlines opts docx r -parPartToInlines opts docx (Insertion _ author date runs) = + Just bodyParts -> do + blks <- concatMapM bodyPartToBlocks bodyParts + return $ [Note blks] + Nothing -> return [Note []] + +parPartToInlines :: ParPart -> DocxContext [Inline] +parPartToInlines (PlainRun r) = runToInlines r +parPartToInlines (Insertion _ author date runs) = do + opts <- asks docxOptions case readerTrackChanges opts of - AcceptChanges -> concatMap (runToInlines opts docx) runs - RejectChanges -> [] - AllChanges -> - [Span - ("", ["insertion"], [("author", author), ("date", date)]) - (concatMap (runToInlines opts docx) runs)] -parPartToInlines opts docx (Deletion _ author date runs) = + AcceptChanges -> concatMapM runToInlines runs >>= return + RejectChanges -> return [] + AllChanges -> do + ils <- (concatMapM runToInlines runs) + return [Span + ("", ["insertion"], [("author", author), ("date", date)]) + ils] +parPartToInlines (Deletion _ author date runs) = do + opts <- asks docxOptions case readerTrackChanges opts of - AcceptChanges -> [] - RejectChanges -> concatMap (runToInlines opts docx) runs - AllChanges -> - [Span - ("", ["deletion"], [("author", author), ("date", date)]) - (concatMap (runToInlines opts docx) runs)] -parPartToInlines _ _ (BookMark _ anchor) | anchor `elem` dummyAnchors = [] -parPartToInlines _ _ (BookMark _ anchor) = [Span (anchor, ["anchor"], []) []] -parPartToInlines _ (Docx _ _ _ rels _) (Drawing relid) = - case lookupRelationship relid rels of + AcceptChanges -> return [] + RejectChanges -> concatMapM runToInlines runs >>= return + AllChanges -> do + ils <- concatMapM runToInlines runs + return [Span + ("", ["deletion"], [("author", author), ("date", date)]) + ils] +parPartToInlines (BookMark _ anchor) | anchor `elem` dummyAnchors = return [] +parPartToInlines (BookMark _ anchor) = return [Span (anchor, ["anchor"], []) []] +parPartToInlines (Drawing relid) = do + (Docx _ _ _ rels _) <- asks docxDocument + return $ case lookupRelationship relid rels of Just target -> [Image [] (combine "word" target, "")] Nothing -> [Image [] ("", "")] -parPartToInlines opts docx (InternalHyperLink anchor runs) = - [Link (concatMap (runToInlines opts docx) runs) ('#' : anchor, "")] -parPartToInlines opts docx@(Docx _ _ _ rels _) (ExternalHyperLink relid runs) = - case lookupRelationship relid rels of +parPartToInlines (InternalHyperLink anchor runs) = do + ils <- concatMapM runToInlines runs + return [Link ils ('#' : anchor, "")] +parPartToInlines (ExternalHyperLink relid runs) = do + (Docx _ _ _ rels _) <- asks docxDocument + rs <- concatMapM runToInlines runs + return $ case lookupRelationship relid rels of Just target -> - [Link (concatMap (runToInlines opts docx) runs) (target, "")] + [Link rs (target, "")] Nothing -> - [Link (concatMap (runToInlines opts docx) runs) ("", "")] + [Link rs ("", "")] isAnchorSpan :: Inline -> Bool isAnchorSpan (Span (ident, classes, kvs) ils) = @@ -287,25 +315,18 @@ makeHeaderAnchors h@(Header n (_, classes, kvs) ils) = _ -> h makeHeaderAnchors blk = blk -parPartsToInlines :: ReaderOptions -> Docx -> [ParPart] -> [Inline] -parPartsToInlines opts docx parparts = - -- - -- We're going to skip data-uri's for now. It should be an option, - -- not mandatory. - -- - (if False -- TODO depend on option - then walk (makeImagesSelfContained docx) - else id) $ - -- bottomUp spanTrim $ - -- bottomUp spanCorrect $ - -- bottomUp spanReduce $ - reduceList $ concatMap (parPartToInlines opts docx) parparts - -cellToBlocks :: ReaderOptions -> Docx -> Cell -> [Block] -cellToBlocks opts docx (Cell bps) = concatMap (bodyPartToBlocks opts docx) bps - -rowToBlocksList :: ReaderOptions -> Docx -> Row -> [[Block]] -rowToBlocksList opts docx (Row cells) = map (cellToBlocks opts docx) cells +parPartsToInlines :: [ParPart] -> DocxContext [Inline] +parPartsToInlines parparts = do + ils <- concatMapM parPartToInlines parparts >>= + -- TODO: Option for self-containted images + (if False then (walkM makeImagesSelfContained) else return) + return $ reduceList $ ils + +cellToBlocks :: Cell -> DocxContext [Block] +cellToBlocks (Cell bps) = concatMapM bodyPartToBlocks bps + +rowToBlocksList :: Row -> DocxContext [[Block]] +rowToBlocksList (Row cells) = mapM cellToBlocks cells blockCodeContainer :: Container Block -> Bool blockCodeContainer (Container f) = case f [] of @@ -313,27 +334,32 @@ blockCodeContainer (Container f) = case f [] of _ -> False blockCodeContainer _ = False -bodyPartToBlocks :: ReaderOptions -> Docx -> BodyPart -> [Block] -bodyPartToBlocks _ _ (Paragraph pPr parparts) +bodyPartToBlocks :: BodyPart -> DocxContext [Block] +bodyPartToBlocks (Paragraph pPr parparts) | any blockCodeContainer (parStyleToContainers pPr) = let otherConts = filter (not . blockCodeContainer) (parStyleToContainers pPr) in + return $ rebuild otherConts [CodeBlock ("", [], []) (concatMap parPartToString parparts)] -bodyPartToBlocks opts docx (Paragraph pPr parparts) = - case parPartsToInlines opts docx parparts of - [] -> - [] - _ -> - let parContents = parPartsToInlines opts docx parparts - trimmedContents = reverse $ dropWhile (Space ==) $ reverse $ dropWhile (Space ==) parContents - in +bodyPartToBlocks (Paragraph pPr parparts) = do + ils <- parPartsToInlines parparts + case ils of + [] -> return [] + _ -> do + parContents <- parPartsToInlines parparts + let trimmedContents = reverse $ + dropWhile (Space ==) $ + reverse $ + dropWhile (Space ==) parContents + return $ rebuild (parStyleToContainers pPr) [Para trimmedContents] -bodyPartToBlocks opts docx@(Docx _ _ numbering _ _) (ListItem pPr numId lvl parparts) = +bodyPartToBlocks (ListItem pPr numId lvl parparts) = do + (Docx _ _ numbering _ _) <- asks docxDocument let kvs = case lookupLevel numId lvl numbering of Just (_, fmt, txt, Just start) -> [ ("level", lvl) @@ -349,23 +375,22 @@ bodyPartToBlocks opts docx@(Docx _ _ numbering _ _) (ListItem pPr numId lvl parp , ("text", txt) ] Nothing -> [] - in - [Div - ("", ["list-item"], kvs) - (bodyPartToBlocks opts docx (Paragraph pPr parparts))] -bodyPartToBlocks _ _ (Tbl _ _ _ []) = - [Para []] -bodyPartToBlocks opts docx (Tbl cap _ look (r:rs)) = + blks <- bodyPartToBlocks (Paragraph pPr parparts) + return $ [Div ("", ["list-item"], kvs) blks] +bodyPartToBlocks (Tbl _ _ _ []) = + return [Para []] +bodyPartToBlocks (Tbl cap _ look (r:rs)) = do let caption = strToInlines cap (hdr, rows) = case firstRowFormatting look of True -> (Just r, rs) False -> (Nothing, r:rs) - hdrCells = case hdr of - Just r' -> rowToBlocksList opts docx r' - Nothing -> [] - cells = map (rowToBlocksList opts docx) rows + hdrCells <- case hdr of + Just r' -> rowToBlocksList r' + Nothing -> return [] + + cells <- mapM rowToBlocksList rows - size = case null hdrCells of + let size = case null hdrCells of True -> length $ head cells False -> length $ hdrCells -- @@ -374,34 +399,42 @@ bodyPartToBlocks opts docx (Tbl cap _ look (r:rs)) = -- moment. Width information is in the TblGrid field of the Tbl, -- so should be possible. Alignment might be more difficult, -- since there doesn't seem to be a column entity in docx. - alignments = take size (repeat AlignDefault) - widths = take size (repeat 0) :: [Double] - in - [Table caption alignments widths hdrCells cells] - - -makeImagesSelfContained :: Docx -> Inline -> Inline -makeImagesSelfContained (Docx _ _ _ _ media) i@(Image alt (uri, title)) = - case lookup uri media of - Just bs -> case getMimeType uri of - Just mime -> let data_uri = - "data:" ++ mime ++ ";base64," ++ toString (encode $ BS.concat $ B.toChunks bs) - in - Image alt (data_uri, title) - Nothing -> i + alignments = replicate size AlignDefault + widths = replicate size 0 :: [Double] + + return [Table caption alignments widths hdrCells cells] + + +makeImagesSelfContained :: Inline -> DocxContext Inline +makeImagesSelfContained i@(Image alt (uri, title)) = do + (Docx _ _ _ _ media) <- asks docxDocument + return $ case lookup uri media of + Just bs -> + case getMimeType uri of + Just mime -> + let data_uri = "data:" ++ mime ++ ";base64," ++ + toString (encode $ BS.concat $ B.toChunks bs) + in + Image alt (data_uri, title) + Nothing -> i Nothing -> i -makeImagesSelfContained _ inline = inline +makeImagesSelfContained inline = return inline -bodyToBlocks :: ReaderOptions -> Docx -> Body -> [Block] -bodyToBlocks opts docx (Body bps) = - map (makeHeaderAnchors) $ - blocksToDefinitions $ - blocksToBullets $ - concatMap (bodyPartToBlocks opts docx) bps +bodyToBlocks :: Body -> DocxContext [Block] +bodyToBlocks (Body bps) = do + blks <- concatMapM bodyPartToBlocks bps + return $ + map (makeHeaderAnchors) $ + blocksToDefinitions $ + blocksToBullets $ blks docxToBlocks :: ReaderOptions -> Docx -> [Block] -docxToBlocks opts d@(Docx (Document _ body) _ _ _ _) = bodyToBlocks opts d body - +docxToBlocks opts d@(Docx (Document _ body) _ _ _ _) = + let dState = DState { docxHdrLinks = M.empty } + dEnv = DEnv { docxOptions = opts + , docxDocument = d} + in + evalDocxContext (bodyToBlocks body) dEnv dState ilToCode :: Inline -> String ilToCode (Str s) = s -- cgit v1.2.3 From db187348cd8bb17ce66d2d4c1db6a5ff46a1ffbc Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal Date: Thu, 26 Jun 2014 23:10:11 -0400 Subject: Docx rdr: Avoid mapping makeHeaderAnchors globally It only applies to headers, so we can just apply it when we make a header. --- src/Text/Pandoc/Readers/Docx.hs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs index 5773027f2..42352a845 100644 --- a/src/Text/Pandoc/Readers/Docx.hs +++ b/src/Text/Pandoc/Readers/Docx.hs @@ -152,6 +152,7 @@ divAttrToContainers (c:cs) _ | isJust (isHeaderClass c) = let n = fromJust (isHeaderClass c) in [(Container $ \blks -> + makeHeaderAnchor $ Header n ("", delete ("Heading" ++ show n) cs, []) (blksToInlines blks))] divAttrToContainers (c:cs) kvs | c `elem` divsToKeep = (Container $ Div ("", [c], [])) : (divAttrToContainers cs kvs) @@ -304,8 +305,8 @@ isAnchorSpan _ = False dummyAnchors :: [String] dummyAnchors = ["_GoBack"] -makeHeaderAnchors :: Block -> Block -makeHeaderAnchors h@(Header n (_, classes, kvs) ils) = +makeHeaderAnchor :: Block -> Block +makeHeaderAnchor h@(Header n (_, classes, kvs) ils) = case filter isAnchorSpan ils of [] -> h (x@(Span (ident, _, _) _) : xs) -> @@ -313,7 +314,7 @@ makeHeaderAnchors h@(Header n (_, classes, kvs) ils) = True -> h False -> Header n (ident, classes, kvs) (ils \\ (x:xs)) _ -> h -makeHeaderAnchors blk = blk +makeHeaderAnchor blk = blk parPartsToInlines :: [ParPart] -> DocxContext [Inline] parPartsToInlines parparts = do @@ -424,7 +425,6 @@ bodyToBlocks :: Body -> DocxContext [Block] bodyToBlocks (Body bps) = do blks <- concatMapM bodyPartToBlocks bps return $ - map (makeHeaderAnchors) $ blocksToDefinitions $ blocksToBullets $ blks -- cgit v1.2.3 From ab76bbebbe7afd3acdf3218b88f02482c885cc87 Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal Date: Fri, 27 Jun 2014 11:35:50 -0400 Subject: Docx Reader: Clean up guards Use PatternGuards to get rid of need for `isJust`, `fromJust` altogether. --- src/Text/Pandoc/Readers/Docx.hs | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs index 42352a845..0c52b1acb 100644 --- a/src/Text/Pandoc/Readers/Docx.hs +++ b/src/Text/Pandoc/Readers/Docx.hs @@ -1,3 +1,5 @@ +{-# LANGUAGE PatternGuards #-} + {- Copyright (C) 2014 Jesse Rosenthal @@ -82,7 +84,7 @@ import Text.Pandoc.Walk import Text.Pandoc.Readers.Docx.Parse import Text.Pandoc.Readers.Docx.Lists import Text.Pandoc.Readers.Docx.Reducible -import Data.Maybe (mapMaybe, isJust, fromJust) +import Data.Maybe (mapMaybe) import Data.List (delete, isPrefixOf, (\\)) import qualified Data.ByteString as BS import qualified Data.ByteString.Lazy as B @@ -148,12 +150,10 @@ runStyleToContainers rPr = divAttrToContainers :: [String] -> [(String, String)] -> [Container Block] -divAttrToContainers (c:cs) _ | isJust (isHeaderClass c) = - let n = fromJust (isHeaderClass c) - in - [(Container $ \blks -> - makeHeaderAnchor $ - Header n ("", delete ("Heading" ++ show n) cs, []) (blksToInlines blks))] +divAttrToContainers (c:cs) _ | Just n <- isHeaderClass c = + [(Container $ \blks -> + makeHeaderAnchor $ + Header n ("", delete ("Heading" ++ show n) cs, []) (blksToInlines blks))] divAttrToContainers (c:cs) kvs | c `elem` divsToKeep = (Container $ Div ("", [c], [])) : (divAttrToContainers cs kvs) divAttrToContainers (c:cs) kvs | c `elem` codeDivs = @@ -167,10 +167,10 @@ divAttrToContainers (c:cs) kvs | c `elem` listParagraphDivs = divAttrToContainers (c:cs) kvs | c `elem` blockQuoteDivs = (Container BlockQuote) : (divAttrToContainers (cs \\ blockQuoteDivs) kvs) divAttrToContainers (_:cs) kvs = divAttrToContainers cs kvs -divAttrToContainers [] kvs | isJust (lookup "indent" kvs) = +divAttrToContainers [] kvs | Just numString <- lookup "indent" kvs = let kvs' = filter (\(k,_) -> k /= "indent") kvs in - case fromJust (lookup "indent" kvs) of + case numString of "0" -> divAttrToContainers [] kvs' ('-' : _) -> divAttrToContainers [] kvs' _ -> (Container BlockQuote) : divAttrToContainers [] kvs' -- cgit v1.2.3 From 1de8d4d08788ef24f69f9f90266604854996080e Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal Date: Fri, 27 Jun 2014 11:45:06 -0400 Subject: Docx Reader: Simplify makeHeaderAnchors Using pattern guard, in preparation for doing some more complicated stuff with it (recording header anchors, so we can change them to auto ids.) --- src/Text/Pandoc/Readers/Docx.hs | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs index 0c52b1acb..9aaf1d340 100644 --- a/src/Text/Pandoc/Readers/Docx.hs +++ b/src/Text/Pandoc/Readers/Docx.hs @@ -306,14 +306,11 @@ dummyAnchors :: [String] dummyAnchors = ["_GoBack"] makeHeaderAnchor :: Block -> Block -makeHeaderAnchor h@(Header n (_, classes, kvs) ils) = - case filter isAnchorSpan ils of - [] -> h - (x@(Span (ident, _, _) _) : xs) -> - case ident `elem` dummyAnchors of - True -> h - False -> Header n (ident, classes, kvs) (ils \\ (x:xs)) - _ -> h +makeHeaderAnchor (Header n (_, classes, kvs) ils) + | (x : xs) <- filter isAnchorSpan ils + , (Span (ident, _, _) _) <- x + , notElem ident dummyAnchors = + Header n (ident, classes, kvs) (ils \\ (x:xs)) makeHeaderAnchor blk = blk parPartsToInlines :: [ParPart] -> DocxContext [Inline] -- cgit v1.2.3 From 5969baf5b97c0926384b1619be3c4be6d92b277b Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal Date: Sat, 28 Jun 2014 02:47:40 -0400 Subject: Rewrote header generation. In preparation for auto ids. --- src/Text/Pandoc/Readers/Docx.hs | 50 ++++++++++++++++++++--------------------- 1 file changed, 25 insertions(+), 25 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs index 9aaf1d340..bbe770f6e 100644 --- a/src/Text/Pandoc/Readers/Docx.hs +++ b/src/Text/Pandoc/Readers/Docx.hs @@ -84,6 +84,7 @@ import Text.Pandoc.Walk import Text.Pandoc.Readers.Docx.Parse import Text.Pandoc.Readers.Docx.Lists import Text.Pandoc.Readers.Docx.Reducible +import Text.Pandoc.Shared import Data.Maybe (mapMaybe) import Data.List (delete, isPrefixOf, (\\)) import qualified Data.ByteString as BS @@ -151,9 +152,8 @@ runStyleToContainers rPr = divAttrToContainers :: [String] -> [(String, String)] -> [Container Block] divAttrToContainers (c:cs) _ | Just n <- isHeaderClass c = - [(Container $ \blks -> - makeHeaderAnchor $ - Header n ("", delete ("Heading" ++ show n) cs, []) (blksToInlines blks))] + [Container $ \_ -> + Header n ("", delete ("Heading" ++ show n) cs, []) []] divAttrToContainers (c:cs) kvs | c `elem` divsToKeep = (Container $ Div ("", [c], [])) : (divAttrToContainers cs kvs) divAttrToContainers (c:cs) kvs | c `elem` codeDivs = @@ -305,13 +305,14 @@ isAnchorSpan _ = False dummyAnchors :: [String] dummyAnchors = ["_GoBack"] -makeHeaderAnchor :: Block -> Block +makeHeaderAnchor :: Block -> DocxContext Block makeHeaderAnchor (Header n (_, classes, kvs) ils) | (x : xs) <- filter isAnchorSpan ils , (Span (ident, _, _) _) <- x , notElem ident dummyAnchors = - Header n (ident, classes, kvs) (ils \\ (x:xs)) -makeHeaderAnchor blk = blk + return $ Header n (ident, classes, kvs) (ils \\ (x:xs)) +makeHeaderAnchor blk = return blk + parPartsToInlines :: [ParPart] -> DocxContext [Inline] parPartsToInlines parparts = do @@ -326,36 +327,40 @@ cellToBlocks (Cell bps) = concatMapM bodyPartToBlocks bps rowToBlocksList :: Row -> DocxContext [[Block]] rowToBlocksList (Row cells) = mapM cellToBlocks cells -blockCodeContainer :: Container Block -> Bool -blockCodeContainer (Container f) = case f [] of - CodeBlock _ _ -> True - _ -> False -blockCodeContainer _ = False +isBlockCodeContainer :: Container Block -> Bool +isBlockCodeContainer (Container f) | CodeBlock _ _ <- f [] = True +isBlockCodeContainer _ = False + +isHeaderContainer :: Container Block -> Bool +isHeaderContainer (Container f) | Header _ _ _ <- f [] = True +isHeaderContainer _ = False bodyPartToBlocks :: BodyPart -> DocxContext [Block] bodyPartToBlocks (Paragraph pPr parparts) - | any blockCodeContainer (parStyleToContainers pPr) = + | any isBlockCodeContainer (parStyleToContainers pPr) = let - otherConts = filter (not . blockCodeContainer) (parStyleToContainers pPr) + otherConts = filter (not . isBlockCodeContainer) (parStyleToContainers pPr) in return $ rebuild otherConts [CodeBlock ("", [], []) (concatMap parPartToString parparts)] +bodyPartToBlocks (Paragraph pPr parparts) + | any isHeaderContainer (parStyleToContainers pPr) = do + ils <- parPartsToInlines parparts >>= (return . normalizeSpaces) + let (Container hdrFun) = head $ filter isHeaderContainer (parStyleToContainers pPr) + Header n attr _ = hdrFun [] + hdr <- makeHeaderAnchor $ Header n attr ils + return [hdr] bodyPartToBlocks (Paragraph pPr parparts) = do - ils <- parPartsToInlines parparts + ils <- parPartsToInlines parparts >>= (return . normalizeSpaces) case ils of [] -> return [] _ -> do - parContents <- parPartsToInlines parparts - let trimmedContents = reverse $ - dropWhile (Space ==) $ - reverse $ - dropWhile (Space ==) parContents return $ rebuild (parStyleToContainers pPr) - [Para trimmedContents] + [Para ils] bodyPartToBlocks (ListItem pPr numId lvl parparts) = do (Docx _ _ numbering _ _) <- asks docxDocument let @@ -446,8 +451,3 @@ isHeaderClass s | "Heading" `isPrefixOf` s = ((n, "") : []) -> Just n _ -> Nothing isHeaderClass _ = Nothing - -blksToInlines :: [Block] -> [Inline] -blksToInlines (Para ils : _) = ils -blksToInlines (Plain ils : _) = ils -blksToInlines _ = [] -- cgit v1.2.3 From b89a3ba2b1069205a308ad0f444457d595e5a77f Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal Date: Sat, 28 Jun 2014 03:04:34 -0400 Subject: make makeHeaderAnchors make an auto id Record relationship between original id and auto id, so we can fix links after. --- src/Text/Pandoc/Readers/Docx.hs | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs index bbe770f6e..a3053b72a 100644 --- a/src/Text/Pandoc/Readers/Docx.hs +++ b/src/Text/Pandoc/Readers/Docx.hs @@ -104,7 +104,7 @@ readDocx opts bytes = Nothing -> error $ "couldn't parse docx file" -data DState = DState { docxHdrLinks :: M.Map String String } +data DState = DState { docxHeaderAnchors :: M.Map String String } data DEnv = DEnv { docxOptions :: ReaderOptions , docxDocument :: Docx} @@ -310,7 +310,11 @@ makeHeaderAnchor (Header n (_, classes, kvs) ils) | (x : xs) <- filter isAnchorSpan ils , (Span (ident, _, _) _) <- x , notElem ident dummyAnchors = - return $ Header n (ident, classes, kvs) (ils \\ (x:xs)) + do + hdrIDMap <- gets docxHeaderAnchors + let newIdent = uniqueIdent ils (M.elems hdrIDMap) + put DState{docxHeaderAnchors = M.insert ident newIdent hdrIDMap} + return $ Header n (newIdent, classes, kvs) (ils \\ (x:xs)) makeHeaderAnchor blk = return blk @@ -432,7 +436,7 @@ bodyToBlocks (Body bps) = do docxToBlocks :: ReaderOptions -> Docx -> [Block] docxToBlocks opts d@(Docx (Document _ body) _ _ _ _) = - let dState = DState { docxHdrLinks = M.empty } + let dState = DState { docxHeaderAnchors = M.empty } dEnv = DEnv { docxOptions = opts , docxDocument = d} in -- cgit v1.2.3 From dce360e1e6fee089e849c07785d8e21961fefb9b Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal Date: Sat, 28 Jun 2014 03:54:58 -0400 Subject: Docx Reader: Introduce link rewriting. --- src/Text/Pandoc/Readers/Docx.hs | 37 ++++++++++++++++++++++++++++++------- 1 file changed, 30 insertions(+), 7 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs index a3053b72a..0607aac7f 100644 --- a/src/Text/Pandoc/Readers/Docx.hs +++ b/src/Text/Pandoc/Readers/Docx.hs @@ -104,7 +104,7 @@ readDocx opts bytes = Nothing -> error $ "couldn't parse docx file" -data DState = DState { docxHeaderAnchors :: M.Map String String } +data DState = DState { docxAnchorMap :: M.Map String String } data DEnv = DEnv { docxOptions :: ReaderOptions , docxDocument :: Docx} @@ -276,7 +276,22 @@ parPartToInlines (Deletion _ author date runs) = do ("", ["deletion"], [("author", author), ("date", date)]) ils] parPartToInlines (BookMark _ anchor) | anchor `elem` dummyAnchors = return [] -parPartToInlines (BookMark _ anchor) = return [Span (anchor, ["anchor"], []) []] +parPartToInlines (BookMark _ anchor) = + -- We record these, so we can make sure not to overwrite + -- user-defined anchor links with header auto ids. + do + -- Get the anchor map. + anchorMap <- gets docxAnchorMap + -- Check to see if the id is already in there. Rewrite if + -- necessary. This will have the possible effect of rewriting + -- user-defined anchor links. However, since these are not defined + -- in pandoc, it seems like a necessary evil to avoid an extra + -- pass. + let newAnchor = case anchor `elem` (M.elems anchorMap) of + True -> uniqueIdent [Str anchor] (M.elems anchorMap) + False -> anchor + put DState{ docxAnchorMap = M.insert anchor newAnchor anchorMap} + return [Span (anchor, ["anchor"], []) []] parPartToInlines (Drawing relid) = do (Docx _ _ _ rels _) <- asks docxDocument return $ case lookupRelationship relid rels of @@ -311,9 +326,9 @@ makeHeaderAnchor (Header n (_, classes, kvs) ils) , (Span (ident, _, _) _) <- x , notElem ident dummyAnchors = do - hdrIDMap <- gets docxHeaderAnchors + hdrIDMap <- gets docxAnchorMap let newIdent = uniqueIdent ils (M.elems hdrIDMap) - put DState{docxHeaderAnchors = M.insert ident newIdent hdrIDMap} + put DState{docxAnchorMap = M.insert ident newIdent hdrIDMap} return $ Header n (newIdent, classes, kvs) (ils \\ (x:xs)) makeHeaderAnchor blk = return blk @@ -411,6 +426,14 @@ bodyPartToBlocks (Tbl cap _ look (r:rs)) = do return [Table caption alignments widths hdrCells cells] +-- replace targets with generated anchors. +rewriteLink :: Inline -> DocxContext Inline +rewriteLink l@(Link ils ('#':target, title)) = do + anchorMap <- gets docxAnchorMap + return $ case M.lookup target anchorMap of + Just newTarget -> (Link ils ('#':newTarget, title)) + Nothing -> l +rewriteLink il = return il makeImagesSelfContained :: Inline -> DocxContext Inline makeImagesSelfContained i@(Image alt (uri, title)) = do @@ -429,14 +452,15 @@ makeImagesSelfContained inline = return inline bodyToBlocks :: Body -> DocxContext [Block] bodyToBlocks (Body bps) = do - blks <- concatMapM bodyPartToBlocks bps + blks <- concatMapM bodyPartToBlocks bps >>= + walkM rewriteLink return $ blocksToDefinitions $ blocksToBullets $ blks docxToBlocks :: ReaderOptions -> Docx -> [Block] docxToBlocks opts d@(Docx (Document _ body) _ _ _ _) = - let dState = DState { docxHeaderAnchors = M.empty } + let dState = DState { docxAnchorMap = M.empty } dEnv = DEnv { docxOptions = opts , docxDocument = d} in @@ -447,7 +471,6 @@ ilToCode (Str s) = s ilToCode Space = " " ilToCode _ = "" - isHeaderClass :: String -> Maybe Int isHeaderClass s | "Heading" `isPrefixOf` s = case reads (drop (length "Heading") s) :: [(Int, String)] of -- cgit v1.2.3 From c0a8d5ac7213ac01b5f12dd7dfca66e6d8301f5f Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal Date: Sat, 28 Jun 2014 08:40:59 -0400 Subject: Docx Reader: All headers get auto id. Previously, only those with an anchor got an auto id. Now, all do, which puts it in line with pandoc's markdown extension. --- src/Text/Pandoc/Readers/Docx.hs | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs index 0607aac7f..71baa5dde 100644 --- a/src/Text/Pandoc/Readers/Docx.hs +++ b/src/Text/Pandoc/Readers/Docx.hs @@ -103,7 +103,6 @@ readDocx opts bytes = Just docx -> Pandoc nullMeta (docxToBlocks opts docx) Nothing -> error $ "couldn't parse docx file" - data DState = DState { docxAnchorMap :: M.Map String String } data DEnv = DEnv { docxOptions :: ReaderOptions @@ -321,6 +320,8 @@ dummyAnchors :: [String] dummyAnchors = ["_GoBack"] makeHeaderAnchor :: Block -> DocxContext Block +-- If there is an anchor already there (an anchor span in the header, +-- to be exact), we rename and associate the new id with the old one. makeHeaderAnchor (Header n (_, classes, kvs) ils) | (x : xs) <- filter isAnchorSpan ils , (Span (ident, _, _) _) <- x @@ -330,6 +331,14 @@ makeHeaderAnchor (Header n (_, classes, kvs) ils) let newIdent = uniqueIdent ils (M.elems hdrIDMap) put DState{docxAnchorMap = M.insert ident newIdent hdrIDMap} return $ Header n (newIdent, classes, kvs) (ils \\ (x:xs)) +-- Otherwise we just give it a name, and register that name (associate +-- it with itself.) +makeHeaderAnchor (Header n (_, classes, kvs) ils) = + do + hdrIDMap <- gets docxAnchorMap + let newIdent = uniqueIdent ils (M.elems hdrIDMap) + put DState{docxAnchorMap = M.insert newIdent newIdent hdrIDMap} + return $ Header n (newIdent, classes, kvs) ils makeHeaderAnchor blk = return blk -- cgit v1.2.3