From 536b6bf538a95f6db6ed41b72257f8b09fd26886 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Fri, 11 Dec 2015 15:58:11 -0800 Subject: Implemented SoftBreak and new `--wrap` option. Added threefold wrapping option. * Command line option: deprecated `--no-wrap`, added `--wrap=[auto|none|preserve]` * Added WrapOption, exported from Text.Pandoc.Options * Changed type of writerWrapText in WriterOptions from Bool to WrapOption. * Modified Text.Pandoc.Shared functions for SoftBreak. * Supported SoftBreak in writers. * Updated tests. * Updated README. Closes #1701. --- src/Text/Pandoc/Options.hs | 11 ++++++-- src/Text/Pandoc/Readers/Markdown.hs | 2 +- src/Text/Pandoc/Shared.hs | 18 ++++++++----- src/Text/Pandoc/Writers/AsciiDoc.hs | 12 ++++++--- src/Text/Pandoc/Writers/CommonMark.hs | 5 ++-- src/Text/Pandoc/Writers/ConTeXt.hs | 8 +++++- src/Text/Pandoc/Writers/Custom.hs | 10 ++++--- src/Text/Pandoc/Writers/Docbook.hs | 6 +++-- src/Text/Pandoc/Writers/Docx.hs | 3 ++- src/Text/Pandoc/Writers/DokuWiki.hs | 9 ++++++- src/Text/Pandoc/Writers/EPUB.hs | 3 ++- src/Text/Pandoc/Writers/FB2.hs | 3 ++- src/Text/Pandoc/Writers/HTML.hs | 10 ++++--- src/Text/Pandoc/Writers/Haddock.hs | 7 ++++- src/Text/Pandoc/Writers/ICML.hs | 19 ++++++++++--- src/Text/Pandoc/Writers/LaTeX.hs | 9 ++++++- src/Text/Pandoc/Writers/Man.hs | 4 ++- src/Text/Pandoc/Writers/Markdown.hs | 44 +++++++++++++++++++++--------- src/Text/Pandoc/Writers/MediaWiki.hs | 7 +++++ src/Text/Pandoc/Writers/Native.hs | 4 +-- src/Text/Pandoc/Writers/ODT.hs | 4 +-- src/Text/Pandoc/Writers/OPML.hs | 2 +- src/Text/Pandoc/Writers/OpenDocument.hs | 48 ++++++++++++++++++--------------- src/Text/Pandoc/Writers/Org.hs | 8 +++++- src/Text/Pandoc/Writers/RST.hs | 10 ++++++- src/Text/Pandoc/Writers/RTF.hs | 1 + src/Text/Pandoc/Writers/Texinfo.hs | 8 +++++- src/Text/Pandoc/Writers/Textile.hs | 2 ++ 28 files changed, 199 insertions(+), 78 deletions(-) (limited to 'src/Text/Pandoc') diff --git a/src/Text/Pandoc/Options.hs b/src/Text/Pandoc/Options.hs index b25c47000..9f27f46f9 100644 --- a/src/Text/Pandoc/Options.hs +++ b/src/Text/Pandoc/Options.hs @@ -42,6 +42,7 @@ module Text.Pandoc.Options ( Extension(..) , ObfuscationMethod (..) , HTMLSlideVariant (..) , EPUBVersion (..) + , WrapOption (..) , WriterOptions (..) , TrackChanges (..) , def @@ -322,6 +323,12 @@ data TrackChanges = AcceptChanges | AllChanges deriving (Show, Read, Eq, Data, Typeable, Generic) +-- | Options for wrapping text in the output. +data WrapOption = WrapAuto -- ^ Automatically wrap to width + | WrapNone -- ^ No non-semantic newlines + | WrapPreserve -- ^ Preserve wrapping of input source + deriving (Show, Read, Eq, Data, Typeable, Generic) + -- | Options for writers data WriterOptions = WriterOptions { writerStandalone :: Bool -- ^ Include header and footer @@ -339,7 +346,7 @@ data WriterOptions = WriterOptions , writerExtensions :: Set Extension -- ^ Markdown extensions that can be used , writerReferenceLinks :: Bool -- ^ Use reference links in writing markdown, rst , writerDpi :: Int -- ^ Dpi for pixel to/from inch/cm conversions - , writerWrapText :: Bool -- ^ Wrap text to line length + , writerWrapText :: WrapOption -- ^ Option for wrapping text , writerColumns :: Int -- ^ Characters in a line (for text wrapping) , writerEmailObfuscation :: ObfuscationMethod -- ^ How to obfuscate emails , writerIdentifierPrefix :: String -- ^ Prefix for section & note ids in HTML @@ -386,7 +393,7 @@ instance Default WriterOptions where , writerExtensions = pandocExtensions , writerReferenceLinks = False , writerDpi = 96 - , writerWrapText = True + , writerWrapText = WrapAuto , writerColumns = 72 , writerEmailObfuscation = JavascriptObfuscation , writerIdentifierPrefix = "" diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 2a901326f..0b7faadb7 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -1679,7 +1679,7 @@ endline = try $ do (eof >> return mempty) <|> (guardEnabled Ext_hard_line_breaks >> return (return B.linebreak)) <|> (guardEnabled Ext_ignore_line_breaks >> return mempty) - <|> (return $ return B.softbreak) + <|> (skipMany spaceChar >> return (return B.softbreak)) -- -- links diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index b7f567435..9d799fa52 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -375,17 +375,19 @@ isSpaceOrEmpty (Str "") = True isSpaceOrEmpty _ = False -- | Extract the leading and trailing spaces from inside an inline element --- and place them outside the element. - +-- and place them outside the element. SoftBreaks count as Spaces for +-- these purposes. extractSpaces :: (Inlines -> Inlines) -> Inlines -> Inlines extractSpaces f is = let contents = B.unMany is left = case viewl contents of - (Space :< _) -> B.space - _ -> mempty + (Space :< _) -> B.space + (SoftBreak :< _) -> B.softbreak + _ -> mempty right = case viewr contents of - (_ :> Space) -> B.space - _ -> mempty in + (_ :> Space) -> B.space + (_ :> SoftBreak) -> B.softbreak + _ -> mempty in (left <> f (B.trimInlines . B.Many $ contents) <> right) -- | Normalize @Pandoc@ document, consolidating doubled 'Space's, @@ -452,6 +454,8 @@ normalizeInlines (Str x : ys) = isStr _ = False fromStr (Str z) = z fromStr _ = error "normalizeInlines - fromStr - not a Str" +normalizeInlines (Space : SoftBreak : ys) = + SoftBreak : normalizeInlines ys normalizeInlines (Space : ys) = if null rest then [] @@ -539,6 +543,7 @@ removeFormatting = query go . walk deNote where go :: Inline -> [Inline] go (Str xs) = [Str xs] go Space = [Space] + go SoftBreak = [SoftBreak] go (Code _ x) = [Str x] go (Math _ x) = [Str x] go LineBreak = [Space] @@ -553,6 +558,7 @@ stringify :: Walkable Inline a => a -> String stringify = query go . walk deNote where go :: Inline -> [Char] go Space = " " + go SoftBreak = " " go (Str x) = x go (Code _ x) = x go (Math _ x) = x diff --git a/src/Text/Pandoc/Writers/AsciiDoc.hs b/src/Text/Pandoc/Writers/AsciiDoc.hs index f42f84432..1038044e7 100644 --- a/src/Text/Pandoc/Writers/AsciiDoc.hs +++ b/src/Text/Pandoc/Writers/AsciiDoc.hs @@ -73,7 +73,7 @@ pandocToAsciiDoc :: WriterOptions -> Pandoc -> State WriterState String pandocToAsciiDoc opts (Pandoc meta blocks) = do let titleblock = not $ null (docTitle meta) && null (docAuthors meta) && null (docDate meta) - let colwidth = if writerWrapText opts + let colwidth = if writerWrapText opts == WrapAuto then Just $ writerColumns opts else Nothing metadata <- metaToJSON opts @@ -227,7 +227,7 @@ blockToAsciiDoc opts (Table caption aligns widths headers rows) = do rows' <- mapM makeRow rows head' <- makeRow headers let head'' = if all null headers then empty else head' - let colwidth = if writerWrapText opts + let colwidth = if writerWrapText opts == WrapAuto then writerColumns opts else 100000 let maxwidth = maximum $ map offset (head':rows') @@ -335,7 +335,7 @@ inlineListToAsciiDoc opts lst = do x' <- withIntraword $ inlineToAsciiDoc opts x xs' <- go xs return (y' <> x' <> xs') - | x /= Space && x /= LineBreak = do + | not (isSpacy x) = do y' <- withIntraword $ inlineToAsciiDoc opts y xs' <- go (x:xs) return (y' <> xs') @@ -345,6 +345,7 @@ inlineListToAsciiDoc opts lst = do return (x' <> xs') isSpacy Space = True isSpacy LineBreak = True + isSpacy SoftBreak = True isSpacy _ = False setIntraword :: Bool -> State WriterState () @@ -391,6 +392,11 @@ inlineToAsciiDoc _ (RawInline f s) | otherwise = return empty inlineToAsciiDoc _ (LineBreak) = return $ " +" <> cr inlineToAsciiDoc _ Space = return space +inlineToAsciiDoc opts SoftBreak = + case writerWrapText opts of + WrapAuto -> return space + WrapPreserve -> return cr + WrapNone -> return space inlineToAsciiDoc opts (Cite _ lst) = inlineListToAsciiDoc opts lst inlineToAsciiDoc opts (Link _ txt (src, _tit)) = do -- relative: link:downloads/foo.zip[download foo.zip] diff --git a/src/Text/Pandoc/Writers/CommonMark.hs b/src/Text/Pandoc/Writers/CommonMark.hs index c2d476641..a786dfd24 100644 --- a/src/Text/Pandoc/Writers/CommonMark.hs +++ b/src/Text/Pandoc/Writers/CommonMark.hs @@ -75,7 +75,7 @@ blocksToCommonMark opts bs = return $ T.unpack $ nodeToCommonmark cmarkOpts colwidth $ node DOCUMENT (blocksToNodes bs) where cmarkOpts = [optHardBreaks | isEnabled Ext_hard_line_breaks opts] - colwidth = if writerWrapText opts + colwidth = if writerWrapText opts == WrapAuto then Just $ writerColumns opts else Nothing @@ -84,7 +84,7 @@ inlinesToCommonMark opts ils = return $ T.unpack $ nodeToCommonmark cmarkOpts colwidth $ node PARAGRAPH (inlinesToNodes ils) where cmarkOpts = [optHardBreaks | isEnabled Ext_hard_line_breaks opts] - colwidth = if writerWrapText opts + colwidth = if writerWrapText opts == WrapAuto then Just $ writerColumns opts else Nothing @@ -138,6 +138,7 @@ inlineToNodes :: Inline -> [Node] -> [Node] inlineToNodes (Str s) = (node (TEXT (T.pack s)) [] :) inlineToNodes Space = (node (TEXT (T.pack " ")) [] :) inlineToNodes LineBreak = (node LINEBREAK [] :) +inlineToNodes SoftBreak = (node SOFTBREAK [] :) inlineToNodes (Emph xs) = (node EMPH (inlinesToNodes xs) :) inlineToNodes (Strong xs) = (node STRONG (inlinesToNodes xs) :) inlineToNodes (Strikeout xs) = diff --git a/src/Text/Pandoc/Writers/ConTeXt.hs b/src/Text/Pandoc/Writers/ConTeXt.hs index bbc5f7132..6680e3003 100644 --- a/src/Text/Pandoc/Writers/ConTeXt.hs +++ b/src/Text/Pandoc/Writers/ConTeXt.hs @@ -63,7 +63,7 @@ writeConTeXt options document = pandocToConTeXt :: WriterOptions -> Pandoc -> State WriterState String pandocToConTeXt options (Pandoc meta blocks) = do - let colwidth = if writerWrapText options + let colwidth = if writerWrapText options == WrapAuto then Just $ writerColumns options else Nothing metadata <- metaToJSON options @@ -315,6 +315,12 @@ inlineToConTeXt (RawInline "context" str) = return $ text str inlineToConTeXt (RawInline "tex" str) = return $ text str inlineToConTeXt (RawInline _ _) = return empty inlineToConTeXt (LineBreak) = return $ text "\\crlf" <> cr +inlineToConTeXt SoftBreak = do + wrapText <- gets (writerWrapText . stOptions) + return $ case wrapText of + WrapAuto -> space + WrapNone -> space + WrapPreserve -> cr inlineToConTeXt Space = return space -- Handle HTML-like internal document references to sections inlineToConTeXt (Link _ txt (('#' : ref), _)) = do diff --git a/src/Text/Pandoc/Writers/Custom.hs b/src/Text/Pandoc/Writers/Custom.hs index e89828911..9671fc05b 100644 --- a/src/Text/Pandoc/Writers/Custom.hs +++ b/src/Text/Pandoc/Writers/Custom.hs @@ -276,6 +276,8 @@ inlineToCustom lua (Str str) = callfunc lua "Str" str inlineToCustom lua Space = callfunc lua "Space" +inlineToCustom lua SoftBreak = callfunc lua "SoftBreak" + inlineToCustom lua (Emph lst) = callfunc lua "Emph" lst inlineToCustom lua (Strong lst) = callfunc lua "Strong" lst @@ -308,11 +310,11 @@ inlineToCustom lua (RawInline format str) = inlineToCustom lua (LineBreak) = callfunc lua "LineBreak" -inlineToCustom lua (Link _ txt (src,tit)) = - callfunc lua "Link" txt src tit +inlineToCustom lua (Link attr txt (src,tit)) = + callfunc lua "Link" txt src tit (attrToMap attr) -inlineToCustom lua (Image _ alt (src,tit)) = - callfunc lua "Image" alt src tit +inlineToCustom lua (Image attr alt (src,tit)) = + callfunc lua "Image" alt src tit (attrToMap attr) inlineToCustom lua (Note contents) = callfunc lua "Note" contents diff --git a/src/Text/Pandoc/Writers/Docbook.hs b/src/Text/Pandoc/Writers/Docbook.hs index d2c39e3b9..2aaebf99f 100644 --- a/src/Text/Pandoc/Writers/Docbook.hs +++ b/src/Text/Pandoc/Writers/Docbook.hs @@ -52,7 +52,7 @@ import Data.Generics (everywhere, mkT) authorToDocbook :: WriterOptions -> [Inline] -> B.Inlines authorToDocbook opts name' = let name = render Nothing $ inlinesToDocbook opts name' - colwidth = if writerWrapText opts + colwidth = if writerWrapText opts == WrapAuto then Just $ writerColumns opts else Nothing in B.rawInline "docbook" $ render colwidth $ @@ -76,7 +76,7 @@ authorToDocbook opts name' = writeDocbook :: WriterOptions -> Pandoc -> String writeDocbook opts (Pandoc meta blocks) = let elements = hierarchicalize blocks - colwidth = if writerWrapText opts + colwidth = if writerWrapText opts == WrapAuto then Just $ writerColumns opts else Nothing render' = render colwidth @@ -331,6 +331,8 @@ inlineToDocbook _ (RawInline f x) | f == "html" || f == "docbook" = text x | otherwise = empty inlineToDocbook _ LineBreak = text "\n" inlineToDocbook _ Space = space +-- because we use \n for LineBreak, we can't do soft breaks: +inlineToDocbook _ SoftBreak = space inlineToDocbook opts (Link attr txt (src, _)) | Just email <- stripPrefix "mailto:" src = let emailLink = inTagsSimple "email" $ text $ diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index c46a5efae..f9f5c8d69 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -245,7 +245,7 @@ writeDocx opts doc@(Pandoc meta _) = do let tocTitle = fromMaybe (stTocTitle defaultWriterState) $ metaValueToInlines <$> lookupMeta "toc-title" meta - ((contents, footnotes), st) <- runStateT (writeOpenXML opts{writerWrapText = False} doc') + ((contents, footnotes), st) <- runStateT (writeOpenXML opts{writerWrapText = WrapAuto} doc') defaultWriterState{ stChangesAuthor = fromMaybe "unknown" username , stChangesDate = formatTime defaultTimeLocale "%FT%XZ" utctime , stPrintWidth = (maybe 420 (\x -> quot x 20) pgContentWidth) @@ -981,6 +981,7 @@ setFirstPara = modify $ \s -> s { stFirstPara = True } inlineToOpenXML :: WriterOptions -> Inline -> WS [Element] inlineToOpenXML _ (Str str) = formattedString str inlineToOpenXML opts Space = inlineToOpenXML opts (Str " ") +inlineToOpenXML opts SoftBreak = inlineToOpenXML opts (Str " ") inlineToOpenXML opts (Span (_,classes,kvs) ils) | "insertion" `elem` classes = do defaultAuthor <- gets stChangesAuthor diff --git a/src/Text/Pandoc/Writers/DokuWiki.hs b/src/Text/Pandoc/Writers/DokuWiki.hs index 730b31fe8..f1088b158 100644 --- a/src/Text/Pandoc/Writers/DokuWiki.hs +++ b/src/Text/Pandoc/Writers/DokuWiki.hs @@ -43,7 +43,8 @@ import Text.Pandoc.Definition import Text.Pandoc.Options ( WriterOptions( writerTableOfContents , writerStandalone - , writerTemplate) ) + , writerTemplate + , writerWrapText), WrapOption(..) ) import Text.Pandoc.Shared ( escapeURI, removeFormatting, camelCaseToHyphenated , trimr, normalize, substitute ) import Text.Pandoc.Writers.Shared ( defField, metaToJSON ) @@ -461,6 +462,12 @@ inlineToDokuWiki _ (RawInline f str) inlineToDokuWiki _ (LineBreak) = return "\\\\\n" +inlineToDokuWiki opts SoftBreak = + case writerWrapText opts of + WrapNone -> return " " + WrapAuto -> return " " + WrapPreserve -> return "\n" + inlineToDokuWiki _ Space = return " " inlineToDokuWiki opts (Link _ txt (src, _)) = do diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs index f4989c8ea..64f94f41f 100644 --- a/src/Text/Pandoc/Writers/EPUB.hs +++ b/src/Text/Pandoc/Writers/EPUB.hs @@ -50,6 +50,7 @@ import Text.Pandoc.Shared ( renderTags', safeRead, uniqueIdent, trim import qualified Text.Pandoc.Shared as S (Element(..)) import Text.Pandoc.Builder (fromList, setMeta) import Text.Pandoc.Options ( WriterOptions(..) + , WrapOption(..) , HTMLMathMethod(..) , EPUBVersion(..) , ObfuscationMethod(NoObfuscation) ) @@ -350,7 +351,7 @@ writeEPUB opts doc@(Pandoc meta _) = do if epub3 then MathML Nothing else writerHTMLMathMethod opts - , writerWrapText = True } + , writerWrapText = WrapAuto } metadata <- getEPUBMetadata opts' meta -- cover page diff --git a/src/Text/Pandoc/Writers/FB2.hs b/src/Text/Pandoc/Writers/FB2.hs index bc936fce5..a1658eb1a 100644 --- a/src/Text/Pandoc/Writers/FB2.hs +++ b/src/Text/Pandoc/Writers/FB2.hs @@ -27,7 +27,7 @@ FictionBook is an XML-based e-book format. For more information see: -} module Text.Pandoc.Writers.FB2 (writeFB2) where -import Control.Monad.State (StateT, evalStateT, get, modify) +import Control.Monad.State (StateT, evalStateT, get, gets, modify) import Control.Monad.State (liftM, liftM2, liftIO) import Data.ByteString.Base64 (encode) import Data.Char (toLower, isSpace, isAscii, isControl) @@ -439,6 +439,7 @@ toXml (Quoted DoubleQuote ss) = do toXml (Cite _ ss) = cMapM toXml ss -- FIXME: support citation styles toXml (Code _ s) = return [el "code" s] toXml Space = return [txt " "] +toXml SoftBreak = return [txt " "] toXml LineBreak = return [el "empty-line" ()] toXml (Math _ formula) = insertMath InlineImage formula toXml (RawInline _ _) = return [] -- raw TeX and raw HTML are suppressed diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs index 67d398a4d..73a8906c3 100644 --- a/src/Text/Pandoc/Writers/HTML.hs +++ b/src/Text/Pandoc/Writers/HTML.hs @@ -95,9 +95,9 @@ strToHtml [] = "" -- | Hard linebreak. nl :: WriterOptions -> Html -nl opts = if writerWrapText opts - then preEscapedString "\n" - else mempty +nl opts = if writerWrapText opts == WrapNone + then mempty + else preEscapedString "\n" -- | Convert Pandoc document to Html string. writeHtmlString :: WriterOptions -> Pandoc -> String @@ -697,6 +697,10 @@ inlineToHtml opts inline = case inline of (Str str) -> return $ strToHtml str (Space) -> return $ strToHtml " " + (SoftBreak) -> return $ case writerWrapText opts of + WrapNone -> preEscapedString " " + WrapAuto -> preEscapedString " " + WrapPreserve -> preEscapedString "\n" (LineBreak) -> return $ (if writerHtml5 opts then H5.br else H.br) <> strToHtml "\n" (Span (id',classes,kvs) ils) diff --git a/src/Text/Pandoc/Writers/Haddock.hs b/src/Text/Pandoc/Writers/Haddock.hs index 118d42d7d..2e5f2dd08 100644 --- a/src/Text/Pandoc/Writers/Haddock.hs +++ b/src/Text/Pandoc/Writers/Haddock.hs @@ -57,7 +57,7 @@ writeHaddock opts document = -- | Return haddock representation of document. pandocToHaddock :: WriterOptions -> Pandoc -> State WriterState String pandocToHaddock opts (Pandoc meta blocks) = do - let colwidth = if writerWrapText opts + let colwidth = if writerWrapText opts == WrapAuto then Just $ writerColumns opts else Nothing body <- blockListToHaddock opts blocks @@ -325,6 +325,11 @@ inlineToHaddock _ (RawInline f str) | otherwise = return empty -- no line break in haddock (see above on CodeBlock) inlineToHaddock _ (LineBreak) = return cr +inlineToHaddock opts SoftBreak = + case writerWrapText opts of + WrapAuto -> return space + WrapNone -> return space + WrapPreserve -> return cr inlineToHaddock _ Space = return space inlineToHaddock opts (Cite _ lst) = inlineListToHaddock opts lst inlineToHaddock _ (Link _ txt (src, _)) = do diff --git a/src/Text/Pandoc/Writers/ICML.hs b/src/Text/Pandoc/Writers/ICML.hs index eb6d135ca..b73fc78cf 100644 --- a/src/Text/Pandoc/Writers/ICML.hs +++ b/src/Text/Pandoc/Writers/ICML.hs @@ -122,7 +122,7 @@ citeName = "Cite" -- | Convert Pandoc document to string in ICML format. writeICML :: WriterOptions -> Pandoc -> IO String writeICML opts (Pandoc meta blocks) = do - let colwidth = if writerWrapText opts + let colwidth = if writerWrapText opts == WrapAuto then Just $ writerColumns opts else Nothing render' = render colwidth @@ -414,6 +414,11 @@ inlineToICML opts style (Quoted DoubleQuote lst) = inlinesToICML opts style $ [S inlineToICML opts style (Cite _ lst) = inlinesToICML opts (citeName:style) lst inlineToICML _ style (Code _ str) = charStyle (codeName:style) $ text $ escapeStringForXML str inlineToICML _ style Space = charStyle style space +inlineToICML opts style SoftBreak = + case writerWrapText opts of + WrapAuto -> charStyle style space + WrapNone -> charStyle style space + WrapPreserve -> charStyle style cr inlineToICML _ style LineBreak = charStyle style $ text lineSeparator inlineToICML opts style (Math mt str) = cat <$> mapM (inlineToICML opts style) (texMathToInlines mt str) @@ -449,12 +454,18 @@ footnoteToICML opts style lst = -- | Auxiliary function to merge Space elements into the adjacent Strs. mergeSpaces :: [Inline] -> [Inline] -mergeSpaces ((Str s):(Space:((Str s'):xs))) = mergeSpaces $ Str(s++" "++s') : xs -mergeSpaces (Space:((Str s):xs)) = mergeSpaces $ Str (" "++s) : xs -mergeSpaces ((Str s):(Space:xs)) = mergeSpaces $ Str (s++" ") : xs +mergeSpaces ((Str s):(x:((Str s'):xs))) | isSp x = + mergeSpaces $ Str(s++" "++s') : xs +mergeSpaces (x:((Str s):xs)) | isSp x = mergeSpaces $ Str (" "++s) : xs +mergeSpaces ((Str s):(x:xs)) | isSp x = mergeSpaces $ Str (s++" ") : xs mergeSpaces (x:xs) = x : (mergeSpaces xs) mergeSpaces [] = [] +isSp :: Inline -> Bool +isSp Space = True +isSp SoftBreak = True +isSp _ = False + -- | Wrap a list of inline elements in an ICML Paragraph Style parStyle :: WriterOptions -> Style -> [Inline] -> WS Doc parStyle opts style lst = diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs index fc069cfcf..d7e0ec530 100644 --- a/src/Text/Pandoc/Writers/LaTeX.hs +++ b/src/Text/Pandoc/Writers/LaTeX.hs @@ -105,7 +105,7 @@ pandocToLaTeX options (Pandoc meta blocks) = do modify $ \s -> s{ stInternalLinks = query isInternalLink blocks' } let template = writerTemplate options -- set stBook depending on documentclass - let colwidth = if writerWrapText options + let colwidth = if writerWrapText options == WrapAuto then Just $ writerColumns options else Nothing metadata <- metaToJSON options @@ -357,6 +357,7 @@ isListBlock _ = False isLineBreakOrSpace :: Inline -> Bool isLineBreakOrSpace LineBreak = True +isLineBreakOrSpace SoftBreak = True isLineBreakOrSpace Space = True isLineBreakOrSpace _ = False @@ -896,6 +897,12 @@ inlineToLaTeX (RawInline f str) = return $ text str | otherwise = return empty inlineToLaTeX (LineBreak) = return $ "\\\\" <> cr +inlineToLaTeX SoftBreak = do + wrapText <- gets (writerWrapText . stOptions) + case wrapText of + WrapAuto -> return space + WrapNone -> return space + WrapPreserve -> return cr inlineToLaTeX Space = return space inlineToLaTeX (Link _ txt ('#':ident, _)) = do contents <- inlineListToLaTeX txt diff --git a/src/Text/Pandoc/Writers/Man.hs b/src/Text/Pandoc/Writers/Man.hs index b8b1c1fdd..5c7d760ac 100644 --- a/src/Text/Pandoc/Writers/Man.hs +++ b/src/Text/Pandoc/Writers/Man.hs @@ -54,7 +54,7 @@ writeMan opts document = evalState (pandocToMan opts document) (WriterState [] F -- | Return groff man representation of document. pandocToMan :: WriterOptions -> Pandoc -> State WriterState String pandocToMan opts (Pandoc meta blocks) = do - let colwidth = if writerWrapText opts + let colwidth = if writerWrapText opts == WrapAuto then Just $ writerColumns opts else Nothing let render' = render colwidth @@ -146,6 +146,7 @@ breakSentence xs = [] -> (as, []) [c] -> (as ++ [c], []) (c:Space:cs) -> (as ++ [c], cs) + (c:SoftBreak:cs) -> (as ++ [c], cs) (Str ".":Str (')':ys):cs) -> (as ++ [Str ".", Str (')':ys)], cs) (x@(Str ('.':')':_)):cs) -> (as ++ [x], cs) (LineBreak:x@(Str ('.':_)):cs) -> (as ++[LineBreak], x:cs) @@ -343,6 +344,7 @@ inlineToMan _ (RawInline f str) | otherwise = return empty inlineToMan _ (LineBreak) = return $ cr <> text ".PD 0" $$ text ".P" $$ text ".PD" <> cr +inlineToMan _ SoftBreak = return space inlineToMan _ Space = return space inlineToMan opts (Link _ txt (src, _)) = do linktext <- inlineListToMan opts txt diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs index ab36832f2..79a2dddf9 100644 --- a/src/Text/Pandoc/Writers/Markdown.hs +++ b/src/Text/Pandoc/Writers/Markdown.hs @@ -71,8 +71,9 @@ instance Default WriterState writeMarkdown :: WriterOptions -> Pandoc -> String writeMarkdown opts document = evalState (pandocToMarkdown opts{ - writerWrapText = writerWrapText opts && - not (isEnabled Ext_hard_line_breaks opts) } + writerWrapText = if isEnabled Ext_hard_line_breaks opts + then WrapNone + else writerWrapText opts } document) def -- | Convert Pandoc to plain text (like markdown, but without links, @@ -144,7 +145,7 @@ jsonToYaml _ = empty -- | Return markdown representation of document. pandocToMarkdown :: WriterOptions -> Pandoc -> State WriterState String pandocToMarkdown opts (Pandoc meta blocks) = do - let colwidth = if writerWrapText opts + let colwidth = if writerWrapText opts == WrapAuto then Just $ writerColumns opts else Nothing isPlain <- gets stPlain @@ -324,7 +325,7 @@ blockToMarkdown opts (Plain inlines) = do contents <- inlineListToMarkdown opts inlines -- escape if para starts with ordered list marker st <- get - let colwidth = if writerWrapText opts + let colwidth = if writerWrapText opts == WrapAuto then Just $ writerColumns opts else Nothing let rendered = render colwidth contents @@ -708,6 +709,10 @@ inlineListToMarkdown opts lst = do Space:(Str('[':_)):_ -> unshortcutable Space:(RawInline _ ('[':_)):_ -> unshortcutable Space:(Cite _ _):_ -> unshortcutable + SoftBreak:(Link _ _ _):_ -> unshortcutable + SoftBreak:(Str('[':_)):_ -> unshortcutable + SoftBreak:(RawInline _ ('[':_)):_ -> unshortcutable + SoftBreak:(Cite _ _):_ -> unshortcutable (Cite _ _):_ -> unshortcutable Str ('[':_):_ -> unshortcutable (RawInline _ ('[':_)):_ -> unshortcutable @@ -721,18 +726,25 @@ inlineListToMarkdown opts lst = do modify (\s -> s {stRefShortcutable = True }) fmap (iMark <>) (go is) +isSp :: Inline -> Bool +isSp Space = True +isSp SoftBreak = True +isSp _ = False + avoidBadWrapsInList :: [Inline] -> [Inline] avoidBadWrapsInList [] = [] -avoidBadWrapsInList (Space:Str ('>':cs):xs) = +avoidBadWrapsInList (s:Str ('>':cs):xs) | isSp s = Str (' ':'>':cs) : avoidBadWrapsInList xs -avoidBadWrapsInList (Space:Str [c]:[]) - | c `elem` ['-','*','+'] = Str [' ', c] : [] -avoidBadWrapsInList (Space:Str [c]:Space:xs) - | c `elem` ['-','*','+'] = Str [' ', c] : Space : avoidBadWrapsInList xs -avoidBadWrapsInList (Space:Str cs:Space:xs) - | isOrderedListMarker cs = Str (' ':cs) : Space : avoidBadWrapsInList xs -avoidBadWrapsInList (Space:Str cs:[]) - | isOrderedListMarker cs = Str (' ':cs) : [] +avoidBadWrapsInList (s:Str [c]:[]) + | isSp s && c `elem` ['-','*','+'] = Str [' ', c] : [] +avoidBadWrapsInList (s:Str [c]:Space:xs) + | isSp s && c `elem` ['-','*','+'] = + Str [' ', c] : Space : avoidBadWrapsInList xs +avoidBadWrapsInList (s:Str cs:Space:xs) + | isSp s && isOrderedListMarker cs = + Str (' ':cs) : Space : avoidBadWrapsInList xs +avoidBadWrapsInList (s:Str cs:[]) + | isSp s && isOrderedListMarker cs = Str (' ':cs) : [] avoidBadWrapsInList (x:xs) = x : avoidBadWrapsInList xs isOrderedListMarker :: String -> Bool @@ -747,6 +759,7 @@ isRight (Left _) = False escapeSpaces :: Inline -> Inline escapeSpaces (Str s) = Str $ substitute " " "\\ " s escapeSpaces Space = Str "\\ " +escapeSpaces SoftBreak = Str "\\ " escapeSpaces x = x -- | Convert Pandoc inline element to markdown. @@ -876,6 +889,11 @@ inlineToMarkdown opts (LineBreak) = do then "\\" <> cr else " " <> cr inlineToMarkdown _ Space = return space +inlineToMarkdown opts SoftBreak = return $ + case writerWrapText opts of + WrapNone -> space + WrapAuto -> space + WrapPreserve -> cr inlineToMarkdown opts (Cite [] lst) = inlineListToMarkdown opts lst inlineToMarkdown opts (Cite (c:cs) lst) | not (isEnabled Ext_citations opts) = inlineListToMarkdown opts lst diff --git a/src/Text/Pandoc/Writers/MediaWiki.hs b/src/Text/Pandoc/Writers/MediaWiki.hs index 1aae15354..d14865612 100644 --- a/src/Text/Pandoc/Writers/MediaWiki.hs +++ b/src/Text/Pandoc/Writers/MediaWiki.hs @@ -397,6 +397,13 @@ inlineToMediaWiki (RawInline f str) inlineToMediaWiki (LineBreak) = return "
\n" +inlineToMediaWiki SoftBreak = do + wrapText <- gets (writerWrapText . stOptions) + case wrapText of + WrapAuto -> return " " + WrapNone -> return " " + WrapPreserve -> return "\n" + inlineToMediaWiki Space = return " " inlineToMediaWiki (Link _ txt (src, _)) = do diff --git a/src/Text/Pandoc/Writers/Native.hs b/src/Text/Pandoc/Writers/Native.hs index 2343ff1a8..fc96e3e3c 100644 --- a/src/Text/Pandoc/Writers/Native.hs +++ b/src/Text/Pandoc/Writers/Native.hs @@ -34,7 +34,7 @@ metadata. -} module Text.Pandoc.Writers.Native ( writeNative ) where -import Text.Pandoc.Options ( WriterOptions(..) ) +import Text.Pandoc.Options ( WriterOptions(..), WrapOption(..) ) import Data.List ( intersperse ) import Text.Pandoc.Definition import Text.Pandoc.Pretty @@ -70,7 +70,7 @@ prettyBlock block = text $ show block -- | Prettyprint Pandoc document. writeNative :: WriterOptions -> Pandoc -> String writeNative opts (Pandoc meta blocks) = - let colwidth = if writerWrapText opts + let colwidth = if writerWrapText opts == WrapAuto then Just $ writerColumns opts else Nothing withHead = if writerStandalone opts diff --git a/src/Text/Pandoc/Writers/ODT.hs b/src/Text/Pandoc/Writers/ODT.hs index 835e79ce7..28f8b58aa 100644 --- a/src/Text/Pandoc/Writers/ODT.hs +++ b/src/Text/Pandoc/Writers/ODT.hs @@ -37,7 +37,7 @@ import Text.TeXMath import qualified Data.ByteString.Lazy as B import Text.Pandoc.UTF8 ( fromStringLazy ) import Codec.Archive.Zip -import Text.Pandoc.Options ( WriterOptions(..) ) +import Text.Pandoc.Options ( WriterOptions(..), WrapOption(..) ) import Text.Pandoc.Shared ( stringify, fetchItem', warn, getDefaultReferenceODT ) import Text.Pandoc.ImageSize ( imageSize, desiredSizeInPoints ) @@ -67,7 +67,7 @@ writeODT opts doc@(Pandoc meta _) = do -- handle formulas and pictures picEntriesRef <- newIORef ([] :: [Entry]) doc' <- walkM (transformPicMath opts picEntriesRef) $ walk fixDisplayMath doc - let newContents = writeOpenDocument opts{writerWrapText = False} doc' + let newContents = writeOpenDocument opts{writerWrapText = WrapAuto} doc' epochtime <- floor `fmap` getPOSIXTime let contentEntry = toEntry "content.xml" epochtime $ fromStringLazy newContents diff --git a/src/Text/Pandoc/Writers/OPML.hs b/src/Text/Pandoc/Writers/OPML.hs index 519136861..5770c3c6f 100644 --- a/src/Text/Pandoc/Writers/OPML.hs +++ b/src/Text/Pandoc/Writers/OPML.hs @@ -45,7 +45,7 @@ import qualified Text.Pandoc.Builder as B writeOPML :: WriterOptions -> Pandoc -> String writeOPML opts (Pandoc meta blocks) = let elements = hierarchicalize blocks - colwidth = if writerWrapText opts + colwidth = if writerWrapText opts == WrapAuto then Just $ writerColumns opts else Nothing meta' = B.setMeta "date" (B.str $ convertDate $ docDate meta) meta diff --git a/src/Text/Pandoc/Writers/OpenDocument.hs b/src/Text/Pandoc/Writers/OpenDocument.hs index dad6b431e..8e55a4016 100644 --- a/src/Text/Pandoc/Writers/OpenDocument.hs +++ b/src/Text/Pandoc/Writers/OpenDocument.hs @@ -175,7 +175,7 @@ handleSpaces s -- | Convert Pandoc document to string in OpenDocument format. writeOpenDocument :: WriterOptions -> Pandoc -> String writeOpenDocument opts (Pandoc meta blocks) = - let colwidth = if writerWrapText opts + let colwidth = if writerWrapText opts == WrapAuto then Just $ writerColumns opts else Nothing render' = render colwidth @@ -374,27 +374,31 @@ inlinesToOpenDocument o l = hcat <$> mapM (inlineToOpenDocument o) l -- | Convert an inline element to OpenDocument. inlineToOpenDocument :: WriterOptions -> Inline -> State WriterState Doc inlineToOpenDocument o ils - | Space <- ils = inTextStyle space - | Span _ xs <- ils = inlinesToOpenDocument o xs - | LineBreak <- ils = return $ selfClosingTag "text:line-break" [] - | Str s <- ils = inTextStyle $ handleSpaces $ escapeStringForXML s - | Emph l <- ils = withTextStyle Italic $ inlinesToOpenDocument o l - | Strong l <- ils = withTextStyle Bold $ inlinesToOpenDocument o l - | Strikeout l <- ils = withTextStyle Strike $ inlinesToOpenDocument o l - | Superscript l <- ils = withTextStyle Sup $ inlinesToOpenDocument o l - | Subscript l <- ils = withTextStyle Sub $ inlinesToOpenDocument o l - | SmallCaps l <- ils = withTextStyle SmallC $ inlinesToOpenDocument o l - | Quoted t l <- ils = inQuotes t <$> inlinesToOpenDocument o l - | Code _ s <- ils = withTextStyle Pre $ inTextStyle $ preformatted s - | Math t s <- ils = inlinesToOpenDocument o (texMathToInlines t s) - | Cite _ l <- ils = inlinesToOpenDocument o l - | RawInline f s <- ils = if f == Format "opendocument" - then return $ text s - else return empty - | Link _ l (s,t) <- ils = mkLink s t <$> inlinesToOpenDocument o l - | Image attr _ (s,t) <- ils = mkImg attr s t - | Note l <- ils = mkNote l - | otherwise = return empty + = case ils of + Space -> inTextStyle space + SoftBreak + | writerWrapText o == WrapPreserve + -> inTextStyle (preformatted "\n") + | otherwise -> inTextStyle space + Span _ xs -> inlinesToOpenDocument o xs + LineBreak -> return $ selfClosingTag "text:line-break" [] + Str s -> inTextStyle $ handleSpaces $ escapeStringForXML s + Emph l -> withTextStyle Italic $ inlinesToOpenDocument o l + Strong l -> withTextStyle Bold $ inlinesToOpenDocument o l + Strikeout l -> withTextStyle Strike $ inlinesToOpenDocument o l + Superscript l -> withTextStyle Sup $ inlinesToOpenDocument o l + Subscript l -> withTextStyle Sub $ inlinesToOpenDocument o l + SmallCaps l -> withTextStyle SmallC $ inlinesToOpenDocument o l + Quoted t l -> inQuotes t <$> inlinesToOpenDocument o l + Code _ s -> withTextStyle Pre $ inTextStyle $ preformatted s + Math t s -> inlinesToOpenDocument o (texMathToInlines t s) + Cite _ l -> inlinesToOpenDocument o l + RawInline f s -> if f == Format "opendocument" + then return $ text s + else return empty + Link _ l (s,t) -> mkLink s t <$> inlinesToOpenDocument o l + Image attr _ (s,t) -> mkImg attr s t + Note l -> mkNote l where preformatted s = handleSpaces $ escapeStringForXML s mkLink s t = inTags False "text:a" [ ("xlink:type" , "simple") diff --git a/src/Text/Pandoc/Writers/Org.hs b/src/Text/Pandoc/Writers/Org.hs index 75967fa2a..d843d2efd 100644 --- a/src/Text/Pandoc/Writers/Org.hs +++ b/src/Text/Pandoc/Writers/Org.hs @@ -61,7 +61,7 @@ writeOrg opts document = pandocToOrg :: Pandoc -> State WriterState String pandocToOrg (Pandoc meta blocks) = do opts <- liftM stOptions get - let colwidth = if writerWrapText opts + let colwidth = if writerWrapText opts == WrapAuto then Just $ writerColumns opts else Nothing metadata <- metaToJSON opts @@ -275,6 +275,12 @@ inlineToOrg (RawInline f str) | f == "tex" || f == "latex" = return $ text str inlineToOrg (RawInline _ _) = return empty inlineToOrg (LineBreak) = return (text "\\\\" <> cr) inlineToOrg Space = return space +inlineToOrg SoftBreak = do + wrapText <- gets (writerWrapText . stOptions) + case wrapText of + WrapPreserve -> return cr + WrapAuto -> return space + WrapNone -> return space inlineToOrg (Link _ txt (src, _)) = do case txt of [Str x] | escapeURI x == src -> -- autolink diff --git a/src/Text/Pandoc/Writers/RST.hs b/src/Text/Pandoc/Writers/RST.hs index 94c54c250..3b44a6cb0 100644 --- a/src/Text/Pandoc/Writers/RST.hs +++ b/src/Text/Pandoc/Writers/RST.hs @@ -70,7 +70,7 @@ writeRST opts document = pandocToRST :: Pandoc -> State WriterState String pandocToRST (Pandoc meta blocks) = do opts <- liftM stOptions get - let colwidth = if writerWrapText opts + let colwidth = if writerWrapText opts == WrapAuto then Just $ writerColumns opts else Nothing let subtit = case lookupMeta "subtitle" meta of @@ -378,11 +378,13 @@ inlineListToRST lst = surroundComplex _ _ = False okAfterComplex :: Inline -> Bool okAfterComplex Space = True + okAfterComplex SoftBreak = True okAfterComplex LineBreak = True okAfterComplex (Str (c:_)) = isSpace c || c `elem` ("-.,:;!?\\/'\")]}>–—" :: String) okAfterComplex _ = False okBeforeComplex :: Inline -> Bool okBeforeComplex Space = True + okBeforeComplex SoftBreak = True okBeforeComplex LineBreak = True okBeforeComplex (Str (c:_)) = isSpace c || c `elem` ("-:/'\"<([{–—" :: String) okBeforeComplex _ = False @@ -446,6 +448,12 @@ inlineToRST (RawInline f x) | otherwise = return empty inlineToRST (LineBreak) = return cr -- there's no line break in RST (see Para) inlineToRST Space = return space +inlineToRST SoftBreak = do + wrapText <- gets (writerWrapText . stOptions) + case wrapText of + WrapPreserve -> return cr + WrapAuto -> return space + WrapNone -> return space -- autolink inlineToRST (Link _ [Str str] (src, _)) | isURI src && diff --git a/src/Text/Pandoc/Writers/RTF.hs b/src/Text/Pandoc/Writers/RTF.hs index dabe5cf78..79a28c880 100644 --- a/src/Text/Pandoc/Writers/RTF.hs +++ b/src/Text/Pandoc/Writers/RTF.hs @@ -349,6 +349,7 @@ inlineToRTF (RawInline f str) | f == Format "rtf" = str | otherwise = "" inlineToRTF (LineBreak) = "\\line " +inlineToRTF SoftBreak = " " inlineToRTF Space = " " inlineToRTF (Link _ text (src, _)) = "{\\field{\\*\\fldinst{HYPERLINK \"" ++ (codeStringToRTF src) ++ diff --git a/src/Text/Pandoc/Writers/Texinfo.hs b/src/Text/Pandoc/Writers/Texinfo.hs index cd9e2ef3d..1aefaa678 100644 --- a/src/Text/Pandoc/Writers/Texinfo.hs +++ b/src/Text/Pandoc/Writers/Texinfo.hs @@ -75,7 +75,7 @@ pandocToTexinfo :: WriterOptions -> Pandoc -> State WriterState String pandocToTexinfo options (Pandoc meta blocks) = do let titlePage = not $ all null $ docTitle meta : docDate meta : docAuthors meta - let colwidth = if writerWrapText options + let colwidth = if writerWrapText options == WrapAuto then Just $ writerColumns options else Nothing metadata <- metaToJSON options @@ -425,6 +425,12 @@ inlineToTexinfo (RawInline f str) | f == "texinfo" = return $ text str | otherwise = return empty inlineToTexinfo (LineBreak) = return $ text "@*" <> cr +inlineToTexinfo SoftBreak = do + wrapText <- gets (writerWrapText . stOptions) + case wrapText of + WrapAuto -> return space + WrapNone -> return space + WrapPreserve -> return cr inlineToTexinfo Space = return space inlineToTexinfo (Link _ txt (src@('#':_), _)) = do diff --git a/src/Text/Pandoc/Writers/Textile.hs b/src/Text/Pandoc/Writers/Textile.hs index 1d5734c96..98f9157fb 100644 --- a/src/Text/Pandoc/Writers/Textile.hs +++ b/src/Text/Pandoc/Writers/Textile.hs @@ -434,6 +434,8 @@ inlineToTextile opts (RawInline f str) inlineToTextile _ (LineBreak) = return "\n" +inlineToTextile _ SoftBreak = return " " + inlineToTextile _ Space = return " " inlineToTextile opts (Link (_, cls, _) txt (src, _)) = do -- cgit v1.2.3