diff options
-rw-r--r-- | MANUAL.txt | 2 | ||||
-rw-r--r-- | benchmark/benchmark-pandoc.hs | 60 | ||||
-rw-r--r-- | doc/lua-filters.md | 8 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Docx/Combine.hs | 101 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/RST.hs | 7 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/Ms.hs | 10 | ||||
-rw-r--r-- | test/docx/golden/inline_formatting.docx | bin | 9989 -> 9987 bytes | |||
-rw-r--r-- | test/docx/inline_formatting.native | 2 |
8 files changed, 89 insertions, 101 deletions
diff --git a/MANUAL.txt b/MANUAL.txt index b9ef68cd2..893b4ae2c 100644 --- a/MANUAL.txt +++ b/MANUAL.txt @@ -902,7 +902,7 @@ header when requesting a document from a URL: downloaded). If you're behind a proxy, you also need to set the environment variable `http_proxy` to `http://...`. -`--no-check-certificate +`--no-check-certificate` : Disable the certificate verification to allow access to unsecure HTTP resources (for example when the certificate diff --git a/benchmark/benchmark-pandoc.hs b/benchmark/benchmark-pandoc.hs index 967728f5d..3e7b663b4 100644 --- a/benchmark/benchmark-pandoc.hs +++ b/benchmark/benchmark-pandoc.hs @@ -1,6 +1,5 @@ {-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE OverloadedStrings #-} -{-# LANGUAGE TupleSections #-} {- Copyright (C) 2012-2019 John MacFarlane <jgm@berkeley.edu> @@ -21,39 +20,49 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA import Prelude import Text.Pandoc import Text.Pandoc.MIME -import Text.Pandoc.Error (PandocError(..)) -import Control.Monad.Except (throwError) +import Control.Monad.Except (throwError, liftIO) import qualified Text.Pandoc.UTF8 as UTF8 import qualified Data.ByteString as B import qualified Data.Text as T import Criterion.Main import Criterion.Types (Config(..)) import Data.List (intersect) -import Data.Maybe (mapMaybe) +import Data.Maybe (mapMaybe, catMaybes) import System.Environment (getArgs) import qualified Data.ByteString.Lazy as BL +data Input = InputText {unInputText :: T.Text} + | InputBS {unInputBS :: BL.ByteString} + readerBench :: Pandoc -> T.Text - -> Maybe Benchmark -readerBench doc name = - case res of - Right (readerFun, inp) -> - Just $ bench (T.unpack $ name <> " reader") - $ nf (\i -> either (error . show) id $ runPure (readerFun i)) - inp - Left _ -> Nothing - where res = runPure $ do - (rdr, rexts) <- getReader name + -> IO (Maybe Benchmark) +readerBench doc name = do + let (rdr, rexts) = either (error . show) id . runPure $ getReader name + res <- runIO $ do (wtr, wexts) <- getWriter name case (rdr, wtr) of (TextReader r, TextWriter w) -> do - setResourcePath ["../test"] + setResourcePath ["./test"] inp <- w def{ writerWrapText = WrapAuto , writerExtensions = wexts } doc - return $ (r def{ readerExtensions = rexts }, inp) - _ -> throwError $ PandocSomeError $ "not a text format: " + return (r def{ readerExtensions = rexts } . unInputText, InputText inp) + (ByteStringReader r, ByteStringWriter w) -> do + setResourcePath ["./test"] + tmpl <- Just <$> compileDefaultTemplate name + inp <- w def{ writerWrapText = WrapAuto + , writerExtensions = wexts + , writerTemplate = tmpl } doc + liftIO $ BL.writeFile "/tmp/test.odt" inp + return (r def{ readerExtensions = rexts } . unInputBS, InputBS inp) + _ -> throwError $ PandocSomeError $ "text/bytestring format mismatch: " <> name + return $ case res of + Right (readerFun, inp) -> + Just $ bench (T.unpack $ name <> " reader") + $ nf (\i -> either (error . show) id $ runPure (readerFun i)) + inp + Left _ -> Nothing getImages :: IO [(FilePath, MimeType, BL.ByteString)] getImages = do @@ -89,22 +98,23 @@ main :: IO () main = do args <- filter (\x -> T.take 1 x /= "-") . fmap T.pack <$> getArgs print args - let matchReader (n, TextReader _) = + let matchReader (n, _) = null args || ("reader" `elem` args && n `elem` args) - matchReader _ = False - let matchWriter (n, TextWriter _) = + matchWriter (n, TextWriter _) = null args || ("writer" `elem` args && n `elem` args) matchWriter _ = False - let matchedReaders = map fst $ (filter matchReader readers + allWriters = map fst (writers :: [(T.Text, Writer PandocPure)]) + matchedReaders = map fst (filter matchReader readers :: [(T.Text, Reader PandocPure)]) - let matchedWriters = map fst $ (filter matchWriter writers + matchedWriters = map fst (filter matchWriter writers :: [(T.Text, Writer PandocPure)]) inp <- UTF8.toText <$> B.readFile "test/testsuite.txt" let opts = def let doc = either (error . show) id $ runPure $ readMarkdown opts inp - let readerBs = mapMaybe (readerBench doc) - $ filter (/="haddock") - (matchedReaders `intersect` matchedWriters) + readerBs <- fmap catMaybes + $ mapM (readerBench doc) + $ filter (/="haddock") + (matchedReaders `intersect` allWriters) -- we need the corresponding writer to generate -- input for the reader let writerBs = mapMaybe (writerBench doc) matchedWriters diff --git a/doc/lua-filters.md b/doc/lua-filters.md index c5f7d2713..cc49da09b 100644 --- a/doc/lua-filters.md +++ b/doc/lua-filters.md @@ -138,7 +138,7 @@ elements. ## Filters on element sequences -For some filtering tasks, the it is necessary to know the order +For some filtering tasks, it is necessary to know the order in which elements occur in the document. It is not enough then to inspect a single element at a time. @@ -1127,7 +1127,7 @@ A [table cell]{#type-table-cell} is a list of blocks. *[Alignment]{#type-alignment}* is a string value indicating the horizontal alignment of a table column. `AlignLeft`, -`AlignRight`, and `AlignCenter` leads cell content tob be +`AlignRight`, and `AlignCenter` leads cell content to be left-aligned, right-aligned, and centered, respectively. The default alignment is `AlignDefault` (often equivalent to centered). @@ -3035,7 +3035,7 @@ methods and convenience functions. [`pandoc.List:insert ([pos], value)`]{#pandoc.list:insert} : Inserts element `value` at position `pos` in list, shifting - elements to the next-greater indix if necessary. + elements to the next-greater index if necessary. This function is identical to [`table.insert`](https://www.lua.org/manual/5.3/manual.html#6.6). @@ -3082,7 +3082,7 @@ methods and convenience functions. Parameters: `pos`: - : position of the list value that will be remove; defaults + : position of the list value that will be removed; defaults to the index of the last element Returns: the removed element diff --git a/src/Text/Pandoc/Readers/Docx/Combine.hs b/src/Text/Pandoc/Readers/Docx/Combine.hs index 54736cd0e..427a73dbe 100644 --- a/src/Text/Pandoc/Readers/Docx/Combine.hs +++ b/src/Text/Pandoc/Readers/Docx/Combine.hs @@ -1,10 +1,9 @@ -{-# LANGUAGE FlexibleInstances #-} -{-# LANGUAGE PatternGuards #-} {-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Readers.Docx.Combine Copyright : © 2014-2020 Jesse Rosenthal <jrosenthal@jhu.edu>, - 2014-2020 John MacFarlane <jgm@berkeley.edu> + 2014-2020 John MacFarlane <jgm@berkeley.edu>, + 2020 Nikolay Yakimov <root@livid.pp.ru> License : GNU GPL, version 2 or above Maintainer : Jesse Rosenthal <jrosenthal@jhu.edu> @@ -59,79 +58,61 @@ module Text.Pandoc.Readers.Docx.Combine ( smushInlines where import Data.List -import Data.Sequence (ViewL (..), ViewR (..), viewl, viewr, (><), (|>)) -import qualified Data.Sequence as Seq (null) +import Data.Bifunctor +import Data.Sequence ( ViewL (..), ViewR (..), viewl, viewr, spanr, spanl + , (><), (|>) ) import Text.Pandoc.Builder data Modifier a = Modifier (a -> a) | AttrModifier (Attr -> a -> a) Attr - | NullModifier spaceOutInlinesL :: Inlines -> (Inlines, Inlines) spaceOutInlinesL ms = (l, stackInlines fs (m' <> r)) - where (l, m, r) = spaceOutInlines ms - (fs, m') = unstackInlines m + where (l, (fs, m'), r) = spaceOutInlines ms spaceOutInlinesR :: Inlines -> (Inlines, Inlines) spaceOutInlinesR ms = (stackInlines fs (l <> m'), r) - where (l, m, r) = spaceOutInlines ms - (fs, m') = unstackInlines m + where (l, (fs, m'), r) = spaceOutInlines ms -spaceOutInlines :: Inlines -> (Inlines, Inlines, Inlines) +spaceOutInlines :: Inlines -> (Inlines, ([Modifier Inlines], Inlines), Inlines) spaceOutInlines ils = let (fs, ils') = unstackInlines ils - contents = unMany ils' - left = case viewl contents of - (Space :< _) -> space - _ -> mempty - right = case viewr contents of - (_ :> Space) -> space - _ -> mempty in - (left, stackInlines fs $ trimInlines . Many $ contents, right) + (left, (right, contents')) = second (spanr isSpace) $ spanl isSpace $ unMany ils' + -- NOTE: spanr counterintuitively returns suffix as the FIRST tuple element + in (Many left, (fs, Many contents'), Many right) + +isSpace :: Inline -> Bool +isSpace Space = True +isSpace SoftBreak = True +isSpace _ = False stackInlines :: [Modifier Inlines] -> Inlines -> Inlines stackInlines [] ms = ms -stackInlines (NullModifier : fs) ms = stackInlines fs ms stackInlines (Modifier f : fs) ms = - if isEmpty ms + if null ms then stackInlines fs ms else f $ stackInlines fs ms stackInlines (AttrModifier f attr : fs) ms = f attr $ stackInlines fs ms unstackInlines :: Inlines -> ([Modifier Inlines], Inlines) -unstackInlines ms = case ilModifier ms of - NullModifier -> ([], ms) - _ -> (f : fs, ms') where - f = ilModifier ms - (fs, ms') = unstackInlines $ ilInnards ms - -ilModifier :: Inlines -> Modifier Inlines -ilModifier ils = case viewl (unMany ils) of - (x :< xs) | Seq.null xs -> case x of - (Emph _) -> Modifier emph - (Strong _) -> Modifier strong - (SmallCaps _) -> Modifier smallcaps - (Strikeout _) -> Modifier strikeout - (Superscript _) -> Modifier superscript - (Subscript _) -> Modifier subscript - (Link attr _ tgt) -> Modifier $ linkWith attr (fst tgt) (snd tgt) - (Span attr _) -> AttrModifier spanWith attr - _ -> NullModifier - _ -> NullModifier - -ilInnards :: Inlines -> Inlines -ilInnards ils = case viewl (unMany ils) of - (x :< xs) | Seq.null xs -> case x of - (Emph lst) -> fromList lst - (Strong lst) -> fromList lst - (SmallCaps lst) -> fromList lst - (Strikeout lst) -> fromList lst - (Superscript lst) -> fromList lst - (Subscript lst) -> fromList lst - (Link _ lst _) -> fromList lst - (Span _ lst) -> fromList lst - _ -> ils - _ -> ils +unstackInlines ms = case ilModifierAndInnards ms of + Nothing -> ([], ms) + Just (f, inner) -> first (f :) $ unstackInlines inner + +ilModifierAndInnards :: Inlines -> Maybe (Modifier Inlines, Inlines) +ilModifierAndInnards ils = case viewl $ unMany ils of + x :< xs | null xs -> second fromList <$> case x of + Emph lst -> Just (Modifier emph, lst) + Strong lst -> Just (Modifier strong, lst) + SmallCaps lst -> Just (Modifier smallcaps, lst) + Strikeout lst -> Just (Modifier strikeout, lst) + Underline lst -> Just (Modifier underline, lst) + Superscript lst -> Just (Modifier superscript, lst) + Subscript lst -> Just (Modifier subscript, lst) + Link attr lst tgt -> Just (Modifier $ linkWith attr (fst tgt) (snd tgt), lst) + Span attr lst -> Just (AttrModifier spanWith attr, lst) + _ -> Nothing + _ -> Nothing inlinesL :: Inlines -> (Inlines, Inlines) inlinesL ils = case viewl $ unMany ils of @@ -161,12 +142,12 @@ combineSingletonInlines x y = y_rem_attr = filter isAttrModifier y_remaining in case null shared of - True | isEmpty xs && isEmpty ys -> - stackInlines (x_rem_attr ++ y_rem_attr) mempty - | isEmpty xs -> + True | null xs && null ys -> + stackInlines (x_rem_attr <> y_rem_attr) mempty + | null xs -> let (sp, y') = spaceOutInlinesL y in stackInlines x_rem_attr mempty <> sp <> y' - | isEmpty ys -> + | null ys -> let (x', sp) = spaceOutInlinesR x in x' <> sp <> stackInlines y_rem_attr mempty | otherwise -> @@ -193,12 +174,8 @@ combineBlocks bs cs = bs <> cs instance (Monoid a, Eq a) => Eq (Modifier a) where (Modifier f) == (Modifier g) = f mempty == g mempty (AttrModifier f attr) == (AttrModifier g attr') = f attr mempty == g attr' mempty - NullModifier == NullModifier = True _ == _ = False -isEmpty :: (Monoid a, Eq a) => a -> Bool -isEmpty x = x == mempty - isAttrModifier :: Modifier a -> Bool isAttrModifier (AttrModifier _ _) = True isAttrModifier _ = False diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs index e558836a1..7c25be486 100644 --- a/src/Text/Pandoc/Readers/RST.hs +++ b/src/Text/Pandoc/Readers/RST.hs @@ -223,7 +223,8 @@ rawFieldListItem minIndent = try $ do first <- anyLine rest <- option "" $ try $ do lookAhead (count indent (char ' ') >> spaceChar) indentedBlock - let raw = (if T.null first then "" else first <> "\n") <> rest <> "\n" + let raw = (if T.null first then "" else first <> "\n") <> rest <> + (if T.null first && T.null rest then "" else "\n") return (name, raw) fieldListItem :: PandocMonad m => Int -> RSTParser m (Inlines, [Blocks]) @@ -484,7 +485,7 @@ includeDirective top fields body = do Just patt -> drop 1 . dropWhile (not . (patt `T.isInfixOf`)) Nothing -> id) $ contentLines' - let contents' = T.unlines contentLines'' <> "\n" + let contents' = T.unlines contentLines'' case lookup "code" fields of Just lang -> do let classes = maybe [] T.words (lookup "class" fields) @@ -494,7 +495,7 @@ includeDirective top fields body = do Just _ -> return $ B.rawBlock "rst" contents' Nothing -> do setPosition $ newPos (T.unpack f) 1 1 - setInput contents' + setInput $ contents' <> "\n" bs <- optional blanklines >> (mconcat <$> many block) setInput oldInput diff --git a/src/Text/Pandoc/Writers/Ms.hs b/src/Text/Pandoc/Writers/Ms.hs index b6522b352..561053c88 100644 --- a/src/Text/Pandoc/Writers/Ms.hs +++ b/src/Text/Pandoc/Writers/Ms.hs @@ -519,11 +519,11 @@ toMacro sty toktype = msFormatter :: WriterOptions -> FormatOptions -> [SourceLine] -> Doc Text msFormatter opts _fmtopts = - vcat . map fmtLine - where fmtLine = hcat . map fmtToken - fmtToken (toktype, tok) = literal "\\*" <> - brackets (literal (tshow toktype) <> literal " \"" - <> literal (escapeStr opts tok) <> literal "\"") + literal . T.intercalate "\n" . map fmtLine + where + fmtLine = mconcat . map fmtToken + fmtToken (toktype, tok) = + "\\*[" <> (tshow toktype) <> " \"" <> (escapeStr opts tok) <> "\"]" highlightCode :: PandocMonad m => WriterOptions -> Attr -> Text -> MS m (Doc Text) highlightCode opts attr str = diff --git a/test/docx/golden/inline_formatting.docx b/test/docx/golden/inline_formatting.docx Binary files differindex 5efe66edd..ddfd45280 100644 --- a/test/docx/golden/inline_formatting.docx +++ b/test/docx/golden/inline_formatting.docx diff --git a/test/docx/inline_formatting.native b/test/docx/inline_formatting.native index 000896df9..df749ffef 100644 --- a/test/docx/inline_formatting.native +++ b/test/docx/inline_formatting.native @@ -1,6 +1,6 @@ Pandoc (Meta {unMeta = fromList []}) [Para [Str "Regular",Space,Str "text",Space,Emph [Str "italics"],Space,Strong [Str "bold",Space,Emph [Str "bold",Space,Str "italics"]],Str "."] ,Para [Str "This",Space,Str "is",Space,SmallCaps [Str "Small",Space,Str "Caps"],Str ",",Space,Str "and",Space,Str "this",Space,Str "is",Space,Strikeout [Str "strikethrough"],Str "."] -,Para [Str "Some",Space,Str "people",Space,Str "use",Space,Underline [Str "single",Space,Str "underlines",Space,Str "for",Space],Emph [Underline [Str "emphasis"]],Str "."] +,Para [Str "Some",Space,Str "people",Space,Str "use",Space,Underline [Str "single",Space,Str "underlines",Space,Str "for",Space,Emph [Str "emphasis"]],Str "."] ,Para [Str "Above",Space,Str "the",Space,Str "line",Space,Str "is",Space,Superscript [Str "superscript"],Space,Str "and",Space,Str "below",Space,Str "the",Space,Str "line",Space,Str "is",Space,Subscript [Str "subscript"],Str "."] ,Para [Str "A",Space,Str "line",LineBreak,Str "break."]] |