diff options
| -rw-r--r-- | .github/workflows/ci.yml | 14 | ||||
| -rw-r--r-- | MANUAL.txt | 2 | ||||
| -rw-r--r-- | benchmark/benchmark-pandoc.hs | 60 | ||||
| -rw-r--r-- | doc/lua-filters.md | 8 | ||||
| -rw-r--r-- | pandoc.cabal | 1 | ||||
| -rw-r--r-- | src/Text/Pandoc/Readers/Docx/Combine.hs | 101 | ||||
| -rw-r--r-- | src/Text/Pandoc/Readers/RST.hs | 7 | ||||
| -rw-r--r-- | src/Text/Pandoc/Writers/Ms.hs | 14 | ||||
| -rw-r--r-- | test/Tests/Writers/Ms.hs | 37 | ||||
| -rw-r--r-- | test/docx/golden/inline_formatting.docx | bin | 9989 -> 9987 bytes | |||
| -rw-r--r-- | test/docx/inline_formatting.native | 2 | ||||
| -rw-r--r-- | test/test-pandoc.hs | 2 | 
12 files changed, 136 insertions, 112 deletions
| diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index e1c5e9165..1617672ae 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -61,16 +61,10 @@ jobs:      # need to install older cabal/ghc versions from ppa repository      - name: Install recent cabal/ghc -      run: | -         if [[ ! -d /opt/ghc/${{ matrix.versions.ghc }} ]] -         then -           sudo add-apt-repository ppa:hvr/ghc -           sudo apt-get update -           sudo apt-get install ghc-${{ matrix.versions.ghc }} cabal-install-${{ matrix.versions.cabal }} -         fi -         # Use a GitHub workflow command to add folders to PATH. -         echo "::add-path::/opt/ghc/${{ matrix.versions.ghc }}/bin" -         echo "::add-path::/opt/cabal/${{ matrix.versions.cabal }}/bin" +      uses: actions/setup-haskell@v1.1 +      with: +        ghc-version: ${{ matrix.versions.ghc }} +        cabal-version: ${{ matrix.versions.cabal }}      # declare/restore cached things      # caching doesn't work for scheduled runs yet diff --git a/MANUAL.txt b/MANUAL.txt index b9ef68cd2..893b4ae2c 100644 --- a/MANUAL.txt +++ b/MANUAL.txt @@ -902,7 +902,7 @@ header when requesting a document from a URL:      downloaded). If you're behind a proxy, you also need to set      the environment variable `http_proxy` to `http://...`. -`--no-check-certificate +`--no-check-certificate`  :   Disable the certificate verification to allow access to       unsecure HTTP resources (for example when the certificate diff --git a/benchmark/benchmark-pandoc.hs b/benchmark/benchmark-pandoc.hs index 967728f5d..3e7b663b4 100644 --- a/benchmark/benchmark-pandoc.hs +++ b/benchmark/benchmark-pandoc.hs @@ -1,6 +1,5 @@  {-# LANGUAGE NoImplicitPrelude #-}  {-# LANGUAGE OverloadedStrings #-} -{-# LANGUAGE TupleSections     #-}  {-  Copyright (C) 2012-2019 John MacFarlane <jgm@berkeley.edu> @@ -21,39 +20,49 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA  import Prelude  import Text.Pandoc  import Text.Pandoc.MIME -import Text.Pandoc.Error (PandocError(..)) -import Control.Monad.Except (throwError) +import Control.Monad.Except (throwError, liftIO)  import qualified Text.Pandoc.UTF8 as UTF8  import qualified Data.ByteString as B  import qualified Data.Text as T  import Criterion.Main  import Criterion.Types (Config(..))  import Data.List (intersect) -import Data.Maybe (mapMaybe) +import Data.Maybe (mapMaybe, catMaybes)  import System.Environment (getArgs)  import qualified Data.ByteString.Lazy as BL +data Input = InputText {unInputText :: T.Text} +           | InputBS {unInputBS :: BL.ByteString} +  readerBench :: Pandoc              -> T.Text -            -> Maybe Benchmark -readerBench doc name = -  case res of -       Right (readerFun, inp) -> -          Just $ bench (T.unpack $ name <> " reader") -               $ nf (\i -> either (error . show) id $ runPure (readerFun i)) -                 inp -       Left _ -> Nothing -  where res = runPure $ do -          (rdr, rexts) <- getReader name +            -> IO (Maybe Benchmark) +readerBench doc name = do +  let (rdr, rexts) = either (error . show) id . runPure $ getReader name +  res <- runIO $ do            (wtr, wexts) <- getWriter name            case (rdr, wtr) of              (TextReader r, TextWriter w) -> do -                     setResourcePath ["../test"] +                     setResourcePath ["./test"]                       inp <- w def{ writerWrapText = WrapAuto                                   , writerExtensions = wexts } doc -                     return $ (r def{ readerExtensions = rexts }, inp) -            _ -> throwError $ PandocSomeError $ "not a text format: " +                     return (r def{ readerExtensions = rexts } . unInputText, InputText inp) +            (ByteStringReader r, ByteStringWriter w) -> do +                     setResourcePath ["./test"] +                     tmpl <- Just <$> compileDefaultTemplate name +                     inp <- w def{ writerWrapText = WrapAuto +                                 , writerExtensions = wexts +                                 , writerTemplate = tmpl } doc +                     liftIO $ BL.writeFile "/tmp/test.odt" inp +                     return (r def{ readerExtensions = rexts } . unInputBS, InputBS inp) +            _ -> throwError $ PandocSomeError $ "text/bytestring format mismatch: "                                   <> name +  return $ case res of +       Right (readerFun, inp) -> +          Just $ bench (T.unpack $ name <> " reader") +               $ nf (\i -> either (error . show) id $ runPure (readerFun i)) +                 inp +       Left _ -> Nothing  getImages :: IO [(FilePath, MimeType, BL.ByteString)]  getImages = do @@ -89,22 +98,23 @@ main :: IO ()  main = do    args <- filter (\x -> T.take 1 x /= "-") . fmap T.pack <$> getArgs    print args -  let matchReader (n, TextReader _) = +  let matchReader (n, _) =           null args || ("reader" `elem` args && n `elem` args) -      matchReader _                 = False -  let matchWriter (n, TextWriter _) = +      matchWriter (n, TextWriter _) =           null args || ("writer" `elem` args && n `elem` args)        matchWriter _                 = False -  let matchedReaders = map fst $ (filter matchReader readers +      allWriters = map fst (writers :: [(T.Text, Writer PandocPure)]) +      matchedReaders = map fst (filter matchReader readers                                      :: [(T.Text, Reader PandocPure)]) -  let matchedWriters = map fst $ (filter matchWriter writers +      matchedWriters = map fst (filter matchWriter writers                                      :: [(T.Text, Writer PandocPure)])    inp <- UTF8.toText <$> B.readFile "test/testsuite.txt"    let opts = def    let doc = either (error . show) id $ runPure $ readMarkdown opts inp -  let readerBs = mapMaybe (readerBench doc) -                 $ filter (/="haddock") -                 (matchedReaders `intersect` matchedWriters) +  readerBs <- fmap catMaybes +              $ mapM (readerBench doc) +              $ filter (/="haddock") +              (matchedReaders `intersect` allWriters)                   -- we need the corresponding writer to generate                   -- input for the reader    let writerBs = mapMaybe (writerBench doc) matchedWriters diff --git a/doc/lua-filters.md b/doc/lua-filters.md index c5f7d2713..cc49da09b 100644 --- a/doc/lua-filters.md +++ b/doc/lua-filters.md @@ -138,7 +138,7 @@ elements.  ## Filters on element sequences -For some filtering tasks, the it is necessary to know the order +For some filtering tasks, it is necessary to know the order  in which elements occur in the document. It is not enough then to  inspect a single element at a time. @@ -1127,7 +1127,7 @@ A [table cell]{#type-table-cell} is a list of blocks.  *[Alignment]{#type-alignment}* is a string value indicating the  horizontal alignment of a table column. `AlignLeft`, -`AlignRight`, and `AlignCenter` leads cell content tob be +`AlignRight`, and `AlignCenter` leads cell content to be  left-aligned, right-aligned, and centered, respectively. The  default alignment is `AlignDefault` (often equivalent to  centered). @@ -3035,7 +3035,7 @@ methods and convenience functions.  [`pandoc.List:insert ([pos], value)`]{#pandoc.list:insert}  :   Inserts element `value` at position `pos` in list, shifting -    elements to the next-greater indix if necessary. +    elements to the next-greater index if necessary.      This function is identical to      [`table.insert`](https://www.lua.org/manual/5.3/manual.html#6.6). @@ -3082,7 +3082,7 @@ methods and convenience functions.      Parameters:      `pos`: -    :   position of the list value that will be remove; defaults +    :   position of the list value that will be removed; defaults          to the index of the last element      Returns: the removed element diff --git a/pandoc.cabal b/pandoc.cabal index a36f3942c..83c7f1805 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -816,6 +816,7 @@ test-suite test-pandoc                    Tests.Writers.FB2                    Tests.Writers.Powerpoint                    Tests.Writers.OOXML +                  Tests.Writers.Ms    if os(windows)      cpp-options:      -D_WINDOWS    default-language: Haskell2010 diff --git a/src/Text/Pandoc/Readers/Docx/Combine.hs b/src/Text/Pandoc/Readers/Docx/Combine.hs index 54736cd0e..427a73dbe 100644 --- a/src/Text/Pandoc/Readers/Docx/Combine.hs +++ b/src/Text/Pandoc/Readers/Docx/Combine.hs @@ -1,10 +1,9 @@ -{-# LANGUAGE FlexibleInstances    #-} -{-# LANGUAGE PatternGuards        #-}  {-# LANGUAGE OverloadedStrings    #-}  {- |     Module      : Text.Pandoc.Readers.Docx.Combine     Copyright   : © 2014-2020 Jesse Rosenthal <jrosenthal@jhu.edu>, -                   2014-2020 John MacFarlane <jgm@berkeley.edu> +                   2014-2020 John MacFarlane <jgm@berkeley.edu>, +                   2020 Nikolay Yakimov <root@livid.pp.ru>     License     : GNU GPL, version 2 or above     Maintainer  : Jesse Rosenthal <jrosenthal@jhu.edu> @@ -59,79 +58,61 @@ module Text.Pandoc.Readers.Docx.Combine ( smushInlines         where  import Data.List -import Data.Sequence (ViewL (..), ViewR (..), viewl, viewr, (><), (|>)) -import qualified Data.Sequence as Seq (null) +import Data.Bifunctor +import Data.Sequence ( ViewL (..), ViewR (..), viewl, viewr, spanr, spanl +                     , (><), (|>) )  import Text.Pandoc.Builder  data Modifier a = Modifier (a -> a)                  | AttrModifier (Attr -> a -> a) Attr -                | NullModifier  spaceOutInlinesL :: Inlines -> (Inlines, Inlines)  spaceOutInlinesL ms = (l, stackInlines fs (m' <> r)) -  where (l, m, r) = spaceOutInlines ms -        (fs, m')  = unstackInlines m +  where (l, (fs, m'), r) = spaceOutInlines ms  spaceOutInlinesR :: Inlines -> (Inlines, Inlines)  spaceOutInlinesR ms = (stackInlines fs (l <> m'), r) -  where (l, m, r) = spaceOutInlines ms -        (fs, m')  = unstackInlines m +  where (l, (fs, m'), r) = spaceOutInlines ms -spaceOutInlines :: Inlines -> (Inlines, Inlines, Inlines) +spaceOutInlines :: Inlines -> (Inlines, ([Modifier Inlines], Inlines), Inlines)  spaceOutInlines ils =    let (fs, ils') = unstackInlines ils -      contents = unMany ils' -      left  = case viewl contents of -        (Space :< _) -> space -        _            -> mempty -      right = case viewr contents of -        (_ :> Space) -> space -        _            -> mempty in -  (left, stackInlines fs $ trimInlines . Many $ contents, right) +      (left, (right, contents')) = second (spanr isSpace) $ spanl isSpace $ unMany ils' +      -- NOTE: spanr counterintuitively returns suffix as the FIRST tuple element +  in (Many left, (fs, Many contents'), Many right) + +isSpace :: Inline -> Bool +isSpace Space = True +isSpace SoftBreak = True +isSpace _ = False  stackInlines :: [Modifier Inlines] -> Inlines -> Inlines  stackInlines [] ms = ms -stackInlines (NullModifier : fs) ms = stackInlines fs ms  stackInlines (Modifier f : fs) ms = -  if isEmpty ms +  if null ms    then stackInlines fs ms    else f $ stackInlines fs ms  stackInlines (AttrModifier f attr : fs) ms = f attr $ stackInlines fs ms  unstackInlines :: Inlines -> ([Modifier Inlines], Inlines) -unstackInlines ms = case ilModifier ms of -  NullModifier -> ([], ms) -  _            -> (f : fs, ms') where -    f = ilModifier ms -    (fs, ms') = unstackInlines $ ilInnards ms - -ilModifier :: Inlines -> Modifier Inlines -ilModifier ils = case viewl (unMany ils) of -  (x :< xs) | Seq.null xs -> case x of -    (Emph _)          -> Modifier emph -    (Strong _)        -> Modifier strong -    (SmallCaps _)     -> Modifier smallcaps -    (Strikeout _)     -> Modifier strikeout -    (Superscript _)   -> Modifier superscript -    (Subscript _)     -> Modifier subscript -    (Link attr _ tgt) -> Modifier $ linkWith attr (fst tgt) (snd tgt) -    (Span attr _)     -> AttrModifier spanWith attr -    _                 -> NullModifier -  _ -> NullModifier - -ilInnards :: Inlines -> Inlines -ilInnards ils = case viewl (unMany ils) of -  (x :< xs) | Seq.null xs -> case x of -    (Emph lst)        -> fromList lst -    (Strong lst)      -> fromList lst -    (SmallCaps lst)   -> fromList lst -    (Strikeout lst)   -> fromList lst -    (Superscript lst) -> fromList lst -    (Subscript lst)   -> fromList lst -    (Link _ lst _)    -> fromList lst -    (Span _ lst)      -> fromList lst -    _                 -> ils -  _          -> ils +unstackInlines ms = case ilModifierAndInnards ms of +  Nothing         -> ([], ms) +  Just (f, inner) -> first (f :) $ unstackInlines inner + +ilModifierAndInnards :: Inlines -> Maybe (Modifier Inlines, Inlines) +ilModifierAndInnards ils = case viewl $ unMany ils of +  x :< xs | null xs -> second fromList <$> case x of +    Emph lst          -> Just (Modifier emph, lst) +    Strong lst        -> Just (Modifier strong, lst) +    SmallCaps lst     -> Just (Modifier smallcaps, lst) +    Strikeout lst     -> Just (Modifier strikeout, lst) +    Underline lst     -> Just (Modifier underline, lst) +    Superscript lst   -> Just (Modifier superscript, lst) +    Subscript lst     -> Just (Modifier subscript, lst) +    Link attr lst tgt -> Just (Modifier $ linkWith attr (fst tgt) (snd tgt), lst) +    Span attr lst     -> Just (AttrModifier spanWith attr, lst) +    _                 -> Nothing +  _ -> Nothing  inlinesL :: Inlines -> (Inlines, Inlines)  inlinesL ils = case viewl $ unMany ils of @@ -161,12 +142,12 @@ combineSingletonInlines x y =        y_rem_attr = filter isAttrModifier y_remaining    in     case null shared of -     True | isEmpty xs && isEmpty ys -> -            stackInlines (x_rem_attr ++ y_rem_attr) mempty -          | isEmpty xs -> +     True | null xs && null ys -> +            stackInlines (x_rem_attr <> y_rem_attr) mempty +          | null xs ->              let (sp, y') = spaceOutInlinesL y in              stackInlines x_rem_attr mempty <> sp <> y' -          | isEmpty ys -> +          | null ys ->              let (x', sp) = spaceOutInlinesR x in              x' <> sp <> stackInlines y_rem_attr mempty            | otherwise -> @@ -193,12 +174,8 @@ combineBlocks bs cs = bs <> cs  instance (Monoid a, Eq a) => Eq (Modifier a) where    (Modifier f) == (Modifier g) = f mempty == g mempty    (AttrModifier f attr) == (AttrModifier g attr') = f attr mempty == g attr' mempty -  NullModifier == NullModifier = True    _ == _ = False -isEmpty :: (Monoid a, Eq a) => a -> Bool -isEmpty x = x == mempty -  isAttrModifier :: Modifier a -> Bool  isAttrModifier (AttrModifier _ _) = True  isAttrModifier _                  = False diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs index e558836a1..7c25be486 100644 --- a/src/Text/Pandoc/Readers/RST.hs +++ b/src/Text/Pandoc/Readers/RST.hs @@ -223,7 +223,8 @@ rawFieldListItem minIndent = try $ do    first <- anyLine    rest <- option "" $ try $ do lookAhead (count indent (char ' ') >> spaceChar)                                 indentedBlock -  let raw = (if T.null first then "" else first <> "\n") <> rest <> "\n" +  let raw = (if T.null first then "" else first <> "\n") <> rest <> +            (if T.null first && T.null rest then "" else "\n")    return (name, raw)  fieldListItem :: PandocMonad m => Int -> RSTParser m (Inlines, [Blocks]) @@ -484,7 +485,7 @@ includeDirective top fields body = do                               Just patt -> drop 1 .                                              dropWhile (not . (patt `T.isInfixOf`))                               Nothing   -> id) $ contentLines' -  let contents' = T.unlines contentLines'' <> "\n" +  let contents' = T.unlines contentLines''    case lookup "code" fields of         Just lang -> do           let classes =  maybe [] T.words (lookup "class" fields) @@ -494,7 +495,7 @@ includeDirective top fields body = do                           Just _  -> return $ B.rawBlock "rst" contents'                           Nothing -> do                             setPosition $ newPos (T.unpack f) 1 1 -                           setInput contents' +                           setInput $ contents' <> "\n"                             bs <- optional blanklines >>                                    (mconcat <$> many block)                             setInput oldInput diff --git a/src/Text/Pandoc/Writers/Ms.hs b/src/Text/Pandoc/Writers/Ms.hs index 81de40045..561053c88 100644 --- a/src/Text/Pandoc/Writers/Ms.hs +++ b/src/Text/Pandoc/Writers/Ms.hs @@ -204,7 +204,9 @@ blockToMs opts (CodeBlock attr str) = do      literal ".IP" $$      literal ".nf" $$      literal "\\f[C]" $$ -    hlCode $$ +    ((case T.uncons str of +      Just ('.',_) -> literal "\\&" +      _            -> mempty) <> hlCode) $$      literal "\\f[]" $$      literal ".fi"  blockToMs opts (LineBlock ls) = do @@ -517,11 +519,11 @@ toMacro sty toktype =  msFormatter :: WriterOptions -> FormatOptions -> [SourceLine] -> Doc Text  msFormatter opts _fmtopts = -  vcat . map fmtLine -  where fmtLine = hcat . map fmtToken -        fmtToken (toktype, tok) = literal "\\*" <> -           brackets (literal (tshow toktype) <> literal " \"" -             <> literal (escapeStr opts tok) <> literal "\"") +  literal . T.intercalate "\n" . map fmtLine + where +  fmtLine = mconcat . map fmtToken +  fmtToken (toktype, tok) = +    "\\*[" <> (tshow toktype) <> " \"" <> (escapeStr opts tok) <> "\"]"  highlightCode :: PandocMonad m => WriterOptions -> Attr -> Text -> MS m (Doc Text)  highlightCode opts attr str = diff --git a/test/Tests/Writers/Ms.hs b/test/Tests/Writers/Ms.hs new file mode 100644 index 000000000..d73603314 --- /dev/null +++ b/test/Tests/Writers/Ms.hs @@ -0,0 +1,37 @@ +{-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE OverloadedStrings #-} +module Tests.Writers.Ms (tests) where + +import Prelude +import Test.Tasty +import Tests.Helpers +import Text.Pandoc +import Text.Pandoc.Builder + +infix 4 =: +(=:) :: (ToString a, ToPandoc a) +     => String -> (a, String) -> TestTree +(=:) = test (purely (writeMs def . toPandoc)) + +tests :: [TestTree] +tests = [ testGroup "code blocks" +          [ "basic" +              =: codeBlock "hello" +              =?> unlines +              [ ".IP" +              , ".nf" +              , "\\f[C]" +              , "hello" +              , "\\f[]" +              , ".fi"] +          , "escape starting ." +              =: codeBlock ". hello" +              =?> unlines +              [ ".IP" +              , ".nf" +              , "\\f[C]" +              , "\\&. hello" +              , "\\f[]" +              , ".fi"] +          ] +        ] diff --git a/test/docx/golden/inline_formatting.docx b/test/docx/golden/inline_formatting.docxBinary files differ index 5efe66edd..ddfd45280 100644 --- a/test/docx/golden/inline_formatting.docx +++ b/test/docx/golden/inline_formatting.docx diff --git a/test/docx/inline_formatting.native b/test/docx/inline_formatting.native index 000896df9..df749ffef 100644 --- a/test/docx/inline_formatting.native +++ b/test/docx/inline_formatting.native @@ -1,6 +1,6 @@  Pandoc (Meta {unMeta = fromList []})  [Para [Str "Regular",Space,Str "text",Space,Emph [Str "italics"],Space,Strong [Str "bold",Space,Emph [Str "bold",Space,Str "italics"]],Str "."]  ,Para [Str "This",Space,Str "is",Space,SmallCaps [Str "Small",Space,Str "Caps"],Str ",",Space,Str "and",Space,Str "this",Space,Str "is",Space,Strikeout [Str "strikethrough"],Str "."] -,Para [Str "Some",Space,Str "people",Space,Str "use",Space,Underline [Str "single",Space,Str "underlines",Space,Str "for",Space],Emph [Underline [Str "emphasis"]],Str "."] +,Para [Str "Some",Space,Str "people",Space,Str "use",Space,Underline [Str "single",Space,Str "underlines",Space,Str "for",Space,Emph [Str "emphasis"]],Str "."]  ,Para [Str "Above",Space,Str "the",Space,Str "line",Space,Str "is",Space,Superscript [Str "superscript"],Space,Str "and",Space,Str "below",Space,Str "the",Space,Str "line",Space,Str "is",Space,Subscript [Str "subscript"],Str "."]  ,Para [Str "A",Space,Str "line",LineBreak,Str "break."]] diff --git a/test/test-pandoc.hs b/test/test-pandoc.hs index ff7661094..d0a1a6f18 100644 --- a/test/test-pandoc.hs +++ b/test/test-pandoc.hs @@ -37,6 +37,7 @@ import qualified Tests.Writers.JATS  import qualified Tests.Writers.Jira  import qualified Tests.Writers.LaTeX  import qualified Tests.Writers.Markdown +import qualified Tests.Writers.Ms  import qualified Tests.Writers.Muse  import qualified Tests.Writers.Native  import qualified Tests.Writers.Org @@ -70,6 +71,7 @@ tests pandocPath = testGroup "pandoc tests"            , testGroup "Muse" Tests.Writers.Muse.tests            , testGroup "FB2" Tests.Writers.FB2.tests            , testGroup "PowerPoint" Tests.Writers.Powerpoint.tests +          , testGroup "Ms" Tests.Writers.Ms.tests            ]          , testGroup "Readers"            [ testGroup "LaTeX" Tests.Readers.LaTeX.tests | 
