aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--MANUAL.txt2
-rw-r--r--benchmark/benchmark-pandoc.hs60
-rw-r--r--doc/lua-filters.md8
-rw-r--r--src/Text/Pandoc/Readers/Docx/Combine.hs101
-rw-r--r--src/Text/Pandoc/Readers/RST.hs7
-rw-r--r--src/Text/Pandoc/Writers/Ms.hs10
-rw-r--r--test/docx/golden/inline_formatting.docxbin9989 -> 9987 bytes
-rw-r--r--test/docx/inline_formatting.native2
8 files changed, 89 insertions, 101 deletions
diff --git a/MANUAL.txt b/MANUAL.txt
index b9ef68cd2..893b4ae2c 100644
--- a/MANUAL.txt
+++ b/MANUAL.txt
@@ -902,7 +902,7 @@ header when requesting a document from a URL:
downloaded). If you're behind a proxy, you also need to set
the environment variable `http_proxy` to `http://...`.
-`--no-check-certificate
+`--no-check-certificate`
: Disable the certificate verification to allow access to
unsecure HTTP resources (for example when the certificate
diff --git a/benchmark/benchmark-pandoc.hs b/benchmark/benchmark-pandoc.hs
index 967728f5d..3e7b663b4 100644
--- a/benchmark/benchmark-pandoc.hs
+++ b/benchmark/benchmark-pandoc.hs
@@ -1,6 +1,5 @@
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
-{-# LANGUAGE TupleSections #-}
{-
Copyright (C) 2012-2019 John MacFarlane <jgm@berkeley.edu>
@@ -21,39 +20,49 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
import Prelude
import Text.Pandoc
import Text.Pandoc.MIME
-import Text.Pandoc.Error (PandocError(..))
-import Control.Monad.Except (throwError)
+import Control.Monad.Except (throwError, liftIO)
import qualified Text.Pandoc.UTF8 as UTF8
import qualified Data.ByteString as B
import qualified Data.Text as T
import Criterion.Main
import Criterion.Types (Config(..))
import Data.List (intersect)
-import Data.Maybe (mapMaybe)
+import Data.Maybe (mapMaybe, catMaybes)
import System.Environment (getArgs)
import qualified Data.ByteString.Lazy as BL
+data Input = InputText {unInputText :: T.Text}
+ | InputBS {unInputBS :: BL.ByteString}
+
readerBench :: Pandoc
-> T.Text
- -> Maybe Benchmark
-readerBench doc name =
- case res of
- Right (readerFun, inp) ->
- Just $ bench (T.unpack $ name <> " reader")
- $ nf (\i -> either (error . show) id $ runPure (readerFun i))
- inp
- Left _ -> Nothing
- where res = runPure $ do
- (rdr, rexts) <- getReader name
+ -> IO (Maybe Benchmark)
+readerBench doc name = do
+ let (rdr, rexts) = either (error . show) id . runPure $ getReader name
+ res <- runIO $ do
(wtr, wexts) <- getWriter name
case (rdr, wtr) of
(TextReader r, TextWriter w) -> do
- setResourcePath ["../test"]
+ setResourcePath ["./test"]
inp <- w def{ writerWrapText = WrapAuto
, writerExtensions = wexts } doc
- return $ (r def{ readerExtensions = rexts }, inp)
- _ -> throwError $ PandocSomeError $ "not a text format: "
+ return (r def{ readerExtensions = rexts } . unInputText, InputText inp)
+ (ByteStringReader r, ByteStringWriter w) -> do
+ setResourcePath ["./test"]
+ tmpl <- Just <$> compileDefaultTemplate name
+ inp <- w def{ writerWrapText = WrapAuto
+ , writerExtensions = wexts
+ , writerTemplate = tmpl } doc
+ liftIO $ BL.writeFile "/tmp/test.odt" inp
+ return (r def{ readerExtensions = rexts } . unInputBS, InputBS inp)
+ _ -> throwError $ PandocSomeError $ "text/bytestring format mismatch: "
<> name
+ return $ case res of
+ Right (readerFun, inp) ->
+ Just $ bench (T.unpack $ name <> " reader")
+ $ nf (\i -> either (error . show) id $ runPure (readerFun i))
+ inp
+ Left _ -> Nothing
getImages :: IO [(FilePath, MimeType, BL.ByteString)]
getImages = do
@@ -89,22 +98,23 @@ main :: IO ()
main = do
args <- filter (\x -> T.take 1 x /= "-") . fmap T.pack <$> getArgs
print args
- let matchReader (n, TextReader _) =
+ let matchReader (n, _) =
null args || ("reader" `elem` args && n `elem` args)
- matchReader _ = False
- let matchWriter (n, TextWriter _) =
+ matchWriter (n, TextWriter _) =
null args || ("writer" `elem` args && n `elem` args)
matchWriter _ = False
- let matchedReaders = map fst $ (filter matchReader readers
+ allWriters = map fst (writers :: [(T.Text, Writer PandocPure)])
+ matchedReaders = map fst (filter matchReader readers
:: [(T.Text, Reader PandocPure)])
- let matchedWriters = map fst $ (filter matchWriter writers
+ matchedWriters = map fst (filter matchWriter writers
:: [(T.Text, Writer PandocPure)])
inp <- UTF8.toText <$> B.readFile "test/testsuite.txt"
let opts = def
let doc = either (error . show) id $ runPure $ readMarkdown opts inp
- let readerBs = mapMaybe (readerBench doc)
- $ filter (/="haddock")
- (matchedReaders `intersect` matchedWriters)
+ readerBs <- fmap catMaybes
+ $ mapM (readerBench doc)
+ $ filter (/="haddock")
+ (matchedReaders `intersect` allWriters)
-- we need the corresponding writer to generate
-- input for the reader
let writerBs = mapMaybe (writerBench doc) matchedWriters
diff --git a/doc/lua-filters.md b/doc/lua-filters.md
index c5f7d2713..cc49da09b 100644
--- a/doc/lua-filters.md
+++ b/doc/lua-filters.md
@@ -138,7 +138,7 @@ elements.
## Filters on element sequences
-For some filtering tasks, the it is necessary to know the order
+For some filtering tasks, it is necessary to know the order
in which elements occur in the document. It is not enough then to
inspect a single element at a time.
@@ -1127,7 +1127,7 @@ A [table cell]{#type-table-cell} is a list of blocks.
*[Alignment]{#type-alignment}* is a string value indicating the
horizontal alignment of a table column. `AlignLeft`,
-`AlignRight`, and `AlignCenter` leads cell content tob be
+`AlignRight`, and `AlignCenter` leads cell content to be
left-aligned, right-aligned, and centered, respectively. The
default alignment is `AlignDefault` (often equivalent to
centered).
@@ -3035,7 +3035,7 @@ methods and convenience functions.
[`pandoc.List:insert ([pos], value)`]{#pandoc.list:insert}
: Inserts element `value` at position `pos` in list, shifting
- elements to the next-greater indix if necessary.
+ elements to the next-greater index if necessary.
This function is identical to
[`table.insert`](https://www.lua.org/manual/5.3/manual.html#6.6).
@@ -3082,7 +3082,7 @@ methods and convenience functions.
Parameters:
`pos`:
- : position of the list value that will be remove; defaults
+ : position of the list value that will be removed; defaults
to the index of the last element
Returns: the removed element
diff --git a/src/Text/Pandoc/Readers/Docx/Combine.hs b/src/Text/Pandoc/Readers/Docx/Combine.hs
index 54736cd0e..427a73dbe 100644
--- a/src/Text/Pandoc/Readers/Docx/Combine.hs
+++ b/src/Text/Pandoc/Readers/Docx/Combine.hs
@@ -1,10 +1,9 @@
-{-# LANGUAGE FlexibleInstances #-}
-{-# LANGUAGE PatternGuards #-}
{-# LANGUAGE OverloadedStrings #-}
{- |
Module : Text.Pandoc.Readers.Docx.Combine
Copyright : © 2014-2020 Jesse Rosenthal <jrosenthal@jhu.edu>,
- 2014-2020 John MacFarlane <jgm@berkeley.edu>
+ 2014-2020 John MacFarlane <jgm@berkeley.edu>,
+ 2020 Nikolay Yakimov <root@livid.pp.ru>
License : GNU GPL, version 2 or above
Maintainer : Jesse Rosenthal <jrosenthal@jhu.edu>
@@ -59,79 +58,61 @@ module Text.Pandoc.Readers.Docx.Combine ( smushInlines
where
import Data.List
-import Data.Sequence (ViewL (..), ViewR (..), viewl, viewr, (><), (|>))
-import qualified Data.Sequence as Seq (null)
+import Data.Bifunctor
+import Data.Sequence ( ViewL (..), ViewR (..), viewl, viewr, spanr, spanl
+ , (><), (|>) )
import Text.Pandoc.Builder
data Modifier a = Modifier (a -> a)
| AttrModifier (Attr -> a -> a) Attr
- | NullModifier
spaceOutInlinesL :: Inlines -> (Inlines, Inlines)
spaceOutInlinesL ms = (l, stackInlines fs (m' <> r))
- where (l, m, r) = spaceOutInlines ms
- (fs, m') = unstackInlines m
+ where (l, (fs, m'), r) = spaceOutInlines ms
spaceOutInlinesR :: Inlines -> (Inlines, Inlines)
spaceOutInlinesR ms = (stackInlines fs (l <> m'), r)
- where (l, m, r) = spaceOutInlines ms
- (fs, m') = unstackInlines m
+ where (l, (fs, m'), r) = spaceOutInlines ms
-spaceOutInlines :: Inlines -> (Inlines, Inlines, Inlines)
+spaceOutInlines :: Inlines -> (Inlines, ([Modifier Inlines], Inlines), Inlines)
spaceOutInlines ils =
let (fs, ils') = unstackInlines ils
- contents = unMany ils'
- left = case viewl contents of
- (Space :< _) -> space
- _ -> mempty
- right = case viewr contents of
- (_ :> Space) -> space
- _ -> mempty in
- (left, stackInlines fs $ trimInlines . Many $ contents, right)
+ (left, (right, contents')) = second (spanr isSpace) $ spanl isSpace $ unMany ils'
+ -- NOTE: spanr counterintuitively returns suffix as the FIRST tuple element
+ in (Many left, (fs, Many contents'), Many right)
+
+isSpace :: Inline -> Bool
+isSpace Space = True
+isSpace SoftBreak = True
+isSpace _ = False
stackInlines :: [Modifier Inlines] -> Inlines -> Inlines
stackInlines [] ms = ms
-stackInlines (NullModifier : fs) ms = stackInlines fs ms
stackInlines (Modifier f : fs) ms =
- if isEmpty ms
+ if null ms
then stackInlines fs ms
else f $ stackInlines fs ms
stackInlines (AttrModifier f attr : fs) ms = f attr $ stackInlines fs ms
unstackInlines :: Inlines -> ([Modifier Inlines], Inlines)
-unstackInlines ms = case ilModifier ms of
- NullModifier -> ([], ms)
- _ -> (f : fs, ms') where
- f = ilModifier ms
- (fs, ms') = unstackInlines $ ilInnards ms
-
-ilModifier :: Inlines -> Modifier Inlines
-ilModifier ils = case viewl (unMany ils) of
- (x :< xs) | Seq.null xs -> case x of
- (Emph _) -> Modifier emph
- (Strong _) -> Modifier strong
- (SmallCaps _) -> Modifier smallcaps
- (Strikeout _) -> Modifier strikeout
- (Superscript _) -> Modifier superscript
- (Subscript _) -> Modifier subscript
- (Link attr _ tgt) -> Modifier $ linkWith attr (fst tgt) (snd tgt)
- (Span attr _) -> AttrModifier spanWith attr
- _ -> NullModifier
- _ -> NullModifier
-
-ilInnards :: Inlines -> Inlines
-ilInnards ils = case viewl (unMany ils) of
- (x :< xs) | Seq.null xs -> case x of
- (Emph lst) -> fromList lst
- (Strong lst) -> fromList lst
- (SmallCaps lst) -> fromList lst
- (Strikeout lst) -> fromList lst
- (Superscript lst) -> fromList lst
- (Subscript lst) -> fromList lst
- (Link _ lst _) -> fromList lst
- (Span _ lst) -> fromList lst
- _ -> ils
- _ -> ils
+unstackInlines ms = case ilModifierAndInnards ms of
+ Nothing -> ([], ms)
+ Just (f, inner) -> first (f :) $ unstackInlines inner
+
+ilModifierAndInnards :: Inlines -> Maybe (Modifier Inlines, Inlines)
+ilModifierAndInnards ils = case viewl $ unMany ils of
+ x :< xs | null xs -> second fromList <$> case x of
+ Emph lst -> Just (Modifier emph, lst)
+ Strong lst -> Just (Modifier strong, lst)
+ SmallCaps lst -> Just (Modifier smallcaps, lst)
+ Strikeout lst -> Just (Modifier strikeout, lst)
+ Underline lst -> Just (Modifier underline, lst)
+ Superscript lst -> Just (Modifier superscript, lst)
+ Subscript lst -> Just (Modifier subscript, lst)
+ Link attr lst tgt -> Just (Modifier $ linkWith attr (fst tgt) (snd tgt), lst)
+ Span attr lst -> Just (AttrModifier spanWith attr, lst)
+ _ -> Nothing
+ _ -> Nothing
inlinesL :: Inlines -> (Inlines, Inlines)
inlinesL ils = case viewl $ unMany ils of
@@ -161,12 +142,12 @@ combineSingletonInlines x y =
y_rem_attr = filter isAttrModifier y_remaining
in
case null shared of
- True | isEmpty xs && isEmpty ys ->
- stackInlines (x_rem_attr ++ y_rem_attr) mempty
- | isEmpty xs ->
+ True | null xs && null ys ->
+ stackInlines (x_rem_attr <> y_rem_attr) mempty
+ | null xs ->
let (sp, y') = spaceOutInlinesL y in
stackInlines x_rem_attr mempty <> sp <> y'
- | isEmpty ys ->
+ | null ys ->
let (x', sp) = spaceOutInlinesR x in
x' <> sp <> stackInlines y_rem_attr mempty
| otherwise ->
@@ -193,12 +174,8 @@ combineBlocks bs cs = bs <> cs
instance (Monoid a, Eq a) => Eq (Modifier a) where
(Modifier f) == (Modifier g) = f mempty == g mempty
(AttrModifier f attr) == (AttrModifier g attr') = f attr mempty == g attr' mempty
- NullModifier == NullModifier = True
_ == _ = False
-isEmpty :: (Monoid a, Eq a) => a -> Bool
-isEmpty x = x == mempty
-
isAttrModifier :: Modifier a -> Bool
isAttrModifier (AttrModifier _ _) = True
isAttrModifier _ = False
diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs
index e558836a1..7c25be486 100644
--- a/src/Text/Pandoc/Readers/RST.hs
+++ b/src/Text/Pandoc/Readers/RST.hs
@@ -223,7 +223,8 @@ rawFieldListItem minIndent = try $ do
first <- anyLine
rest <- option "" $ try $ do lookAhead (count indent (char ' ') >> spaceChar)
indentedBlock
- let raw = (if T.null first then "" else first <> "\n") <> rest <> "\n"
+ let raw = (if T.null first then "" else first <> "\n") <> rest <>
+ (if T.null first && T.null rest then "" else "\n")
return (name, raw)
fieldListItem :: PandocMonad m => Int -> RSTParser m (Inlines, [Blocks])
@@ -484,7 +485,7 @@ includeDirective top fields body = do
Just patt -> drop 1 .
dropWhile (not . (patt `T.isInfixOf`))
Nothing -> id) $ contentLines'
- let contents' = T.unlines contentLines'' <> "\n"
+ let contents' = T.unlines contentLines''
case lookup "code" fields of
Just lang -> do
let classes = maybe [] T.words (lookup "class" fields)
@@ -494,7 +495,7 @@ includeDirective top fields body = do
Just _ -> return $ B.rawBlock "rst" contents'
Nothing -> do
setPosition $ newPos (T.unpack f) 1 1
- setInput contents'
+ setInput $ contents' <> "\n"
bs <- optional blanklines >>
(mconcat <$> many block)
setInput oldInput
diff --git a/src/Text/Pandoc/Writers/Ms.hs b/src/Text/Pandoc/Writers/Ms.hs
index b6522b352..561053c88 100644
--- a/src/Text/Pandoc/Writers/Ms.hs
+++ b/src/Text/Pandoc/Writers/Ms.hs
@@ -519,11 +519,11 @@ toMacro sty toktype =
msFormatter :: WriterOptions -> FormatOptions -> [SourceLine] -> Doc Text
msFormatter opts _fmtopts =
- vcat . map fmtLine
- where fmtLine = hcat . map fmtToken
- fmtToken (toktype, tok) = literal "\\*" <>
- brackets (literal (tshow toktype) <> literal " \""
- <> literal (escapeStr opts tok) <> literal "\"")
+ literal . T.intercalate "\n" . map fmtLine
+ where
+ fmtLine = mconcat . map fmtToken
+ fmtToken (toktype, tok) =
+ "\\*[" <> (tshow toktype) <> " \"" <> (escapeStr opts tok) <> "\"]"
highlightCode :: PandocMonad m => WriterOptions -> Attr -> Text -> MS m (Doc Text)
highlightCode opts attr str =
diff --git a/test/docx/golden/inline_formatting.docx b/test/docx/golden/inline_formatting.docx
index 5efe66edd..ddfd45280 100644
--- a/test/docx/golden/inline_formatting.docx
+++ b/test/docx/golden/inline_formatting.docx
Binary files differ
diff --git a/test/docx/inline_formatting.native b/test/docx/inline_formatting.native
index 000896df9..df749ffef 100644
--- a/test/docx/inline_formatting.native
+++ b/test/docx/inline_formatting.native
@@ -1,6 +1,6 @@
Pandoc (Meta {unMeta = fromList []})
[Para [Str "Regular",Space,Str "text",Space,Emph [Str "italics"],Space,Strong [Str "bold",Space,Emph [Str "bold",Space,Str "italics"]],Str "."]
,Para [Str "This",Space,Str "is",Space,SmallCaps [Str "Small",Space,Str "Caps"],Str ",",Space,Str "and",Space,Str "this",Space,Str "is",Space,Strikeout [Str "strikethrough"],Str "."]
-,Para [Str "Some",Space,Str "people",Space,Str "use",Space,Underline [Str "single",Space,Str "underlines",Space,Str "for",Space],Emph [Underline [Str "emphasis"]],Str "."]
+,Para [Str "Some",Space,Str "people",Space,Str "use",Space,Underline [Str "single",Space,Str "underlines",Space,Str "for",Space,Emph [Str "emphasis"]],Str "."]
,Para [Str "Above",Space,Str "the",Space,Str "line",Space,Str "is",Space,Superscript [Str "superscript"],Space,Str "and",Space,Str "below",Space,Str "the",Space,Str "line",Space,Str "is",Space,Subscript [Str "subscript"],Str "."]
,Para [Str "A",Space,Str "line",LineBreak,Str "break."]]