From dc6856530c2cb6ca58ed82721ab895b86cfe0c1c Mon Sep 17 00:00:00 2001 From: Jan Tojnar Date: Fri, 4 Dec 2020 09:16:56 +0100 Subject: Docbook writer: handle admonitions Similarly to https://github.com/jgm/pandoc/commit/d6fdfe6f2bba2a8ed25d6c9f11861774001f7a91, we should handle admonitions. --- test/Tests/Writers/Docbook.hs | 52 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) (limited to 'test/Tests/Writers') diff --git a/test/Tests/Writers/Docbook.hs b/test/Tests/Writers/Docbook.hs index f6a047b0b..1d53dcfe7 100644 --- a/test/Tests/Writers/Docbook.hs +++ b/test/Tests/Writers/Docbook.hs @@ -70,6 +70,58 @@ tests = [ testGroup "line blocks" , "" ] ) ] + , testGroup "divs" + [ "admonition" =: divWith ("foo", ["warning"], []) (para "This is a test") + =?> unlines + [ "" + , " " + , " This is a test" + , " " + , "" + ] + , "admonition-with-title" =: + divWith ("foo", ["attention"], []) ( + divWith ("foo", ["title"], []) + (plain (text "This is title")) <> + para "This is a test" + ) + =?> unlines + [ "" + , " This is title" + , " " + , " This is a test" + , " " + , "" + ] + , "single-child" =: + divWith ("foo", [], []) (para "This is a test") + =?> unlines + [ "" + , " This is a test" + , "" + ] + , "single-literal-child" =: + divWith ("foo", [], []) lineblock + =?> unlines + [ "some text" + , "and more lines" + , "and again" + ] + , "multiple-children" =: + divWith ("foo", [], []) ( + para "This is a test" <> + para "This is an another test" + ) + =?> unlines + [ "" + , "" + , " This is a test" + , "" + , "" + , " This is an another test" + , "" + ] + ] , testGroup "compact lists" [ testGroup "bullet" [ "compact" =: bulletList [plain "a", plain "b", plain "c"] -- cgit v1.2.3 From 70c7c5703afcbd1cbf2a80c2be515e038abcd419 Mon Sep 17 00:00:00 2001 From: Jan Tojnar Date: Mon, 7 Dec 2020 07:28:39 +0100 Subject: Docbook writer: Handle admonition titles from Markdown reader MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Docbook reader produces a `Div` with `title` class for `` element within an “admonition” element. Markdown writer then turns this into a fenced div with `title` class attribute. Since fenced divs are block elements, their content is recognized as a paragraph by the Markdown reader. This is an issue for Docbook writer because it would produce an invalid DocBook document from such AST – the `<title>` element can only contain “inline” elements. Let’s handle this invalid special case separately by unwrapping the paragraph before creating the `<title>` element. --- src/Text/Pandoc/Writers/Docbook.hs | 2 ++ test/Tests/Writers/Docbook.hs | 14 ++++++++++++++ 2 files changed, 16 insertions(+) (limited to 'test/Tests/Writers') diff --git a/src/Text/Pandoc/Writers/Docbook.hs b/src/Text/Pandoc/Writers/Docbook.hs index 398920839..affa0de04 100644 --- a/src/Text/Pandoc/Writers/Docbook.hs +++ b/src/Text/Pandoc/Writers/Docbook.hs @@ -201,6 +201,8 @@ blockToDocbook opts (Div (ident,classes,_) bs) = do (l:_) | l `elem` admonitions -> do let (mTitleBs, bodyBs) = case bs of + -- Matches AST produced by the DocBook reader → Markdown writer → Markdown reader chain. + (Div (_,["title"],_) [Para ts] : rest) -> (Just (inlinesToDocbook opts ts), rest) -- Matches AST produced by the Docbook reader. (Div (_,["title"],_) ts : rest) -> (Just (blocksToDocbook opts ts), rest) _ -> (Nothing, bs) diff --git a/test/Tests/Writers/Docbook.hs b/test/Tests/Writers/Docbook.hs index 1d53dcfe7..621c1280b 100644 --- a/test/Tests/Writers/Docbook.hs +++ b/test/Tests/Writers/Docbook.hs @@ -93,6 +93,20 @@ tests = [ testGroup "line blocks" , " </para>" , "</attention>" ] + , "admonition-with-title-in-para" =: + divWith ("foo", ["attention"], []) ( + divWith ("foo", ["title"], []) + (para "This is title") <> + para "This is a test" + ) + =?> unlines + [ "<attention id=\"foo\">" + , " <title>This is title" + , " " + , " This is a test" + , " " + , "" + ] , "single-child" =: divWith ("foo", [], []) (para "This is a test") =?> unlines -- cgit v1.2.3 From 00031fc809117cb436397aba83a41ca1d4056f61 Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Sun, 13 Dec 2020 14:09:59 +0100 Subject: Docx writer: keep raw openxml strings verbatim. Closes: #6933 --- src/Text/Pandoc/Writers/Docx.hs | 7 +++++-- test/Tests/Writers/Docx.hs | 10 ++++++++++ test/docx/golden/raw-blocks.docx | Bin 0 -> 9888 bytes test/docx/golden/raw-bookmarks.docx | Bin 0 -> 10023 bytes test/docx/raw-blocks.native | 6 ++++++ test/docx/raw-bookmarks.native | 3 +++ 6 files changed, 24 insertions(+), 2 deletions(-) create mode 100644 test/docx/golden/raw-blocks.docx create mode 100644 test/docx/golden/raw-bookmarks.docx create mode 100644 test/docx/raw-blocks.native create mode 100644 test/docx/raw-bookmarks.native (limited to 'test/Tests/Writers') diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index 97048e980..0174a8501 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -978,7 +978,9 @@ blockToOpenXML' opts (Para lst) return [Elem $ mknode "w:p" [] (map Elem paraProps' ++ contents)] blockToOpenXML' opts (LineBlock lns) = blockToOpenXML opts $ linesToPara lns blockToOpenXML' _ b@(RawBlock format str) - | format == Format "openxml" = return (parseXML str) + | format == Format "openxml" = return [ + Text (CData CDataRaw (T.unpack str) Nothing) + ] | otherwise = do report $ BlockNotRendered b return [] @@ -1312,7 +1314,8 @@ inlineToOpenXML' opts (Strikeout lst) = $ inlinesToOpenXML opts lst inlineToOpenXML' _ LineBreak = return [Elem br] inlineToOpenXML' _ il@(RawInline f str) - | f == Format "openxml" = return (parseXML str) + | f == Format "openxml" = return + [Text (CData CDataRaw (T.unpack str) Nothing)] | otherwise = do report $ InlineNotRendered il return [] diff --git a/test/Tests/Writers/Docx.hs b/test/Tests/Writers/Docx.hs index 8f051b4b7..66a5c3d36 100644 --- a/test/Tests/Writers/Docx.hs +++ b/test/Tests/Writers/Docx.hs @@ -128,6 +128,16 @@ tests = [ testGroup "inlines" def "docx/codeblock.native" "docx/golden/codeblock.docx" + , docxTest + "raw OOXML blocks" + def + "docx/raw-blocks.native" + "docx/golden/raw-blocks.docx" + , docxTest + "raw bookmark markers" + def + "docx/raw-bookmarks.native" + "docx/golden/raw-bookmarks.docx" ] , testGroup "track changes" [ docxTest diff --git a/test/docx/golden/raw-blocks.docx b/test/docx/golden/raw-blocks.docx new file mode 100644 index 000000000..ae7f8f1f0 Binary files /dev/null and b/test/docx/golden/raw-blocks.docx differ diff --git a/test/docx/golden/raw-bookmarks.docx b/test/docx/golden/raw-bookmarks.docx new file mode 100644 index 000000000..5e433b736 Binary files /dev/null and b/test/docx/golden/raw-bookmarks.docx differ diff --git a/test/docx/raw-blocks.native b/test/docx/raw-blocks.native new file mode 100644 index 000000000..d7f985bc3 --- /dev/null +++ b/test/docx/raw-blocks.native @@ -0,0 +1,6 @@ +[Para [Str "Cell",Space,Str "compartments"] +,RawBlock (Format "openxml") "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n" +,Para [Str "Ribosome"] +,RawBlock (Format "openxml") "\n" +,Para [Str "Lysosome"] +,RawBlock (Format "openxml") "\n\n"] diff --git a/test/docx/raw-bookmarks.native b/test/docx/raw-bookmarks.native new file mode 100644 index 000000000..1e76655d6 --- /dev/null +++ b/test/docx/raw-bookmarks.native @@ -0,0 +1,3 @@ +[Para [Str "Manual",Space,Str "endnotes."] +,Para [Str "Nullam",Space,Str "eu",Space,Str "ante",Space,Str "vel",Space,Str "est",Space,Str "convallis",Space,Str "dignissim.",Space,Str "Nunc",Space,Str "porta",Space,Str "vulputate",Space,Str "tellus.",Space,Str "Nunc",Space,Str "rutrum",Space,Str "turpis",Space,Str "sed",Space,Str "pede.",Space,Str "Sed",Space,Str "bibendum.",RawInline (Format "openxml") "",Str "Aliquam",Space,Str "posuere."] +,Para [Str "Nunc",Space,Str "aliquet,",Space,Str "augue",Space,Str "nec",Space,Str "adipiscing",Space,Str "interdum,",Space,Str "lacus",Space,Str "tellus",Space,Str "malesuada",Space,Str "massa,",Space,Str "quis",Space,Str "varius",Space,Str "mi",Space,Str "purus",Space,Str "non",Space,Str "odio.",RawInline (Format "openxml") "",Str "Pellentesque",Space,Str "condimentum,",Space,Str "magna",Space,Str "ut",Space,Str "suscipit",Space,Str "hendrerit,",Space,Str "ipsum",Space,Str "augue",Space,Str "ornare",Space,Str "nulla,",Space,Str "non",Space,Str "luctus",Space,Str "diam",Space,Str "neque",Space,Str "sit",Space,Str "amet",Space,Str "urna.",Space,Str "Curabitur",Space,Str "vulputate",Space,Str "vestibulum",Space,Str "lorem."]] -- cgit v1.2.3 From d202f7eb77242bd2d9395b950b74fc9b22f9ae13 Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Sun, 7 Feb 2021 19:02:35 +0100 Subject: Avoid unnecessary use of NoImplicitPrelude pragma (#7089) --- app/pandoc.hs | 2 -- benchmark/benchmark-pandoc.hs | 2 -- benchmark/weigh-pandoc.hs | 2 -- pandoc.cabal | 2 +- src/Text/Pandoc/UTF8.hs | 1 - test/Tests/Command.hs | 2 -- test/Tests/Helpers.hs | 2 -- test/Tests/Lua.hs | 2 -- test/Tests/Old.hs | 2 -- test/Tests/Readers/Creole.hs | 2 -- test/Tests/Readers/Docx.hs | 2 -- test/Tests/Readers/DokuWiki.hs | 2 -- test/Tests/Readers/EPUB.hs | 2 -- test/Tests/Readers/FB2.hs | 2 -- test/Tests/Readers/HTML.hs | 2 -- test/Tests/Readers/JATS.hs | 2 -- test/Tests/Readers/Jira.hs | 1 - test/Tests/Readers/LaTeX.hs | 2 -- test/Tests/Readers/Man.hs | 1 - test/Tests/Readers/Markdown.hs | 2 -- test/Tests/Readers/Muse.hs | 2 -- test/Tests/Readers/Odt.hs | 2 -- test/Tests/Readers/Org/Block.hs | 2 -- test/Tests/Readers/Org/Block/CodeBlock.hs | 2 -- test/Tests/Readers/Org/Block/Figure.hs | 2 -- test/Tests/Readers/Org/Block/Header.hs | 2 -- test/Tests/Readers/Org/Block/List.hs | 2 -- test/Tests/Readers/Org/Block/Table.hs | 2 -- test/Tests/Readers/Org/Directive.hs | 2 -- test/Tests/Readers/Org/Inline.hs | 2 -- test/Tests/Readers/Org/Inline/Citation.hs | 2 -- test/Tests/Readers/Org/Inline/Note.hs | 2 -- test/Tests/Readers/Org/Inline/Smart.hs | 2 -- test/Tests/Readers/Org/Meta.hs | 2 -- test/Tests/Readers/Org/Shared.hs | 2 -- test/Tests/Readers/RST.hs | 2 -- test/Tests/Readers/Txt2Tags.hs | 2 -- test/Tests/Shared.hs | 2 -- test/Tests/Writers/AnnotatedTable.hs | 1 - test/Tests/Writers/AsciiDoc.hs | 2 -- test/Tests/Writers/ConTeXt.hs | 2 -- test/Tests/Writers/Docbook.hs | 2 -- test/Tests/Writers/Docx.hs | 2 -- test/Tests/Writers/FB2.hs | 2 -- test/Tests/Writers/HTML.hs | 2 -- test/Tests/Writers/JATS.hs | 2 -- test/Tests/Writers/LaTeX.hs | 2 -- test/Tests/Writers/Markdown.hs | 2 -- test/Tests/Writers/Ms.hs | 2 -- test/Tests/Writers/Muse.hs | 1 - test/Tests/Writers/Native.hs | 2 -- test/Tests/Writers/OOXML.hs | 2 -- test/Tests/Writers/Org.hs | 2 -- test/Tests/Writers/Plain.hs | 2 -- test/Tests/Writers/Powerpoint.hs | 2 -- test/Tests/Writers/RST.hs | 2 -- test/Tests/Writers/TEI.hs | 2 -- test/test-pandoc.hs | 2 -- trypandoc/trypandoc.hs | 2 -- 59 files changed, 1 insertion(+), 112 deletions(-) (limited to 'test/Tests/Writers') diff --git a/app/pandoc.hs b/app/pandoc.hs index 162570f18..0e30f45d1 100644 --- a/app/pandoc.hs +++ b/app/pandoc.hs @@ -1,4 +1,3 @@ -{-# LANGUAGE NoImplicitPrelude #-} {- | Module : Main Copyright : Copyright (C) 2006-2021 John MacFarlane @@ -12,7 +11,6 @@ Parses command-line options and calls the appropriate readers and writers. -} module Main where -import Prelude import qualified Control.Exception as E import Text.Pandoc.App (convertWithOpts, defaultOpts, options, parseOptions) import Text.Pandoc.Error (handleError) diff --git a/benchmark/benchmark-pandoc.hs b/benchmark/benchmark-pandoc.hs index 045573273..496732693 100644 --- a/benchmark/benchmark-pandoc.hs +++ b/benchmark/benchmark-pandoc.hs @@ -1,4 +1,3 @@ -{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE OverloadedStrings #-} {- Copyright (C) 2012-2021 John MacFarlane @@ -17,7 +16,6 @@ You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -} -import Prelude import Text.Pandoc import Text.Pandoc.MIME import Control.Monad.Except (throwError, liftIO) diff --git a/benchmark/weigh-pandoc.hs b/benchmark/weigh-pandoc.hs index ad4c83ad7..b77fa9ee9 100644 --- a/benchmark/weigh-pandoc.hs +++ b/benchmark/weigh-pandoc.hs @@ -1,4 +1,3 @@ -{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE OverloadedStrings #-} {- | Module : Main @@ -11,7 +10,6 @@ Benchmarks to determine resource use of readers and writers. -} -import Prelude import Weigh import Text.Pandoc import Data.Text (Text, unpack) diff --git a/pandoc.cabal b/pandoc.cabal index 7696e22b3..72e7c2da5 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -400,7 +400,6 @@ common common-options default-language: Haskell2010 build-depends: base >= 4.9 && < 5, text >= 1.1.1.0 && < 1.3 - other-extensions: NoImplicitPrelude ghc-options: -Wall -fno-warn-unused-do-bind -Wincomplete-record-updates -Wnoncanonical-monad-instances @@ -409,6 +408,7 @@ common common-options hs-source-dirs: prelude other-modules: Prelude build-depends: base-compat >= 0.9 + other-extensions: NoImplicitPrelude if os(windows) cpp-options: -D_WINDOWS diff --git a/src/Text/Pandoc/UTF8.hs b/src/Text/Pandoc/UTF8.hs index b583dbbdb..567f5abe5 100644 --- a/src/Text/Pandoc/UTF8.hs +++ b/src/Text/Pandoc/UTF8.hs @@ -1,4 +1,3 @@ -{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.UTF8 diff --git a/test/Tests/Command.hs b/test/Tests/Command.hs index 8118e9759..07d825f73 100644 --- a/test/Tests/Command.hs +++ b/test/Tests/Command.hs @@ -1,4 +1,3 @@ -{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE TupleSections #-} {- | Module : Tests.Command @@ -14,7 +13,6 @@ Run commands, and test results, defined in markdown files. module Tests.Command (runTest, tests) where -import Prelude import Data.Algorithm.Diff import System.Environment.Executable (getExecutablePath) import qualified Data.ByteString as BS diff --git a/test/Tests/Helpers.hs b/test/Tests/Helpers.hs index 813c80adb..31e727a66 100644 --- a/test/Tests/Helpers.hs +++ b/test/Tests/Helpers.hs @@ -1,5 +1,4 @@ {-# LANGUAGE CPP #-} -{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE FlexibleInstances #-} {- | Module : Tests.Helpers @@ -22,7 +21,6 @@ module Tests.Helpers ( test ) where -import Prelude import Data.Algorithm.Diff import qualified Data.Map as M import Data.Text (Text, unpack) diff --git a/test/Tests/Lua.hs b/test/Tests/Lua.hs index 1dfbbd053..31c011900 100644 --- a/test/Tests/Lua.hs +++ b/test/Tests/Lua.hs @@ -1,4 +1,3 @@ -{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE ScopedTypeVariables #-} {- | @@ -14,7 +13,6 @@ Unit and integration tests for pandoc's Lua subsystem. -} module Tests.Lua ( runLuaTest, tests ) where -import Prelude import Control.Monad (when) import System.FilePath (()) import Test.Tasty (TestTree, localOption) diff --git a/test/Tests/Old.hs b/test/Tests/Old.hs index a031700a6..17ece49fd 100644 --- a/test/Tests/Old.hs +++ b/test/Tests/Old.hs @@ -1,4 +1,3 @@ -{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE TupleSections #-} {- | Module : Tests.Old @@ -13,7 +12,6 @@ -} module Tests.Old (tests) where -import Prelude import Data.Algorithm.Diff import System.Exit import System.FilePath ((<.>), ()) diff --git a/test/Tests/Readers/Creole.hs b/test/Tests/Readers/Creole.hs index 1fc0e62d7..3320b78e8 100644 --- a/test/Tests/Readers/Creole.hs +++ b/test/Tests/Readers/Creole.hs @@ -1,4 +1,3 @@ -{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE OverloadedStrings #-} {- | Module : Tests.Readers.Creole @@ -14,7 +13,6 @@ Tests for the creole reader. -} module Tests.Readers.Creole (tests) where -import Prelude import Data.Text (Text) import qualified Data.Text as T import Test.Tasty diff --git a/test/Tests/Readers/Docx.hs b/test/Tests/Readers/Docx.hs index 12007f502..263e04173 100644 --- a/test/Tests/Readers/Docx.hs +++ b/test/Tests/Readers/Docx.hs @@ -1,4 +1,3 @@ -{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE OverloadedStrings #-} {- | Module : Tests.Readers.Docx @@ -13,7 +12,6 @@ Tests for the word docx reader. -} module Tests.Readers.Docx (tests) where -import Prelude import Codec.Archive.Zip import qualified Data.ByteString as BS import qualified Data.ByteString.Lazy as B diff --git a/test/Tests/Readers/DokuWiki.hs b/test/Tests/Readers/DokuWiki.hs index d5f0c45a9..84ba86d46 100644 --- a/test/Tests/Readers/DokuWiki.hs +++ b/test/Tests/Readers/DokuWiki.hs @@ -1,4 +1,3 @@ -{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE ScopedTypeVariables #-} {- | @@ -14,7 +13,6 @@ Tests for DokuWiki reader. -} module Tests.Readers.DokuWiki (tests) where -import Prelude import Data.Text (Text) import qualified Data.Text as T import Test.Tasty diff --git a/test/Tests/Readers/EPUB.hs b/test/Tests/Readers/EPUB.hs index 3c75dd08d..13e239911 100644 --- a/test/Tests/Readers/EPUB.hs +++ b/test/Tests/Readers/EPUB.hs @@ -1,4 +1,3 @@ -{-# LANGUAGE NoImplicitPrelude #-} {- | Module : Tests.Readers.EPUB Copyright : © 2006-2021 John MacFarlane @@ -12,7 +11,6 @@ Tests for the EPUB mediabag. -} module Tests.Readers.EPUB (tests) where -import Prelude import qualified Data.ByteString.Lazy as BL import qualified Data.Text as T import Test.Tasty diff --git a/test/Tests/Readers/FB2.hs b/test/Tests/Readers/FB2.hs index fbb2e2150..42054a235 100644 --- a/test/Tests/Readers/FB2.hs +++ b/test/Tests/Readers/FB2.hs @@ -1,4 +1,3 @@ -{-# LANGUAGE NoImplicitPrelude #-} {- | Module : Tests.Readers.FB2 Copyright : © 2018-2020 Alexander Krotov @@ -12,7 +11,6 @@ Tests for the EPUB mediabag. -} module Tests.Readers.FB2 (tests) where -import Prelude import Test.Tasty import Tests.Helpers import Test.Tasty.Golden (goldenVsString) diff --git a/test/Tests/Readers/HTML.hs b/test/Tests/Readers/HTML.hs index 578c76860..f23af2cb1 100644 --- a/test/Tests/Readers/HTML.hs +++ b/test/Tests/Readers/HTML.hs @@ -1,4 +1,3 @@ -{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE OverloadedStrings #-} {- | Module : Tests.Readers.HTML @@ -13,7 +12,6 @@ Tests for the HTML reader. -} module Tests.Readers.HTML (tests) where -import Prelude import Data.Text (Text) import qualified Data.Text as T import Test.Tasty diff --git a/test/Tests/Readers/JATS.hs b/test/Tests/Readers/JATS.hs index 3c61f602f..525499c86 100644 --- a/test/Tests/Readers/JATS.hs +++ b/test/Tests/Readers/JATS.hs @@ -1,4 +1,3 @@ -{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE OverloadedStrings #-} {- | Module : Tests.Readers.JATS @@ -13,7 +12,6 @@ Tests for the JATS reader. -} module Tests.Readers.JATS (tests) where -import Prelude import Data.Text (Text) import Test.Tasty import Tests.Helpers diff --git a/test/Tests/Readers/Jira.hs b/test/Tests/Readers/Jira.hs index bf78fe1fe..e170d2aaa 100644 --- a/test/Tests/Readers/Jira.hs +++ b/test/Tests/Readers/Jira.hs @@ -1,4 +1,3 @@ -{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE ScopedTypeVariables #-} {- | diff --git a/test/Tests/Readers/LaTeX.hs b/test/Tests/Readers/LaTeX.hs index 2a52ffd18..77104c853 100644 --- a/test/Tests/Readers/LaTeX.hs +++ b/test/Tests/Readers/LaTeX.hs @@ -1,4 +1,3 @@ -{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE OverloadedStrings #-} {- | Module : Tests.Readers.LaTeX @@ -13,7 +12,6 @@ Tests for the LaTeX reader. -} module Tests.Readers.LaTeX (tests) where -import Prelude import Data.Text (Text) import qualified Data.Text as T import qualified Text.Pandoc.UTF8 as UTF8 diff --git a/test/Tests/Readers/Man.hs b/test/Tests/Readers/Man.hs index 4f3ab5a28..d36151d58 100644 --- a/test/Tests/Readers/Man.hs +++ b/test/Tests/Readers/Man.hs @@ -13,7 +13,6 @@ Tests for the Man reader. -} module Tests.Readers.Man (tests) where -import Prelude import Data.Text (Text) import Test.Tasty import Tests.Helpers diff --git a/test/Tests/Readers/Markdown.hs b/test/Tests/Readers/Markdown.hs index 18f909583..0930deae6 100644 --- a/test/Tests/Readers/Markdown.hs +++ b/test/Tests/Readers/Markdown.hs @@ -1,4 +1,3 @@ -{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE OverloadedStrings #-} {- | Module : Tests.Readers.Markdown @@ -13,7 +12,6 @@ Tests for the Markdown reader. -} module Tests.Readers.Markdown (tests) where -import Prelude import Data.Text (Text, unpack) import qualified Data.Text as T import Test.Tasty diff --git a/test/Tests/Readers/Muse.hs b/test/Tests/Readers/Muse.hs index 4ec1631e0..68bdc87b4 100644 --- a/test/Tests/Readers/Muse.hs +++ b/test/Tests/Readers/Muse.hs @@ -1,4 +1,3 @@ -{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE OverloadedStrings #-} {- | Module : Tests.Readers.Muse @@ -13,7 +12,6 @@ Tests for the Muse reader. -} module Tests.Readers.Muse (tests) where -import Prelude import Data.List (intersperse) import Data.Monoid (Any (..)) import Data.Text (Text) diff --git a/test/Tests/Readers/Odt.hs b/test/Tests/Readers/Odt.hs index f5e427ba2..9b5ec6b9e 100644 --- a/test/Tests/Readers/Odt.hs +++ b/test/Tests/Readers/Odt.hs @@ -1,4 +1,3 @@ -{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE OverloadedStrings #-} {- | Module : Tests.Readers.Odt @@ -14,7 +13,6 @@ Tests for the ODT reader. -} module Tests.Readers.Odt (tests) where -import Prelude import Control.Monad (liftM) import qualified Data.ByteString as BS import qualified Data.ByteString.Lazy as B diff --git a/test/Tests/Readers/Org/Block.hs b/test/Tests/Readers/Org/Block.hs index 2ce07c4bb..779563794 100644 --- a/test/Tests/Readers/Org/Block.hs +++ b/test/Tests/Readers/Org/Block.hs @@ -1,4 +1,3 @@ -{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE OverloadedStrings #-} {- | Module : Tests.Readers.Org.Block @@ -13,7 +12,6 @@ Tests parsing of org blocks. -} module Tests.Readers.Org.Block (tests) where -import Prelude import Test.Tasty (TestTree, testGroup) import Tests.Helpers ((=?>)) import Tests.Readers.Org.Shared ((=:), spcSep) diff --git a/test/Tests/Readers/Org/Block/CodeBlock.hs b/test/Tests/Readers/Org/Block/CodeBlock.hs index d40c3bc1d..6b83ec6a9 100644 --- a/test/Tests/Readers/Org/Block/CodeBlock.hs +++ b/test/Tests/Readers/Org/Block/CodeBlock.hs @@ -1,4 +1,3 @@ -{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE OverloadedStrings #-} {- | Module : Tests.Readers.Org.Block.CodeBlock @@ -13,7 +12,6 @@ Test parsing of org code blocks. -} module Tests.Readers.Org.Block.CodeBlock (tests) where -import Prelude import Test.Tasty (TestTree) import Tests.Helpers ((=?>)) import Tests.Readers.Org.Shared ((=:), spcSep) diff --git a/test/Tests/Readers/Org/Block/Figure.hs b/test/Tests/Readers/Org/Block/Figure.hs index 8822f5b03..eb5be1c2b 100644 --- a/test/Tests/Readers/Org/Block/Figure.hs +++ b/test/Tests/Readers/Org/Block/Figure.hs @@ -1,4 +1,3 @@ -{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE OverloadedStrings #-} {- | Module : Tests.Readers.Org.Block.Figure @@ -13,7 +12,6 @@ Test parsing of org figures. -} module Tests.Readers.Org.Block.Figure (tests) where -import Prelude import Test.Tasty (TestTree) import Tests.Helpers ((=?>)) import Tests.Readers.Org.Shared ((=:)) diff --git a/test/Tests/Readers/Org/Block/Header.hs b/test/Tests/Readers/Org/Block/Header.hs index 887055451..1344ad79b 100644 --- a/test/Tests/Readers/Org/Block/Header.hs +++ b/test/Tests/Readers/Org/Block/Header.hs @@ -1,4 +1,3 @@ -{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE OverloadedStrings #-} {- | Module : Tests.Readers.Org.Block.Header @@ -13,7 +12,6 @@ Test parsing of org header blocks. -} module Tests.Readers.Org.Block.Header (tests) where -import Prelude import Test.Tasty (TestTree, testGroup) import Tests.Helpers ((=?>)) import Tests.Readers.Org.Shared ((=:), spcSep, tagSpan) diff --git a/test/Tests/Readers/Org/Block/List.hs b/test/Tests/Readers/Org/Block/List.hs index ac03c583b..9686b5148 100644 --- a/test/Tests/Readers/Org/Block/List.hs +++ b/test/Tests/Readers/Org/Block/List.hs @@ -1,4 +1,3 @@ -{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE OverloadedStrings #-} {- | Module : Tests.Readers.Org.Block.Header @@ -13,7 +12,6 @@ Test parsing of org lists. -} module Tests.Readers.Org.Block.List (tests) where -import Prelude import Test.Tasty (TestTree) import Tests.Helpers ((=?>)) import Tests.Readers.Org.Shared ((=:), spcSep) diff --git a/test/Tests/Readers/Org/Block/Table.hs b/test/Tests/Readers/Org/Block/Table.hs index cb38fcc12..ce18e6a5b 100644 --- a/test/Tests/Readers/Org/Block/Table.hs +++ b/test/Tests/Readers/Org/Block/Table.hs @@ -1,4 +1,3 @@ -{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE OverloadedStrings #-} {- | Module : Tests.Readers.Org.Block.Table @@ -13,7 +12,6 @@ Test parsing of org tables. -} module Tests.Readers.Org.Block.Table (tests) where -import Prelude import Test.Tasty (TestTree) import Tests.Helpers ((=?>)) import Tests.Readers.Org.Shared ((=:), spcSep) diff --git a/test/Tests/Readers/Org/Directive.hs b/test/Tests/Readers/Org/Directive.hs index 00cb9762b..85d1bc088 100644 --- a/test/Tests/Readers/Org/Directive.hs +++ b/test/Tests/Readers/Org/Directive.hs @@ -1,4 +1,3 @@ -{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE OverloadedStrings #-} {- | Module : Tests.Readers.Org.Directive @@ -13,7 +12,6 @@ Tests parsing of org directives (like @#+OPTIONS@). -} module Tests.Readers.Org.Directive (tests) where -import Prelude import Data.Time (UTCTime (UTCTime), secondsToDiffTime) import Data.Time.Calendar (Day (ModifiedJulianDay)) import Test.Tasty (TestTree, testGroup) diff --git a/test/Tests/Readers/Org/Inline.hs b/test/Tests/Readers/Org/Inline.hs index 13e9fef21..111d74879 100644 --- a/test/Tests/Readers/Org/Inline.hs +++ b/test/Tests/Readers/Org/Inline.hs @@ -1,4 +1,3 @@ -{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE OverloadedStrings #-} {- | Module : Tests.Readers.Org.Inline @@ -13,7 +12,6 @@ Tests parsing of org inlines. -} module Tests.Readers.Org.Inline (tests) where -import Prelude import Data.List (intersperse) import Test.Tasty (TestTree, testGroup) import Tests.Helpers ((=?>)) diff --git a/test/Tests/Readers/Org/Inline/Citation.hs b/test/Tests/Readers/Org/Inline/Citation.hs index 87bb3ca75..5538f1ec8 100644 --- a/test/Tests/Readers/Org/Inline/Citation.hs +++ b/test/Tests/Readers/Org/Inline/Citation.hs @@ -1,4 +1,3 @@ -{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE OverloadedStrings #-} {- | Module : Tests.Readers.Org.Inline.Citation @@ -13,7 +12,6 @@ Test parsing of citations in org input. -} module Tests.Readers.Org.Inline.Citation (tests) where -import Prelude import Test.Tasty (TestTree, testGroup) import Tests.Helpers ((=?>)) import Tests.Readers.Org.Shared ((=:)) diff --git a/test/Tests/Readers/Org/Inline/Note.hs b/test/Tests/Readers/Org/Inline/Note.hs index 20157d2ae..c37133d54 100644 --- a/test/Tests/Readers/Org/Inline/Note.hs +++ b/test/Tests/Readers/Org/Inline/Note.hs @@ -1,4 +1,3 @@ -{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE OverloadedStrings #-} {- | Module : Tests.Readers.Org.Inline.Note @@ -13,7 +12,6 @@ Test parsing of footnotes in org input. -} module Tests.Readers.Org.Inline.Note (tests) where -import Prelude import Test.Tasty (TestTree) import Tests.Helpers ((=?>)) import Tests.Readers.Org.Shared ((=:)) diff --git a/test/Tests/Readers/Org/Inline/Smart.hs b/test/Tests/Readers/Org/Inline/Smart.hs index 7fde380af..db96eb2ca 100644 --- a/test/Tests/Readers/Org/Inline/Smart.hs +++ b/test/Tests/Readers/Org/Inline/Smart.hs @@ -1,4 +1,3 @@ -{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE OverloadedStrings #-} {- | Module : Tests.Readers.Org.Inline.Smart @@ -13,7 +12,6 @@ Test smart parsing of quotes, apostrophe, etc. -} module Tests.Readers.Org.Inline.Smart (tests) where -import Prelude import Data.Text (Text) import Test.Tasty (TestTree) import Tests.Helpers ((=?>), purely, test) diff --git a/test/Tests/Readers/Org/Meta.hs b/test/Tests/Readers/Org/Meta.hs index 3c50f891b..6363d84b0 100644 --- a/test/Tests/Readers/Org/Meta.hs +++ b/test/Tests/Readers/Org/Meta.hs @@ -1,4 +1,3 @@ -{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE OverloadedStrings #-} {- | Module : Tests.Readers.Org.Meta @@ -13,7 +12,6 @@ Tests parsing of org meta data (mostly lines starting with @#+@). -} module Tests.Readers.Org.Meta (tests) where -import Prelude import Test.Tasty (TestTree, testGroup) import Tests.Helpers ((=?>)) import Tests.Readers.Org.Shared ((=:), spcSep) diff --git a/test/Tests/Readers/Org/Shared.hs b/test/Tests/Readers/Org/Shared.hs index 4d0848575..c584eff19 100644 --- a/test/Tests/Readers/Org/Shared.hs +++ b/test/Tests/Readers/Org/Shared.hs @@ -1,4 +1,3 @@ -{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE OverloadedStrings #-} {- | Module : Tests.Readers.Org.Shared @@ -18,7 +17,6 @@ module Tests.Readers.Org.Shared , tagSpan ) where -import Prelude import Data.List (intersperse) import Data.Text (Text) import Tests.Helpers (ToString, purely, test) diff --git a/test/Tests/Readers/RST.hs b/test/Tests/Readers/RST.hs index 68241b7f9..a12b59fc2 100644 --- a/test/Tests/Readers/RST.hs +++ b/test/Tests/Readers/RST.hs @@ -1,4 +1,3 @@ -{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE ScopedTypeVariables #-} {- | @@ -14,7 +13,6 @@ Tests for the RST reader. -} module Tests.Readers.RST (tests) where -import Prelude import Data.Text (Text) import qualified Data.Text as T import Test.Tasty diff --git a/test/Tests/Readers/Txt2Tags.hs b/test/Tests/Readers/Txt2Tags.hs index 62f336690..013f29d68 100644 --- a/test/Tests/Readers/Txt2Tags.hs +++ b/test/Tests/Readers/Txt2Tags.hs @@ -1,4 +1,3 @@ -{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE OverloadedStrings #-} {- | Module : Tests.Readers.Txt2Tags @@ -14,7 +13,6 @@ Tests for the Txt2Tags reader. -} module Tests.Readers.Txt2Tags (tests) where -import Prelude import Data.List (intersperse) import Data.Text (Text) import qualified Data.Text as T diff --git a/test/Tests/Shared.hs b/test/Tests/Shared.hs index 72a59fec0..e415ea153 100644 --- a/test/Tests/Shared.hs +++ b/test/Tests/Shared.hs @@ -1,4 +1,3 @@ -{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE OverloadedStrings #-} {- | Module : Tests.Shared @@ -13,7 +12,6 @@ Tests for functions used in many parts of the library. -} module Tests.Shared (tests) where -import Prelude import System.FilePath.Posix (joinPath) import Test.Tasty import Test.Tasty.HUnit (assertBool, testCase, (@?=)) diff --git a/test/Tests/Writers/AnnotatedTable.hs b/test/Tests/Writers/AnnotatedTable.hs index 7e16cf8e0..53cca80a6 100644 --- a/test/Tests/Writers/AnnotatedTable.hs +++ b/test/Tests/Writers/AnnotatedTable.hs @@ -1,4 +1,3 @@ -{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE OverloadedStrings #-} {- | Module : Tests.Writers.AnnotatedTable diff --git a/test/Tests/Writers/AsciiDoc.hs b/test/Tests/Writers/AsciiDoc.hs index 75f6e5e97..04655635f 100644 --- a/test/Tests/Writers/AsciiDoc.hs +++ b/test/Tests/Writers/AsciiDoc.hs @@ -1,8 +1,6 @@ -{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE OverloadedStrings #-} module Tests.Writers.AsciiDoc (tests) where -import Prelude import Data.Text (unpack) import Test.Tasty import Tests.Helpers diff --git a/test/Tests/Writers/ConTeXt.hs b/test/Tests/Writers/ConTeXt.hs index c747e5d2f..5c1c98d4e 100644 --- a/test/Tests/Writers/ConTeXt.hs +++ b/test/Tests/Writers/ConTeXt.hs @@ -1,8 +1,6 @@ -{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE OverloadedStrings #-} module Tests.Writers.ConTeXt (tests) where -import Prelude import Data.Text (unpack, pack) import Test.Tasty import Test.Tasty.QuickCheck diff --git a/test/Tests/Writers/Docbook.hs b/test/Tests/Writers/Docbook.hs index 621c1280b..842aed7ae 100644 --- a/test/Tests/Writers/Docbook.hs +++ b/test/Tests/Writers/Docbook.hs @@ -1,8 +1,6 @@ -{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE OverloadedStrings #-} module Tests.Writers.Docbook (tests) where -import Prelude import Data.Text (unpack) import Test.Tasty import Tests.Helpers diff --git a/test/Tests/Writers/Docx.hs b/test/Tests/Writers/Docx.hs index 66a5c3d36..2e0f1e3fb 100644 --- a/test/Tests/Writers/Docx.hs +++ b/test/Tests/Writers/Docx.hs @@ -1,7 +1,5 @@ -{-# LANGUAGE NoImplicitPrelude #-} module Tests.Writers.Docx (tests) where -import Prelude import Text.Pandoc import Test.Tasty import Tests.Writers.OOXML diff --git a/test/Tests/Writers/FB2.hs b/test/Tests/Writers/FB2.hs index 7699c58e9..2e10636fa 100644 --- a/test/Tests/Writers/FB2.hs +++ b/test/Tests/Writers/FB2.hs @@ -1,8 +1,6 @@ -{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE OverloadedStrings #-} module Tests.Writers.FB2 (tests) where -import Prelude import Test.Tasty import Tests.Helpers import Text.Pandoc diff --git a/test/Tests/Writers/HTML.hs b/test/Tests/Writers/HTML.hs index 6ff0a6e1d..328801e31 100644 --- a/test/Tests/Writers/HTML.hs +++ b/test/Tests/Writers/HTML.hs @@ -1,8 +1,6 @@ -{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE OverloadedStrings #-} module Tests.Writers.HTML (tests) where -import Prelude import Data.Text (unpack) import Test.Tasty import Tests.Helpers diff --git a/test/Tests/Writers/JATS.hs b/test/Tests/Writers/JATS.hs index 7d98f979b..2f501c890 100644 --- a/test/Tests/Writers/JATS.hs +++ b/test/Tests/Writers/JATS.hs @@ -1,8 +1,6 @@ -{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE OverloadedStrings #-} module Tests.Writers.JATS (tests) where -import Prelude import Data.Text (unpack) import Test.Tasty import Tests.Helpers diff --git a/test/Tests/Writers/LaTeX.hs b/test/Tests/Writers/LaTeX.hs index 44e23d48e..ae5879099 100644 --- a/test/Tests/Writers/LaTeX.hs +++ b/test/Tests/Writers/LaTeX.hs @@ -1,8 +1,6 @@ -{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE OverloadedStrings #-} module Tests.Writers.LaTeX (tests) where -import Prelude import Data.Text (unpack) import Test.Tasty import Tests.Helpers diff --git a/test/Tests/Writers/Markdown.hs b/test/Tests/Writers/Markdown.hs index 4b819de24..d4f927ebe 100644 --- a/test/Tests/Writers/Markdown.hs +++ b/test/Tests/Writers/Markdown.hs @@ -1,9 +1,7 @@ -{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE OverloadedStrings #-} {-# OPTIONS_GHC -fno-warn-name-shadowing #-} module Tests.Writers.Markdown (tests) where -import Prelude import Data.Text (unpack) import Test.Tasty import Tests.Helpers diff --git a/test/Tests/Writers/Ms.hs b/test/Tests/Writers/Ms.hs index d73603314..ad6849633 100644 --- a/test/Tests/Writers/Ms.hs +++ b/test/Tests/Writers/Ms.hs @@ -1,8 +1,6 @@ -{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE OverloadedStrings #-} module Tests.Writers.Ms (tests) where -import Prelude import Test.Tasty import Tests.Helpers import Text.Pandoc diff --git a/test/Tests/Writers/Muse.hs b/test/Tests/Writers/Muse.hs index d0df0799f..5bddca3af 100644 --- a/test/Tests/Writers/Muse.hs +++ b/test/Tests/Writers/Muse.hs @@ -1,4 +1,3 @@ -{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE OverloadedStrings #-} module Tests.Writers.Muse (tests) where diff --git a/test/Tests/Writers/Native.hs b/test/Tests/Writers/Native.hs index 905e83b1e..d7771ca19 100644 --- a/test/Tests/Writers/Native.hs +++ b/test/Tests/Writers/Native.hs @@ -1,7 +1,5 @@ -{-# LANGUAGE NoImplicitPrelude #-} module Tests.Writers.Native (tests) where -import Prelude import Data.Text (unpack) import Test.Tasty import Test.Tasty.QuickCheck diff --git a/test/Tests/Writers/OOXML.hs b/test/Tests/Writers/OOXML.hs index 628ea9409..376f02c55 100644 --- a/test/Tests/Writers/OOXML.hs +++ b/test/Tests/Writers/OOXML.hs @@ -1,10 +1,8 @@ -{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE PatternGuards #-} {-# LANGUAGE OverloadedStrings #-} module Tests.Writers.OOXML (ooxmlTest) where -import Prelude import Text.Pandoc import Test.Tasty import Test.Tasty.Golden.Advanced diff --git a/test/Tests/Writers/Org.hs b/test/Tests/Writers/Org.hs index c99f7344d..9cbe360da 100644 --- a/test/Tests/Writers/Org.hs +++ b/test/Tests/Writers/Org.hs @@ -1,8 +1,6 @@ -{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE OverloadedStrings #-} module Tests.Writers.Org (tests) where -import Prelude import Test.Tasty import Tests.Helpers import Text.Pandoc diff --git a/test/Tests/Writers/Plain.hs b/test/Tests/Writers/Plain.hs index b8d1f6693..17edc9dbd 100644 --- a/test/Tests/Writers/Plain.hs +++ b/test/Tests/Writers/Plain.hs @@ -1,8 +1,6 @@ -{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE OverloadedStrings #-} module Tests.Writers.Plain (tests) where -import Prelude import Test.Tasty import Tests.Helpers import Text.Pandoc diff --git a/test/Tests/Writers/Powerpoint.hs b/test/Tests/Writers/Powerpoint.hs index be98fe0e7..87ebe990c 100644 --- a/test/Tests/Writers/Powerpoint.hs +++ b/test/Tests/Writers/Powerpoint.hs @@ -1,7 +1,5 @@ -{-# LANGUAGE NoImplicitPrelude #-} module Tests.Writers.Powerpoint (tests) where -import Prelude import Tests.Writers.OOXML (ooxmlTest) import Text.Pandoc import Test.Tasty diff --git a/test/Tests/Writers/RST.hs b/test/Tests/Writers/RST.hs index a52423fad..94745e9a2 100644 --- a/test/Tests/Writers/RST.hs +++ b/test/Tests/Writers/RST.hs @@ -1,8 +1,6 @@ -{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE OverloadedStrings #-} module Tests.Writers.RST (tests) where -import Prelude import Control.Monad.Identity import Test.Tasty import Test.Tasty.HUnit diff --git a/test/Tests/Writers/TEI.hs b/test/Tests/Writers/TEI.hs index 31e970495..fa372909f 100644 --- a/test/Tests/Writers/TEI.hs +++ b/test/Tests/Writers/TEI.hs @@ -1,8 +1,6 @@ -{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE OverloadedStrings #-} module Tests.Writers.TEI (tests) where -import Prelude import Test.Tasty import Tests.Helpers import Text.Pandoc diff --git a/test/test-pandoc.hs b/test/test-pandoc.hs index 9973dffc8..4d9da525b 100644 --- a/test/test-pandoc.hs +++ b/test/test-pandoc.hs @@ -1,9 +1,7 @@ -{-# LANGUAGE NoImplicitPrelude #-} {-# OPTIONS_GHC -Wall #-} module Main where -import Prelude import System.Environment (getArgs) import qualified Control.Exception as E import Text.Pandoc.App (convertWithOpts, defaultOpts, options, diff --git a/trypandoc/trypandoc.hs b/trypandoc/trypandoc.hs index 9a53aa18c..810752afa 100644 --- a/trypandoc/trypandoc.hs +++ b/trypandoc/trypandoc.hs @@ -1,4 +1,3 @@ -{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE OverloadedStrings #-} {- | Module : Main @@ -12,7 +11,6 @@ Provides a webservice which allows to try pandoc in the browser. -} module Main where -import Prelude import Network.Wai.Handler.CGI import Network.Wai import Control.Applicative ((<$>)) -- cgit v1.2.3 From a3beed9db874517fa57b55380658f4e019e809b2 Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Sat, 13 Feb 2021 09:37:43 +0100 Subject: Org: support task_lists extension The tasks lists extension is now supported by the org reader and writer; the extension is turned on by default. Closes: #6336 --- src/Text/Pandoc/Extensions.hs | 2 ++ src/Text/Pandoc/Readers/Org/Blocks.hs | 41 +++++++++++++++++++++++-- src/Text/Pandoc/Writers/Org.hs | 16 ++++++++-- test/Tests/Readers/Org/Block/List.hs | 13 ++++++++ test/Tests/Writers/Org.hs | 57 ++++++++++++++++++++++++++++------- 5 files changed, 113 insertions(+), 16 deletions(-) (limited to 'test/Tests/Writers') diff --git a/src/Text/Pandoc/Extensions.hs b/src/Text/Pandoc/Extensions.hs index 82eb0e957..3b96f9e04 100644 --- a/src/Text/Pandoc/Extensions.hs +++ b/src/Text/Pandoc/Extensions.hs @@ -381,6 +381,7 @@ getDefaultExtensions "commonmark_x" = extensionsFromList ] getDefaultExtensions "org" = extensionsFromList [Ext_citations, + Ext_task_lists, Ext_auto_identifiers] getDefaultExtensions "html" = extensionsFromList [Ext_auto_identifiers, @@ -515,6 +516,7 @@ getAllExtensions f = universalExtensions <> getAll f extensionsFromList [ Ext_citations , Ext_smart + , Ext_task_lists ] getAll "html" = autoIdExtensions <> extensionsFromList diff --git a/src/Text/Pandoc/Readers/Org/Blocks.hs b/src/Text/Pandoc/Readers/Org/Blocks.hs index 6bd046e04..d1aff701e 100644 --- a/src/Text/Pandoc/Readers/Org/Blocks.hs +++ b/src/Text/Pandoc/Readers/Org/Blocks.hs @@ -1,6 +1,7 @@ {-# LANGUAGE FlexibleContexts #-} -{-# LANGUAGE RecordWildCards #-} +{-# LANGUAGE LambdaCase #-} {-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE RecordWildCards #-} {- | Module : Text.Pandoc.Readers.Org.Blocks Copyright : Copyright (C) 2014-2021 Albert Krewinkel @@ -850,16 +851,52 @@ definitionListItem parseIndentedMarker = try $ do definitionMarker = spaceChar *> string "::" <* (spaceChar <|> lookAhead newline) +-- | Checkbox for tasks. +data Checkbox + = UncheckedBox + | CheckedBox + | SemicheckedBox + +-- | Parses a checkbox in a plain list. +checkbox :: PandocMonad m + => OrgParser m Checkbox +checkbox = do + guardEnabled Ext_task_lists + try (char '[' *> status <* char ']') "checkbox" + where + status = choice + [ UncheckedBox <$ char ' ' + , CheckedBox <$ char 'X' + , SemicheckedBox <$ char '-' + ] + +checkboxToInlines :: Checkbox -> Inline +checkboxToInlines = B.Str . \case + UncheckedBox -> "☐" + SemicheckedBox -> "☐" + CheckedBox -> "☒" + -- | parse raw text for one list item listItem :: PandocMonad m => OrgParser m Int -> OrgParser m (F Blocks) listItem parseIndentedMarker = try . withContext ListItemState $ do markerLength <- try parseIndentedMarker + box <- optionMaybe checkbox firstLine <- anyLineNewline blank <- option "" ("\n" <$ blankline) rest <- T.concat <$> many (listContinuation markerLength) - parseFromString blocks $ firstLine <> blank <> rest + contents <- parseFromString blocks $ firstLine <> blank <> rest + return (maybe id (prependInlines . checkboxToInlines) box <$> contents) + +-- | Prepend inlines to blocks, adding them to the first paragraph or +-- creating a new Plain element if necessary. +prependInlines :: Inline -> Blocks -> Blocks +prependInlines inlns = B.fromList . prepend . B.toList + where + prepend (Plain is : bs) = Plain (inlns : Space : is) : bs + prepend (Para is : bs) = Para (inlns : Space : is) : bs + prepend bs = Plain [inlns, Space] : bs -- continuation of a list item - indented and separated by blankline or endline. -- Note: nested lists are parsed as continuations. diff --git a/src/Text/Pandoc/Writers/Org.hs b/src/Text/Pandoc/Writers/Org.hs index 43ebf1807..8dfc2749c 100644 --- a/src/Text/Pandoc/Writers/Org.hs +++ b/src/Text/Pandoc/Writers/Org.hs @@ -213,25 +213,35 @@ blockToOrg (DefinitionList items) = do -- | Convert bullet list item (list of blocks) to Org. bulletListItemToOrg :: PandocMonad m => [Block] -> Org m (Doc Text) bulletListItemToOrg items = do - contents <- blockListToOrg items + exts <- gets $ writerExtensions . stOptions + contents <- blockListToOrg (taskListItemToOrg exts items) return $ hang 2 "- " contents $$ if endsWithPlain items then cr else blankline - -- | Convert ordered list item (a list of blocks) to Org. orderedListItemToOrg :: PandocMonad m => Text -- ^ marker for list item -> [Block] -- ^ list item (list of blocks) -> Org m (Doc Text) orderedListItemToOrg marker items = do - contents <- blockListToOrg items + exts <- gets $ writerExtensions . stOptions + contents <- blockListToOrg (taskListItemToOrg exts items) return $ hang (T.length marker + 1) (literal marker <> space) contents $$ if endsWithPlain items then cr else blankline +-- | Convert a list item containing text starting with @U+2610 BALLOT BOX@ +-- or @U+2612 BALLOT BOX WITH X@ to org checkbox syntax (e.g. @[X]@). +taskListItemToOrg :: Extensions -> [Block] -> [Block] +taskListItemToOrg = handleTaskListItem toOrg + where + toOrg (Str "☐" : Space : is) = Str "[ ]" : Space : is + toOrg (Str "☒" : Space : is) = Str "[X]" : Space : is + toOrg is = is + -- | Convert definition list item (label, list of blocks) to Org. definitionListItemToOrg :: PandocMonad m => ([Inline], [[Block]]) -> Org m (Doc Text) diff --git a/test/Tests/Readers/Org/Block/List.hs b/test/Tests/Readers/Org/Block/List.hs index 9686b5148..2ee37081e 100644 --- a/test/Tests/Readers/Org/Block/List.hs +++ b/test/Tests/Readers/Org/Block/List.hs @@ -118,6 +118,19 @@ tests = ] =?> bulletList [ plain "", plain "" ] + , "Task list" =: + T.unlines [ "- [ ] nope" + , "- [X] yup" + , "- [-] started" + , " 1. [X] sure" + , " 2. [ ] nuh-uh" + ] =?> + bulletList [ plain "☐ nope", plain "☒ yup" + , mconcat [ plain "☐ started" + , orderedList [plain "☒ sure", plain "☐ nuh-uh"] + ] + ] + , "Simple Ordered List" =: ("1. Item1\n" <> "2. Item2\n") =?> diff --git a/test/Tests/Writers/Org.hs b/test/Tests/Writers/Org.hs index 9cbe360da..bd6c9b7ab 100644 --- a/test/Tests/Writers/Org.hs +++ b/test/Tests/Writers/Org.hs @@ -1,6 +1,7 @@ {-# LANGUAGE OverloadedStrings #-} module Tests.Writers.Org (tests) where +import Data.Text as T import Test.Tasty import Tests.Helpers import Text.Pandoc @@ -9,17 +10,51 @@ import Text.Pandoc.Builder infix 4 =: (=:) :: (ToString a, ToPandoc a) - => String -> (a, String) -> TestTree -(=:) = test (purely (writeOrg def . toPandoc)) + => String -> (a, Text) -> TestTree +(=:) = test org + +defopts :: WriterOptions +defopts = def + { writerExtensions = getDefaultExtensions "org" + } + +org :: (ToPandoc a) => a -> Text +org = orgWithOpts defopts + +orgWithOpts :: (ToPandoc a) => WriterOptions -> a -> Text +orgWithOpts opts x = purely (writeOrg opts) $ toPandoc x + tests :: [TestTree] -tests = [ testGroup "links" - -- See http://orgmode.org/manual/Internal-links.html#Internal-links - [ "simple link" - =: link "/url" "" "foo" - =?> "[[/url][foo]]" - , "internal link to anchor" - =: link "#my-custom-id" "" "#my-custom-id" - =?> "[[#my-custom-id]]" +tests = + [ testGroup "links" + -- See http://orgmode.org/manual/Internal-links.html#Internal-links + [ "simple link" + =: link "/url" "" "foo" + =?> "[[/url][foo]]" + , "internal link to anchor" + =: link "#my-custom-id" "" "#my-custom-id" + =?> "[[#my-custom-id]]" + ] + + , testGroup "lists" + [ "bullet task list" + =: bulletList [plain "☐ a", plain "☒ b"] + =?> T.unlines + [ "- [ ] a" + , "- [X] b" + ] + , "ordered task list" + =: orderedList [plain ("☐" <> space <> "a"), plain "☒ b"] + =?> T.unlines + [ "1. [ ] a" + , "2. [X] b" + ] + , test (orgWithOpts def) "bullet without task_lists" $ + bulletList [plain "☐ a", plain "☒ b"] + =?> T.unlines + [ "- ☐ a" + , "- ☒ b" ] - ] + ] + ] -- cgit v1.2.3 From 967e7f5fb990b29de48b37be1db40fb149a8cf55 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 14 Feb 2021 22:29:21 -0800 Subject: Rename Text.Pandoc.XMLParser -> Text.Pandoc.XML.Light... ..and add new definitions isomorphic to xml-light's, but with Text instead of String. This allows us to keep most of the code in existing readers that use xml-light, but avoid lots of unnecessary allocation. We also add versions of the functions from xml-light's Text.XML.Light.Output and Text.XML.Light.Proc that operate on our modified XML types, and functions that convert xml-light types to our types (since some of our dependencies, like texmath, use xml-light). Update golden tests for docx and pptx. OOXML test: Use `showContent` instead of `ppContent` in `displayDiff`. Docx: Do a manual traversal to unwrap sdt and smartTag. This is faster, and needed to pass the tests. Benchmarks: A = prior to 8ca191604dcd13af27c11d2da225da646ebce6fc (Feb 8) B = as of 8ca191604dcd13af27c11d2da225da646ebce6fc (Feb 8) C = this commit | Reader | A | B | C | | ------- | ----- | ------ | ----- | | docbook | 18 ms | 12 ms | 10 ms | | opml | 65 ms | 62 ms | 35 ms | | jats | 15 ms | 11 ms | 9 ms | | docx | 72 ms | 69 ms | 44 ms | | odt | 78 ms | 41 ms | 28 ms | | epub | 64 ms | 61 ms | 56 ms | | fb2 | 14 ms | 5 ms | 4 ms | --- .hlint.yaml | 1 + pandoc.cabal | 2 +- src/Text/Pandoc/ImageSize.hs | 9 +- src/Text/Pandoc/Readers/DocBook.hs | 76 +-- src/Text/Pandoc/Readers/Docx/Parse.hs | 163 +++--- src/Text/Pandoc/Readers/Docx/Parse/Styles.hs | 31 +- src/Text/Pandoc/Readers/Docx/Util.hs | 27 +- src/Text/Pandoc/Readers/EPUB.hs | 65 +-- src/Text/Pandoc/Readers/FB2.hs | 93 ++-- src/Text/Pandoc/Readers/JATS.hs | 58 +- src/Text/Pandoc/Readers/OPML.hs | 29 +- src/Text/Pandoc/Readers/Odt.hs | 5 +- src/Text/Pandoc/Readers/Odt/ContentReader.hs | 13 +- src/Text/Pandoc/Readers/Odt/Generic/Namespaces.hs | 3 +- src/Text/Pandoc/Readers/Odt/Generic/Utils.hs | 33 +- .../Pandoc/Readers/Odt/Generic/XMLConverter.hs | 23 +- src/Text/Pandoc/Readers/Odt/Namespaces.hs | 11 +- src/Text/Pandoc/Readers/Odt/StyleReader.hs | 23 +- src/Text/Pandoc/Writers/Docx.hs | 263 +++++---- src/Text/Pandoc/Writers/EPUB.hs | 356 ++++++------- src/Text/Pandoc/Writers/FB2.hs | 127 ++--- src/Text/Pandoc/Writers/ODT.hs | 16 +- src/Text/Pandoc/Writers/OOXML.hs | 35 +- src/Text/Pandoc/Writers/Powerpoint/Output.hs | 201 +++---- src/Text/Pandoc/XML/Light.hs | 586 +++++++++++++++++++++ src/Text/Pandoc/XMLParser.hs | 66 --- test/Tests/Writers/OOXML.hs | 3 +- test/docx/golden/block_quotes.docx | Bin 10098 -> 10071 bytes test/docx/golden/codeblock.docx | Bin 9950 -> 9920 bytes test/docx/golden/comments.docx | Bin 10285 -> 10258 bytes test/docx/golden/custom_style_no_reference.docx | Bin 10048 -> 10021 bytes test/docx/golden/custom_style_preserve.docx | Bin 10673 -> 10650 bytes test/docx/golden/custom_style_reference.docx | Bin 12434 -> 12403 bytes test/docx/golden/definition_list.docx | Bin 9947 -> 9920 bytes .../golden/document-properties-short-desc.docx | Bin 9953 -> 9925 bytes test/docx/golden/document-properties.docx | Bin 10429 -> 10404 bytes test/docx/golden/headers.docx | Bin 10086 -> 10059 bytes test/docx/golden/image.docx | Bin 26764 -> 26736 bytes test/docx/golden/inline_code.docx | Bin 9886 -> 9859 bytes test/docx/golden/inline_formatting.docx | Bin 10066 -> 10038 bytes test/docx/golden/inline_images.docx | Bin 26822 -> 26793 bytes test/docx/golden/link_in_notes.docx | Bin 10107 -> 10081 bytes test/docx/golden/links.docx | Bin 10282 -> 10251 bytes test/docx/golden/lists.docx | Bin 10358 -> 10332 bytes test/docx/golden/lists_continuing.docx | Bin 10149 -> 10123 bytes test/docx/golden/lists_multiple_initial.docx | Bin 10238 -> 10210 bytes test/docx/golden/lists_restarting.docx | Bin 10150 -> 10122 bytes test/docx/golden/nested_anchors_in_header.docx | Bin 10245 -> 10216 bytes test/docx/golden/notes.docx | Bin 10052 -> 10028 bytes test/docx/golden/raw-blocks.docx | Bin 9986 -> 9960 bytes test/docx/golden/raw-bookmarks.docx | Bin 10121 -> 10094 bytes test/docx/golden/table_one_row.docx | Bin 9938 -> 9908 bytes test/docx/golden/table_with_list_cell.docx | Bin 10255 -> 10227 bytes test/docx/golden/tables.docx | Bin 10272 -> 10244 bytes test/docx/golden/track_changes_deletion.docx | Bin 9930 -> 9903 bytes test/docx/golden/track_changes_insertion.docx | Bin 9913 -> 9886 bytes test/docx/golden/track_changes_move.docx | Bin 9947 -> 9920 bytes .../golden/track_changes_scrubbed_metadata.docx | Bin 10059 -> 10032 bytes test/docx/golden/unicode.docx | Bin 9871 -> 9845 bytes test/docx/golden/verbatim_subsuper.docx | Bin 9919 -> 9892 bytes test/pptx/code-custom.pptx | Bin 28221 -> 28184 bytes test/pptx/code-custom_templated.pptx | Bin 395516 -> 395477 bytes test/pptx/code.pptx | Bin 28220 -> 28183 bytes test/pptx/code_templated.pptx | Bin 395514 -> 395477 bytes test/pptx/document-properties-short-desc.pptx | Bin 27004 -> 26967 bytes .../document-properties-short-desc_templated.pptx | Bin 394288 -> 394253 bytes test/pptx/document-properties.pptx | Bin 27408 -> 27375 bytes test/pptx/document-properties_templated.pptx | Bin 394691 -> 394656 bytes test/pptx/endnotes.pptx | Bin 26962 -> 26928 bytes test/pptx/endnotes_templated.pptx | Bin 394253 -> 394219 bytes test/pptx/endnotes_toc.pptx | Bin 27789 -> 27747 bytes test/pptx/endnotes_toc_templated.pptx | Bin 395083 -> 395041 bytes test/pptx/images.pptx | Bin 44619 -> 44579 bytes test/pptx/images_templated.pptx | Bin 411909 -> 411870 bytes test/pptx/inline_formatting.pptx | Bin 26148 -> 26121 bytes test/pptx/inline_formatting_templated.pptx | Bin 393438 -> 393412 bytes test/pptx/lists.pptx | Bin 27049 -> 27015 bytes test/pptx/lists_templated.pptx | Bin 394340 -> 394307 bytes test/pptx/raw_ooxml.pptx | Bin 26940 -> 26908 bytes test/pptx/raw_ooxml_templated.pptx | Bin 394231 -> 394198 bytes test/pptx/remove_empty_slides.pptx | Bin 44065 -> 44025 bytes test/pptx/remove_empty_slides_templated.pptx | Bin 411352 -> 411311 bytes test/pptx/slide_breaks.pptx | Bin 28575 -> 28531 bytes test/pptx/slide_breaks_slide_level_1.pptx | Bin 27744 -> 27705 bytes .../pptx/slide_breaks_slide_level_1_templated.pptx | Bin 395038 -> 395000 bytes test/pptx/slide_breaks_templated.pptx | Bin 395868 -> 395825 bytes test/pptx/slide_breaks_toc.pptx | Bin 29532 -> 29481 bytes test/pptx/slide_breaks_toc_templated.pptx | Bin 396826 -> 396776 bytes test/pptx/speaker_notes.pptx | Bin 35436 -> 35360 bytes test/pptx/speaker_notes_after_metadata.pptx | Bin 31675 -> 31636 bytes .../speaker_notes_after_metadata_templated.pptx | Bin 398955 -> 398915 bytes test/pptx/speaker_notes_afterheader.pptx | Bin 30691 -> 30657 bytes test/pptx/speaker_notes_afterheader_templated.pptx | Bin 397979 -> 397943 bytes test/pptx/speaker_notes_afterseps.pptx | Bin 51604 -> 51548 bytes test/pptx/speaker_notes_afterseps_templated.pptx | Bin 418896 -> 418834 bytes test/pptx/speaker_notes_templated.pptx | Bin 402728 -> 402650 bytes test/pptx/start_numbering_at.pptx | Bin 27023 -> 26991 bytes test/pptx/start_numbering_at_templated.pptx | Bin 394314 -> 394283 bytes test/pptx/tables.pptx | Bin 27566 -> 27532 bytes test/pptx/tables_templated.pptx | Bin 394859 -> 394827 bytes test/pptx/two_column.pptx | Bin 26065 -> 26038 bytes test/pptx/two_column_templated.pptx | Bin 393355 -> 393327 bytes 102 files changed, 1388 insertions(+), 930 deletions(-) create mode 100644 src/Text/Pandoc/XML/Light.hs delete mode 100644 src/Text/Pandoc/XMLParser.hs (limited to 'test/Tests/Writers') diff --git a/.hlint.yaml b/.hlint.yaml index d5ebffd34..e482b2b37 100644 --- a/.hlint.yaml +++ b/.hlint.yaml @@ -14,6 +14,7 @@ - ignore: {name: "Reduce duplication"} # TODO: could be more fine-grained - ignore: {name: "Use &&&"} - ignore: {name: "Use String"} +- ignore: {name: "Use camelCase"} - ignore: {name: "Use fmap"} # specific for GHC 7.8 compat - ignore: {name: "Use isDigit"} diff --git a/pandoc.cabal b/pandoc.cabal index 3c7063f6c..22aebd55e 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -687,7 +687,7 @@ library Text.Pandoc.Lua.PandocLua, Text.Pandoc.Lua.Util, Text.Pandoc.Lua.Walk, - Text.Pandoc.XMLParser, + Text.Pandoc.XML.Light, Text.Pandoc.CSS, Text.Pandoc.CSV, Text.Pandoc.RoffChar, diff --git a/src/Text/Pandoc/ImageSize.hs b/src/Text/Pandoc/ImageSize.hs index e0a1af8e8..bb1aa6351 100644 --- a/src/Text/Pandoc/ImageSize.hs +++ b/src/Text/Pandoc/ImageSize.hs @@ -44,8 +44,7 @@ import Numeric (showFFloat) import Text.Pandoc.Definition import Text.Pandoc.Options import qualified Text.Pandoc.UTF8 as UTF8 -import qualified Text.XML.Light as Xml -import Text.Pandoc.XMLParser (parseXMLElement) +import Text.Pandoc.XML.Light hiding (Attr) import qualified Data.Text as T import qualified Data.Text.Lazy as TL import qualified Data.Text.Encoding as TE @@ -332,12 +331,12 @@ svgSize opts img = do doc <- either (const mzero) return $ parseXMLElement $ TL.fromStrict $ UTF8.toText img let viewboxSize = do - vb <- Xml.findAttrBy (== Xml.QName "viewBox" Nothing Nothing) doc - [_,_,w,h] <- mapM safeRead (T.words (T.pack vb)) + vb <- findAttrBy (== QName "viewBox" Nothing Nothing) doc + [_,_,w,h] <- mapM safeRead (T.words vb) return (w,h) let dpi = fromIntegral $ writerDpi opts let dirToInt dir = do - dim <- Xml.findAttrBy (== Xml.QName dir Nothing Nothing) doc >>= lengthToDim . T.pack + dim <- findAttrBy (== QName dir Nothing Nothing) doc >>= lengthToDim return $ inPixel opts dim w <- dirToInt "width" <|> (fst <$> viewboxSize) h <- dirToInt "height" <|> (snd <$> viewboxSize) diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs index ad0108843..e201b54fe 100644 --- a/src/Text/Pandoc/Readers/DocBook.hs +++ b/src/Text/Pandoc/Readers/DocBook.hs @@ -31,8 +31,7 @@ import Text.Pandoc.Options import Text.Pandoc.Logging (LogMessage(..)) import Text.Pandoc.Shared (crFilter, safeRead, extractSpaces) import Text.TeXMath (readMathML, writeTeX) -import Text.XML.Light -import Text.Pandoc.XMLParser (parseXMLContents) +import Text.Pandoc.XML.Light {- @@ -578,26 +577,27 @@ normalizeTree = everywhere (mkT go) where go :: [Content] -> [Content] go (Text (CData CDataRaw _ _):xs) = xs go (Text (CData CDataText s1 z):Text (CData CDataText s2 _):xs) = - Text (CData CDataText (s1 ++ s2) z):xs + Text (CData CDataText (s1 <> s2) z):xs go (Text (CData CDataText s1 z):CRef r:xs) = - Text (CData CDataText (s1 ++ convertEntity r) z):xs + Text (CData CDataText (s1 <> convertEntity r) z):xs go (CRef r:Text (CData CDataText s1 z):xs) = - Text (CData CDataText (convertEntity r ++ s1) z):xs + Text (CData CDataText (convertEntity r <> s1) z):xs go (CRef r1:CRef r2:xs) = - Text (CData CDataText (convertEntity r1 ++ convertEntity r2) Nothing):xs + Text (CData CDataText (convertEntity r1 <> + convertEntity r2) Nothing):xs go xs = xs -convertEntity :: String -> String -convertEntity e = Data.Maybe.fromMaybe (map toUpper e) (lookupEntity e) +convertEntity :: Text -> Text +convertEntity e = maybe (T.map toUpper e) T.pack (lookupEntity $ T.unpack e) -- convenience function to get an attribute value, defaulting to "" -attrValue :: String -> Element -> Text +attrValue :: Text -> Element -> Text attrValue attr elt = - maybe "" T.pack (lookupAttrBy (\x -> qName x == attr) (elAttribs elt)) + fromMaybe "" (lookupAttrBy (\x -> qName x == attr) (elAttribs elt)) -- convenience function named :: Text -> Element -> Bool -named s e = qName (elName e) == T.unpack s +named s e = qName (elName e) == s -- @@ -634,7 +634,7 @@ isBlockElement :: Content -> Bool isBlockElement (Elem e) = qName (elName e) `elem` blockTags isBlockElement _ = False -blockTags :: [String] +blockTags :: [Text] blockTags = [ "abstract" , "ackno" @@ -721,7 +721,7 @@ blockTags = , "variablelist" ] ++ admonitionTags -admonitionTags :: [String] +admonitionTags :: [Text] admonitionTags = ["important","caution","note","tip","warning"] -- Trim leading and trailing newline characters @@ -779,10 +779,10 @@ getBlocks e = mconcat <$> parseBlock :: PandocMonad m => Content -> DB m Blocks parseBlock (Text (CData CDataRaw _ _)) = return mempty -- DOCTYPE -parseBlock (Text (CData _ s _)) = if all isSpace s +parseBlock (Text (CData _ s _)) = if T.all isSpace s then return mempty - else return $ plain $ trimInlines $ text $ T.pack s -parseBlock (CRef x) = return $ plain $ str $ T.toUpper $ T.pack x + else return $ plain $ trimInlines $ text s +parseBlock (CRef x) = return $ plain $ str $ T.toUpper x parseBlock (Elem e) = case qName (elName e) of "toc" -> skip -- skip TOC, since in pandoc it's autogenerated @@ -837,7 +837,7 @@ parseBlock (Elem e) = "refsect2" -> sect 2 "refsect3" -> sect 3 "refsection" -> gets dbSectionLevel >>= sect . (+1) - l | l `elem` admonitionTags -> parseAdmonition $ T.pack l + l | l `elem` admonitionTags -> parseAdmonition l "area" -> skip "areaset" -> skip "areaspec" -> skip @@ -899,7 +899,7 @@ parseBlock (Elem e) = "subtitle" -> return mempty -- handled in parent element _ -> skip >> getBlocks e where skip = do - let qn = T.pack $ qName $ elName e + let qn = qName $ elName e let name = if "pi-" `T.isPrefixOf` qn then " qn <> "?>" else qn @@ -911,7 +911,7 @@ parseBlock (Elem e) = "" -> [] x -> [x] return $ codeBlockWith (attrValue "id" e, classes', []) - $ trimNl $ T.pack $ strContentRecursive e + $ trimNl $ strContentRecursive e parseBlockquote = do attrib <- case filterChild (named "attribution") e of Nothing -> return mempty @@ -965,7 +965,7 @@ parseBlock (Elem e) = w <- findAttr (unqual "colwidth") c n <- safeRead $ "0" <> T.filter (\x -> (x >= '0' && x <= '9') - || x == '.') (T.pack w) + || x == '.') w if n > 0 then Just n else Nothing let numrows = case bodyrows of [] -> 0 @@ -1048,12 +1048,12 @@ parseMixed container conts = do x <- parseMixed container rs return $ p <> b <> x -parseRow :: PandocMonad m => [String] -> Element -> DB m [Cell] +parseRow :: PandocMonad m => [Text] -> Element -> DB m [Cell] parseRow cn = do let isEntry x = named "entry" x || named "td" x || named "th" x mapM (parseEntry cn) . filterChildren isEntry -parseEntry :: PandocMonad m => [String] -> Element -> DB m Cell +parseEntry :: PandocMonad m => [Text] -> Element -> DB m Cell parseEntry cn el = do let colDistance sa ea = do let iStrt = elemIndex sa cn @@ -1075,7 +1075,7 @@ getInlines :: PandocMonad m => Element -> DB m Inlines getInlines e' = trimInlines . mconcat <$> mapM parseInline (elContent e') -strContentRecursive :: Element -> String +strContentRecursive :: Element -> Text strContentRecursive = strContent . (\e' -> e'{ elContent = map elementToStr $ elContent e' }) @@ -1084,9 +1084,9 @@ elementToStr (Elem e') = Text $ CData CDataText (strContentRecursive e') Nothing elementToStr x = x parseInline :: PandocMonad m => Content -> DB m Inlines -parseInline (Text (CData _ s _)) = return $ text $ T.pack s +parseInline (Text (CData _ s _)) = return $ text s parseInline (CRef ref) = - return $ text $ maybe (T.toUpper $ T.pack ref) T.pack $ lookupEntity ref + return $ text $ maybe (T.toUpper ref) T.pack $ lookupEntity (T.unpack ref) parseInline (Elem e) = case qName (elName e) of "anchor" -> do @@ -1138,7 +1138,7 @@ parseInline (Elem e) = "userinput" -> codeWithLang "systemitem" -> codeWithLang "varargs" -> return $ code "(...)" - "keycap" -> return (str $ T.pack $ strContent e) + "keycap" -> return (str $ strContent e) "keycombo" -> keycombo <$> mapM parseInline (elContent e) "menuchoice" -> menuchoice <$> @@ -1150,17 +1150,17 @@ parseInline (Elem e) = let title = case attrValue "endterm" e of "" -> maybe "???" xrefTitleByElem (findElementById linkend content) - endterm -> maybe "???" (T.pack . strContent) + endterm -> maybe "???" strContent (findElementById endterm content) return $ link ("#" <> linkend) "" (text title) - "email" -> return $ link ("mailto:" <> T.pack (strContent e)) "" - $ str $ T.pack $ strContent e - "uri" -> return $ link (T.pack $ strContent e) "" $ str $ T.pack $ strContent e + "email" -> return $ link ("mailto:" <> strContent e) "" + $ str $ strContent e + "uri" -> return $ link (strContent e) "" $ str $ strContent e "ulink" -> innerInlines (link (attrValue "url" e) "") "link" -> do ils <- innerInlines id let href = case findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) e of - Just h -> T.pack h + Just h -> h _ -> "#" <> attrValue "linkend" e let ils' = if ils == mempty then str href else ils let attr = (attrValue "id" e, T.words $ attrValue "role" e, []) @@ -1180,7 +1180,7 @@ parseInline (Elem e) = "pi-asciidoc-br" -> return linebreak _ -> skip >> innerInlines id where skip = do - let qn = T.pack $ qName $ elName e + let qn = qName $ elName e let name = if "pi-" `T.isPrefixOf` qn then " qn <> "?>" else qn @@ -1193,7 +1193,7 @@ parseInline (Elem e) = let classes' = case attrValue "language" e of "" -> [] l -> [l] - return $ codeWith (attrValue "id" e,classes',[]) $ T.pack $ strContentRecursive e + return $ codeWith (attrValue "id" e,classes',[]) $ strContentRecursive e simpleList = mconcat . intersperse (str "," <> space) <$> mapM getInlines (filterChildren (named "member") e) segmentedList = do @@ -1234,10 +1234,10 @@ parseInline (Elem e) = "sect5" -> descendantContent "title" el "cmdsynopsis" -> descendantContent "command" el "funcsynopsis" -> descendantContent "function" el - _ -> T.pack $ qName (elName el) ++ "_title" + _ -> qName (elName el) <> "_title" where xrefLabel = attrValue "xreflabel" el - descendantContent name = maybe "???" (T.pack . strContent) + descendantContent name = maybe "???" strContent . filterElementName (\n -> qName n == name) -- | Extract a math equation from an element @@ -1258,7 +1258,7 @@ equation e constructor = mathMLEquations :: [Text] mathMLEquations = map writeTeX $ rights $ readMath (\x -> qName (elName x) == "math" && qPrefix (elName x) == Just "mml") - (readMathML . T.pack . showElement) + (readMathML . showElement) latexEquations :: [Text] latexEquations = readMath (\x -> qName (elName x) == "mathphrase") @@ -1272,8 +1272,8 @@ equation e constructor = -- | Get the actual text stored in a CData block. 'showContent' -- returns the text still surrounded by the [[CDATA]] tags. showVerbatimCData :: Content -> Text -showVerbatimCData (Text (CData _ d _)) = T.pack d -showVerbatimCData c = T.pack $ showContent c +showVerbatimCData (Text (CData _ d _)) = d +showVerbatimCData c = showContent c -- | Set the prefix of a name to 'Nothing' diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs index 056dab6c2..c76f3c171 100644 --- a/src/Text/Pandoc/Readers/Docx/Parse.hs +++ b/src/Text/Pandoc/Readers/Docx/Parse.hs @@ -63,6 +63,7 @@ import Data.Char (chr, ord, readLitChar) import Data.List import qualified Data.Map as M import qualified Data.Text as T +import Data.Text (Text) import Data.Maybe import System.FilePath import Text.Pandoc.Readers.Docx.Util @@ -72,9 +73,7 @@ import qualified Text.Pandoc.UTF8 as UTF8 import Text.TeXMath (Exp) import Text.TeXMath.Readers.OMML (readOMML) import Text.TeXMath.Unicode.Fonts (Font (..), getUnicode, textToFont) -import Text.XML.Light -import qualified Text.XML.Light.Cursor as XMLC -import Text.Pandoc.XMLParser (parseXMLElement) +import Text.Pandoc.XML.Light data ReaderEnv = ReaderEnv { envNotes :: Notes , envComments :: Comments @@ -128,37 +127,23 @@ mapD f xs = in concatMapM handler xs -unwrap :: NameSpaces -> Content -> [Content] -unwrap ns (Elem element) +unwrapElement :: NameSpaces -> Element -> [Element] +unwrapElement ns element | isElem ns "w" "sdt" element , Just sdtContent <- findChildByName ns "w" "sdtContent" element - = concatMap (unwrap ns . Elem) (elChildren sdtContent) + = concatMap (unwrapElement ns) (elChildren sdtContent) | isElem ns "w" "smartTag" element - = concatMap (unwrap ns . Elem) (elChildren element) -unwrap _ content = [content] + = concatMap (unwrapElement ns) (elChildren element) + | otherwise + = [element{ elContent = concatMap (unwrapContent ns) (elContent element) }] -unwrapChild :: NameSpaces -> Content -> Content -unwrapChild ns (Elem element) = - Elem $ element { elContent = concatMap (unwrap ns) (elContent element) } -unwrapChild _ content = content +unwrapContent :: NameSpaces -> Content -> [Content] +unwrapContent ns (Elem element) = map Elem $ unwrapElement ns element +unwrapContent _ content = [content] -walkDocument' :: NameSpaces -> XMLC.Cursor -> XMLC.Cursor -walkDocument' ns cur = - let modifiedCur = XMLC.modifyContent (unwrapChild ns) cur - in - case XMLC.nextDF modifiedCur of - Just cur' -> walkDocument' ns cur' - Nothing -> XMLC.root modifiedCur - -walkDocument :: NameSpaces -> Element -> Maybe Element +walkDocument :: NameSpaces -> Element -> Element walkDocument ns element = - let cur = XMLC.fromContent (Elem element) - cur' = walkDocument' ns cur - in - case XMLC.toTree cur' of - Elem element' -> Just element' - _ -> Nothing - + element{ elContent = concatMap (unwrapContent ns) (elContent element) } newtype Docx = Docx Document deriving Show @@ -361,9 +346,9 @@ getDocumentXmlPath zf = do fp <- findAttr (QName "Target" Nothing Nothing) rel -- sometimes there will be a leading slash, which windows seems to -- have trouble with. - return $ case fp of + return $ case T.unpack fp of '/' : fp' -> fp' - _ -> fp + fp' -> fp' archiveToDocument :: Archive -> D Document archiveToDocument zf = do @@ -372,7 +357,7 @@ archiveToDocument zf = do docElem <- maybeToD $ parseXMLFromEntry entry let namespaces = elemToNameSpaces docElem bodyElem <- maybeToD $ findChildByName namespaces "w" "body" docElem - let bodyElem' = fromMaybe bodyElem (walkDocument namespaces bodyElem) + let bodyElem' = walkDocument namespaces bodyElem body <- elemToBody namespaces bodyElem' return $ Document namespaces body @@ -414,8 +399,8 @@ archiveToNotes zf = fn_namespaces = maybe [] elemToNameSpaces fnElem en_namespaces = maybe [] elemToNameSpaces enElem ns = unionBy (\x y -> fst x == fst y) fn_namespaces en_namespaces - fn = fnElem >>= walkDocument ns >>= elemToNotes ns "footnote" - en = enElem >>= walkDocument ns >>= elemToNotes ns "endnote" + fn = fnElem >>= elemToNotes ns "footnote" . walkDocument ns + en = enElem >>= elemToNotes ns "endnote" . walkDocument ns in Notes ns fn en @@ -424,7 +409,8 @@ archiveToComments zf = let cmtsElem = findEntryByPath "word/comments.xml" zf >>= parseXMLFromEntry cmts_namespaces = maybe [] elemToNameSpaces cmtsElem - cmts = elemToComments cmts_namespaces <$> (cmtsElem >>= walkDocument cmts_namespaces) + cmts = elemToComments cmts_namespaces . walkDocument cmts_namespaces <$> + cmtsElem in case cmts of Just c -> Comments cmts_namespaces c @@ -443,8 +429,8 @@ filePathToRelType path docXmlPath = relElemToRelationship :: DocumentLocation -> Element -> Maybe Relationship relElemToRelationship relType element | qName (elName element) == "Relationship" = do - relId <- findAttrText (QName "Id" Nothing Nothing) element - target <- findAttrText (QName "Target" Nothing Nothing) element + relId <- findAttr (QName "Id" Nothing Nothing) element + target <- findAttr (QName "Target" Nothing Nothing) element return $ Relationship relType relId target relElemToRelationship _ _ = Nothing @@ -485,10 +471,10 @@ lookupLevel numId ilvl (Numbering _ numbs absNumbs) = do loElemToLevelOverride :: NameSpaces -> Element -> Maybe LevelOverride loElemToLevelOverride ns element | isElem ns "w" "lvlOverride" element = do - ilvl <- findAttrTextByName ns "w" "ilvl" element + ilvl <- findAttrByName ns "w" "ilvl" element let startOverride = findChildByName ns "w" "startOverride" element >>= findAttrByName ns "w" "val" - >>= (\s -> listToMaybe (map fst (reads s :: [(Integer, String)]))) + >>= stringToInteger lvl = findChildByName ns "w" "lvl" element >>= levelElemToLevel ns return $ LevelOverride ilvl startOverride lvl @@ -497,9 +483,9 @@ loElemToLevelOverride _ _ = Nothing numElemToNum :: NameSpaces -> Element -> Maybe Numb numElemToNum ns element | isElem ns "w" "num" element = do - numId <- findAttrTextByName ns "w" "numId" element + numId <- findAttrByName ns "w" "numId" element absNumId <- findChildByName ns "w" "abstractNumId" element - >>= findAttrTextByName ns "w" "val" + >>= findAttrByName ns "w" "val" let lvlOverrides = mapMaybe (loElemToLevelOverride ns) (findChildrenByName ns "w" "lvlOverride" element) @@ -509,7 +495,7 @@ numElemToNum _ _ = Nothing absNumElemToAbsNum :: NameSpaces -> Element -> Maybe AbstractNumb absNumElemToAbsNum ns element | isElem ns "w" "abstractNum" element = do - absNumId <- findAttrTextByName ns "w" "abstractNumId" element + absNumId <- findAttrByName ns "w" "abstractNumId" element let levelElems = findChildrenByName ns "w" "lvl" element levels = mapMaybe (levelElemToLevel ns) levelElems return $ AbstractNumb absNumId levels @@ -518,14 +504,14 @@ absNumElemToAbsNum _ _ = Nothing levelElemToLevel :: NameSpaces -> Element -> Maybe Level levelElemToLevel ns element | isElem ns "w" "lvl" element = do - ilvl <- findAttrTextByName ns "w" "ilvl" element + ilvl <- findAttrByName ns "w" "ilvl" element fmt <- findChildByName ns "w" "numFmt" element - >>= findAttrTextByName ns "w" "val" + >>= findAttrByName ns "w" "val" txt <- findChildByName ns "w" "lvlText" element - >>= findAttrTextByName ns "w" "val" + >>= findAttrByName ns "w" "val" let start = findChildByName ns "w" "start" element >>= findAttrByName ns "w" "val" - >>= (\s -> listToMaybe (map fst (reads s :: [(Integer, String)]))) + >>= stringToInteger return (Level ilvl fmt txt start) levelElemToLevel _ _ = Nothing @@ -546,11 +532,11 @@ archiveToNumbering :: Archive -> Numbering archiveToNumbering archive = fromMaybe (Numbering [] [] []) (archiveToNumbering' archive) -elemToNotes :: NameSpaces -> String -> Element -> Maybe (M.Map T.Text Element) +elemToNotes :: NameSpaces -> Text -> Element -> Maybe (M.Map T.Text Element) elemToNotes ns notetype element | isElem ns "w" (notetype <> "s") element = let pairs = mapMaybe - (\e -> findAttrTextByName ns "w" "id" e >>= + (\e -> findAttrByName ns "w" "id" e >>= (\a -> Just (a, e))) (findChildrenByName ns "w" notetype element) in @@ -562,7 +548,7 @@ elemToComments :: NameSpaces -> Element -> M.Map T.Text Element elemToComments ns element | isElem ns "w" "comments" element = let pairs = mapMaybe - (\e -> findAttrTextByName ns "w" "id" e >>= + (\e -> findAttrByName ns "w" "id" e >>= (\a -> Just (a, e))) (findChildrenByName ns "w" "comment" element) in @@ -622,12 +608,12 @@ elemToParIndentation ns element | isElem ns "w" "ind" element = stringToInteger , hangingParIndent = findAttrByName ns "w" "hanging" element >>= - stringToInteger} + stringToInteger } elemToParIndentation _ _ = Nothing -testBitMask :: String -> Int -> Bool +testBitMask :: Text -> Int -> Bool testBitMask bitMaskS n = - case (reads ("0x" ++ bitMaskS) :: [(Int, String)]) of + case (reads ("0x" ++ T.unpack bitMaskS) :: [(Int, String)]) of [] -> False ((n', _) : _) -> (n' .|. n) /= 0 @@ -642,7 +628,7 @@ elemToBodyPart ns element | isElem ns "w" "p" element , (c:_) <- findChildrenByName ns "m" "oMathPara" element = do - expsLst <- eitherToD $ readOMML $ T.pack $ showElement c + expsLst <- eitherToD $ readOMML $ showElement c return $ OMathPara expsLst elemToBodyPart ns element | isElem ns "w" "p" element @@ -666,7 +652,7 @@ elemToBodyPart ns element | isElem ns "w" "tbl" element = do let caption' = findChildByName ns "w" "tblPr" element >>= findChildByName ns "w" "tblCaption" - >>= findAttrTextByName ns "w" "val" + >>= findAttrByName ns "w" "val" caption = fromMaybe "" caption' grid' = case findChildByName ns "w" "tblGrid" element of Just g -> elemToTblGrid ns g @@ -705,8 +691,8 @@ getTitleAndAlt :: NameSpaces -> Element -> (T.Text, T.Text) getTitleAndAlt ns element = let mbDocPr = findChildByName ns "wp" "inline" element >>= findChildByName ns "wp" "docPr" - title = fromMaybe "" (mbDocPr >>= findAttrTextByName ns "" "title") - alt = fromMaybe "" (mbDocPr >>= findAttrTextByName ns "" "descr") + title = fromMaybe "" (mbDocPr >>= findAttrByName ns "" "title") + alt = fromMaybe "" (mbDocPr >>= findAttrByName ns "" "descr") in (title, alt) elemToParPart :: NameSpaces -> Element -> D ParPart @@ -718,7 +704,7 @@ elemToParPart ns element = let (title, alt) = getTitleAndAlt ns drawingElem a_ns = "http://schemas.openxmlformats.org/drawingml/2006/main" drawing = findElement (QName "blip" (Just a_ns) (Just "a")) picElem - >>= findAttrTextByName ns "r" "embed" + >>= findAttrByName ns "r" "embed" in case drawing of Just s -> expandDrawingId s >>= (\(fp, bs) -> return $ Drawing fp title alt bs $ elemToExtent drawingElem) @@ -728,7 +714,7 @@ elemToParPart ns element | isElem ns "w" "r" element , Just _ <- findChildByName ns "w" "pict" element = let drawing = findElement (elemName ns "v" "imagedata") element - >>= findAttrTextByName ns "r" "id" + >>= findAttrByName ns "r" "id" in case drawing of -- Todo: check out title and attr for deprecated format. @@ -797,7 +783,7 @@ elemToParPart ns element fldCharState <- gets stateFldCharState case fldCharState of FldCharOpen -> do - info <- eitherToD $ parseFieldInfo $ T.pack $ strContent instrText + info <- eitherToD $ parseFieldInfo $ strContent instrText modify $ \st -> st{stateFldCharState = FldCharFieldInfo info} return NullParPart _ -> return NullParPart @@ -818,48 +804,48 @@ elemToParPart ns element return $ ChangedRuns change runs elemToParPart ns element | isElem ns "w" "bookmarkStart" element - , Just bmId <- findAttrTextByName ns "w" "id" element - , Just bmName <- findAttrTextByName ns "w" "name" element = + , Just bmId <- findAttrByName ns "w" "id" element + , Just bmName <- findAttrByName ns "w" "name" element = return $ BookMark bmId bmName elemToParPart ns element | isElem ns "w" "hyperlink" element - , Just relId <- findAttrTextByName ns "r" "id" element = do + , Just relId <- findAttrByName ns "r" "id" element = do location <- asks envLocation runs <- mapD (elemToRun ns) (elChildren element) rels <- asks envRelationships case lookupRelationship location relId rels of Just target -> - case findAttrTextByName ns "w" "anchor" element of + case findAttrByName ns "w" "anchor" element of Just anchor -> return $ ExternalHyperLink (target <> "#" <> anchor) runs Nothing -> return $ ExternalHyperLink target runs Nothing -> return $ ExternalHyperLink "" runs elemToParPart ns element | isElem ns "w" "hyperlink" element - , Just anchor <- findAttrTextByName ns "w" "anchor" element = do + , Just anchor <- findAttrByName ns "w" "anchor" element = do runs <- mapD (elemToRun ns) (elChildren element) return $ InternalHyperLink anchor runs elemToParPart ns element | isElem ns "w" "commentRangeStart" element - , Just cmtId <- findAttrTextByName ns "w" "id" element = do + , Just cmtId <- findAttrByName ns "w" "id" element = do (Comments _ commentMap) <- asks envComments case M.lookup cmtId commentMap of Just cmtElem -> elemToCommentStart ns cmtElem Nothing -> throwError WrongElem elemToParPart ns element | isElem ns "w" "commentRangeEnd" element - , Just cmtId <- findAttrTextByName ns "w" "id" element = + , Just cmtId <- findAttrByName ns "w" "id" element = return $ CommentEnd cmtId elemToParPart ns element | isElem ns "m" "oMath" element = - fmap PlainOMath (eitherToD $ readOMML $ T.pack $ showElement element) + fmap PlainOMath (eitherToD $ readOMML $ showElement element) elemToParPart _ _ = throwError WrongElem elemToCommentStart :: NameSpaces -> Element -> D ParPart elemToCommentStart ns element | isElem ns "w" "comment" element - , Just cmtId <- findAttrTextByName ns "w" "id" element - , Just cmtAuthor <- findAttrTextByName ns "w" "author" element - , cmtDate <- findAttrTextByName ns "w" "date" element = do + , Just cmtId <- findAttrByName ns "w" "id" element + , Just cmtAuthor <- findAttrByName ns "w" "author" element + , cmtDate <- findAttrByName ns "w" "date" element = do bps <- mapD (elemToBodyPart ns) (elChildren element) return $ CommentStart cmtId cmtAuthor cmtDate bps elemToCommentStart _ _ = throwError WrongElem @@ -878,7 +864,7 @@ elemToExtent drawingElem = where wp_ns = "http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing" getDim at = findElement (QName "extent" (Just wp_ns) (Just "wp")) drawingElem - >>= findAttr (QName at Nothing Nothing) >>= safeRead . T.pack + >>= findAttr (QName at Nothing Nothing) >>= safeRead childElemToRun :: NameSpaces -> Element -> D Run @@ -889,7 +875,7 @@ childElemToRun ns element = let (title, alt) = getTitleAndAlt ns element a_ns = "http://schemas.openxmlformats.org/drawingml/2006/main" drawing = findElement (QName "blip" (Just a_ns) (Just "a")) picElem - >>= findAttrText (QName "embed" (lookup "r" ns) (Just "r")) + >>= findAttr (QName "embed" (lookup "r" ns) (Just "r")) in case drawing of Just s -> expandDrawingId s >>= @@ -902,7 +888,7 @@ childElemToRun ns element = return InlineChart childElemToRun ns element | isElem ns "w" "footnoteReference" element - , Just fnId <- findAttrTextByName ns "w" "id" element = do + , Just fnId <- findAttrByName ns "w" "id" element = do notes <- asks envNotes case lookupFootnote fnId notes of Just e -> do bps <- local (\r -> r {envLocation=InFootnote}) $ mapD (elemToBodyPart ns) (elChildren e) @@ -910,7 +896,7 @@ childElemToRun ns element Nothing -> return $ Footnote [] childElemToRun ns element | isElem ns "w" "endnoteReference" element - , Just enId <- findAttrTextByName ns "w" "id" element = do + , Just enId <- findAttrByName ns "w" "id" element = do notes <- asks envNotes case lookupEndnote enId notes of Just e -> do bps <- local (\r -> r {envLocation=InEndnote}) $ mapD (elemToBodyPart ns) (elChildren e) @@ -963,15 +949,15 @@ getParStyleField _ _ = Nothing getTrackedChange :: NameSpaces -> Element -> Maybe TrackedChange getTrackedChange ns element | isElem ns "w" "ins" element || isElem ns "w" "moveTo" element - , Just cId <- findAttrTextByName ns "w" "id" element - , Just cAuthor <- findAttrTextByName ns "w" "author" element - , mcDate <- findAttrTextByName ns "w" "date" element = + , Just cId <- findAttrByName ns "w" "id" element + , Just cAuthor <- findAttrByName ns "w" "author" element + , mcDate <- findAttrByName ns "w" "date" element = Just $ TrackedChange Insertion (ChangeInfo cId cAuthor mcDate) getTrackedChange ns element | isElem ns "w" "del" element || isElem ns "w" "moveFrom" element - , Just cId <- findAttrTextByName ns "w" "id" element - , Just cAuthor <- findAttrTextByName ns "w" "author" element - , mcDate <- findAttrTextByName ns "w" "date" element = + , Just cId <- findAttrByName ns "w" "id" element + , Just cAuthor <- findAttrByName ns "w" "author" element + , mcDate <- findAttrByName ns "w" "date" element = Just $ TrackedChange Deletion (ChangeInfo cId cAuthor mcDate) getTrackedChange _ _ = Nothing @@ -980,7 +966,7 @@ elemToParagraphStyle ns element sty | Just pPr <- findChildByName ns "w" "pPr" element = let style = mapMaybe - (fmap ParaStyleId . findAttrTextByName ns "w" "val") + (fmap ParaStyleId . findAttrByName ns "w" "val") (findChildrenByName ns "w" "pStyle" pPr) in ParagraphStyle {pStyle = mapMaybe (`M.lookup` sty) style @@ -1012,7 +998,7 @@ elemToRunStyleD ns element charStyles <- asks envCharStyles let parentSty = findChildByName ns "w" "rStyle" rPr >>= - findAttrTextByName ns "w" "val" >>= + findAttrByName ns "w" "val" >>= flip M.lookup charStyles . CharStyleId return $ elemToRunStyle ns element parentSty elemToRunStyleD _ _ = return defaultRunStyle @@ -1022,7 +1008,7 @@ elemToRunElem ns element | isElem ns "w" "t" element || isElem ns "w" "delText" element || isElem ns "m" "t" element = do - let str = T.pack $ strContent element + let str = strContent element font <- asks envFont case font of Nothing -> return $ TextRun str @@ -1044,14 +1030,14 @@ getSymChar :: NameSpaces -> Element -> RunElem getSymChar ns element | Just s <- lowerFromPrivate <$> getCodepoint , Just font <- getFont = - case readLitChar ("\\x" ++ s) of + case readLitChar ("\\x" ++ T.unpack s) of [(char, _)] -> TextRun . maybe "" T.singleton $ getUnicode font char _ -> TextRun "" where getCodepoint = findAttrByName ns "w" "char" element - getFont = textToFont . T.pack =<< findAttrByName ns "w" "font" element - lowerFromPrivate ('F':xs) = '0':xs - lowerFromPrivate xs = xs + getFont = textToFont =<< findAttrByName ns "w" "font" element + lowerFromPrivate t | "F" `T.isPrefixOf` t = "0" <> T.drop 1 t + | otherwise = t getSymChar _ _ = TextRun "" elemToRunElems :: NameSpaces -> Element -> D [RunElem] @@ -1061,8 +1047,9 @@ elemToRunElems ns element let qualName = elemName ns "w" let font = do fontElem <- findElement (qualName "rFonts") element - textToFont . T.pack =<< - foldr ((<|>) . (flip findAttr fontElem . qualName)) Nothing ["ascii", "hAnsi"] + textToFont =<< + foldr ((<|>) . (flip findAttr fontElem . qualName)) + Nothing ["ascii", "hAnsi"] local (setFont font) (mapD (elemToRunElem ns) (elChildren element)) elemToRunElems _ _ = throwError WrongElem diff --git a/src/Text/Pandoc/Readers/Docx/Parse/Styles.hs b/src/Text/Pandoc/Readers/Docx/Parse/Styles.hs index edade8654..0d7271d6a 100644 --- a/src/Text/Pandoc/Readers/Docx/Parse/Styles.hs +++ b/src/Text/Pandoc/Readers/Docx/Parse/Styles.hs @@ -48,12 +48,13 @@ import Data.Function (on) import Data.String (IsString(..)) import qualified Data.Map as M import qualified Data.Text as T +import qualified Data.Text.Read +import Data.Text (Text) import Data.Maybe import Data.Coerce import Text.Pandoc.Readers.Docx.Util import qualified Text.Pandoc.UTF8 as UTF8 -import Text.XML.Light -import Text.Pandoc.XMLParser (parseXMLElement) +import Text.Pandoc.XML.Light newtype CharStyleId = CharStyleId T.Text deriving (Show, Eq, Ord, IsString, FromStyleId) @@ -109,7 +110,7 @@ data RunStyle = RunStyle { isBold :: Maybe Bool , isRTL :: Maybe Bool , isForceCTL :: Maybe Bool , rVertAlign :: Maybe VertAlign - , rUnderline :: Maybe String + , rUnderline :: Maybe Text , rParentStyle :: Maybe CharStyle } deriving Show @@ -159,7 +160,7 @@ isBasedOnStyle ns element parentStyle , Just styleType <- findAttrByName ns "w" "type" element , styleType == cStyleType parentStyle , Just basedOnVal <- findChildByName ns "w" "basedOn" element >>= - findAttrTextByName ns "w" "val" + findAttrByName ns "w" "val" , Just ps <- parentStyle = basedOnVal == fromStyleId (getStyleId ps) | isElem ns "w" "style" element , Just styleType <- findAttrByName ns "w" "type" element @@ -169,7 +170,7 @@ isBasedOnStyle ns element parentStyle | otherwise = False class HasStyleId a => ElemToStyle a where - cStyleType :: Maybe a -> String + cStyleType :: Maybe a -> Text elemToStyle :: NameSpaces -> Element -> Maybe a -> Maybe a class FromStyleId (StyleId a) => HasStyleId a where @@ -226,8 +227,10 @@ buildBasedOnList ns element rootStyle = stys -> stys ++ concatMap (buildBasedOnList ns element . Just) stys -stringToInteger :: String -> Maybe Integer -stringToInteger s = listToMaybe $ map fst (reads s :: [(Integer, String)]) +stringToInteger :: Text -> Maybe Integer +stringToInteger s = case Data.Text.Read.decimal s of + Right (x,_) -> Just x + Left _ -> Nothing checkOnOff :: NameSpaces -> Element -> QName -> Maybe Bool checkOnOff ns rPr tag @@ -247,7 +250,7 @@ checkOnOff _ _ _ = Nothing elemToCharStyle :: NameSpaces -> Element -> Maybe CharStyle -> Maybe CharStyle elemToCharStyle ns element parentStyle - = CharStyle <$> (CharStyleId <$> findAttrTextByName ns "w" "styleId" element) + = CharStyle <$> (CharStyleId <$> findAttrByName ns "w" "styleId" element) <*> getElementStyleName ns element <*> Just (elemToRunStyle ns element parentStyle) @@ -281,7 +284,7 @@ elemToRunStyle _ _ _ = defaultRunStyle getHeaderLevel :: NameSpaces -> Element -> Maybe (ParaStyleName, Int) getHeaderLevel ns element | Just styleName <- getElementStyleName ns element - , Just n <- stringToInteger . T.unpack =<< + , Just n <- stringToInteger =<< (T.stripPrefix "heading " . T.toLower $ fromStyleName styleName) , n > 0 = Just (styleName, fromInteger n) @@ -289,8 +292,8 @@ getHeaderLevel _ _ = Nothing getElementStyleName :: Coercible T.Text a => NameSpaces -> Element -> Maybe a getElementStyleName ns el = coerce <$> - ((findChildByName ns "w" "name" el >>= findAttrTextByName ns "w" "val") - <|> findAttrTextByName ns "w" "styleId" el) + ((findChildByName ns "w" "name" el >>= findAttrByName ns "w" "val") + <|> findAttrByName ns "w" "styleId" el) getNumInfo :: NameSpaces -> Element -> Maybe (T.Text, T.Text) getNumInfo ns element = do @@ -298,15 +301,15 @@ getNumInfo ns element = do findChildByName ns "w" "numPr" lvl = fromMaybe "0" (numPr >>= findChildByName ns "w" "ilvl" >>= - findAttrTextByName ns "w" "val") + findAttrByName ns "w" "val") numId <- numPr >>= findChildByName ns "w" "numId" >>= - findAttrTextByName ns "w" "val" + findAttrByName ns "w" "val" return (numId, lvl) elemToParStyleData :: NameSpaces -> Element -> Maybe ParStyle -> Maybe ParStyle elemToParStyleData ns element parentStyle - | Just styleId <- findAttrTextByName ns "w" "styleId" element + | Just styleId <- findAttrByName ns "w" "styleId" element , Just styleName <- getElementStyleName ns element = Just $ ParStyle { diff --git a/src/Text/Pandoc/Readers/Docx/Util.hs b/src/Text/Pandoc/Readers/Docx/Util.hs index f9c9a8e26..21df03d9e 100644 --- a/src/Text/Pandoc/Readers/Docx/Util.hs +++ b/src/Text/Pandoc/Readers/Docx/Util.hs @@ -1,3 +1,4 @@ +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Readers.Docx.StyleMaps Copyright : © 2014-2020 Jesse Rosenthal , @@ -18,51 +19,45 @@ module Text.Pandoc.Readers.Docx.Util ( , elemToNameSpaces , findChildByName , findChildrenByName - , findAttrText , findAttrByName - , findAttrTextByName ) where import Data.Maybe (mapMaybe) import qualified Data.Text as T -import Text.XML.Light +import Data.Text (Text) +import Text.Pandoc.XML.Light -type NameSpaces = [(String, String)] +type NameSpaces = [(Text, Text)] elemToNameSpaces :: Element -> NameSpaces elemToNameSpaces = mapMaybe attrToNSPair . elAttribs -attrToNSPair :: Attr -> Maybe (String, String) +attrToNSPair :: Attr -> Maybe (Text, Text) attrToNSPair (Attr (QName s _ (Just "xmlns")) val) = Just (s, val) attrToNSPair _ = Nothing -elemName :: NameSpaces -> String -> String -> QName +elemName :: NameSpaces -> Text -> Text -> QName elemName ns prefix name = - QName name (lookup prefix ns) (if null prefix then Nothing else Just prefix) + QName name (lookup prefix ns) (if T.null prefix then Nothing else Just prefix) -isElem :: NameSpaces -> String -> String -> Element -> Bool +isElem :: NameSpaces -> Text -> Text -> Element -> Bool isElem ns prefix name element = let ns' = ns ++ elemToNameSpaces element in qName (elName element) == name && qURI (elName element) == lookup prefix ns' -findChildByName :: NameSpaces -> String -> String -> Element -> Maybe Element +findChildByName :: NameSpaces -> Text -> Text -> Element -> Maybe Element findChildByName ns pref name el = let ns' = ns ++ elemToNameSpaces el in findChild (elemName ns' pref name) el -findChildrenByName :: NameSpaces -> String -> String -> Element -> [Element] +findChildrenByName :: NameSpaces -> Text -> Text -> Element -> [Element] findChildrenByName ns pref name el = let ns' = ns ++ elemToNameSpaces el in findChildren (elemName ns' pref name) el -findAttrText :: QName -> Element -> Maybe T.Text -findAttrText x = fmap T.pack . findAttr x - -findAttrByName :: NameSpaces -> String -> String -> Element -> Maybe String +findAttrByName :: NameSpaces -> Text -> Text -> Element -> Maybe Text findAttrByName ns pref name el = let ns' = ns ++ elemToNameSpaces el in findAttr (elemName ns' pref name) el -findAttrTextByName :: NameSpaces -> String -> String -> Element -> Maybe T.Text -findAttrTextByName a b c = fmap T.pack . findAttrByName a b c diff --git a/src/Text/Pandoc/Readers/EPUB.hs b/src/Text/Pandoc/Readers/EPUB.hs index 369c4f0c9..eb8d2405d 100644 --- a/src/Text/Pandoc/Readers/EPUB.hs +++ b/src/Text/Pandoc/Readers/EPUB.hs @@ -23,8 +23,8 @@ import Control.DeepSeq (NFData, deepseq) import Control.Monad (guard, liftM, liftM2, mplus) import Control.Monad.Except (throwError) import qualified Data.ByteString.Lazy as BL (ByteString) -import Data.List (isInfixOf) import qualified Data.Text as T +import Data.Text (Text) import qualified Data.Map as M (Map, elems, fromList, lookup) import Data.Maybe (mapMaybe) import qualified Data.Text.Lazy as TL @@ -40,13 +40,12 @@ import Text.Pandoc.Extensions (Extension (Ext_raw_html), enableExtension) import Text.Pandoc.MIME (MimeType) import Text.Pandoc.Options (ReaderOptions (..)) import Text.Pandoc.Readers.HTML (readHtml) -import Text.Pandoc.Shared (addMetaField, collapseFilePath, escapeURI) +import Text.Pandoc.Shared (addMetaField, collapseFilePath, escapeURI, tshow) import qualified Text.Pandoc.UTF8 as UTF8 (toTextLazy) import Text.Pandoc.Walk (query, walk) -import Text.XML.Light -import Text.Pandoc.XMLParser (parseXMLElement) +import Text.Pandoc.XML.Light -type Items = M.Map String (FilePath, MimeType) +type Items = M.Map Text (FilePath, MimeType) readEPUB :: PandocMonad m => ReaderOptions -> BL.ByteString -> m Pandoc readEPUB opts bytes = case toArchiveOrFail bytes of @@ -126,26 +125,27 @@ imageToPandoc s = B.doc . B.para $ B.image (T.pack s) "" mempty imageMimes :: [MimeType] imageMimes = ["image/gif", "image/jpeg", "image/png"] -type CoverId = String +type CoverId = Text type CoverImage = FilePath -parseManifest :: (PandocMonad m) => Element -> Maybe CoverId -> m (Maybe CoverImage, Items) +parseManifest :: (PandocMonad m) + => Element -> Maybe CoverId -> m (Maybe CoverImage, Items) parseManifest content coverId = do manifest <- findElementE (dfName "manifest") content let items = findChildren (dfName "item") manifest r <- mapM parseItem items let cover = findAttr (emptyName "href") =<< filterChild findCover manifest - return (cover `mplus` coverId, M.fromList r) + return (T.unpack <$> (cover `mplus` coverId), M.fromList r) where - findCover e = maybe False (isInfixOf "cover-image") + findCover e = maybe False (T.isInfixOf "cover-image") (findAttr (emptyName "properties") e) || Just True == liftM2 (==) coverId (findAttr (emptyName "id") e) parseItem e = do uid <- findAttrE (emptyName "id") e href <- findAttrE (emptyName "href") e mime <- findAttrE (emptyName "media-type") e - return (uid, (href, T.pack mime)) + return (uid, (T.unpack href, mime)) parseSpine :: PandocMonad m => Items -> Element -> m [(FilePath, MimeType)] parseSpine is e = do @@ -173,11 +173,11 @@ parseMeta content = do -- http://www.idpf.org/epub/30/spec/epub30-publications.html#sec-metadata-elem parseMetaItem :: Element -> Meta -> Meta parseMetaItem e@(stripNamespace . elName -> field) meta = - addMetaField (renameMeta field) (B.str $ T.pack $ strContent e) meta + addMetaField (renameMeta field) (B.str $ strContent e) meta -renameMeta :: String -> T.Text +renameMeta :: Text -> Text renameMeta "creator" = "author" -renameMeta s = T.pack s +renameMeta s = s getManifest :: PandocMonad m => Archive -> m (String, Element) getManifest archive = do @@ -187,7 +187,7 @@ getManifest archive = do ns <- mkE "xmlns not in namespaces" (lookup "xmlns" namespaces) as <- fmap (map attrToPair . elAttribs) (findElementE (QName "rootfile" (Just ns) Nothing) docElem) - manifestFile <- mkE "Root not found" (lookup "full-path" as) + manifestFile <- T.unpack <$> mkE "Root not found" (lookup "full-path" as) let rootdir = dropFileName manifestFile --mime <- lookup "media-type" as manifest <- findEntryByPathE manifestFile archive @@ -201,7 +201,8 @@ fixInternalReferences pathToFile = . walk (fixBlockIRs filename) . walk (fixInlineIRs filename) where - (root, T.unpack . escapeURI . T.pack -> filename) = splitFileName pathToFile + (root, T.unpack . escapeURI . T.pack -> filename) = + splitFileName pathToFile fixInlineIRs :: String -> Inline -> Inline fixInlineIRs s (Span as v) = @@ -214,7 +215,7 @@ fixInlineIRs s (Link as is t) = Link (fixAttrs s as) is t fixInlineIRs _ v = v -prependHash :: [T.Text] -> Inline -> Inline +prependHash :: [Text] -> Inline -> Inline prependHash ps l@(Link attr is (url, tit)) | or [s `T.isPrefixOf` url | s <- ps] = Link attr is ("#" <> url, tit) @@ -231,16 +232,17 @@ fixBlockIRs s (CodeBlock as code) = fixBlockIRs _ b = b fixAttrs :: FilePath -> B.Attr -> B.Attr -fixAttrs s (ident, cs, kvs) = (addHash s ident, filter (not . T.null) cs, removeEPUBAttrs kvs) +fixAttrs s (ident, cs, kvs) = + (addHash s ident, filter (not . T.null) cs, removeEPUBAttrs kvs) -addHash :: String -> T.Text -> T.Text +addHash :: FilePath -> Text -> Text addHash _ "" = "" addHash s ident = T.pack (takeFileName s) <> "#" <> ident -removeEPUBAttrs :: [(T.Text, T.Text)] -> [(T.Text, T.Text)] +removeEPUBAttrs :: [(Text, Text)] -> [(Text, Text)] removeEPUBAttrs kvs = filter (not . isEPUBAttr) kvs -isEPUBAttr :: (T.Text, a) -> Bool +isEPUBAttr :: (Text, a) -> Bool isEPUBAttr (k, _) = "epub:" `T.isPrefixOf` k -- Library @@ -257,33 +259,33 @@ uncurry3 f (a, b, c) = f a b c -- Utility -stripNamespace :: QName -> String +stripNamespace :: QName -> Text stripNamespace (QName v _ _) = v -attrToNSPair :: Attr -> Maybe (String, String) +attrToNSPair :: Attr -> Maybe (Text, Text) attrToNSPair (Attr (QName "xmlns" _ _) val) = Just ("xmlns", val) attrToNSPair _ = Nothing -attrToPair :: Attr -> (String, String) +attrToPair :: Attr -> (Text, Text) attrToPair (Attr (QName name _ _) val) = (name, val) -defaultNameSpace :: Maybe String +defaultNameSpace :: Maybe Text defaultNameSpace = Just "http://www.idpf.org/2007/opf" -dfName :: String -> QName +dfName :: Text -> QName dfName s = QName s defaultNameSpace Nothing -emptyName :: String -> QName +emptyName :: Text -> QName emptyName s = QName s Nothing Nothing -- Convert Maybe interface to Either -findAttrE :: PandocMonad m => QName -> Element -> m String +findAttrE :: PandocMonad m => QName -> Element -> m Text findAttrE q e = mkE "findAttr" $ findAttr q e findEntryByPathE :: PandocMonad m => FilePath -> Archive -> m Entry findEntryByPathE (normalise . unEscapeString -> path) a = - mkE ("No entry on path: " ++ path) $ findEntryByPath path a + mkE ("No entry on path: " <> T.pack path) $ findEntryByPath path a parseXMLDocE :: PandocMonad m => Entry -> m Element parseXMLDocE entry = @@ -293,7 +295,8 @@ parseXMLDocE entry = fp = T.pack $ eRelativePath entry findElementE :: PandocMonad m => QName -> Element -> m Element -findElementE e x = mkE ("Unable to find element: " ++ show e) $ findElement e x +findElementE e x = + mkE ("Unable to find element: " <> tshow e) $ findElement e x -mkE :: PandocMonad m => String -> Maybe a -> m a -mkE s = maybe (throwError . PandocParseError $ T.pack s) return +mkE :: PandocMonad m => Text -> Maybe a -> m a +mkE s = maybe (throwError . PandocParseError $ s) return diff --git a/src/Text/Pandoc/Readers/FB2.hs b/src/Text/Pandoc/Readers/FB2.hs index b804eab4f..66e390bd7 100644 --- a/src/Text/Pandoc/Readers/FB2.hs +++ b/src/Text/Pandoc/Readers/FB2.hs @@ -25,7 +25,6 @@ TODO: module Text.Pandoc.Readers.FB2 ( readFB2 ) where import Control.Monad.Except (throwError) import Control.Monad.State.Strict -import Data.ByteString.Lazy.Char8 ( pack ) import Data.ByteString.Base64.Lazy import Data.Functor import Data.List (intersperse) @@ -42,8 +41,8 @@ import Text.Pandoc.Error import Text.Pandoc.Logging import Text.Pandoc.Options import Text.Pandoc.Shared (crFilter) -import Text.XML.Light -import Text.Pandoc.XMLParser (parseXMLElement) +import Text.Pandoc.XML.Light +import qualified Text.Pandoc.UTF8 as UTF8 type FB2 m = StateT FB2State m @@ -85,12 +84,12 @@ removeHash t = case T.uncons t of Just ('#', xs) -> xs _ -> t -convertEntity :: String -> Text -convertEntity e = maybe (T.toUpper $ T.pack e) T.pack $ lookupEntity e +convertEntity :: Text -> Text +convertEntity e = maybe (T.toUpper e) T.pack $ lookupEntity (T.unpack e) parseInline :: PandocMonad m => Content -> FB2 m Inlines parseInline (Elem e) = - case T.pack $ qName $ elName e of + case qName $ elName e of "strong" -> strong <$> parseStyleType e "emphasis" -> emph <$> parseStyleType e "style" -> parseNamedStyle e @@ -98,12 +97,12 @@ parseInline (Elem e) = "strikethrough" -> strikeout <$> parseStyleType e "sub" -> subscript <$> parseStyleType e "sup" -> superscript <$> parseStyleType e - "code" -> pure $ code $ T.pack $ strContent e + "code" -> pure $ code $ strContent e "image" -> parseInlineImageElement e name -> do report $ IgnoredElement name pure mempty -parseInline (Text x) = pure $ text $ T.pack $ cdData x +parseInline (Text x) = pure $ text $ cdData x parseInline (CRef r) = pure $ str $ convertEntity r parseSubtitle :: PandocMonad m => Element -> FB2 m Blocks @@ -113,7 +112,7 @@ parseSubtitle e = headerWith ("", ["unnumbered"], []) <$> gets fb2SectionLevel < parseRootElement :: PandocMonad m => Element -> FB2 m Blocks parseRootElement e = - case T.pack $ qName $ elName e of + case qName $ elName e of "FictionBook" -> do -- Parse notes before parsing the rest of the content. case filterChild isNotesBody e of @@ -146,7 +145,7 @@ parseNote e = Just sectionId -> do content <- mconcat <$> mapM parseSectionChild (dropTitle $ elChildren e) oldNotes <- gets fb2Notes - modify $ \s -> s { fb2Notes = M.insert ("#" <> T.pack sectionId) content oldNotes } + modify $ \s -> s { fb2Notes = M.insert ("#" <> sectionId) content oldNotes } pure () where isTitle x = qName (elName x) == "title" @@ -158,7 +157,7 @@ parseNote e = -- | Parse a child of @\@ element. parseFictionBookChild :: PandocMonad m => Element -> FB2 m Blocks parseFictionBookChild e = - case T.pack $ qName $ elName e of + case qName $ elName e of "stylesheet" -> pure mempty -- stylesheet is ignored "description" -> mempty <$ mapM_ parseDescriptionChild (elChildren e) "body" -> if isNotesBody e @@ -170,7 +169,7 @@ parseFictionBookChild e = -- | Parse a child of @\@ element. parseDescriptionChild :: PandocMonad m => Element -> FB2 m () parseDescriptionChild e = - case T.pack $ qName $ elName e of + case qName $ elName e of "title-info" -> mapM_ parseTitleInfoChild (elChildren e) "src-title-info" -> pure () -- ignore "document-info" -> pure () @@ -184,7 +183,7 @@ parseDescriptionChild e = -- | Parse a child of @\@ element. parseBodyChild :: PandocMonad m => Element -> FB2 m Blocks parseBodyChild e = - case T.pack $ qName $ elName e of + case qName $ elName e of "image" -> parseImageElement e "title" -> header <$> gets fb2SectionLevel <*> parseTitleType (elContent e) "epigraph" -> parseEpigraph e @@ -198,7 +197,10 @@ parseBinaryElement e = (Nothing, _) -> report $ IgnoredElement "binary without id attribute" (Just _, Nothing) -> report $ IgnoredElement "binary without content-type attribute" - (Just filename, contentType) -> insertMedia filename (T.pack <$> contentType) (decodeLenient (pack (strContent e))) + (Just filename, contentType) -> + insertMedia (T.unpack filename) contentType + (decodeLenient + (UTF8.fromTextLazy . TL.fromStrict . strContent $ e)) -- * Type parsers @@ -208,13 +210,13 @@ parseAuthor e = T.unwords . catMaybes <$> mapM parseAuthorChild (elChildren e) parseAuthorChild :: PandocMonad m => Element -> FB2 m (Maybe Text) parseAuthorChild e = - case T.pack $ qName $ elName e of - "first-name" -> pure $ Just $ T.pack $ strContent e - "middle-name" -> pure $ Just $ T.pack $ strContent e - "last-name" -> pure $ Just $ T.pack $ strContent e - "nickname" -> pure $ Just $ T.pack $ strContent e - "home-page" -> pure $ Just $ T.pack $ strContent e - "email" -> pure $ Just $ T.pack $ strContent e + case qName $ elName e of + "first-name" -> pure $ Just $ strContent e + "middle-name" -> pure $ Just $ strContent e + "last-name" -> pure $ Just $ strContent e + "nickname" -> pure $ Just $ strContent e + "home-page" -> pure $ Just $ strContent e + "email" -> pure $ Just $ strContent e name -> do report $ IgnoredElement $ name <> " in author" pure Nothing @@ -238,13 +240,13 @@ parseTitleContent _ = pure Nothing parseImageElement :: PandocMonad m => Element -> FB2 m Blocks parseImageElement e = case href of - Just src -> pure $ para $ imageWith (imgId, [], []) (removeHash $ T.pack src) title alt + Just src -> pure $ para $ imageWith (imgId, [], []) (removeHash src) title alt Nothing -> do report $ IgnoredElement " image without href" pure mempty - where alt = maybe mempty (str . T.pack) $ findAttr (unqual "alt") e - title = maybe "" T.pack $ findAttr (unqual "title") e - imgId = maybe "" T.pack $ findAttr (unqual "id") e + where alt = maybe mempty str $ findAttr (unqual "alt") e + title = fromMaybe "" $ findAttr (unqual "title") e + imgId = fromMaybe "" $ findAttr (unqual "id") e href = findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) e -- | Parse @pType@ @@ -258,7 +260,7 @@ parseCite e = blockQuote . mconcat <$> mapM parseCiteChild (elChildren e) -- | Parse @citeType@ child parseCiteChild :: PandocMonad m => Element -> FB2 m Blocks parseCiteChild e = - case T.pack $ qName $ elName e of + case qName $ elName e of "p" -> para <$> parsePType e "poem" -> parsePoem e "empty-line" -> pure horizontalRule @@ -273,13 +275,13 @@ parsePoem e = mconcat <$> mapM parsePoemChild (elChildren e) parsePoemChild :: PandocMonad m => Element -> FB2 m Blocks parsePoemChild e = - case T.pack $ qName $ elName e of + case qName $ elName e of "title" -> parseTitle e "subtitle" -> parseSubtitle e "epigraph" -> parseEpigraph e "stanza" -> parseStanza e "text-author" -> para <$> parsePType e - "date" -> pure $ para $ text $ T.pack $ strContent e + "date" -> pure $ para $ text $ strContent e name -> report (UnexpectedXmlElement name "poem") $> mempty parseStanza :: PandocMonad m => Element -> FB2 m Blocks @@ -292,7 +294,7 @@ joinLineBlocks [] = [] parseStanzaChild :: PandocMonad m => Element -> FB2 m Blocks parseStanzaChild e = - case T.pack $ qName $ elName e of + case qName $ elName e of "title" -> parseTitle e "subtitle" -> parseSubtitle e "v" -> lineBlock . (:[]) <$> parsePType e @@ -302,11 +304,11 @@ parseStanzaChild e = parseEpigraph :: PandocMonad m => Element -> FB2 m Blocks parseEpigraph e = divWith (divId, ["epigraph"], []) . mconcat <$> mapM parseEpigraphChild (elChildren e) - where divId = maybe "" T.pack $ findAttr (unqual "id") e + where divId = fromMaybe "" $ findAttr (unqual "id") e parseEpigraphChild :: PandocMonad m => Element -> FB2 m Blocks parseEpigraphChild e = - case T.pack $ qName $ elName e of + case qName $ elName e of "p" -> para <$> parsePType e "poem" -> parsePoem e "cite" -> parseCite e @@ -320,7 +322,7 @@ parseAnnotation e = mconcat <$> mapM parseAnnotationChild (elChildren e) parseAnnotationChild :: PandocMonad m => Element -> FB2 m Blocks parseAnnotationChild e = - case T.pack $ qName $ elName e of + case qName $ elName e of "p" -> para <$> parsePType e "poem" -> parsePoem e "cite" -> parseCite e @@ -334,14 +336,14 @@ parseSection :: PandocMonad m => Element -> FB2 m Blocks parseSection e = do n <- gets fb2SectionLevel modify $ \st -> st{ fb2SectionLevel = n + 1 } - let sectionId = maybe "" T.pack $ findAttr (unqual "id") e + let sectionId = fromMaybe "" $ findAttr (unqual "id") e bs <- divWith (sectionId, ["section"], []) . mconcat <$> mapM parseSectionChild (elChildren e) modify $ \st -> st{ fb2SectionLevel = n } pure bs parseSectionChild :: PandocMonad m => Element -> FB2 m Blocks parseSectionChild e = - case T.pack $ qName $ elName e of + case qName $ elName e of "title" -> parseBodyChild e "epigraph" -> parseEpigraph e "image" -> parseImageElement e @@ -363,16 +365,16 @@ parseStyleType e = mconcat <$> mapM parseInline (elContent e) parseNamedStyle :: PandocMonad m => Element -> FB2 m Inlines parseNamedStyle e = do content <- mconcat <$> mapM parseNamedStyleChild (elContent e) - let lang = maybeToList $ ("lang",) . T.pack <$> findAttr (QName "lang" Nothing (Just "xml")) e + let lang = maybeToList $ ("lang",) <$> findAttr (QName "lang" Nothing (Just "xml")) e case findAttr (unqual "name") e of - Just name -> pure $ spanWith ("", [T.pack name], lang) content + Just name -> pure $ spanWith ("", [name], lang) content Nothing -> do report $ IgnoredElement "link without required name" pure mempty parseNamedStyleChild :: PandocMonad m => Content -> FB2 m Inlines parseNamedStyleChild (Elem e) = - case T.pack $ qName (elName e) of + case qName (elName e) of "strong" -> strong <$> parseStyleType e "emphasis" -> emph <$> parseStyleType e "style" -> parseNamedStyle e @@ -380,7 +382,7 @@ parseNamedStyleChild (Elem e) = "strikethrough" -> strikeout <$> parseStyleType e "sub" -> subscript <$> parseStyleType e "sup" -> superscript <$> parseStyleType e - "code" -> pure $ code $ T.pack $ strContent e + "code" -> pure $ code $ strContent e "image" -> parseInlineImageElement e name -> do report $ IgnoredElement $ name <> " in style" @@ -392,7 +394,7 @@ parseLinkType :: PandocMonad m => Element -> FB2 m Inlines parseLinkType e = do content <- mconcat <$> mapM parseStyleLinkType (elContent e) notes <- gets fb2Notes - case T.pack <$> findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) e of + case findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) e of Just href -> case findAttr (QName "type" Nothing Nothing) e of Just "note" -> case M.lookup href notes of Nothing -> pure $ link href "" content @@ -419,15 +421,14 @@ parseTable _ = pure mempty -- TODO: tables are not supported yet -- | Parse @title-infoType@ parseTitleInfoChild :: PandocMonad m => Element -> FB2 m () parseTitleInfoChild e = - case T.pack $ qName (elName e) of + case qName (elName e) of "genre" -> pure () "author" -> parseAuthor e >>= \author -> modify (\st -> st {fb2Authors = author:fb2Authors st}) - "book-title" -> modify (setMeta "title" (text $ T.pack $ strContent e)) + "book-title" -> modify (setMeta "title" (text $ strContent e)) "annotation" -> parseAnnotation e >>= modify . setMeta "abstract" "keywords" -> modify (setMeta "keywords" (map (MetaString . trim) $ T.splitOn "," - $ T.pack $ strContent e)) - "date" -> modify (setMeta "date" (text $ T.pack $ strContent e)) + "date" -> modify (setMeta "date" (text $ strContent e)) "coverpage" -> parseCoverPage e "lang" -> pure () "src-lang" -> pure () @@ -441,7 +442,7 @@ parseCoverPage e = Just img -> case href of Just src -> modify (setMeta "cover-image" (MetaString $ removeHash src)) Nothing -> pure () - where href = T.pack <$> findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) img + where href = findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) img Nothing -> pure () -- | Parse @inlineImageType@ element @@ -454,5 +455,5 @@ parseInlineImageElement e = Nothing -> do report $ IgnoredElement "inline image without href" pure mempty - where alt = maybe mempty (str . T.pack) $ findAttr (unqual "alt") e - href = T.pack <$> findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) e + where alt = maybe mempty str $ findAttr (unqual "alt") e + href = findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) e diff --git a/src/Text/Pandoc/Readers/JATS.hs b/src/Text/Pandoc/Readers/JATS.hs index dfd343b7a..5353f2001 100644 --- a/src/Text/Pandoc/Readers/JATS.hs +++ b/src/Text/Pandoc/Readers/JATS.hs @@ -16,7 +16,7 @@ module Text.Pandoc.Readers.JATS ( readJATS ) where import Control.Monad.State.Strict import Control.Monad.Except (throwError) import Text.Pandoc.Error (PandocError(..)) -import Data.Char (isDigit, isSpace, toUpper) +import Data.Char (isDigit, isSpace) import Data.Default import Data.Generics import Data.List (foldl', intersperse) @@ -31,8 +31,7 @@ import Text.Pandoc.Class.PandocMonad (PandocMonad) import Text.Pandoc.Options import Text.Pandoc.Shared (crFilter, safeRead, extractSpaces) import Text.TeXMath (readMathML, writeTeX) -import Text.XML.Light -import Text.Pandoc.XMLParser (parseXMLContents) +import Text.Pandoc.XML.Light import qualified Data.Set as S (fromList, member) import Data.Set ((\\)) @@ -67,29 +66,29 @@ normalizeTree = everywhere (mkT go) where go :: [Content] -> [Content] go (Text (CData CDataRaw _ _):xs) = xs go (Text (CData CDataText s1 z):Text (CData CDataText s2 _):xs) = - Text (CData CDataText (s1 ++ s2) z):xs + Text (CData CDataText (s1 <> s2) z):xs go (Text (CData CDataText s1 z):CRef r:xs) = - Text (CData CDataText (s1 ++ convertEntity r) z):xs + Text (CData CDataText (s1 <> convertEntity r) z):xs go (CRef r:Text (CData CDataText s1 z):xs) = - Text (CData CDataText (convertEntity r ++ s1) z):xs + Text (CData CDataText (convertEntity r <> s1) z):xs go (CRef r1:CRef r2:xs) = - Text (CData CDataText (convertEntity r1 ++ convertEntity r2) Nothing):xs + Text (CData CDataText (convertEntity r1 <> convertEntity r2) Nothing):xs go xs = xs -convertEntity :: String -> String -convertEntity e = Data.Maybe.fromMaybe (map toUpper e) (lookupEntity e) +convertEntity :: Text -> Text +convertEntity e = maybe (T.toUpper e) T.pack (lookupEntity $ T.unpack e) -- convenience function to get an attribute value, defaulting to "" -attrValue :: String -> Element -> Text +attrValue :: Text -> Element -> Text attrValue attr = fromMaybe "" . maybeAttrValue attr -maybeAttrValue :: String -> Element -> Maybe Text +maybeAttrValue :: Text -> Element -> Maybe Text maybeAttrValue attr elt = - T.pack <$> lookupAttrBy (\x -> qName x == attr) (elAttribs elt) + lookupAttrBy (\x -> qName x == attr) (elAttribs elt) -- convenience function -named :: String -> Element -> Bool +named :: Text -> Element -> Bool named s e = qName (elName e) == s -- @@ -155,10 +154,10 @@ getBlocks e = mconcat <$> parseBlock :: PandocMonad m => Content -> JATS m Blocks parseBlock (Text (CData CDataRaw _ _)) = return mempty -- DOCTYPE -parseBlock (Text (CData _ s _)) = if all isSpace s +parseBlock (Text (CData _ s _)) = if T.all isSpace s then return mempty - else return $ plain $ trimInlines $ text $ T.pack s -parseBlock (CRef x) = return $ plain $ str $ T.toUpper $ T.pack x + else return $ plain $ trimInlines $ text s +parseBlock (CRef x) = return $ plain $ str $ T.toUpper x parseBlock (Elem e) = case qName (elName e) of "p" -> parseMixed para (elContent e) @@ -207,7 +206,7 @@ parseBlock (Elem e) = "" -> [] x -> [x] return $ codeBlockWith (attrValue "id" e, classes', []) - $ trimNl $ textContentRecursive e + $ trimNl $ strContentRecursive e parseBlockquote = do attrib <- case filterChild (named "attribution") e of Nothing -> return mempty @@ -271,7 +270,7 @@ parseBlock (Elem e) = Just "center" -> AlignCenter _ -> AlignDefault let toWidth c = do - w <- findAttrText (unqual "colwidth") c + w <- findAttr (unqual "colwidth") c n <- safeRead $ "0" <> T.filter (\x -> isDigit x || x == '.') w if n > 0 then Just n else Nothing let numrows = foldl' max 0 $ map length bodyrows @@ -442,16 +441,10 @@ parseRef e = do Nothing -> return $ Map.insert "id" (toMetaValue refId) mempty -- TODO handle mixed-citation -findAttrText :: QName -> Element -> Maybe Text -findAttrText x = fmap T.pack . findAttr x - textContent :: Element -> Text -textContent = T.pack . strContent - -textContentRecursive :: Element -> Text -textContentRecursive = T.pack . strContentRecursive +textContent = strContent -strContentRecursive :: Element -> String +strContentRecursive :: Element -> Text strContentRecursive = strContent . (\e' -> e'{ elContent = map elementToStr $ elContent e' }) @@ -460,9 +453,8 @@ elementToStr (Elem e') = Text $ CData CDataText (strContentRecursive e') Nothing elementToStr x = x parseInline :: PandocMonad m => Content -> JATS m Inlines -parseInline (Text (CData _ s _)) = return $ text $ T.pack s -parseInline (CRef ref) = - return . text . maybe (T.toUpper $ T.pack ref) T.pack $ lookupEntity ref +parseInline (Text (CData _ s _)) = return $ text s +parseInline (CRef ref) = return . text . convertEntity $ ref parseInline (Elem e) = case qName (elName e) of "italic" -> innerInlines emph @@ -507,9 +499,9 @@ parseInline (Elem e) = else linkWith attr ("#" <> rid) "" ils "ext-link" -> do ils <- innerInlines id - let title = fromMaybe "" $ findAttrText (QName "title" (Just "http://www.w3.org/1999/xlink") Nothing) e + let title = fromMaybe "" $ findAttr (QName "title" (Just "http://www.w3.org/1999/xlink") Nothing) e let href = case findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) e of - Just h -> T.pack h + Just h -> h _ -> "#" <> attrValue "rid" e let ils' = if ils == mempty then str href else ils let attr = (attrValue "id" e, [], []) @@ -529,7 +521,7 @@ parseInline (Elem e) = where innerInlines f = extractSpaces f . mconcat <$> mapM parseInline (elContent e) mathML x = - case readMathML . T.pack . showElement $ everywhere (mkT removePrefix) x of + case readMathML . showElement $ everywhere (mkT removePrefix) x of Left _ -> mempty Right m -> writeTeX m formula constructor = do @@ -547,4 +539,4 @@ parseInline (Elem e) = let classes' = case attrValue "language" e of "" -> [] l -> [l] - return $ codeWith (attrValue "id" e,classes',[]) $ textContentRecursive e + return $ codeWith (attrValue "id" e,classes',[]) $ strContentRecursive e diff --git a/src/Text/Pandoc/Readers/OPML.hs b/src/Text/Pandoc/Readers/OPML.hs index bdadc4dd9..184d5a63f 100644 --- a/src/Text/Pandoc/Readers/OPML.hs +++ b/src/Text/Pandoc/Readers/OPML.hs @@ -13,7 +13,6 @@ Conversion of OPML to 'Pandoc' document. module Text.Pandoc.Readers.OPML ( readOPML ) where import Control.Monad.State.Strict -import Data.Char (toUpper) import Data.Default import Data.Generics import Data.Maybe (fromMaybe) @@ -28,8 +27,7 @@ import Text.Pandoc.Error (PandocError(..)) import Text.Pandoc.Readers.HTML (readHtml) import Text.Pandoc.Readers.Markdown (readMarkdown) import Text.Pandoc.Shared (crFilter, blocksToInlines') -import Text.XML.Light -import Text.Pandoc.XMLParser (parseXMLContents) +import Text.Pandoc.XML.Light import Control.Monad.Except (throwError) type OPML m = StateT OPMLState m @@ -69,25 +67,22 @@ normalizeTree = everywhere (mkT go) where go :: [Content] -> [Content] go (Text (CData CDataRaw _ _):xs) = xs go (Text (CData CDataText s1 z):Text (CData CDataText s2 _):xs) = - Text (CData CDataText (s1 ++ s2) z):xs + Text (CData CDataText (s1 <> s2) z):xs go (Text (CData CDataText s1 z):CRef r:xs) = - Text (CData CDataText (s1 ++ convertEntity r) z):xs + Text (CData CDataText (s1 <> convertEntity r) z):xs go (CRef r:Text (CData CDataText s1 z):xs) = - Text (CData CDataText (convertEntity r ++ s1) z):xs + Text (CData CDataText (convertEntity r <> s1) z):xs go (CRef r1:CRef r2:xs) = - Text (CData CDataText (convertEntity r1 ++ convertEntity r2) Nothing):xs + Text (CData CDataText (convertEntity r1 <> convertEntity r2) Nothing):xs go xs = xs -convertEntity :: String -> String -convertEntity e = Data.Maybe.fromMaybe (map toUpper e) (lookupEntity e) +convertEntity :: Text -> Text +convertEntity e = maybe (T.toUpper e) T.pack (lookupEntity (T.unpack e)) -- convenience function to get an attribute value, defaulting to "" -attrValue :: String -> Element -> Text +attrValue :: Text -> Element -> Text attrValue attr elt = - maybe "" T.pack (lookupAttrBy (\x -> qName x == attr) (elAttribs elt)) - -textContent :: Element -> Text -textContent = T.pack . strContent + fromMaybe "" (lookupAttrBy (\x -> qName x == attr) (elAttribs elt)) -- exceptT :: PandocMonad m => Either PandocError a -> OPML m a -- exceptT = either throwError return @@ -111,11 +106,11 @@ parseBlock :: PandocMonad m => Content -> OPML m Blocks parseBlock (Elem e) = case qName (elName e) of "ownerName" -> mempty <$ modify (\st -> - st{opmlDocAuthors = [text $ textContent e]}) + st{opmlDocAuthors = [text $ strContent e]}) "dateModified" -> mempty <$ modify (\st -> - st{opmlDocDate = text $ textContent e}) + st{opmlDocDate = text $ strContent e}) "title" -> mempty <$ modify (\st -> - st{opmlDocTitle = text $ textContent e}) + st{opmlDocTitle = text $ strContent e}) "outline" -> gets opmlSectionLevel >>= sect . (+1) "?xml" -> return mempty _ -> getBlocks e diff --git a/src/Text/Pandoc/Readers/Odt.hs b/src/Text/Pandoc/Readers/Odt.hs index 85308deb1..c274b6fd4 100644 --- a/src/Text/Pandoc/Readers/Odt.hs +++ b/src/Text/Pandoc/Readers/Odt.hs @@ -14,8 +14,7 @@ Entry point to the odt reader. module Text.Pandoc.Readers.Odt ( readOdt ) where import Codec.Archive.Zip -import qualified Text.XML.Light as XML -import Text.Pandoc.XMLParser (parseXMLElement) +import Text.Pandoc.XML.Light import qualified Data.ByteString.Lazy as B @@ -91,7 +90,7 @@ archiveToOdt archive = do -- -entryToXmlElem :: Entry -> Either PandocError XML.Element +entryToXmlElem :: Entry -> Either PandocError Element entryToXmlElem entry = case parseXMLElement . UTF8.toTextLazy . fromEntry $ entry of Right x -> Right x diff --git a/src/Text/Pandoc/Readers/Odt/ContentReader.hs b/src/Text/Pandoc/Readers/Odt/ContentReader.hs index 43c44e7e9..df90880fa 100644 --- a/src/Text/Pandoc/Readers/Odt/ContentReader.hs +++ b/src/Text/Pandoc/Readers/Odt/ContentReader.hs @@ -29,14 +29,14 @@ import Control.Monad ((<=<)) import qualified Data.ByteString.Lazy as B import Data.Foldable (fold) -import Data.List (find, stripPrefix) +import Data.List (find) import qualified Data.Map as M import qualified Data.Text as T import Data.Maybe import Data.Semigroup (First(..), Option(..)) import Text.TeXMath (readMathML, writeTeX) -import qualified Text.XML.Light as XML +import qualified Text.Pandoc.XML.Light as XML import Text.Pandoc.Builder hiding (underline) import Text.Pandoc.MediaBag (MediaBag, insertMedia) @@ -557,7 +557,7 @@ read_plain_text = fst ^&&& read_plain_text' >>% recover >>?% mappend -- extractText :: XML.Content -> Fallible T.Text - extractText (XML.Text cData) = succeedWith (T.pack $ XML.cdData cData) + extractText (XML.Text cData) = succeedWith (XML.cdData cData) extractText _ = failEmpty read_text_seq :: InlineMatcher @@ -777,14 +777,14 @@ read_frame_img = "" -> returnV mempty -< () src' -> do let exts = extensionsFromList [Ext_auto_identifiers] - resource <- lookupResource -< src' + resource <- lookupResource -< T.unpack src' _ <- updateMediaWithResource -< resource w <- findAttrText' NsSVG "width" -< () h <- findAttrText' NsSVG "height" -< () titleNodes <- matchChildContent' [ read_frame_title ] -< () alt <- matchChildContent [] read_plain_text -< () arr (firstMatch . uncurry4 imageWith) -< - (image_attributes w h, T.pack src', inlineListToIdentifier exts (toList titleNodes), alt) + (image_attributes w h, src', inlineListToIdentifier exts (toList titleNodes), alt) read_frame_title :: InlineMatcher read_frame_title = matchingElement NsSVG "title" (matchChildContent [] read_plain_text) @@ -804,7 +804,8 @@ read_frame_mathml = case fold src of "" -> returnV mempty -< () src' -> do - let path = fromMaybe src' (stripPrefix "./" src') ++ "/content.xml" + let path = T.unpack $ + fromMaybe src' (T.stripPrefix "./" src') <> "/content.xml" (_, mathml) <- lookupResource -< path case readMathML (UTF8.toText $ B.toStrict mathml) of Left _ -> returnV mempty -< () diff --git a/src/Text/Pandoc/Readers/Odt/Generic/Namespaces.hs b/src/Text/Pandoc/Readers/Odt/Generic/Namespaces.hs index 77174c793..78a7fc0b2 100644 --- a/src/Text/Pandoc/Readers/Odt/Generic/Namespaces.hs +++ b/src/Text/Pandoc/Readers/Odt/Generic/Namespaces.hs @@ -14,9 +14,10 @@ typesafe Haskell namespace identifiers and unsafe "real world" namespaces. module Text.Pandoc.Readers.Odt.Generic.Namespaces where import qualified Data.Map as M +import Data.Text (Text) -- -type NameSpaceIRI = String +type NameSpaceIRI = Text -- type NameSpaceIRIs nsID = M.Map nsID NameSpaceIRI diff --git a/src/Text/Pandoc/Readers/Odt/Generic/Utils.hs b/src/Text/Pandoc/Readers/Odt/Generic/Utils.hs index 6dc56a0d9..edefe3c70 100644 --- a/src/Text/Pandoc/Readers/Odt/Generic/Utils.hs +++ b/src/Text/Pandoc/Readers/Odt/Generic/Utils.hs @@ -20,7 +20,6 @@ module Text.Pandoc.Readers.Odt.Generic.Utils , reverseComposition , tryToRead , Lookupable(..) -, readLookupables , readLookupable , readPercent , findBy @@ -30,11 +29,11 @@ module Text.Pandoc.Readers.Odt.Generic.Utils import Control.Category (Category, (<<<), (>>>)) import qualified Control.Category as Cat (id) -import Control.Monad (msum) - +import Data.Char (isSpace) import qualified Data.Foldable as F (Foldable, foldr) import Data.Maybe - +import Data.Text (Text) +import qualified Data.Text as T -- | Equivalent to -- > foldr (.) id @@ -76,8 +75,8 @@ swing = flip.(.flip id) -- (nobody wants that) while the latter returns "to much" for simple purposes. -- This function instead applies 'reads' and returns the first match (if any) -- in a 'Maybe'. -tryToRead :: (Read r) => String -> Maybe r -tryToRead = reads >>> listToMaybe >>> fmap fst +tryToRead :: (Read r) => Text -> Maybe r +tryToRead = (reads . T.unpack) >>> listToMaybe >>> fmap fst -- | A version of 'reads' that requires a '%' sign after the number readPercent :: ReadS Int @@ -88,26 +87,12 @@ readPercent s = [ (i,s') | (i , r ) <- reads s -- | Data that can be looked up. -- This is mostly a utility to read data with kind *. class Lookupable a where - lookupTable :: [(String, a)] - --- | The idea is to use this function as if there was a declaration like --- --- > instance (Lookupable a) => (Read a) where --- > readsPrec _ = readLookupables --- . --- But including this code in this form would need UndecideableInstances. --- That is a bad idea. Luckily 'readLookupable' (without the s at the end) --- can be used directly in almost any case. -readLookupables :: (Lookupable a) => String -> [(a,String)] -readLookupables s = [ (a,rest) | (word,rest) <- lex s, - a <- maybeToList (lookup word lookupTable) - ] + lookupTable :: [(Text, a)] -- | Very similar to a simple 'lookup' in the 'lookupTable', but with a lexer. -readLookupable :: (Lookupable a) => String -> Maybe a -readLookupable s = msum - $ map ((`lookup` lookupTable).fst) - $ lex s +readLookupable :: (Lookupable a) => Text -> Maybe a +readLookupable s = + lookup (T.takeWhile (not . isSpace) $ T.dropWhile isSpace s) lookupTable uncurry3 :: (a->b->c -> z) -> (a,b,c ) -> z uncurry4 :: (a->b->c->d -> z) -> (a,b,c,d ) -> z diff --git a/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs b/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs index 00c636a0d..0d921e23b 100644 --- a/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs +++ b/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs @@ -1,3 +1,4 @@ +{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE TupleSections #-} {-# LANGUAGE GADTs #-} {-# LANGUAGE LambdaCase #-} @@ -60,11 +61,11 @@ import Control.Arrow import Data.Bool ( bool ) import Data.Either ( rights ) import qualified Data.Map as M -import qualified Data.Text as T +import Data.Text (Text) import Data.Default import Data.Maybe -import qualified Text.XML.Light as XML +import qualified Text.Pandoc.XML.Light as XML import Text.Pandoc.Readers.Odt.Arrows.State import Text.Pandoc.Readers.Odt.Arrows.Utils @@ -78,13 +79,13 @@ import Text.Pandoc.Readers.Odt.Generic.Fallible -------------------------------------------------------------------------------- -- -type ElementName = String -type AttributeName = String -type AttributeValue = String -type TextAttributeValue = T.Text +type ElementName = Text +type AttributeName = Text +type AttributeValue = Text +type TextAttributeValue = Text -- -type NameSpacePrefix = String +type NameSpacePrefix = Text -- type NameSpacePrefixes nsID = M.Map nsID NameSpacePrefix @@ -461,7 +462,7 @@ lookupDefaultingAttr :: (NameSpaceID nsID, Lookupable a, Default a) lookupDefaultingAttr nsID attrName = lookupAttrWithDefault nsID attrName def --- | Return value as a (Maybe String) +-- | Return value as a (Maybe Text) findAttr' :: (NameSpaceID nsID) => nsID -> AttributeName -> XMLConverter nsID extraState x (Maybe AttributeValue) @@ -477,7 +478,6 @@ findAttrText' nsID attrName = qualifyName nsID attrName &&& getCurrentElement >>% XML.findAttr - >>^ fmap T.pack -- | Return value as string or fail findAttr :: (NameSpaceID nsID) @@ -492,7 +492,6 @@ findAttrText :: (NameSpaceID nsID) -> FallibleXMLConverter nsID extraState x TextAttributeValue findAttrText nsID attrName = findAttr' nsID attrName - >>^ fmap T.pack >>> maybeToChoice -- | Return value as string or return provided default value @@ -511,7 +510,7 @@ findAttrTextWithDefault :: (NameSpaceID nsID) -> XMLConverter nsID extraState x TextAttributeValue findAttrTextWithDefault nsID attrName deflt = findAttr' nsID attrName - >>^ maybe deflt T.pack + >>^ fromMaybe deflt -- | Read and return value or fail readAttr :: (NameSpaceID nsID, Read attrValue) @@ -748,7 +747,7 @@ matchContent lookups fallback -- Internals -------------------------------------------------------------------------------- -stringToBool' :: String -> Maybe Bool +stringToBool' :: Text -> Maybe Bool stringToBool' val | val `elem` trueValues = Just True | val `elem` falseValues = Just False | otherwise = Nothing diff --git a/src/Text/Pandoc/Readers/Odt/Namespaces.hs b/src/Text/Pandoc/Readers/Odt/Namespaces.hs index 3a24a1162..70741c28d 100644 --- a/src/Text/Pandoc/Readers/Odt/Namespaces.hs +++ b/src/Text/Pandoc/Readers/Odt/Namespaces.hs @@ -1,3 +1,4 @@ +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Reader.Odt.Namespaces Copyright : Copyright (C) 2015 Martin Linnemann @@ -13,10 +14,10 @@ Namespaces used in odt files. module Text.Pandoc.Readers.Odt.Namespaces ( Namespace (..) ) where -import Data.List (isPrefixOf) import qualified Data.Map as M (empty, insert) import Data.Maybe (fromMaybe, listToMaybe) - +import Data.Text (Text) +import qualified Data.Text as T import Text.Pandoc.Readers.Odt.Generic.Namespaces @@ -30,7 +31,7 @@ instance NameSpaceID Namespace where findID :: NameSpaceIRI -> Maybe Namespace -findID iri = listToMaybe [nsID | (iri',nsID) <- nsIDs, iri' `isPrefixOf` iri] +findID iri = listToMaybe [nsID | (iri',nsID) <- nsIDs, iri' `T.isPrefixOf` iri] nsIDmap :: NameSpaceIRIs Namespace nsIDmap = foldr (uncurry $ flip M.insert) M.empty nsIDs @@ -54,12 +55,12 @@ data Namespace = -- Open Document core -- Core XML (basically only for the 'id'-attribute) | NsXML -- Fallback - | NsOther String + | NsOther Text deriving ( Eq, Ord, Show ) -- | Not the actual iri's, but large prefixes of them - this way there are -- less versioning problems and the like. -nsIDs :: [(String,Namespace)] +nsIDs :: [(Text, Namespace)] nsIDs = [ ("urn:oasis:names:tc:opendocument:xmlns:animation" , NsAnim ), ("urn:oasis:names:tc:opendocument:xmlns:chart" , NsChart ), diff --git a/src/Text/Pandoc/Readers/Odt/StyleReader.hs b/src/Text/Pandoc/Readers/Odt/StyleReader.hs index 46a777df1..5e10f896c 100644 --- a/src/Text/Pandoc/Readers/Odt/StyleReader.hs +++ b/src/Text/Pandoc/Readers/Odt/StyleReader.hs @@ -2,6 +2,7 @@ {-# LANGUAGE Arrows #-} {-# LANGUAGE RecordWildCards #-} {-# LANGUAGE TupleSections #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Readers.Odt.StyleReader Copyright : Copyright (C) 2015 Martin Linnemann @@ -46,11 +47,13 @@ import qualified Data.Foldable as F import Data.List (unfoldr) import qualified Data.Map as M import Data.Maybe +import Data.Text (Text) +import qualified Data.Text as T import qualified Data.Set as S -import qualified Text.XML.Light as XML +import qualified Text.Pandoc.XML.Light as XML -import Text.Pandoc.Shared (safeRead) +import Text.Pandoc.Shared (safeRead, tshow) import Text.Pandoc.Readers.Odt.Arrows.Utils @@ -90,7 +93,7 @@ instance Default FontPitch where -- -- Thus, we want -type FontFaceName = String +type FontFaceName = Text type FontPitches = M.Map FontFaceName FontPitch @@ -151,7 +154,7 @@ findPitch = ( lookupAttr NsStyle "font-pitch" -- Definitions of main data -------------------------------------------------------------------------------- -type StyleName = String +type StyleName = Text -- | There are two types of styles: named styles with a style family and an -- optional style parent, and default styles for each style family, @@ -355,8 +358,8 @@ getListLevelStyle level ListStyle{..} = -- \^ simpler, but in general less efficient data ListLevelStyle = ListLevelStyle { listLevelType :: ListLevelType - , listItemPrefix :: Maybe String - , listItemSuffix :: Maybe String + , listItemPrefix :: Maybe Text + , listItemSuffix :: Maybe Text , listItemFormat :: ListItemNumberFormat , listItemStart :: Int } @@ -366,9 +369,9 @@ instance Show ListLevelStyle where show ListLevelStyle{..} = " listItemPrefix) ++ show listItemFormat - ++ maybeToString listItemSuffix + ++ maybeToString (T.unpack <$> listItemSuffix) ++ ">" where maybeToString = fromMaybe "" @@ -471,7 +474,7 @@ readTextProperties = ) where isFontEmphasised = [("normal",False),("italic",True),("oblique",True)] isFontBold = ("normal",False):("bold",True) - :map ((,True).show) ([100,200..900]::[Int]) + :map ((,True) . tshow) ([100,200..900]::[Int]) readUnderlineMode :: StyleReaderSafe _x (Maybe UnderlineMode) readUnderlineMode = readLineMode "text-underline-mode" @@ -481,7 +484,7 @@ readStrikeThroughMode :: StyleReaderSafe _x (Maybe UnderlineMode) readStrikeThroughMode = readLineMode "text-line-through-mode" "text-line-through-style" -readLineMode :: String -> String -> StyleReaderSafe _x (Maybe UnderlineMode) +readLineMode :: Text -> Text -> StyleReaderSafe _x (Maybe UnderlineMode) readLineMode modeAttr styleAttr = proc x -> do isUL <- searchAttr NsStyle styleAttr False isLinePresent -< x mode <- lookupAttr' NsStyle modeAttr -< x diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index da990e4d3..89c71d773 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -31,6 +31,7 @@ import qualified Data.Map as M import Data.Maybe (fromMaybe, isNothing, mapMaybe, maybeToList) import qualified Data.Set as Set import qualified Data.Text as T +import Data.Text (Text) import qualified Data.Text.Lazy as TL import Data.Time.Clock.POSIX import Data.Digest.Pure.SHA (sha1, showDigest) @@ -57,19 +58,19 @@ import Text.Pandoc.Writers.Math import Text.Pandoc.Writers.Shared import Text.Printf (printf) import Text.TeXMath -import Text.XML.Light as XML -import Text.XML.Light.Cursor as XMLC import Text.Pandoc.Writers.OOXML +import Text.Pandoc.XML.Light as XML +import Data.Generics (mkT, everywhere) data ListMarker = NoMarker | BulletMarker | NumberMarker ListNumberStyle ListNumberDelim Int deriving (Show, Read, Eq, Ord) -listMarkerToId :: ListMarker -> String +listMarkerToId :: ListMarker -> Text listMarkerToId NoMarker = "990" listMarkerToId BulletMarker = "991" -listMarkerToId (NumberMarker sty delim n) = +listMarkerToId (NumberMarker sty delim n) = T.pack $ '9' : '9' : styNum : delimNum : show n where styNum = case sty of DefaultStyle -> '2' @@ -106,8 +107,8 @@ data WriterEnv = WriterEnv{ envTextProperties :: EnvProps , envListLevel :: Int , envListNumId :: Int , envInDel :: Bool - , envChangesAuthor :: T.Text - , envChangesDate :: T.Text + , envChangesAuthor :: Text + , envChangesDate :: Text , envPrintWidth :: Integer } @@ -125,9 +126,9 @@ defaultWriterEnv = WriterEnv{ envTextProperties = mempty data WriterState = WriterState{ stFootnotes :: [Element] - , stComments :: [([(T.Text, T.Text)], [Inline])] - , stSectionIds :: Set.Set T.Text - , stExternalLinks :: M.Map String String + , stComments :: [([(Text, Text)], [Inline])] + , stSectionIds :: Set.Set Text + , stExternalLinks :: M.Map Text Text , stImages :: M.Map FilePath (String, String, Maybe MimeType, B.ByteString) , stLists :: [ListMarker] , stInsId :: Int @@ -164,18 +165,18 @@ defaultWriterState = WriterState{ type WS m = ReaderT WriterEnv (StateT WriterState m) -renumIdMap :: Int -> [Element] -> M.Map String String +renumIdMap :: Int -> [Element] -> M.Map Text Text renumIdMap _ [] = M.empty renumIdMap n (e:es) | Just oldId <- findAttr (QName "Id" Nothing Nothing) e = - M.insert oldId ("rId" ++ show n) (renumIdMap (n+1) es) + M.insert oldId ("rId" <> tshow n) (renumIdMap (n+1) es) | otherwise = renumIdMap n es -replaceAttr :: (QName -> Bool) -> String -> [XML.Attr] -> [XML.Attr] +replaceAttr :: (QName -> Bool) -> Text -> [XML.Attr] -> [XML.Attr] replaceAttr f val = map $ \a -> if f (attrKey a) then XML.Attr (attrKey a) val else a -renumId :: (QName -> Bool) -> M.Map String String -> Element -> Element +renumId :: (QName -> Bool) -> M.Map Text Text -> Element -> Element renumId f renumMap e | Just oldId <- findAttrBy f e , Just newId <- M.lookup oldId renumMap = @@ -184,18 +185,12 @@ renumId f renumMap e e { elAttribs = attrs' } | otherwise = e -renumIds :: (QName -> Bool) -> M.Map String String -> [Element] -> [Element] +renumIds :: (QName -> Bool) -> M.Map Text Text -> [Element] -> [Element] renumIds f renumMap = map (renumId f renumMap) -findAttrTextBy :: (QName -> Bool) -> Element -> Maybe T.Text -findAttrTextBy x = fmap T.pack . findAttrBy x - -lookupAttrTextBy :: (QName -> Bool) -> [XML.Attr] -> Maybe T.Text -lookupAttrTextBy x = fmap T.pack . lookupAttrBy x - -- | Certain characters are invalid in XML even if escaped. -- See #1992 -stripInvalidChars :: T.Text -> T.Text +stripInvalidChars :: Text -> Text stripInvalidChars = T.filter isValidChar -- | See XML reference @@ -234,11 +229,11 @@ writeDocx opts doc = do -- Gets the template size let mbpgsz = mbsectpr >>= filterElementName (wname (=="pgSz")) - let mbAttrSzWidth = mbpgsz >>= lookupAttrTextBy ((=="w") . qName) . elAttribs + let mbAttrSzWidth = mbpgsz >>= lookupAttrBy ((=="w") . qName) . elAttribs let mbpgmar = mbsectpr >>= filterElementName (wname (=="pgMar")) - let mbAttrMarLeft = mbpgmar >>= lookupAttrTextBy ((=="left") . qName) . elAttribs - let mbAttrMarRight = mbpgmar >>= lookupAttrTextBy ((=="right") . qName) . elAttribs + let mbAttrMarLeft = mbpgmar >>= lookupAttrBy ((=="left") . qName) . elAttribs + let mbAttrMarRight = mbpgmar >>= lookupAttrBy ((=="right") . qName) . elAttribs -- Get the available area (converting the size and the margins to int and -- doing the difference @@ -250,24 +245,21 @@ writeDocx opts doc = do -- styles mblang <- toLang $ getLang opts meta + -- TODO FIXME avoid this generic traversal! + -- lang is in w:docDefaults / w:rPr / w:lang let addLang :: Element -> Element - addLang e = case (\l -> XMLC.toTree . go (T.unpack $ renderLang l) $ - XMLC.fromElement e) <$> mblang of - Just (Elem e') -> e' - _ -> e -- return original - where go :: String -> Cursor -> Cursor - go l cursor = case XMLC.findRec (isLangElt . current) cursor of - Nothing -> cursor - Just t -> XMLC.modifyContent (setval l) t - setval :: String -> Content -> Content - setval l (Elem e') = Elem $ e'{ elAttribs = map (setvalattr l) $ - elAttribs e' } - setval _ x = x - setvalattr :: String -> XML.Attr -> XML.Attr - setvalattr l (XML.Attr qn@(QName "val" _ _) _) = XML.Attr qn l - setvalattr _ x = x - isLangElt (Elem e') = qName (elName e') == "lang" - isLangElt _ = False + addLang = case mblang of + Nothing -> id + Just l -> everywhere (mkT (go (renderLang l))) + where + go :: Text -> Element -> Element + go l e' + | qName (elName e') == "lang" + = e'{ elAttribs = map (setvalattr l) $ elAttribs e' } + | otherwise = e' + + setvalattr l (XML.Attr qn@(QName "val" _ _) _) = XML.Attr qn l + setvalattr _ x = x let stylepath = "word/styles.xml" styledoc <- addLang <$> parseXml refArchive distArchive stylepath @@ -337,12 +329,13 @@ writeDocx opts doc = do -- [Content_Types].xml let mkOverrideNode (part', contentType') = mknode "Override" - [("PartName",part'),("ContentType",contentType')] () + [("PartName", T.pack part') + ,("ContentType", contentType')] () let mkImageOverride (_, imgpath, mbMimeType, _) = - mkOverrideNode ("/word/" ++ imgpath, - maybe "application/octet-stream" T.unpack mbMimeType) + mkOverrideNode ("/word/" <> imgpath, + fromMaybe "application/octet-stream" mbMimeType) let mkMediaOverride imgpath = - mkOverrideNode ('/':imgpath, T.unpack $ getMimeTypeDef imgpath) + mkOverrideNode ("/" <> imgpath, getMimeTypeDef imgpath) let overrides = map mkOverrideNode ( [("/word/webSettings.xml", "application/vnd.openxmlformats-officedocument.wordprocessingml.webSettings+xml") @@ -369,13 +362,14 @@ writeDocx opts doc = do ,("/word/footnotes.xml", "application/vnd.openxmlformats-officedocument.wordprocessingml.footnotes+xml") ] ++ - map (\x -> (maybe "" ("/word/" ++) $ extractTarget x, + map (\x -> (maybe "" (T.unpack . ("/word/" <>)) (extractTarget x), "application/vnd.openxmlformats-officedocument.wordprocessingml.header+xml")) headers ++ - map (\x -> (maybe "" ("/word/" ++) $ extractTarget x, + map (\x -> (maybe "" (T.unpack . ("/word/" <>)) (extractTarget x), "application/vnd.openxmlformats-officedocument.wordprocessingml.footer+xml")) footers) ++ map mkImageOverride imgs ++ - [ mkMediaOverride (eRelativePath e) | e <- zEntries refArchive - , "word/media/" `isPrefixOf` eRelativePath e ] + [ mkMediaOverride (eRelativePath e) + | e <- zEntries refArchive + , "word/media/" `isPrefixOf` eRelativePath e ] let defaultnodes = [mknode "Default" [("Extension","xml"),("ContentType","application/xml")] (), @@ -421,7 +415,7 @@ writeDocx opts doc = do let renumHeaders = renumIds (\q -> qName q == "Id") idMap headers let renumFooters = renumIds (\q -> qName q == "Id") idMap footers let baserels = baserels' ++ renumHeaders ++ renumFooters - let toImgRel (ident,path,_,_) = mknode "Relationship" [("Type","http://schemas.openxmlformats.org/officeDocument/2006/relationships/image"),("Id",ident),("Target",path)] () + let toImgRel (ident,path,_,_) = mknode "Relationship" [("Type","http://schemas.openxmlformats.org/officeDocument/2006/relationships/image"),("Id",T.pack ident),("Target",T.pack path)] () let imgrels = map toImgRel imgs let toLinkRel (src,ident) = mknode "Relationship" [("Type","http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink"),("Id",ident),("Target",src),("TargetMode","External") ] () let linkrels = map toLinkRel $ M.toList $ stExternalLinks st @@ -489,10 +483,10 @@ writeDocx opts doc = do numbering <- parseXml refArchive distArchive numpath let newNumElts = mkNumbering (stLists st) let pandocAdded e = - case findAttrTextBy ((== "abstractNumId") . qName) e >>= safeRead of + case findAttrBy ((== "abstractNumId") . qName) e >>= safeRead of Just numid -> numid >= (990 :: Int) Nothing -> - case findAttrTextBy ((== "numId") . qName) e >>= safeRead of + case findAttrBy ((== "numId") . qName) e >>= safeRead of Just numid -> numid >= (1000 :: Int) Nothing -> False let oldElts = filter (not . pandocAdded) $ onlyElems (elContent numbering) @@ -514,7 +508,7 @@ writeDocx opts doc = do let extraCoreProps = ["subject","lang","category","description"] let extraCorePropsMap = M.fromList $ zip extraCoreProps ["dc:subject","dc:language","cp:category","dc:description"] - let lookupMetaString' :: T.Text -> Meta -> T.Text + let lookupMetaString' :: Text -> Meta -> Text lookupMetaString' key' meta' = case key' of "description" -> T.intercalate "_x000d_\n" (map stringify $ lookupMetaBlocks "description" meta') @@ -530,21 +524,21 @@ writeDocx opts doc = do : mktnode "dc:creator" [] (T.intercalate "; " (map stringify $ docAuthors meta)) : [ mktnode (M.findWithDefault "" k extraCorePropsMap) [] (lookupMetaString' k meta) | k <- M.keys (unMeta meta), k `elem` extraCoreProps] - ++ mknode "cp:keywords" [] (T.unpack $ T.intercalate ", " keywords) + ++ mknode "cp:keywords" [] (T.intercalate ", " keywords) : (\x -> [ mknode "dcterms:created" [("xsi:type","dcterms:W3CDTF")] x , mknode "dcterms:modified" [("xsi:type","dcterms:W3CDTF")] x - ]) (formatTime defaultTimeLocale "%FT%XZ" utctime) + ]) (T.pack $ formatTime defaultTimeLocale "%FT%XZ" utctime) let docPropsEntry = toEntry docPropsPath epochtime $ renderXml docProps -- docProps/custom.xml - let customProperties :: [(String, String)] - customProperties = [ (T.unpack k, T.unpack $ lookupMetaString k meta) + let customProperties :: [(Text, Text)] + customProperties = [ (k, lookupMetaString k meta) | k <- M.keys (unMeta meta) , k `notElem` (["title", "author", "keywords"] ++ extraCoreProps)] let mkCustomProp (k, v) pid = mknode "property" [("fmtid","{D5CDD505-2E9C-101B-9397-08002B2CF9AE}") - ,("pid", show pid) + ,("pid", tshow pid) ,("name", k)] $ mknode "vt:lpwstr" [] v let customPropsPath = "docProps/custom.xml" let customProps = mknode "Properties" @@ -594,7 +588,8 @@ writeDocx opts doc = do fontTableEntry <- entryFromArchive refArchive "word/fontTable.xml" webSettingsEntry <- entryFromArchive refArchive "word/webSettings.xml" headerFooterEntries <- mapM (entryFromArchive refArchive . ("word/" ++)) $ - mapMaybe extractTarget (headers ++ footers) + mapMaybe (fmap T.unpack . extractTarget) + (headers ++ footers) let miscRelEntries = [ e | e <- zEntries refArchive , "word/_rels/" `isPrefixOf` eRelativePath e , ".xml.rels" `isSuffixOf` eRelativePath e @@ -620,8 +615,8 @@ newParaPropToOpenXml (fromStyleName -> s) = let styleId = T.filter (not . isSpace) s in mknode "w:style" [ ("w:type", "paragraph") , ("w:customStyle", "1") - , ("w:styleId", T.unpack styleId)] - [ mknode "w:name" [("w:val", T.unpack s)] () + , ("w:styleId", styleId)] + [ mknode "w:name" [("w:val", s)] () , mknode "w:basedOn" [("w:val","BodyText")] () , mknode "w:qFormat" [] () ] @@ -631,8 +626,8 @@ newTextPropToOpenXml (fromStyleName -> s) = let styleId = T.filter (not . isSpace) s in mknode "w:style" [ ("w:type", "character") , ("w:customStyle", "1") - , ("w:styleId", T.unpack styleId)] - [ mknode "w:name" [("w:val", T.unpack s)] () + , ("w:styleId", styleId)] + [ mknode "w:name" [("w:val", s)] () , mknode "w:basedOn" [("w:val","BodyTextChar")] () ] @@ -643,13 +638,14 @@ styleToOpenXml sm style = toStyle toktype | hasStyleName (fromString $ show toktype) (smCharStyle sm) = Nothing | otherwise = Just $ mknode "w:style" [("w:type","character"), - ("w:customStyle","1"),("w:styleId",show toktype)] - [ mknode "w:name" [("w:val",show toktype)] () + ("w:customStyle","1"),("w:styleId", tshow toktype)] + [ mknode "w:name" [("w:val", tshow toktype)] () , mknode "w:basedOn" [("w:val","VerbatimChar")] () , mknode "w:rPr" [] $ - [ mknode "w:color" [("w:val",tokCol toktype)] () + [ mknode "w:color" [("w:val", tokCol toktype)] () | tokCol toktype /= "auto" ] ++ - [ mknode "w:shd" [("w:val","clear"),("w:fill",tokBg toktype)] () + [ mknode "w:shd" [("w:val","clear") + ,("w:fill",tokBg toktype)] () | tokBg toktype /= "auto" ] ++ [ mknode "w:b" [] () | tokFeature tokenBold toktype ] ++ [ mknode "w:i" [] () | tokFeature tokenItalic toktype ] ++ @@ -657,10 +653,10 @@ styleToOpenXml sm style = ] tokStyles = tokenStyles style tokFeature f toktype = maybe False f $ M.lookup toktype tokStyles - tokCol toktype = maybe "auto" (drop 1 . fromColor) + tokCol toktype = maybe "auto" (T.pack . drop 1 . fromColor) $ (tokenColor =<< M.lookup toktype tokStyles) `mplus` defaultColor style - tokBg toktype = maybe "auto" (drop 1 . fromColor) + tokBg toktype = maybe "auto" (T.pack . drop 1 . fromColor) $ (tokenBackground =<< M.lookup toktype tokStyles) `mplus` backgroundColor style parStyle | hasStyleName "Source Code" (smParaStyle sm) = Nothing @@ -673,10 +669,11 @@ styleToOpenXml sm style = , mknode "w:pPr" [] $ mknode "w:wordWrap" [("w:val","off")] () : - maybe [] (\col -> [mknode "w:shd" [("w:val","clear"),("w:fill",drop 1 $ fromColor col)] ()]) (backgroundColor style) + maybe [] (\col -> [mknode "w:shd" [("w:val","clear"),("w:fill", T.pack $ drop 1 $ fromColor col)] ()]) (backgroundColor style) ] -copyChildren :: (PandocMonad m) => Archive -> Archive -> String -> Integer -> [String] -> m Entry +copyChildren :: (PandocMonad m) + => Archive -> Archive -> String -> Integer -> [Text] -> m Entry copyChildren refArchive distArchive path timestamp elNames = do ref <- parseXml refArchive distArchive path dist <- parseXml distArchive distArchive path @@ -685,7 +682,7 @@ copyChildren refArchive distArchive path timestamp elNames = do } where strName QName{qName=name, qPrefix=prefix} - | Just p <- prefix = p++":"++name + | Just p <- prefix = p <> ":" <> name | otherwise = name shouldCopy = (`elem` elNames) . strName cleanElem el@Element{elName=name} = Elem el{elName=name{qURI=Nothing}} @@ -706,35 +703,35 @@ maxListLevel = 8 mkNum :: ListMarker -> Int -> Element mkNum marker numid = - mknode "w:num" [("w:numId",show numid)] + mknode "w:num" [("w:numId",tshow numid)] $ mknode "w:abstractNumId" [("w:val",listMarkerToId marker)] () : case marker of NoMarker -> [] BulletMarker -> [] NumberMarker _ _ start -> - map (\lvl -> mknode "w:lvlOverride" [("w:ilvl",show (lvl :: Int))] - $ mknode "w:startOverride" [("w:val",show start)] ()) + map (\lvl -> mknode "w:lvlOverride" [("w:ilvl",tshow (lvl :: Int))] + $ mknode "w:startOverride" [("w:val",tshow start)] ()) [0..maxListLevel] mkAbstractNum :: ListMarker -> Integer -> Element mkAbstractNum marker nsid = mknode "w:abstractNum" [("w:abstractNumId",listMarkerToId marker)] - $ mknode "w:nsid" [("w:val", printf "%8x" nsid)] () + $ mknode "w:nsid" [("w:val", T.pack $ printf "%8x" nsid)] () : mknode "w:multiLevelType" [("w:val","multilevel")] () : map (mkLvl marker) [0..maxListLevel] mkLvl :: ListMarker -> Int -> Element mkLvl marker lvl = - mknode "w:lvl" [("w:ilvl",show lvl)] $ + mknode "w:lvl" [("w:ilvl",tshow lvl)] $ [ mknode "w:start" [("w:val",start)] () | marker /= NoMarker && marker /= BulletMarker ] ++ [ mknode "w:numFmt" [("w:val",fmt)] () - , mknode "w:lvlText" [("w:val",lvltxt)] () + , mknode "w:lvlText" [("w:val", lvltxt)] () , mknode "w:lvlJc" [("w:val","left")] () , mknode "w:pPr" [] - [ mknode "w:ind" [ ("w:left",show $ lvl * step + step) - , ("w:hanging",show (hang :: Int)) + [ mknode "w:ind" [ ("w:left",tshow $ lvl * step + step) + , ("w:hanging",tshow (hang :: Int)) ] () ] ] @@ -743,8 +740,8 @@ mkLvl marker lvl = NoMarker -> ("bullet"," ","1") BulletMarker -> ("bullet",bulletFor lvl,"1") NumberMarker st de n -> (styleFor st lvl - ,patternFor de ("%" ++ show (lvl + 1)) - ,show n) + ,patternFor de ("%" <> tshow (lvl + 1)) + ,tshow n) step = 720 hang = 480 bulletFor 0 = "\x2022" -- filled circle @@ -767,9 +764,9 @@ mkLvl marker lvl = styleFor DefaultStyle 5 = "lowerRoman" styleFor DefaultStyle x = styleFor DefaultStyle (x `mod` 6) styleFor _ _ = "decimal" - patternFor OneParen s = s ++ ")" - patternFor TwoParens s = "(" ++ s ++ ")" - patternFor _ s = s ++ "." + patternFor OneParen s = s <> ")" + patternFor TwoParens s = "(" <> s <> ")" + patternFor _ s = s <> "." getNumId :: (PandocMonad m) => WS m Int getNumId = (((baseListId - 1) +) . length) `fmap` gets stLists @@ -777,8 +774,8 @@ getNumId = (((baseListId - 1) +) . length) `fmap` gets stLists makeTOC :: (PandocMonad m) => WriterOptions -> WS m [Element] makeTOC opts = do - let depth = "1-"++show (writerTOCDepth opts) - let tocCmd = "TOC \\o \""++depth++"\" \\h \\z \\u" + let depth = "1-" <> tshow (writerTOCDepth opts) + let tocCmd = "TOC \\o \"" <> depth <> "\" \\h \\z \\u" tocTitle <- gets stTocTitle title <- withParaPropM (pStyleM "TOC Heading") (blocksToOpenXML opts [Para tocTitle]) return @@ -831,7 +828,7 @@ writeOpenXML opts (Pandoc meta blocks) = do let toComment (kvs, ils) = do annotation <- inlinesToOpenXML opts ils return $ - mknode "w:comment" [('w':':':T.unpack k,T.unpack v) | (k,v) <- kvs] + mknode "w:comment" [("w:" <> k, v) | (k,v) <- kvs] [ mknode "w:p" [] $ map Elem [ mknode "w:pPr" [] @@ -867,24 +864,24 @@ pStyleM :: (PandocMonad m) => ParaStyleName -> WS m XML.Element pStyleM styleName = do pStyleMap <- gets (smParaStyle . stStyleMaps) let sty' = getStyleIdFromName styleName pStyleMap - return $ mknode "w:pStyle" [("w:val", T.unpack $ fromStyleId sty')] () + return $ mknode "w:pStyle" [("w:val", fromStyleId sty')] () rStyleM :: (PandocMonad m) => CharStyleName -> WS m XML.Element rStyleM styleName = do cStyleMap <- gets (smCharStyle . stStyleMaps) let sty' = getStyleIdFromName styleName cStyleMap - return $ mknode "w:rStyle" [("w:val", T.unpack $ fromStyleId sty')] () + return $ mknode "w:rStyle" [("w:val", fromStyleId sty')] () -getUniqueId :: (PandocMonad m) => WS m String +getUniqueId :: (PandocMonad m) => WS m Text -- the + 20 is to ensure that there are no clashes with the rIds -- already in word/document.xml.rel getUniqueId = do n <- gets stCurId modify $ \st -> st{stCurId = n + 1} - return $ show n + return $ tshow n -- | Key for specifying user-defined docx styles. -dynamicStyleKey :: T.Text +dynamicStyleKey :: Text dynamicStyleKey = "custom-style" -- | Convert a Pandoc block element to OpenXML. @@ -979,7 +976,7 @@ blockToOpenXML' opts (Para lst) blockToOpenXML' opts (LineBlock lns) = blockToOpenXML opts $ linesToPara lns blockToOpenXML' _ b@(RawBlock format str) | format == Format "openxml" = return [ - Text (CData CDataRaw (T.unpack str) Nothing) + Text (CData CDataRaw str Nothing) ] | otherwise = do report $ BlockNotRendered b @@ -1036,7 +1033,7 @@ blockToOpenXML' opts (Table _ blkCapt specs thead tbody tfoot) = do let fullrow = 5000 -- 100% specified in pct let rowwidth = fullrow * sum widths let mkgridcol w = mknode "w:gridCol" - [("w:w", show (floor (textwidth * w) :: Integer))] () + [("w:w", tshow (floor (textwidth * w) :: Integer))] () let hasHeader = not $ all null headers modify $ \s -> s { stInTable = False } -- for compatibility with Word <= 2007, we include a val with a bitmask @@ -1054,16 +1051,16 @@ blockToOpenXML' opts (Table _ blkCapt specs thead tbody tfoot) = do mknode "w:tbl" [] ( mknode "w:tblPr" [] ( mknode "w:tblStyle" [("w:val","Table")] () : - mknode "w:tblW" [("w:type", "pct"), ("w:w", show rowwidth)] () : + mknode "w:tblW" [("w:type", "pct"), ("w:w", tshow rowwidth)] () : mknode "w:tblLook" [("w:firstRow",if hasHeader then "1" else "0") ,("w:lastRow","0") ,("w:firstColumn","0") ,("w:lastColumn","0") ,("w:noHBand","0") ,("w:noVBand","0") - ,("w:val", printf "%04x" tblLookVal) + ,("w:val", T.pack $ printf "%04x" tblLookVal) ] () : - [ mknode "w:tblCaption" [("w:val", T.unpack captionStr)] () + [ mknode "w:tblCaption" [("w:val", captionStr)] () | not (null caption) ] ) : mknode "w:tblGrid" [] (if all (==0) widths @@ -1126,7 +1123,7 @@ listItemToOpenXML opts numid (first:rest) = do modify $ \st -> st{ stInList = oldInList } return $ first'' ++ rest'' -alignmentToString :: Alignment -> [Char] +alignmentToString :: Alignment -> Text alignmentToString alignment = case alignment of AlignLeft -> "left" AlignRight -> "right" @@ -1169,8 +1166,8 @@ getParaProps displayMathPara = do listLevel <- asks envListLevel numid <- asks envListNumId let listPr = [mknode "w:numPr" [] - [ mknode "w:ilvl" [("w:val",show listLevel)] () - , mknode "w:numId" [("w:val",show numid)] () ] | listLevel >= 0 && not displayMathPara] + [ mknode "w:ilvl" [("w:val",tshow listLevel)] () + , mknode "w:numId" [("w:val",tshow numid)] () ] | listLevel >= 0 && not displayMathPara] return $ case listPr ++ squashProps props of [] -> [] ps -> [mknode "w:pPr" [] ps] @@ -1185,7 +1182,7 @@ withParaPropM md p = do d <- md withParaProp d p -formattedString :: PandocMonad m => T.Text -> WS m [Element] +formattedString :: PandocMonad m => Text -> WS m [Element] formattedString str = -- properly handle soft hyphens case splitTextBy (=='\173') str of @@ -1194,7 +1191,7 @@ formattedString str = sh <- formattedRun [mknode "w:softHyphen" [] ()] intercalate sh <$> mapM formattedString' ws -formattedString' :: PandocMonad m => T.Text -> WS m [Element] +formattedString' :: PandocMonad m => Text -> WS m [Element] formattedString' str = do inDel <- asks envInDel formattedRun [ mktnode (if inDel then "w:delText" else "w:t") @@ -1226,7 +1223,7 @@ inlineToOpenXML' opts (Span ("",["csl-right-inline"],[]) ils) = mknode "w:r" [] (mknode "w:t" [("xml:space","preserve")] - ("\t" :: String))] ++) + ("\t" :: Text))] ++) <$> inlinesToOpenXML opts ils inlineToOpenXML' opts (Span ("",["csl-indent"],[]) ils) = inlinesToOpenXML opts ils @@ -1236,17 +1233,17 @@ inlineToOpenXML' _ (Span (ident,["comment-start"],kvs) ils) = do let ident' = fromMaybe ident (lookup "id" kvs) kvs' = filter (("id" /=) . fst) kvs modify $ \st -> st{ stComments = (("id",ident'):kvs', ils) : stComments st } - return [ Elem $ mknode "w:commentRangeStart" [("w:id", T.unpack ident')] () ] + return [ Elem $ mknode "w:commentRangeStart" [("w:id", ident')] () ] inlineToOpenXML' _ (Span (ident,["comment-end"],kvs) _) = -- prefer the "id" in kvs, since that is the one produced by the docx -- reader. let ident' = fromMaybe ident (lookup "id" kvs) in return . map Elem $ - [ mknode "w:commentRangeEnd" [("w:id", T.unpack ident')] () + [ mknode "w:commentRangeEnd" [("w:id", ident')] () , mknode "w:r" [] [ mknode "w:rPr" [] [ mknode "w:rStyle" [("w:val", "CommentReference")] () ] - , mknode "w:commentReference" [("w:id", T.unpack ident')] () ] + , mknode "w:commentReference" [("w:id", ident')] () ] ] inlineToOpenXML' opts (Span (ident,classes,kvs) ils) = do stylemod <- case lookup dynamicStyleKey kvs of @@ -1270,8 +1267,8 @@ inlineToOpenXML' opts (Span (ident,classes,kvs) ils) = do defaultAuthor <- asks envChangesAuthor let author = fromMaybe defaultAuthor (lookup "author" kvs) let mdate = lookup "date" kvs - return $ ("w:author", T.unpack author) : - maybe [] (\date -> [("w:date", T.unpack date)]) mdate + return $ ("w:author", author) : + maybe [] (\date -> [("w:date", date)]) mdate insmod <- if "insertion" `elem` classes then do changeAuthorDate <- getChangeAuthorDate @@ -1281,7 +1278,7 @@ inlineToOpenXML' opts (Span (ident,classes,kvs) ils) = do x <- f return [Elem $ mknode "w:ins" - (("w:id", show insId) : changeAuthorDate) x] + (("w:id", tshow insId) : changeAuthorDate) x] else return id delmod <- if "deletion" `elem` classes then do @@ -1291,7 +1288,7 @@ inlineToOpenXML' opts (Span (ident,classes,kvs) ils) = do return $ \f -> local (\env->env{envInDel=True}) $ do x <- f return [Elem $ mknode "w:del" - (("w:id", show delId) : changeAuthorDate) x] + (("w:id", tshow delId) : changeAuthorDate) x] else return id contents <- insmod $ delmod $ dirmod $ stylemod $ pmod $ inlinesToOpenXML opts ils @@ -1322,7 +1319,7 @@ inlineToOpenXML' opts (Strikeout lst) = inlineToOpenXML' _ LineBreak = return [Elem br] inlineToOpenXML' _ il@(RawInline f str) | f == Format "openxml" = return - [Text (CData CDataRaw (T.unpack str) Nothing)] + [Text (CData CDataRaw str Nothing)] | otherwise = do report $ InlineNotRendered il return [] @@ -1335,7 +1332,7 @@ inlineToOpenXML' opts (Math mathType str) = do when (mathType == DisplayMath) setFirstPara res <- (lift . lift) (convertMath writeOMML mathType str) case res of - Right r -> return [Elem r] + Right r -> return [Elem $ fromXLElement r] Left il -> inlineToOpenXML' opts il inlineToOpenXML' opts (Cite _ lst) = inlinesToOpenXML opts lst inlineToOpenXML' opts (Code attrs str) = do @@ -1348,7 +1345,7 @@ inlineToOpenXML' opts (Code attrs str) = do mknode "w:r" [] [ mknode "w:rPr" [] $ maybeToList (lookup toktype tokTypesMap) - , mknode "w:t" [("xml:space","preserve")] (T.unpack tok) ] + , mknode "w:t" [("xml:space","preserve")] tok ] withTextPropM (rStyleM "Verbatim Char") $ if isNothing (writerHighlightStyle opts) then unhighlighted @@ -1365,7 +1362,7 @@ inlineToOpenXML' opts (Note bs) = do let notemarker = mknode "w:r" [] [ mknode "w:rPr" [] footnoteStyle , mknode "w:footnoteRef" [] () ] - let notemarkerXml = RawInline (Format "openxml") $ T.pack $ ppElement notemarker + let notemarkerXml = RawInline (Format "openxml") $ ppElement notemarker let insertNoteRef (Plain ils : xs) = Plain (notemarkerXml : Space : ils) : xs insertNoteRef (Para ils : xs) = Para (notemarkerXml : Space : ils) : xs insertNoteRef xs = Para [notemarkerXml] : xs @@ -1384,17 +1381,17 @@ inlineToOpenXML' opts (Note bs) = do inlineToOpenXML' opts (Link _ txt (T.uncons -> Just ('#', xs),_)) = do contents <- withTextPropM (rStyleM "Hyperlink") $ inlinesToOpenXML opts txt return - [ Elem $ mknode "w:hyperlink" [("w:anchor", T.unpack $ toBookmarkName xs)] contents ] + [ Elem $ mknode "w:hyperlink" [("w:anchor", toBookmarkName xs)] contents ] -- external link: inlineToOpenXML' opts (Link _ txt (src,_)) = do contents <- withTextPropM (rStyleM "Hyperlink") $ inlinesToOpenXML opts txt extlinks <- gets stExternalLinks - id' <- case M.lookup (T.unpack src) extlinks of + id' <- case M.lookup src extlinks of Just i -> return i Nothing -> do - i <- ("rId"++) `fmap` getUniqueId + i <- ("rId" <>) <$> getUniqueId modify $ \st -> st{ stExternalLinks = - M.insert (T.unpack src) i extlinks } + M.insert src i extlinks } return i return [ Elem $ mknode "w:hyperlink" [("r:id",id')] contents ] inlineToOpenXML' opts (Image attr@(imgident, _, _) alt (src, title)) = do @@ -1414,17 +1411,17 @@ inlineToOpenXML' opts (Image attr@(imgident, _, _) alt (src, title)) = do ,("noChangeAspect","1")] () nvPicPr = mknode "pic:nvPicPr" [] [ mknode "pic:cNvPr" - [("descr",T.unpack src),("id","0"),("name","Picture")] () + [("descr",src),("id","0"),("name","Picture")] () , cNvPicPr ] blipFill = mknode "pic:blipFill" [] - [ mknode "a:blip" [("r:embed",ident)] () + [ mknode "a:blip" [("r:embed",T.pack ident)] () , mknode "a:stretch" [] $ mknode "a:fillRect" [] () ] xfrm = mknode "a:xfrm" [] [ mknode "a:off" [("x","0"),("y","0")] () - , mknode "a:ext" [("cx",show xemu) - ,("cy",show yemu)] () ] + , mknode "a:ext" [("cx",tshow xemu) + ,("cy",tshow yemu)] () ] prstGeom = mknode "a:prstGeom" [("prst","rect")] $ mknode "a:avLst" [] () ln = mknode "a:ln" [("w","9525")] @@ -1445,12 +1442,12 @@ inlineToOpenXML' opts (Image attr@(imgident, _, _) alt (src, title)) = do imgElt = mknode "w:r" [] $ mknode "w:drawing" [] $ mknode "wp:inline" [] - [ mknode "wp:extent" [("cx",show xemu),("cy",show yemu)] () + [ mknode "wp:extent" [("cx",tshow xemu),("cy",tshow yemu)] () , mknode "wp:effectExtent" [("b","0"),("l","0"),("r","0"),("t","0")] () , mknode "wp:docPr" - [ ("descr", T.unpack $ stringify alt) - , ("title", T.unpack title) + [ ("descr", stringify alt) + , ("title", title) , ("id","1") , ("name","Picture") ] () @@ -1463,7 +1460,7 @@ inlineToOpenXML' opts (Image attr@(imgident, _, _) alt (src, title)) = do Just imgData -> return [Elem $ generateImgElt imgData] Nothing -> ( do --try (img, mt) <- P.fetchItem src - ident <- ("rId"++) `fmap` getUniqueId + ident <- ("rId" <>) <$> getUniqueId let imgext = case mt >>= extensionFromMimeType of @@ -1477,10 +1474,10 @@ inlineToOpenXML' opts (Image attr@(imgident, _, _) alt (src, title)) = do Just Svg -> ".svg" Just Emf -> ".emf" Nothing -> "" - imgpath = "media/" <> ident <> T.unpack imgext - mbMimeType = mt <|> getMimeType imgpath + imgpath = "media/" <> ident <> imgext + mbMimeType = mt <|> getMimeType (T.unpack imgpath) - imgData = (ident, imgpath, mbMimeType, img) + imgData = (T.unpack ident, T.unpack imgpath, mbMimeType, img) if T.null imgext then -- without an extension there is no rule for content type @@ -1538,20 +1535,20 @@ withDirection x = do , envTextProperties = EnvProps textStyle textProps' } -wrapBookmark :: (PandocMonad m) => T.Text -> [Content] -> WS m [Content] +wrapBookmark :: (PandocMonad m) => Text -> [Content] -> WS m [Content] wrapBookmark "" contents = return contents wrapBookmark ident contents = do id' <- getUniqueId let bookmarkStart = mknode "w:bookmarkStart" [("w:id", id') - ,("w:name", T.unpack $ toBookmarkName ident)] () + ,("w:name", toBookmarkName ident)] () bookmarkEnd = mknode "w:bookmarkEnd" [("w:id", id')] () return $ Elem bookmarkStart : contents ++ [Elem bookmarkEnd] -- Word imposes a 40 character limit on bookmark names and requires -- that they begin with a letter. So we just use a hash of the -- identifier when otherwise we'd have an illegal bookmark name. -toBookmarkName :: T.Text -> T.Text +toBookmarkName :: Text -> Text toBookmarkName s | Just (c, _) <- T.uncons s , isLetter c diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs index 171ffe582..3f10cb437 100644 --- a/src/Text/Pandoc/Writers/EPUB.hs +++ b/src/Text/Pandoc/Writers/EPUB.hs @@ -24,12 +24,13 @@ import Control.Monad.State.Strict (StateT, evalState, evalStateT, get, gets, lift, modify) import qualified Data.ByteString.Lazy as B import qualified Data.ByteString.Lazy.Char8 as B8 -import Data.Char (isAlphaNum, isAscii, isDigit, toLower) +import Data.Char (isAlphaNum, isAscii, isDigit) import Data.List (isInfixOf, isPrefixOf) import qualified Data.Map as M import Data.Maybe (fromMaybe, isNothing, mapMaybe, isJust) import qualified Data.Set as Set -import qualified Data.Text as TS +import qualified Data.Text as T +import Data.Text (Text) import qualified Data.Text.Lazy as TL import Network.HTTP (urlEncode) import System.FilePath (takeExtension, takeFileName, makeRelative) @@ -48,16 +49,13 @@ import Text.Pandoc.Options (EPUBVersion (..), HTMLMathMethod (..), ObfuscationMethod (NoObfuscation), WrapOption (..), WriterOptions (..)) import Text.Pandoc.Shared (makeSections, normalizeDate, renderTags', - safeRead, stringify, trim, uniqueIdent, tshow) + stringify, uniqueIdent, tshow) import qualified Text.Pandoc.UTF8 as UTF8 import Text.Pandoc.UUID (getRandomUUID) import Text.Pandoc.Walk (query, walk, walkM) import Text.Pandoc.Writers.HTML (writeHtmlStringForEPUB) import Text.Printf (printf) -import Text.XML.Light (Attr (..), Element (..), Node (..), QName (..), - add_attrs, lookupAttr, node, onlyElems, - ppElement, showElement, strContent, unode, unqual) -import Text.Pandoc.XMLParser (parseXMLContents) +import Text.Pandoc.XML.Light import Text.Pandoc.XML (escapeStringForXML) import Text.DocTemplates (FromContext(lookupContext), Context(..), ToContext(toVal), Val(..)) @@ -69,7 +67,7 @@ newtype Chapter = Chapter [Block] data EPUBState = EPUBState { stMediaPaths :: [(FilePath, (FilePath, Maybe Entry))] , stMediaNextId :: Int - , stEpubSubdir :: String + , stEpubSubdir :: FilePath } type E m = StateT EPUBState m @@ -78,62 +76,63 @@ data EPUBMetadata = EPUBMetadata{ epubIdentifier :: [Identifier] , epubTitle :: [Title] , epubDate :: [Date] - , epubLanguage :: String + , epubLanguage :: Text , epubCreator :: [Creator] , epubContributor :: [Creator] - , epubSubject :: [String] - , epubDescription :: Maybe String - , epubType :: Maybe String - , epubFormat :: Maybe String - , epubPublisher :: Maybe String - , epubSource :: Maybe String - , epubRelation :: Maybe String - , epubCoverage :: Maybe String - , epubRights :: Maybe String - , epubBelongsToCollection :: Maybe String - , epubGroupPosition :: Maybe String - , epubCoverImage :: Maybe String + , epubSubject :: [Text] + , epubDescription :: Maybe Text + , epubType :: Maybe Text + , epubFormat :: Maybe Text + , epubPublisher :: Maybe Text + , epubSource :: Maybe Text + , epubRelation :: Maybe Text + , epubCoverage :: Maybe Text + , epubRights :: Maybe Text + , epubBelongsToCollection :: Maybe Text + , epubGroupPosition :: Maybe Text + , epubCoverImage :: Maybe FilePath , epubStylesheets :: [FilePath] , epubPageDirection :: Maybe ProgressionDirection - , epubIbooksFields :: [(String, String)] - , epubCalibreFields :: [(String, String)] + , epubIbooksFields :: [(Text, Text)] + , epubCalibreFields :: [(Text, Text)] } deriving Show data Date = Date{ - dateText :: String - , dateEvent :: Maybe String + dateText :: Text + , dateEvent :: Maybe Text } deriving Show data Creator = Creator{ - creatorText :: String - , creatorRole :: Maybe String - , creatorFileAs :: Maybe String + creatorText :: Text + , creatorRole :: Maybe Text + , creatorFileAs :: Maybe Text } deriving Show data Identifier = Identifier{ - identifierText :: String - , identifierScheme :: Maybe String + identifierText :: Text + , identifierScheme :: Maybe Text } deriving Show data Title = Title{ - titleText :: String - , titleFileAs :: Maybe String - , titleType :: Maybe String + titleText :: Text + , titleFileAs :: Maybe Text + , titleType :: Maybe Text } deriving Show data ProgressionDirection = LTR | RTL deriving Show -dcName :: String -> QName +dcName :: Text -> QName dcName n = QName n Nothing (Just "dc") -dcNode :: Node t => String -> t -> Element +dcNode :: Node t => Text -> t -> Element dcNode = node . dcName -opfName :: String -> QName +opfName :: Text -> QName opfName n = QName n Nothing (Just "opf") -toId :: FilePath -> String -toId = map (\x -> if isAlphaNum x || x == '-' || x == '_' +toId :: FilePath -> Text +toId = T.pack . + map (\x -> if isAlphaNum x || x == '-' || x == '_' then x else '_') . takeFileName @@ -141,8 +140,8 @@ removeNote :: Inline -> Inline removeNote (Note _) = Str "" removeNote x = x -toVal' :: String -> Val TS.Text -toVal' = toVal . TS.pack +toVal' :: Text -> Val T.Text +toVal' = toVal mkEntry :: PandocMonad m => FilePath -> B.ByteString -> E m Entry mkEntry path content = do @@ -172,21 +171,21 @@ getEPUBMetadata opts meta = do if null (epubIdentifier m) then do randomId <- getRandomUUID - return $ m{ epubIdentifier = [Identifier (show randomId) Nothing] } + return $ m{ epubIdentifier = [Identifier (tshow randomId) Nothing] } else return m let addLanguage m = - if null (epubLanguage m) + if T.null (epubLanguage m) then case lookupContext "lang" (writerVariables opts) of - Just x -> return m{ epubLanguage = TS.unpack x } + Just x -> return m{ epubLanguage = x } Nothing -> do mLang <- lift $ P.lookupEnv "LANG" let localeLang = case mLang of Just lang -> - TS.map (\c -> if c == '_' then '-' else c) $ - TS.takeWhile (/='.') lang + T.map (\c -> if c == '_' then '-' else c) $ + T.takeWhile (/='.') lang Nothing -> "en-US" - return m{ epubLanguage = TS.unpack localeLang } + return m{ epubLanguage = localeLang } else return m let fixDate m = if null (epubDate m) @@ -201,7 +200,7 @@ getEPUBMetadata opts meta = do then return m else do let authors' = map stringify $ docAuthors meta - let toAuthor name = Creator{ creatorText = TS.unpack name + let toAuthor name = Creator{ creatorText = name , creatorRole = Just "aut" , creatorFileAs = Nothing } return $ m{ epubCreator = map toAuthor authors' ++ epubCreator m } @@ -249,31 +248,31 @@ addMetadataFromXML e@(Element (QName name _ (Just "dc")) attrs _ _) md where getAttr n = lookupAttr (opfName n) attrs addMetadataFromXML e@(Element (QName "meta" _ _) attrs _ _) md = case getAttr "property" of - Just s | "ibooks:" `isPrefixOf` s -> - md{ epubIbooksFields = (drop 7 s, strContent e) : + Just s | "ibooks:" `T.isPrefixOf` s -> + md{ epubIbooksFields = (T.drop 7 s, strContent e) : epubIbooksFields md } _ -> case getAttr "name" of - Just s | "calibre:" `isPrefixOf` s -> + Just s | "calibre:" `T.isPrefixOf` s -> md{ epubCalibreFields = - (drop 8 s, fromMaybe "" $ getAttr "content") : + (T.drop 8 s, fromMaybe "" $ getAttr "content") : epubCalibreFields md } _ -> md where getAttr n = lookupAttr (unqual n) attrs addMetadataFromXML _ md = md -metaValueToString :: MetaValue -> String -metaValueToString (MetaString s) = TS.unpack s -metaValueToString (MetaInlines ils) = TS.unpack $ stringify ils -metaValueToString (MetaBlocks bs) = TS.unpack $ stringify bs +metaValueToString :: MetaValue -> Text +metaValueToString (MetaString s) = s +metaValueToString (MetaInlines ils) = stringify ils +metaValueToString (MetaBlocks bs) = stringify bs metaValueToString (MetaBool True) = "true" metaValueToString (MetaBool False) = "false" metaValueToString _ = "" metaValueToPaths :: MetaValue -> [FilePath] -metaValueToPaths (MetaList xs) = map metaValueToString xs -metaValueToPaths x = [metaValueToString x] +metaValueToPaths (MetaList xs) = map (T.unpack . metaValueToString) xs +metaValueToPaths x = [T.unpack $ metaValueToString x] -getList :: TS.Text -> Meta -> (MetaValue -> a) -> [a] +getList :: T.Text -> Meta -> (MetaValue -> a) -> [a] getList s meta handleMetaValue = case lookupMeta s meta of Just (MetaList xs) -> map handleMetaValue xs @@ -297,7 +296,7 @@ getTitle meta = getList "title" meta handleMetaValue , titleType = metaValueToString <$> M.lookup "type" m } handleMetaValue mv = Title (metaValueToString mv) Nothing Nothing -getCreator :: TS.Text -> Meta -> [Creator] +getCreator :: T.Text -> Meta -> [Creator] getCreator s meta = getList s meta handleMetaValue where handleMetaValue (MetaMap m) = Creator{ creatorText = maybe "" metaValueToString $ M.lookup "text" m @@ -305,7 +304,7 @@ getCreator s meta = getList s meta handleMetaValue , creatorRole = metaValueToString <$> M.lookup "role" m } handleMetaValue mv = Creator (metaValueToString mv) Nothing Nothing -getDate :: TS.Text -> Meta -> [Date] +getDate :: T.Text -> Meta -> [Date] getDate s meta = getList s meta handleMetaValue where handleMetaValue (MetaMap m) = Date{ dateText = fromMaybe "" $ @@ -314,7 +313,7 @@ getDate s meta = getList s meta handleMetaValue handleMetaValue mv = Date { dateText = fromMaybe "" $ normalizeDate' $ metaValueToString mv , dateEvent = Nothing } -simpleList :: TS.Text -> Meta -> [String] +simpleList :: T.Text -> Meta -> [Text] simpleList s meta = case lookupMeta s meta of Just (MetaList xs) -> map metaValueToString xs @@ -339,7 +338,7 @@ metadataFromMeta opts meta = EPUBMetadata{ , epubCoverage = coverage , epubRights = rights , epubBelongsToCollection = belongsToCollection - , epubGroupPosition = groupPosition + , epubGroupPosition = groupPosition , epubCoverImage = coverImage , epubStylesheets = stylesheets , epubPageDirection = pageDirection @@ -363,31 +362,30 @@ metadataFromMeta opts meta = EPUBMetadata{ coverage = metaValueToString <$> lookupMeta "coverage" meta rights = metaValueToString <$> lookupMeta "rights" meta belongsToCollection = metaValueToString <$> lookupMeta "belongs-to-collection" meta - groupPosition = metaValueToString <$> lookupMeta "group-position" meta - coverImage = - (TS.unpack <$> lookupContext "epub-cover-image" - (writerVariables opts)) + groupPosition = metaValueToString <$> lookupMeta "group-position" meta + coverImage = T.unpack <$> + lookupContext "epub-cover-image" (writerVariables opts) `mplus` (metaValueToString <$> lookupMeta "cover-image" meta) mCss = lookupMeta "css" meta <|> lookupMeta "stylesheet" meta stylesheets = maybe [] metaValueToPaths mCss ++ case lookupContext "css" (writerVariables opts) of - Just xs -> map TS.unpack xs + Just xs -> map T.unpack xs Nothing -> case lookupContext "css" (writerVariables opts) of - Just x -> [TS.unpack x] + Just x -> [T.unpack x] Nothing -> [] - pageDirection = case map toLower . metaValueToString <$> + pageDirection = case T.toLower . metaValueToString <$> lookupMeta "page-progression-direction" meta of Just "ltr" -> Just LTR Just "rtl" -> Just RTL _ -> Nothing ibooksFields = case lookupMeta "ibooks" meta of Just (MetaMap mp) - -> M.toList $ M.mapKeys TS.unpack $ M.map metaValueToString mp + -> M.toList $ M.map metaValueToString mp _ -> [] calibreFields = case lookupMeta "calibre" meta of Just (MetaMap mp) - -> M.toList $ M.mapKeys TS.unpack $ M.map metaValueToString mp + -> M.toList $ M.map metaValueToString mp _ -> [] -- | Produce an EPUB2 file from a Pandoc document. @@ -413,9 +411,11 @@ writeEPUB :: PandocMonad m writeEPUB epubVersion opts doc = do let epubSubdir = writerEpubSubdirectory opts -- sanity check on epubSubdir - unless (TS.all (\c -> isAscii c && isAlphaNum c) epubSubdir) $ + unless (T.all (\c -> isAscii c && isAlphaNum c) epubSubdir) $ throwError $ PandocEpubSubdirectoryError epubSubdir - let initState = EPUBState { stMediaPaths = [], stMediaNextId = 0, stEpubSubdir = TS.unpack epubSubdir } + let initState = EPUBState { stMediaPaths = [] + , stMediaNextId = 0 + , stEpubSubdir = T.unpack epubSubdir } evalStateT (pandocToEPUB epubVersion opts doc) initState pandocToEPUB :: PandocMonad m @@ -439,7 +439,7 @@ pandocToEPUB version opts doc = do [] -> case epubTitle metadata of [] -> "UNTITLED" (x:_) -> titleText x - x -> TS.unpack $ stringify x + x -> stringify x -- stylesheet stylesheets <- case epubStylesheets metadata of @@ -461,7 +461,8 @@ pandocToEPUB version opts doc = do (ListVal $ map (\e -> toVal' $ (if useprefix then "../" else "") <> - makeRelative epubSubdir (eRelativePath e)) + T.pack + (makeRelative epubSubdir (eRelativePath e))) stylesheetEntries) mempty @@ -490,18 +491,19 @@ pandocToEPUB version opts doc = do case imageSize opts' (B.toStrict imgContent) of Right sz -> return $ sizeInPixels sz Left err' -> (0, 0) <$ report - (CouldNotDetermineImageSize (TS.pack img) err') + (CouldNotDetermineImageSize (T.pack img) err') cpContent <- lift $ writeHtml opts'{ writerVariables = Context (M.fromList [ ("coverpage", toVal' "true"), ("pagetitle", toVal $ - escapeStringForXML $ TS.pack plainTitle), - ("cover-image", toVal' coverImageName), + escapeStringForXML plainTitle), + ("cover-image", + toVal' $ T.pack coverImageName), ("cover-image-width", toVal' $ - show coverImageWidth), + tshow coverImageWidth), ("cover-image-height", toVal' $ - show coverImageHeight)]) <> + tshow coverImageHeight)]) <> cssvars True <> vars } (Pandoc meta []) coverEntry <- mkEntry "text/cover.xhtml" cpContent @@ -517,7 +519,7 @@ pandocToEPUB version opts doc = do ("titlepage", toVal' "true"), ("body-type", toVal' "frontmatter"), ("pagetitle", toVal $ - escapeStringForXML $ TS.pack plainTitle)]) + escapeStringForXML plainTitle)]) <> cssvars True <> vars } (Pandoc meta []) tpEntry <- mkEntry "text/title_page.xhtml" tpContent @@ -526,7 +528,7 @@ pandocToEPUB version opts doc = do let matchingGlob f = do xs <- lift $ P.glob f when (null xs) $ - report $ CouldNotFetchResource (TS.pack f) "glob did not match any font files" + report $ CouldNotFetchResource (T.pack f) "glob did not match any font files" return xs let mkFontEntry f = mkEntry ("fonts/" ++ takeFileName f) =<< lift (P.readFileLazy f) @@ -573,13 +575,13 @@ pandocToEPUB version opts doc = do let chapters' = secsToChapters secs - let extractLinkURL' :: Int -> Inline -> [(TS.Text, TS.Text)] + let extractLinkURL' :: Int -> Inline -> [(T.Text, T.Text)] extractLinkURL' num (Span (ident, _, _) _) - | not (TS.null ident) = [(ident, TS.pack (showChapter num) <> "#" <> ident)] + | not (T.null ident) = [(ident, showChapter num <> "#" <> ident)] extractLinkURL' num (Link (ident, _, _) _ _) - | not (TS.null ident) = [(ident, TS.pack (showChapter num) <> "#" <> ident)] + | not (T.null ident) = [(ident, showChapter num <> "#" <> ident)] extractLinkURL' num (Image (ident, _, _) _ _) - | not (TS.null ident) = [(ident, TS.pack (showChapter num) <> "#" <> ident)] + | not (T.null ident) = [(ident, showChapter num <> "#" <> ident)] extractLinkURL' num (RawInline fmt raw) | isHtmlFormat fmt = foldr (\tag -> @@ -587,18 +589,18 @@ pandocToEPUB version opts doc = do TagOpen{} -> case fromAttrib "id" tag of "" -> id - x -> ((x, TS.pack (showChapter num) <> "#" <> x):) + x -> ((x, showChapter num <> "#" <> x):) _ -> id) [] (parseTags raw) extractLinkURL' _ _ = [] - let extractLinkURL :: Int -> Block -> [(TS.Text, TS.Text)] + let extractLinkURL :: Int -> Block -> [(T.Text, T.Text)] extractLinkURL num (Div (ident, _, _) _) - | not (TS.null ident) = [(ident, TS.pack (showChapter num) <> "#" <> ident)] + | not (T.null ident) = [(ident, showChapter num <> "#" <> ident)] extractLinkURL num (Header _ (ident, _, _) _) - | not (TS.null ident) = [(ident, TS.pack (showChapter num) <> "#" <> ident)] + | not (T.null ident) = [(ident, showChapter num <> "#" <> ident)] extractLinkURL num (Table (ident,_,_) _ _ _ _ _) - | not (TS.null ident) = [(ident, TS.pack (showChapter num) <> "#" <> ident)] + | not (T.null ident) = [(ident, showChapter num <> "#" <> ident)] extractLinkURL num (RawBlock fmt raw) | isHtmlFormat fmt = foldr (\tag -> @@ -606,7 +608,7 @@ pandocToEPUB version opts doc = do TagOpen{} -> case fromAttrib "id" tag of "" -> id - x -> ((x, TS.pack (showChapter num) <> "#" <> x):) + x -> ((x, showChapter num <> "#" <> x):) _ -> id) [] (parseTags raw) extractLinkURL num b = query (extractLinkURL' num) b @@ -617,7 +619,7 @@ pandocToEPUB version opts doc = do let fixInternalReferences :: Inline -> Inline fixInternalReferences (Link attr lab (src, tit)) - | Just ('#', xs) <- TS.uncons src = case lookup xs reftable of + | Just ('#', xs) <- T.uncons src = case lookup xs reftable of Just ys -> Link attr lab (ys, tit) Nothing -> Link attr lab (src, tit) fixInternalReferences x = x @@ -630,7 +632,7 @@ pandocToEPUB version opts doc = do chapters' let chapToEntry num (Chapter bs) = - mkEntry ("text/" ++ showChapter num) =<< + mkEntry ("text/" ++ T.unpack (showChapter num)) =<< writeHtml opts'{ writerVariables = Context (M.fromList [("body-type", toVal' bodyType), @@ -677,12 +679,12 @@ pandocToEPUB version opts doc = do let chapterNode ent = unode "item" ! ([("id", toId $ makeRelative epubSubdir $ eRelativePath ent), - ("href", makeRelative epubSubdir + ("href", T.pack $ makeRelative epubSubdir $ eRelativePath ent), ("media-type", "application/xhtml+xml")] ++ case props ent of [] -> [] - xs -> [("properties", unwords xs)]) + xs -> [("properties", T.unwords xs)]) $ () let chapterRefNode ent = unode "itemref" ! @@ -691,17 +693,17 @@ pandocToEPUB version opts doc = do let pictureNode ent = unode "item" ! [("id", toId $ makeRelative epubSubdir $ eRelativePath ent), - ("href", makeRelative epubSubdir + ("href", T.pack $ makeRelative epubSubdir $ eRelativePath ent), ("media-type", - maybe "application/octet-stream" TS.unpack + fromMaybe "application/octet-stream" $ mediaTypeOf $ eRelativePath ent)] $ () let fontNode ent = unode "item" ! [("id", toId $ makeRelative epubSubdir $ eRelativePath ent), - ("href", makeRelative epubSubdir + ("href", T.pack $ makeRelative epubSubdir $ eRelativePath ent), - ("media-type", maybe "" TS.unpack $ + ("media-type", fromMaybe "" $ getMimeType $ eRelativePath ent)] $ () let tocTitle = maybe plainTitle @@ -710,7 +712,7 @@ pandocToEPUB version opts doc = do (x:_) -> return $ identifierText x -- use first identifier as UUID [] -> throwError $ PandocShouldNeverHappenError "epubIdentifier is null" -- shouldn't happen currentTime <- lift P.getTimestamp - let contentsData = UTF8.fromStringLazy $ ppTopElement $ + let contentsData = UTF8.fromTextLazy $ TL.fromStrict $ ppTopElement $ unode "package" ! ([("version", case version of EPUB2 -> "2.0" @@ -728,7 +730,8 @@ pandocToEPUB version opts doc = do ,("media-type","application/xhtml+xml")] ++ [("properties","nav") | epub3 ]) $ () ] ++ - [ unode "item" ! [("id","stylesheet" ++ show n), ("href",fp) + [ unode "item" ! [("id","stylesheet" <> tshow n) + , ("href", T.pack fp) ,("media-type","text/css")] $ () | (n :: Int, fp) <- zip [1..] (map (makeRelative epubSubdir . eRelativePath) @@ -773,7 +776,7 @@ pandocToEPUB version opts doc = do let tocLevel = writerTOCDepth opts let navPointNode :: PandocMonad m - => (Int -> [Inline] -> TS.Text -> [Element] -> Element) + => (Int -> [Inline] -> T.Text -> [Element] -> Element) -> Block -> StateT Int m [Element] navPointNode formatter (Div (ident,_,_) (Header lvl (_,_,kvs) ils : children)) = @@ -783,7 +786,7 @@ pandocToEPUB version opts doc = do n <- get modify (+1) let num = fromMaybe "" $ lookup "number" kvs - let tit = if writerNumberSections opts && not (TS.null num) + let tit = if writerNumberSections opts && not (T.null num) then Span ("", ["section-header-number"], []) [Str num] : Space : ils else ils @@ -797,21 +800,21 @@ pandocToEPUB version opts doc = do concat <$> mapM (navPointNode formatter) bs navPointNode _ _ = return [] - let navMapFormatter :: Int -> [Inline] -> TS.Text -> [Element] -> Element + let navMapFormatter :: Int -> [Inline] -> T.Text -> [Element] -> Element navMapFormatter n tit src subs = unode "navPoint" ! - [("id", "navPoint-" ++ show n)] $ - [ unode "navLabel" $ unode "text" $ TS.unpack $ stringify tit - , unode "content" ! [("src", "text/" <> TS.unpack src)] $ () + [("id", "navPoint-" <> tshow n)] $ + [ unode "navLabel" $ unode "text" $ stringify tit + , unode "content" ! [("src", "text/" <> src)] $ () ] ++ subs let tpNode = unode "navPoint" ! [("id", "navPoint-0")] $ - [ unode "navLabel" $ unode "text" (TS.unpack $ stringify $ docTitle' meta) + [ unode "navLabel" $ unode "text" (stringify $ docTitle' meta) , unode "content" ! [("src", "text/title_page.xhtml")] $ () ] navMap <- lift $ evalStateT (concat <$> mapM (navPointNode navMapFormatter) secs) 1 - let tocData = UTF8.fromStringLazy $ ppTopElement $ + let tocData = B.fromStrict $ UTF8.fromText $ ppTopElement $ unode "ncx" ! [("version","2005-1") ,("xmlns","http://www.daisy.org/z3986/2005/ncx/")] $ [ unode "head" $ @@ -833,11 +836,11 @@ pandocToEPUB version opts doc = do ] tocEntry <- mkEntry "toc.ncx" tocData - let navXhtmlFormatter :: Int -> [Inline] -> TS.Text -> [Element] -> Element + let navXhtmlFormatter :: Int -> [Inline] -> T.Text -> [Element] -> Element navXhtmlFormatter n tit src subs = unode "li" ! - [("id", "toc-li-" ++ show n)] $ + [("id", "toc-li-" <> tshow n)] $ (unode "a" ! - [("href", "text/" <> TS.unpack src)] + [("href", "text/" <> src)] $ titElements) : case subs of [] -> [] @@ -850,7 +853,7 @@ pandocToEPUB version opts doc = do , writerVariables = Context (M.fromList [("pagetitle", toVal $ - escapeStringForXML $ TS.pack plainTitle)]) + escapeStringForXML plainTitle)]) <> writerVariables opts} (Pandoc nullMeta [Plain $ walk clean tit])) of @@ -865,7 +868,7 @@ pandocToEPUB version opts doc = do tocBlocks <- lift $ evalStateT (concat <$> mapM (navPointNode navXhtmlFormatter) secs) 1 let navBlocks = [RawBlock (Format "html") - $ TS.pack $ showElement $ -- prettyprinting introduces bad spaces + $ showElement $ -- prettyprinting introduces bad spaces unode navtag ! ([("epub:type","toc") | epub3] ++ [("id","toc")]) $ [ unode "h1" ! [("id","toc-title")] $ tocTitle @@ -875,21 +878,21 @@ pandocToEPUB version opts doc = do [ unode "a" ! [("href", "text/title_page.xhtml") ,("epub:type", "titlepage")] $ - ("Title Page" :: String) ] : + ("Title Page" :: Text) ] : [ unode "li" [ unode "a" ! [("href", "text/cover.xhtml") ,("epub:type", "cover")] $ - ("Cover" :: String)] | + ("Cover" :: Text)] | isJust (epubCoverImage metadata) ] ++ [ unode "li" [ unode "a" ! [("href", "#toc") ,("epub:type", "toc")] $ - ("Table of Contents" :: String) + ("Table of Contents" :: Text) ] | writerTableOfContents opts ] else [] - let landmarks = [RawBlock (Format "html") $ TS.pack $ ppElement $ + let landmarks = [RawBlock (Format "html") $ ppElement $ unode "nav" ! [("epub:type","landmarks") ,("id","landmarks") ,("hidden","hidden")] $ @@ -910,22 +913,22 @@ pandocToEPUB version opts doc = do UTF8.fromStringLazy "application/epub+zip" -- container.xml - let containerData = UTF8.fromStringLazy $ ppTopElement $ + let containerData = B.fromStrict $ UTF8.fromText $ ppTopElement $ unode "container" ! [("version","1.0") ,("xmlns","urn:oasis:names:tc:opendocument:xmlns:container")] $ unode "rootfiles" $ unode "rootfile" ! [("full-path", (if null epubSubdir then "" - else epubSubdir ++ "/") ++ "content.opf") + else T.pack epubSubdir <> "/") <> "content.opf") ,("media-type","application/oebps-package+xml")] $ () containerEntry <- mkEntry "META-INF/container.xml" containerData -- com.apple.ibooks.display-options.xml - let apple = UTF8.fromStringLazy $ ppTopElement $ + let apple = B.fromStrict $ UTF8.fromText $ ppTopElement $ unode "display_options" $ unode "platform" ! [("name","*")] $ - unode "option" ! [("name","specified-fonts")] $ ("true" :: String) + unode "option" ! [("name","specified-fonts")] $ ("true" :: Text) appleEntry <- mkEntry "META-INF/com.apple.ibooks.display-options.xml" apple -- construct archive @@ -947,7 +950,8 @@ metadataElement version md currentTime = ++ publisherNodes ++ sourceNodes ++ relationNodes ++ coverageNodes ++ rightsNodes ++ coverImageNodes ++ modifiedNodes ++ belongsToCollectionNodes - withIds base f = concat . zipWith f (map (\x -> base ++ ('-' : show x)) + withIds base f = concat . zipWith f (map (\x -> base <> + T.cons '-' (tshow x)) ([1..] :: [Int])) identifierNodes = withIds "epub-id" toIdentifierNode $ epubIdentifier md @@ -961,9 +965,9 @@ metadataElement version md currentTime = (x:_) -> [dcNode "date" ! [("id","epub-date")] $ dateText x] ibooksNodes = map ibooksNode (epubIbooksFields md) - ibooksNode (k, v) = unode "meta" ! [("property", "ibooks:" ++ k)] $ v + ibooksNode (k, v) = unode "meta" ! [("property", "ibooks:" <> k)] $ v calibreNodes = map calibreNode (epubCalibreFields md) - calibreNode (k, v) = unode "meta" ! [("name", "calibre:" ++ k), + calibreNode (k, v) = unode "meta" ! [("name", "calibre:" <> k), ("content", v)] $ () languageNodes = [dcTag "language" $ epubLanguage md] creatorNodes = withIds "epub-creator" (toCreatorNode "creator") $ @@ -989,12 +993,12 @@ metadataElement version md currentTime = maybe [] (\belongsToCollection -> (unode "meta" ! [("property", "belongs-to-collection"), ("id", "epub-id-1")] $ belongsToCollection ) : - [unode "meta" ! [("refines", "#epub-id-1"), ("property", "collection-type")] $ ("series" :: String) ]) + [unode "meta" ! [("refines", "#epub-id-1"), ("property", "collection-type")] $ ("series" :: Text) ]) (epubBelongsToCollection md)++ maybe [] (\groupPosition -> [unode "meta" ! [("refines", "#epub-id-1"), ("property", "group-position")] $ groupPosition ]) (epubGroupPosition md) - dcTag n s = unode ("dc:" ++ n) s + dcTag n s = unode ("dc:" <> n) s dcTag' n s = [dcTag n s] toIdentifierNode id' (Identifier txt scheme) | version == EPUB2 = [dcNode "identifier" ! @@ -1002,7 +1006,7 @@ metadataElement version md currentTime = txt] | otherwise = (dcNode "identifier" ! [("id",id')] $ txt) : maybe [] ((\x -> [unode "meta" ! - [ ("refines",'#':id') + [ ("refines","#" <> id') , ("property","identifier-type") , ("scheme","onix:codelist5") ] @@ -1018,10 +1022,10 @@ metadataElement version md currentTime = (creatorRole creator >>= toRelator)) $ creatorText creator] | otherwise = [dcNode s ! [("id",id')] $ creatorText creator] ++ maybe [] (\x -> [unode "meta" ! - [("refines",'#':id'),("property","file-as")] $ x]) + [("refines","#" <> id'),("property","file-as")] $ x]) (creatorFileAs creator) ++ maybe [] (\x -> [unode "meta" ! - [("refines",'#':id'),("property","role"), + [("refines","#" <> id'),("property","role"), ("scheme","marc:relators")] $ x]) (creatorRole creator >>= toRelator) toTitleNode id' title @@ -1033,16 +1037,16 @@ metadataElement version md currentTime = | otherwise = [dcNode "title" ! [("id",id')] $ titleText title] ++ maybe [] (\x -> [unode "meta" ! - [("refines",'#':id'),("property","file-as")] $ x]) + [("refines","#" <> id'),("property","file-as")] $ x]) (titleFileAs title) ++ maybe [] (\x -> [unode "meta" ! - [("refines",'#':id'),("property","title-type")] $ x]) + [("refines","#" <> id'),("property","title-type")] $ x]) (titleType title) toDateNode id' date = [dcNode "date" ! (("id",id') : maybe [] (\x -> [("opf:event",x)]) (dateEvent date)) $ dateText date] - schemeToOnix :: String -> String + schemeToOnix :: Text -> Text schemeToOnix "ISBN-10" = "02" schemeToOnix "GTIN-13" = "03" schemeToOnix "UPC" = "04" @@ -1060,48 +1064,48 @@ metadataElement version md currentTime = schemeToOnix "OLCC" = "28" schemeToOnix _ = "01" -showDateTimeISO8601 :: UTCTime -> String -showDateTimeISO8601 = formatTime defaultTimeLocale "%FT%TZ" +showDateTimeISO8601 :: UTCTime -> Text +showDateTimeISO8601 = T.pack . formatTime defaultTimeLocale "%FT%TZ" transformTag :: PandocMonad m - => Tag TS.Text - -> E m (Tag TS.Text) + => Tag T.Text + -> E m (Tag T.Text) transformTag tag@(TagOpen name attr) | name `elem` ["video", "source", "img", "audio"] && isNothing (lookup "data-external" attr) = do let src = fromAttrib "src" tag let poster = fromAttrib "poster" tag - newsrc <- modifyMediaRef $ TS.unpack src - newposter <- modifyMediaRef $ TS.unpack poster + newsrc <- modifyMediaRef $ T.unpack src + newposter <- modifyMediaRef $ T.unpack poster let attr' = filter (\(x,_) -> x /= "src" && x /= "poster") attr ++ - [("src", "../" <> newsrc) | not (TS.null newsrc)] ++ - [("poster", "../" <> newposter) | not (TS.null newposter)] + [("src", "../" <> newsrc) | not (T.null newsrc)] ++ + [("poster", "../" <> newposter) | not (T.null newposter)] return $ TagOpen name attr' transformTag tag = return tag modifyMediaRef :: PandocMonad m => FilePath - -> E m TS.Text + -> E m T.Text modifyMediaRef "" = return "" modifyMediaRef oldsrc = do media <- gets stMediaPaths case lookup oldsrc media of - Just (n,_) -> return $ TS.pack n + Just (n,_) -> return $ T.pack n Nothing -> catchError - (do (img, mbMime) <- P.fetchItem $ TS.pack oldsrc - let ext = maybe (takeExtension (takeWhile (/='?') oldsrc)) TS.unpack + (do (img, mbMime) <- P.fetchItem $ T.pack oldsrc + let ext = maybe (takeExtension (takeWhile (/='?') oldsrc)) T.unpack (("." <>) <$> (mbMime >>= extensionFromMimeType)) newName <- getMediaNextNewName ext let newPath = "media/" ++ newName entry <- mkEntry newPath (B.fromChunks . (:[]) $ img) modify $ \st -> st{ stMediaPaths = (oldsrc, (newPath, Just entry)):media} - return $ TS.pack newPath) + return $ T.pack newPath) (\e -> do - report $ CouldNotFetchResource (TS.pack oldsrc) (tshow e) - return $ TS.pack oldsrc) + report $ CouldNotFetchResource (T.pack oldsrc) (tshow e) + return $ T.pack oldsrc) -getMediaNextNewName :: PandocMonad m => String -> E m String +getMediaNextNewName :: PandocMonad m => FilePath -> E m FilePath getMediaNextNewName ext = do nextId <- gets stMediaNextId modify $ \st -> st { stMediaNextId = nextId + 1 } @@ -1128,11 +1132,11 @@ transformInline :: PandocMonad m -> Inline -> E m Inline transformInline _opts (Image attr lab (src,tit)) = do - newsrc <- modifyMediaRef $ TS.unpack src + newsrc <- modifyMediaRef $ T.unpack src return $ Image attr lab ("../" <> newsrc, tit) transformInline opts x@(Math t m) | WebTeX url <- writerHTMLMathMethod opts = do - newsrc <- modifyMediaRef (TS.unpack url <> urlEncode (TS.unpack m)) + newsrc <- modifyMediaRef (T.unpack url <> urlEncode (T.unpack m)) let mathclass = if t == DisplayMath then "display" else "inline" return $ Span ("",["math",mathclass],[]) [Image nullAttr [x] ("../" <> newsrc, "")] @@ -1143,40 +1147,26 @@ transformInline _opts (RawInline fmt raw) return $ RawInline fmt (renderTags' tags') transformInline _ x = return x -(!) :: (t -> Element) -> [(String, String)] -> t -> Element +(!) :: (t -> Element) -> [(Text, Text)] -> t -> Element (!) f attrs n = add_attrs (map (\(k,v) -> Attr (unqual k) v) attrs) (f n) --- | Version of 'ppTopElement' that specifies UTF-8 encoding. -ppTopElement :: Element -> String -ppTopElement = ("\n" ++) . unEntity . ppElement - -- unEntity removes numeric entities introduced by ppElement - -- (kindlegen seems to choke on these). - where unEntity [] = "" - unEntity ('&':'#':xs) = - let (ds,ys) = break (==';') xs - rest = drop 1 ys - in case safeRead (TS.pack $ "'\\" <> ds <> "'") of - Just x -> x : unEntity rest - Nothing -> '&':'#':unEntity xs - unEntity (x:xs) = x : unEntity xs - mediaTypeOf :: FilePath -> Maybe MimeType mediaTypeOf x = let mediaPrefixes = ["image", "video", "audio"] in case getMimeType x of - Just y | any (`TS.isPrefixOf` y) mediaPrefixes -> Just y + Just y | any (`T.isPrefixOf` y) mediaPrefixes -> Just y _ -> Nothing -- Returns filename for chapter number. -showChapter :: Int -> String -showChapter = printf "ch%03d.xhtml" +showChapter :: Int -> Text +showChapter = T.pack . printf "ch%03d.xhtml" -- Add identifiers to any headers without them. addIdentifiers :: WriterOptions -> [Block] -> [Block] addIdentifiers opts bs = evalState (mapM go bs) Set.empty where go (Header n (ident,classes,kvs) ils) = do ids <- get - let ident' = if TS.null ident + let ident' = if T.null ident then uniqueIdent (writerExtensions opts) ils ids else ident modify $ Set.insert ident' @@ -1184,27 +1174,27 @@ addIdentifiers opts bs = evalState (mapM go bs) Set.empty go x = return x -- Variant of normalizeDate that allows partial dates: YYYY, YYYY-MM -normalizeDate' :: String -> Maybe String -normalizeDate' = fmap TS.unpack . go . trim . TS.pack +normalizeDate' :: Text -> Maybe Text +normalizeDate' = go . T.strip where go xs - | TS.length xs == 4 -- YYY - , TS.all isDigit xs = Just xs - | (y, s) <- TS.splitAt 4 xs -- YYY-MM - , Just ('-', m) <- TS.uncons s - , TS.length m == 2 - , TS.all isDigit y && TS.all isDigit m = Just xs + | T.length xs == 4 -- YYY + , T.all isDigit xs = Just xs + | (y, s) <- T.splitAt 4 xs -- YYY-MM + , Just ('-', m) <- T.uncons s + , T.length m == 2 + , T.all isDigit y && T.all isDigit m = Just xs | otherwise = normalizeDate xs -toRelator :: String -> Maybe String +toRelator :: Text -> Maybe Text toRelator x | x `elem` relators = Just x - | otherwise = lookup (map toLower x) relatorMap + | otherwise = lookup (T.toLower x) relatorMap -relators :: [String] +relators :: [Text] relators = map snd relatorMap -relatorMap :: [(String, String)] +relatorMap :: [(Text, Text)] relatorMap = [("abridger", "abr") ,("actor", "act") diff --git a/src/Text/Pandoc/Writers/FB2.hs b/src/Text/Pandoc/Writers/FB2.hs index 9334d6e9a..3b5d04427 100644 --- a/src/Text/Pandoc/Writers/FB2.hs +++ b/src/Text/Pandoc/Writers/FB2.hs @@ -25,15 +25,12 @@ import Data.ByteString.Base64 (encode) import Data.Char (isAscii, isControl, isSpace) import Data.Either (lefts, rights) import Data.List (intercalate) -import Data.Text (Text, pack) +import Data.Text (Text) import qualified Data.Text as T import qualified Data.Text.Lazy as TL import qualified Data.Text.Encoding as TE import Network.HTTP (urlEncode) -import Text.XML.Light -import qualified Text.XML.Light as X -import qualified Text.XML.Light.Cursor as XC -import Text.Pandoc.XMLParser (parseXMLContents) +import Text.Pandoc.XML.Light as X import Text.Pandoc.Class.PandocMonad (PandocMonad, report) import qualified Text.Pandoc.Class.PandocMonad as P @@ -44,6 +41,7 @@ import Text.Pandoc.Options (HTMLMathMethod (..), WriterOptions (..), def) import Text.Pandoc.Shared (capitalize, isURI, orderedListMarkers, makeSections, tshow, stringify) import Text.Pandoc.Writers.Shared (lookupMetaString, toLegacyTable) +import Data.Generics (everywhere, mkT) -- | Data to be written at the end of the document: -- (foot)notes, URLs, references, images. @@ -88,7 +86,7 @@ pandocToFB2 opts (Pandoc meta blocks) = do (imgs,missing) <- get >>= (lift . fetchImages . imagesToFetch) let body' = replaceImagesWithAlt missing body let fb2_xml = el "FictionBook" (fb2_attrs, [desc, body'] ++ notes ++ imgs) - return $ pack $ xml_head ++ showContent fb2_xml ++ "\n" + return $ xml_head <> showContent fb2_xml <> "\n" where xml_head = "\n" fb2_attrs = @@ -100,8 +98,8 @@ pandocToFB2 opts (Pandoc meta blocks) = do description :: PandocMonad m => Meta -> FBM m Content description meta' = do let genre = case lookupMetaString "genre" meta' of - "" -> el "genre" ("unrecognised" :: String) - s -> el "genre" (T.unpack s) + "" -> el "genre" ("unrecognised" :: Text) + s -> el "genre" s bt <- booktitle meta' let as = authors meta' dd <- docdate meta' @@ -112,7 +110,7 @@ description meta' = do Just (MetaInlines [Str s]) -> [el "lang" $ iso639 s] Just (MetaString s) -> [el "lang" $ iso639 s] _ -> [] - where iso639 = T.unpack . T.takeWhile (/= '-') -- Convert BCP 47 to ISO 639 + where iso639 = T.takeWhile (/= '-') -- Convert BCP 47 to ISO 639 let coverimage url = do let img = Image nullAttr mempty (url, "") im <- insertImage InlineImage img @@ -124,7 +122,7 @@ description meta' = do return $ el "description" [ el "title-info" (genre : (as ++ bt ++ annotation ++ dd ++ coverpage ++ lang)) - , el "document-info" [el "program-used" ("pandoc" :: String)] + , el "document-info" [el "program-used" ("pandoc" :: Text)] ] booktitle :: PandocMonad m => Meta -> FBM m [Content] @@ -137,15 +135,15 @@ authors meta' = cMap author (docAuthors meta') author :: [Inline] -> [Content] author ss = - let ws = words . cMap plain $ ss - email = el "email" <$> take 1 (filter ('@' `elem`) ws) - ws' = filter ('@' `notElem`) ws + let ws = T.words $ mconcat $ map plain ss + email = el "email" <$> take 1 (filter (T.any (=='@')) ws) + ws' = filter (not . T.any (== '@')) ws names = case ws' of [nickname] -> [ el "nickname" nickname ] [fname, lname] -> [ el "first-name" fname , el "last-name" lname ] (fname:rest) -> [ el "first-name" fname - , el "middle-name" (concat . init $ rest) + , el "middle-name" (T.concat . init $ rest) , el "last-name" (last rest) ] [] -> [] in list $ el "author" (names ++ email) @@ -206,7 +204,7 @@ renderFootnotes = do el "body" ([uattr "name" "notes"], map renderFN (reverse fns)) where renderFN (n, idstr, cs) = - let fn_texts = el "title" (el "p" (show n)) : cs + let fn_texts = el "title" (el "p" (tshow n)) : cs in el "section" ([uattr "id" idstr], fn_texts) -- | Fetch images and encode them for the FictionBook XML. @@ -282,7 +280,7 @@ isMimeType s = where types = ["text","image","audio","video","application","message","multipart"] valid c = isAscii c && not (isControl c) && not (isSpace c) && - c `notElem` ("()<>@,;:\\\"/[]?=" :: String) + c `notElem` ("()<>@,;:\\\"/[]?=" :: [Char]) footnoteID :: Int -> Text footnoteID i = "n" <> tshow i @@ -306,7 +304,7 @@ blockToXml (Para [Image atr alt (src,tgt)]) = insertImage NormalImage (Image atr alt (src,tit)) blockToXml (Para ss) = list . el "p" <$> cMapM toXml ss blockToXml (CodeBlock _ s) = return . spaceBeforeAfter . - map (el "p" . el "code" . T.unpack) . T.lines $ s + map (el "p" . el "code") . T.lines $ s blockToXml (RawBlock f str) = if f == Format "fb2" then @@ -346,11 +344,11 @@ blockToXml (Table _ blkCapt specs thead tbody tfoot) = do c <- el "emphasis" <$> cMapM toXml caption return [el "table" (hd <> bd), el "p" c] where - mkrow :: PandocMonad m => String -> [[Block]] -> [Alignment] -> FBM m Content + mkrow :: PandocMonad m => Text -> [[Block]] -> [Alignment] -> FBM m Content mkrow tag cells aligns' = el "tr" <$> mapM (mkcell tag) (zip cells aligns') -- - mkcell :: PandocMonad m => String -> ([Block], Alignment) -> FBM m Content + mkcell :: PandocMonad m => Text -> ([Block], Alignment) -> FBM m Content mkcell tag (cell, align) = do cblocks <- cMapM blockToXml cell return $ el tag ([align_attr align], cblocks) @@ -424,7 +422,7 @@ toXml (Quoted DoubleQuote ss) = do inner <- cMapM toXml ss return $ [txt "“"] ++ inner ++ [txt "”"] toXml (Cite _ ss) = cMapM toXml ss -- FIXME: support citation styles -toXml (Code _ s) = return [el "code" $ T.unpack s] +toXml (Code _ s) = return [el "code" s] toXml Space = return [txt " "] toXml SoftBreak = return [txt "\n"] toXml LineBreak = return [txt "\n"] @@ -456,7 +454,7 @@ insertMath immode formula = do let imgurl = url <> T.pack (urlEncode $ T.unpack formula) let img = Image nullAttr alt (imgurl, "") insertImage immode img - _ -> return [el "code" $ T.unpack formula] + _ -> return [el "code" formula] insertImage :: PandocMonad m => ImageMode -> Inline -> FBM m [Content] insertImage immode (Image _ alt (url,ttl)) = do @@ -471,31 +469,16 @@ insertImage immode (Image _ alt (url,ttl)) = do el "image" $ [ attr ("l","href") ("#" <> fname) , attr ("l","type") (tshow immode) - , uattr "alt" (T.pack $ cMap plain alt) ] + , uattr "alt" (mconcat $ map plain alt) ] ++ ttlattr insertImage _ _ = error "unexpected inline instead of image" replaceImagesWithAlt :: [Text] -> Content -> Content -replaceImagesWithAlt missingHrefs body = - let cur = XC.fromContent body - cur' = replaceAll cur - in XC.toTree . XC.root $ cur' +replaceImagesWithAlt missingHrefs = everywhere (mkT go) where - -- - replaceAll :: XC.Cursor -> XC.Cursor - replaceAll c = - let n = XC.current c - c' = if isImage n && isMissing n - then XC.modifyContent replaceNode c - else c - in case XC.nextDF c' of - (Just cnext) -> replaceAll cnext - Nothing -> c' -- end of document - -- - isImage :: Content -> Bool - isImage (Elem e) = elName e == uname "image" - isImage _ = False - -- + go c = if isMissing c + then replaceNode c + else c isMissing (Elem img@Element{}) = let imgAttrs = elAttribs img badAttrs = map (attr ("l","href")) missingHrefs @@ -505,18 +488,18 @@ replaceImagesWithAlt missingHrefs body = replaceNode :: Content -> Content replaceNode n@(Elem img@Element{}) = let attrs = elAttribs img - alt = getAttrVal attrs (uname "alt") + alt = getAttrVal attrs (unqual "alt") imtype = getAttrVal attrs (qname "l" "type") in case (alt, imtype) of (Just alt', Just imtype') -> - if imtype' == show NormalImage + if imtype' == tshow NormalImage then el "p" alt' - else txt $ T.pack alt' - (Just alt', Nothing) -> txt $ T.pack alt' -- no type attribute + else txt alt' + (Just alt', Nothing) -> txt alt' -- no type attribute _ -> n -- don't replace if alt text is not found replaceNode n = n -- - getAttrVal :: [X.Attr] -> QName -> Maybe String + getAttrVal :: [X.Attr] -> QName -> Maybe Text getAttrVal attrs name = case filter ((name ==) . attrKey) attrs of (a:_) -> Just (attrVal a) @@ -524,7 +507,7 @@ replaceImagesWithAlt missingHrefs body = -- | Wrap all inlines with an XML tag (given its unqualified name). -wrap :: PandocMonad m => String -> [Inline] -> FBM m Content +wrap :: PandocMonad m => Text -> [Inline] -> FBM m Content wrap tagname inlines = el tagname `liftM` cMapM toXml inlines -- " Create a singleton list. @@ -532,31 +515,31 @@ list :: a -> [a] list = (:[]) -- | Convert an 'Inline' to plaintext. -plain :: Inline -> String -plain (Str s) = T.unpack s -plain (Emph ss) = cMap plain ss -plain (Underline ss) = cMap plain ss -plain (Span _ ss) = cMap plain ss -plain (Strong ss) = cMap plain ss -plain (Strikeout ss) = cMap plain ss -plain (Superscript ss) = cMap plain ss -plain (Subscript ss) = cMap plain ss -plain (SmallCaps ss) = cMap plain ss -plain (Quoted _ ss) = cMap plain ss -plain (Cite _ ss) = cMap plain ss -- FIXME -plain (Code _ s) = T.unpack s +plain :: Inline -> Text +plain (Str s) = s +plain (Emph ss) = mconcat $ map plain ss +plain (Underline ss) = mconcat $ map plain ss +plain (Span _ ss) = mconcat $ map plain ss +plain (Strong ss) = mconcat $ map plain ss +plain (Strikeout ss) = mconcat $ map plain ss +plain (Superscript ss) = mconcat $ map plain ss +plain (Subscript ss) = mconcat $ map plain ss +plain (SmallCaps ss) = mconcat $ map plain ss +plain (Quoted _ ss) = mconcat $ map plain ss +plain (Cite _ ss) = mconcat $ map plain ss -- FIXME +plain (Code _ s) = s plain Space = " " plain SoftBreak = " " plain LineBreak = "\n" -plain (Math _ s) = T.unpack s +plain (Math _ s) = s plain (RawInline _ _) = "" -plain (Link _ text (url,_)) = concat (map plain text ++ [" <", T.unpack url, ">"]) -plain (Image _ alt _) = cMap plain alt +plain (Link _ text (url,_)) = mconcat (map plain text ++ [" <", url, ">"]) +plain (Image _ alt _) = mconcat $ map plain alt plain (Note _) = "" -- FIXME -- | Create an XML element. el :: (Node t) - => String -- ^ unqualified element name + => Text -- ^ unqualified element name -> t -- ^ node contents -> Content -- ^ XML content el name cs = Elem $ unode name cs @@ -569,22 +552,18 @@ spaceBeforeAfter cs = -- | Create a plain-text XML content. txt :: Text -> Content -txt s = Text $ CData CDataText (T.unpack s) Nothing +txt s = Text $ CData CDataText s Nothing -- | Create an XML attribute with an unqualified name. -uattr :: String -> Text -> Text.XML.Light.Attr -uattr name = Attr (uname name) . T.unpack +uattr :: Text -> Text -> X.Attr +uattr name = Attr (unqual name) -- | Create an XML attribute with a qualified name from given namespace. -attr :: (String, String) -> Text -> Text.XML.Light.Attr -attr (ns, name) = Attr (qname ns name) . T.unpack - --- | Unqualified name -uname :: String -> QName -uname name = QName name Nothing Nothing +attr :: (Text, Text) -> Text -> X.Attr +attr (ns, name) = Attr (qname ns name) -- | Qualified name -qname :: String -> String -> QName +qname :: Text -> Text -> QName qname ns name = QName name Nothing (Just ns) -- | Abbreviation for 'concatMap'. diff --git a/src/Text/Pandoc/Writers/ODT.hs b/src/Text/Pandoc/Writers/ODT.hs index 06369b4db..101b236aa 100644 --- a/src/Text/Pandoc/Writers/ODT.hs +++ b/src/Text/Pandoc/Writers/ODT.hs @@ -40,9 +40,9 @@ import Text.Pandoc.UTF8 (fromStringLazy, fromTextLazy, toTextLazy) import Text.Pandoc.Walk import Text.Pandoc.Writers.OpenDocument (writeOpenDocument) import Text.Pandoc.XML -import Text.Pandoc.XMLParser (parseXMLElement) +import Text.Pandoc.XML.Light import Text.TeXMath -import Text.XML.Light +import qualified Text.XML.Light as XL newtype ODTState = ODTState { stEntries :: [Entry] } @@ -181,18 +181,20 @@ updateStyleWithLang (Just lang) arch = do PandocXMLError "styles.xml" msg Right d -> return $ toEntry "styles.xml" epochtime - ( fromStringLazy + ( fromTextLazy + . TL.fromStrict . ppTopElement . addLang lang $ d ) else return e) (zEntries arch) return arch{ zEntries = entries } +-- TODO FIXME avoid this generic traversal! addLang :: Lang -> Element -> Element addLang lang = everywhere' (mkT updateLangAttr) where updateLangAttr (Attr n@(QName "language" _ (Just "fo")) _) - = Attr n (T.unpack $ langLanguage lang) + = Attr n (langLanguage lang) updateLangAttr (Attr n@(QName "country" _ (Just "fo")) _) - = Attr n (T.unpack $ langRegion lang) + = Attr n (langRegion lang) updateLangAttr x = x -- | transform both Image and Math elements @@ -238,8 +240,8 @@ transformPicMath _ (Math t math) = do case writeMathML dt <$> readTeX math of Left _ -> return $ Math t math Right r -> do - let conf = useShortEmptyTags (const False) defaultConfigPP - let mathml = ppcTopElement conf r + let conf = XL.useShortEmptyTags (const False) XL.defaultConfigPP + let mathml = XL.ppcTopElement conf r epochtime <- floor `fmap` lift P.getPOSIXTime let dirname = "Formula-" ++ show (length entries) ++ "/" let fname = dirname ++ "content.xml" diff --git a/src/Text/Pandoc/Writers/OOXML.hs b/src/Text/Pandoc/Writers/OOXML.hs index 8f60e70d5..0533d6c12 100644 --- a/src/Text/Pandoc/Writers/OOXML.hs +++ b/src/Text/Pandoc/Writers/OOXML.hs @@ -29,33 +29,32 @@ import Control.Monad.Except (throwError) import Text.Pandoc.Error import qualified Data.ByteString as B import qualified Data.ByteString.Lazy as BL -import qualified Data.ByteString.Lazy.Char8 as BL8 import Data.Maybe (mapMaybe) import qualified Data.Text as T +import Data.Text (Text) import Text.Pandoc.Class.PandocMonad (PandocMonad) import qualified Text.Pandoc.UTF8 as UTF8 -import Text.XML.Light as XML -import Text.Pandoc.XMLParser (parseXMLElement) +import Text.Pandoc.XML.Light -mknode :: Node t => String -> [(String,String)] -> t -> Element +mknode :: Node t => Text -> [(Text,Text)] -> t -> Element mknode s attrs = add_attrs (map (\(k,v) -> Attr (nodename k) v) attrs) . node (nodename s) -mktnode :: String -> [(String,String)] -> T.Text -> Element -mktnode s attrs = mknode s attrs . T.unpack +mktnode :: Text -> [(Text,Text)] -> T.Text -> Element +mktnode s attrs = mknode s attrs -nodename :: String -> QName +nodename :: Text -> QName nodename s = QName{ qName = name, qURI = Nothing, qPrefix = prefix } - where (name, prefix) = case break (==':') s of - (xs,[]) -> (xs, Nothing) - (ys, _:zs) -> (zs, Just ys) + where (name, prefix) = case T.break (==':') s of + (xs,ys) -> case T.uncons ys of + Nothing -> (xs, Nothing) + Just (_,zs) -> (zs, Just xs) toLazy :: B.ByteString -> BL.ByteString toLazy = BL.fromChunks . (:[]) renderXml :: Element -> BL.ByteString -renderXml elt = BL8.pack "\n" <> - UTF8.fromStringLazy (showElement elt) +renderXml elt = BL.fromStrict (UTF8.fromText (showTopElement elt)) parseXml :: PandocMonad m => Archive -> Archive -> String -> m Element parseXml refArchive distArchive relpath = @@ -70,25 +69,25 @@ parseXml refArchive distArchive relpath = -- Copied from Util -attrToNSPair :: XML.Attr -> Maybe (String, String) -attrToNSPair (XML.Attr (QName s _ (Just "xmlns")) val) = Just (s, val) +attrToNSPair :: Attr -> Maybe (Text, Text) +attrToNSPair (Attr (QName s _ (Just "xmlns")) val) = Just (s, val) attrToNSPair _ = Nothing elemToNameSpaces :: Element -> NameSpaces elemToNameSpaces = mapMaybe attrToNSPair . elAttribs -elemName :: NameSpaces -> String -> String -> QName +elemName :: NameSpaces -> Text -> Text -> QName elemName ns prefix name = - QName name (lookup prefix ns) (if null prefix then Nothing else Just prefix) + QName name (lookup prefix ns) (if T.null prefix then Nothing else Just prefix) -isElem :: NameSpaces -> String -> String -> Element -> Bool +isElem :: NameSpaces -> Text -> Text -> Element -> Bool isElem ns prefix name element = let ns' = ns ++ elemToNameSpaces element in qName (elName element) == name && qURI (elName element) == lookup prefix ns' -type NameSpaces = [(String, String)] +type NameSpaces = [(Text, Text)] -- | Scales the image to fit the page -- sizes are passed in emu diff --git a/src/Text/Pandoc/Writers/Powerpoint/Output.hs b/src/Text/Pandoc/Writers/Powerpoint/Output.hs index 0a7060895..5caeb0753 100644 --- a/src/Text/Pandoc/Writers/Powerpoint/Output.hs +++ b/src/Text/Pandoc/Writers/Powerpoint/Output.hs @@ -20,16 +20,16 @@ import Control.Monad.Except (throwError, catchError) import Control.Monad.Reader import Control.Monad.State import Codec.Archive.Zip -import Data.Char (toUpper) import Data.List (intercalate, stripPrefix, nub, union, isPrefixOf, intersperse) import Data.Default +import Data.Text (Text) import qualified Data.Text as T +import qualified Data.Text.Read import Data.Time (formatTime, defaultTimeLocale) import Data.Time.Clock (UTCTime) import Data.Time.Clock.POSIX (utcTimeToPOSIXSeconds, posixSecondsToUTCTime) import System.FilePath.Posix (splitDirectories, splitExtension, takeExtension) -import Text.XML.Light -import Text.Pandoc.XMLParser (parseXMLElement) +import Text.Pandoc.XML.Light as XML import Text.Pandoc.Definition import qualified Text.Pandoc.UTF8 as UTF8 import Text.Pandoc.Class.PandocMonad (PandocMonad) @@ -48,6 +48,7 @@ import Text.DocTemplates (FromContext(lookupContext)) import Text.TeXMath import Text.Pandoc.Writers.Math (convertMath) import Text.Pandoc.Writers.Powerpoint.Presentation +import Text.Pandoc.Shared (tshow) import Skylighting (fromColor) -- |The 'EMU' type is used to specify sizes in English Metric Units. @@ -84,10 +85,13 @@ getPresentationSize refArchive distArchive = do sldSize <- findChild (elemName ns "p" "sldSz") presElement cxS <- findAttr (QName "cx" Nothing Nothing) sldSize cyS <- findAttr (QName "cy" Nothing Nothing) sldSize - (cx, _) <- listToMaybe $ reads cxS :: Maybe (Integer, String) - (cy, _) <- listToMaybe $ reads cyS :: Maybe (Integer, String) + cx <- readTextAsInteger cxS + cy <- readTextAsInteger cyS return (cx `div` 12700, cy `div` 12700) +readTextAsInteger :: Text -> Maybe Integer +readTextAsInteger = either (const Nothing) (Just . fst) . Data.Text.Read.decimal + data WriterEnv = WriterEnv { envRefArchive :: Archive , envDistArchive :: Archive , envUTCTime :: UTCTime @@ -161,9 +165,6 @@ runP env st p = evalStateT (runReaderT p env) st -------------------------------------------------------------------- -findAttrText :: QName -> Element -> Maybe T.Text -findAttrText n = fmap T.pack . findAttr n - monospaceFont :: Monad m => P m T.Text monospaceFont = do vars <- writerVariables <$> asks envOpts @@ -171,10 +172,9 @@ monospaceFont = do Just s -> return s Nothing -> return "Courier" --- Kept as string for XML.Light -fontSizeAttributes :: Monad m => RunProps -> P m [(String, String)] +fontSizeAttributes :: Monad m => RunProps -> P m [(Text, Text)] fontSizeAttributes RunProps { rPropForceSize = Just sz } = - return [("sz", show $ sz * 100)] + return [("sz", tshow $ sz * 100)] fontSizeAttributes _ = return [] copyFileToArchive :: PandocMonad m => Archive -> FilePath -> P m Archive @@ -365,7 +365,7 @@ shapeHasId :: NameSpaces -> T.Text -> Element -> Bool shapeHasId ns ident element | Just nvSpPr <- findChild (elemName ns "p" "nvSpPr") element , Just cNvPr <- findChild (elemName ns "p" "cNvPr") nvSpPr - , Just nm <- findAttrText (QName "id" Nothing Nothing) cNvPr = + , Just nm <- findAttr (QName "id" Nothing Nothing) cNvPr = nm == ident | otherwise = False @@ -396,10 +396,10 @@ getShapeDimensions ns element ext <- findChild (elemName ns "a" "ext") xfrm cxS <- findAttr (QName "cx" Nothing Nothing) ext cyS <- findAttr (QName "cy" Nothing Nothing) ext - (x, _) <- listToMaybe $ reads xS - (y, _) <- listToMaybe $ reads yS - (cx, _) <- listToMaybe $ reads cxS - (cy, _) <- listToMaybe $ reads cyS + x <- readTextAsInteger xS + y <- readTextAsInteger yS + cx <- readTextAsInteger cxS + cy <- readTextAsInteger cyS return ((x `div` 12700, y `div` 12700), (cx `div` 12700, cy `div` 12700)) | otherwise = Nothing @@ -430,7 +430,7 @@ getContentShapeSize ns layout master Nothing -> do let mbSz = findChild (elemName ns "p" "nvSpPr") sp >>= findChild (elemName ns "p" "cNvPr") >>= - findAttrText (QName "id" Nothing Nothing) >>= + findAttr (QName "id" Nothing Nothing) >>= flip getMasterShapeDimensionsById master case mbSz of Just sz' -> return sz' @@ -450,8 +450,8 @@ buildSpTree ns spTreeElem newShapes = fn _ = True replaceNamedChildren :: NameSpaces - -> String - -> String + -> Text + -> Text -> [Element] -> Element -> Element @@ -654,10 +654,10 @@ createCaption contentShapeDimensions paraElements = do ] , mknode "p:spPr" [] [ mknode "a:xfrm" [] - [ mknode "a:off" [("x", show $ 12700 * x), - ("y", show $ 12700 * (y + cy - captionHeight))] () - , mknode "a:ext" [("cx", show $ 12700 * cx), - ("cy", show $ 12700 * captionHeight)] () + [ mknode "a:off" [("x", tshow $ 12700 * x), + ("y", tshow $ 12700 * (y + cy - captionHeight))] () + , mknode "a:ext" [("cx", tshow $ 12700 * cx), + ("cy", tshow $ 12700 * captionHeight)] () ] , mknode "a:prstGeom" [("prst", "rect")] [ mknode "a:avLst" [] () @@ -706,11 +706,13 @@ makePicElements layout picProps mInfo alt = do ,("noChangeAspect","1")] () -- cNvPr will contain the link information so we do that separately, -- and register the link if necessary. - let cNvPrAttr = [("descr", mInfoFilePath mInfo), ("id","0"),("name","Picture 1")] + let cNvPrAttr = [("descr", T.pack $ mInfoFilePath mInfo), + ("id","0"), + ("name","Picture 1")] cNvPr <- case picPropLink picProps of Just link -> do idNum <- registerLink link return $ mknode "p:cNvPr" cNvPrAttr $ - mknode "a:hlinkClick" [("r:id", "rId" <> show idNum)] () + mknode "a:hlinkClick" [("r:id", "rId" <> tshow idNum)] () Nothing -> return $ mknode "p:cNvPr" cNvPrAttr () let nvPicPr = mknode "p:nvPicPr" [] [ cNvPr @@ -718,13 +720,13 @@ makePicElements layout picProps mInfo alt = do , mknode "p:nvPr" [] ()] let blipFill = mknode "p:blipFill" [] [ mknode "a:blip" [("r:embed", "rId" <> - show (mInfoLocalId mInfo))] () + tshow (mInfoLocalId mInfo))] () , mknode "a:stretch" [] $ mknode "a:fillRect" [] () ] let xfrm = mknode "a:xfrm" [] - [ mknode "a:off" [("x",show xoff'), ("y",show yoff')] () - , mknode "a:ext" [("cx",show dimX') - ,("cy",show dimY')] () ] + [ mknode "a:off" [("x", tshow xoff'), ("y", tshow yoff')] () + , mknode "a:ext" [("cx", tshow dimX') + ,("cy", tshow dimY')] () ] let prstGeom = mknode "a:prstGeom" [("prst","rect")] $ mknode "a:avLst" [] () let ln = mknode "a:ln" [("w","9525")] @@ -763,7 +765,7 @@ paraElemToElements (Run rpr s) = do Just DoubleStrike -> [("strike", "dblStrike")] Nothing -> []) <> (case rBaseline rpr of - Just n -> [("baseline", show n)] + Just n -> [("baseline", tshow n)] Nothing -> []) <> (case rCap rpr of Just NoCapitals -> [("cap", "none")] @@ -780,43 +782,44 @@ paraElemToElements (Run rpr s) = do return $ case link of InternalTarget _ -> let linkAttrs = - [ ("r:id", "rId" <> show idNum) + [ ("r:id", "rId" <> tshow idNum) , ("action", "ppaction://hlinksldjump") ] in [mknode "a:hlinkClick" linkAttrs ()] -- external ExternalTarget _ -> let linkAttrs = - [ ("r:id", "rId" <> show idNum) + [ ("r:id", "rId" <> tshow idNum) ] in [mknode "a:hlinkClick" linkAttrs ()] Nothing -> return [] let colorContents = case rSolidFill rpr of Just color -> case fromColor color of - '#':hx -> [mknode "a:solidFill" [] - [mknode "a:srgbClr" [("val", map toUpper hx)] ()] - ] + '#':hx -> + [mknode "a:solidFill" [] + [mknode "a:srgbClr" + [("val", T.toUpper $ T.pack hx)] ()]] _ -> [] Nothing -> [] codeFont <- monospaceFont let codeContents = - [mknode "a:latin" [("typeface", T.unpack codeFont)] () | rPropCode rpr] + [mknode "a:latin" [("typeface", codeFont)] () | rPropCode rpr] let propContents = linkProps <> colorContents <> codeContents return [Elem $ mknode "a:r" [] [ mknode "a:rPr" attrs propContents - , mknode "a:t" [] $ T.unpack s + , mknode "a:t" [] s ]] paraElemToElements (MathElem mathType texStr) = do isInSpkrNotes <- asks envInSpeakerNotes if isInSpkrNotes then paraElemToElements $ Run def $ unTeXString texStr else do res <- convertMath writeOMML mathType (unTeXString texStr) - case res of + case fromXLElement <$> res of Right r -> return [Elem $ mknode "a14:m" [] $ addMathInfo r] Left (Str s) -> paraElemToElements (Run def s) Left _ -> throwError $ PandocShouldNeverHappenError "non-string math fallback" paraElemToElements (RawOOXMLParaElem str) = return - [Text (CData CDataRaw (T.unpack str) Nothing)] + [Text (CData CDataRaw str Nothing)] -- This is a bit of a kludge -- really requires adding an option to @@ -824,9 +827,10 @@ paraElemToElements (RawOOXMLParaElem str) = return -- step at a time. addMathInfo :: Element -> Element addMathInfo element = - let mathspace = Attr { attrKey = QName "m" Nothing (Just "xmlns") - , attrVal = "http://schemas.openxmlformats.org/officeDocument/2006/math" - } + let mathspace = + Attr { attrKey = QName "m" Nothing (Just "xmlns") + , attrVal = "http://schemas.openxmlformats.org/officeDocument/2006/math" + } in add_attr mathspace element -- We look through the element to see if it contains an a14:m @@ -849,13 +853,13 @@ surroundWithMathAlternate element = paragraphToElement :: PandocMonad m => Paragraph -> P m Element paragraphToElement par = do let - attrs = [("lvl", show $ pPropLevel $ paraProps par)] <> + attrs = [("lvl", tshow $ pPropLevel $ paraProps par)] <> (case pPropMarginLeft (paraProps par) of - Just px -> [("marL", show $ pixelsToEmu px)] + Just px -> [("marL", tshow $ pixelsToEmu px)] Nothing -> [] ) <> (case pPropIndent (paraProps par) of - Just px -> [("indent", show $ pixelsToEmu px)] + Just px -> [("indent", tshow $ pixelsToEmu px)] Nothing -> [] ) <> (case pPropAlign (paraProps par) of @@ -867,7 +871,7 @@ paragraphToElement par = do props = [] <> (case pPropSpaceBefore $ paraProps par of Just px -> [mknode "a:spcBef" [] [ - mknode "a:spcPts" [("val", show $ 100 * px)] () + mknode "a:spcPts" [("val", tshow $ 100 * px)] () ] ] Nothing -> [] @@ -910,7 +914,7 @@ shapeToElements layout (Pic picProps fp alt) = do shapeToElements layout (GraphicFrame tbls cptn) = map Elem <$> graphicFrameToElements layout tbls cptn shapeToElements _ (RawOOXMLShape str) = return - [Text (CData CDataRaw (T.unpack str) Nothing)] + [Text (CData CDataRaw str Nothing)] shapeToElements layout shp = do element <- shapeToElement layout shp return [Elem element] @@ -942,8 +946,10 @@ graphicFrameToElements layout tbls caption = do [mknode "p:ph" [("idx", "1")] ()] ] , mknode "p:xfrm" [] - [ mknode "a:off" [("x", show $ 12700 * x), ("y", show $ 12700 * y)] () - , mknode "a:ext" [("cx", show $ 12700 * cx), ("cy", show $ 12700 * cy)] () + [ mknode "a:off" [("x", tshow $ 12700 * x), + ("y", tshow $ 12700 * y)] () + , mknode "a:ext" [("cx", tshow $ 12700 * cx), + ("cy", tshow $ 12700 * cy)] () ] ] <> elements @@ -957,7 +963,7 @@ getDefaultTableStyle = do refArchive <- asks envRefArchive distArchive <- asks envDistArchive tblStyleLst <- parseXml refArchive distArchive "ppt/tableStyles.xml" - return $ findAttrText (QName "def" Nothing Nothing) tblStyleLst + return $ findAttr (QName "def" Nothing Nothing) tblStyleLst graphicToElement :: PandocMonad m => Integer -> Graphic -> P m Element graphicToElement tableWidth (Tbl tblPr hdrCells rows) = do @@ -995,7 +1001,7 @@ graphicToElement tableWidth (Tbl tblPr hdrCells rows) = do let mkrow border cells = mknode "a:tr" [("h", "0")] $ map (mkcell border) cells let mkgridcol w = mknode "a:gridCol" - [("w", show ((12700 * w) :: Integer))] () + [("w", tshow ((12700 * w) :: Integer))] () let hasHeader = not (all null hdrCells) mbDefTblStyle <- getDefaultTableStyle @@ -1004,7 +1010,7 @@ graphicToElement tableWidth (Tbl tblPr hdrCells rows) = do , ("bandRow", if tblPrBandRow tblPr then "1" else "0") ] (case mbDefTblStyle of Nothing -> [] - Just sty -> [mknode "a:tableStyleId" [] $ T.unpack sty]) + Just sty -> [mknode "a:tableStyleId" [] sty]) return $ mknode "a:graphic" [] [mknode "a:graphicData" [("uri", "http://schemas.openxmlformats.org/drawingml/2006/table")] @@ -1037,7 +1043,7 @@ findPHType ns spElem phType -- if it's a named PHType, we want to check that the attribute -- value matches. Just phElem | (PHType tp) <- phType -> - case findAttrText (QName "type" Nothing Nothing) phElem of + case findAttr (QName "type" Nothing Nothing) phElem of Just tp' -> tp == tp' Nothing -> False -- if it's an ObjType, we want to check that there is NO @@ -1204,7 +1210,7 @@ getSlideNumberFieldId notesMaster , Just txBody <- findChild (elemName ns "p" "txBody") sp , Just p <- findChild (elemName ns "a" "p") txBody , Just fld <- findChild (elemName ns "a" "fld") p - , Just fldId <- findAttrText (QName "id" Nothing Nothing) fld = + , Just fldId <- findAttr (QName "id" Nothing Nothing) fld = return fldId | otherwise = throwError $ PandocSomeError @@ -1283,11 +1289,11 @@ speakerNotesSlideNumber pgNum fieldId = [ mknode "a:bodyPr" [] () , mknode "a:lstStyle" [] () , mknode "a:p" [] - [ mknode "a:fld" [ ("id", T.unpack fieldId) + [ mknode "a:fld" [ ("id", fieldId) , ("type", "slidenum") ] [ mknode "a:rPr" [("lang", "en-US")] () - , mknode "a:t" [] (show pgNum) + , mknode "a:t" [] (tshow pgNum) ] , mknode "a:endParaRPr" [("lang", "en-US")] () ] @@ -1339,7 +1345,7 @@ getSlideIdNum sldId = do Just n -> return n Nothing -> throwError $ PandocShouldNeverHappenError $ - "Slide Id " <> T.pack (show sldId) <> " not found." + "Slide Id " <> tshow sldId <> " not found." slideNum :: PandocMonad m => Slide -> P m Int slideNum slide = getSlideIdNum $ slideId slide @@ -1356,7 +1362,7 @@ slideToRelId :: PandocMonad m => Slide -> P m T.Text slideToRelId slide = do n <- slideNum slide offset <- asks envSlideIdOffset - return $ "rId" <> T.pack (show $ n + offset) + return $ "rId" <> tshow (n + offset) data Relationship = Relationship { relId :: Int @@ -1368,13 +1374,11 @@ elementToRel :: Element -> Maybe Relationship elementToRel element | elName element == QName "Relationship" (Just "http://schemas.openxmlformats.org/package/2006/relationships") Nothing = do rId <- findAttr (QName "Id" Nothing Nothing) element - numStr <- stripPrefix "rId" rId - num <- case reads numStr :: [(Int, String)] of - (n, _) : _ -> Just n - [] -> Nothing - type' <- findAttrText (QName "Type" Nothing Nothing) element + numStr <- T.stripPrefix "rId" rId + num <- fromIntegral <$> readTextAsInteger numStr + type' <- findAttr (QName "Type" Nothing Nothing) element target <- findAttr (QName "Target" Nothing Nothing) element - return $ Relationship num type' target + return $ Relationship num type' (T.unpack target) | otherwise = Nothing slideToPresRel :: PandocMonad m => Slide -> P m Relationship @@ -1463,10 +1467,9 @@ topLevelRelsEntry :: PandocMonad m => P m Entry topLevelRelsEntry = elemToEntry "_rels/.rels" $ relsToElement topLevelRels relToElement :: Relationship -> Element -relToElement rel = mknode "Relationship" [ ("Id", "rId" <> - show (relId rel)) - , ("Type", T.unpack $ relType rel) - , ("Target", relTarget rel) ] () +relToElement rel = mknode "Relationship" [ ("Id", "rId" <> tshow (relId rel)) + , ("Type", relType rel) + , ("Target", T.pack $ relTarget rel) ] () relsToElement :: [Relationship] -> Element relsToElement rels = mknode "Relationships" @@ -1501,7 +1504,8 @@ slideToSpeakerNotesEntry slide = do Just element | Just notesIdNum <- mbNotesIdNum -> Just <$> elemToEntry - ("ppt/notesSlides/notesSlide" <> show notesIdNum <> ".xml") + ("ppt/notesSlides/notesSlide" <> show notesIdNum <> + ".xml") element _ -> return Nothing @@ -1514,7 +1518,7 @@ slideToSpeakerNotesRelElement slide@Slide{} = do [("xmlns", "http://schemas.openxmlformats.org/package/2006/relationships")] [ mknode "Relationship" [ ("Id", "rId2") , ("Type", "http://schemas.openxmlformats.org/officeDocument/2006/relationships/slide") - , ("Target", "../slides/slide" <> show idNum <> ".xml") + , ("Target", "../slides/slide" <> tshow idNum <> ".xml") ] () , mknode "Relationship" [ ("Id", "rId1") , ("Type", "http://schemas.openxmlformats.org/officeDocument/2006/relationships/notesMaster") @@ -1547,15 +1551,15 @@ linkRelElement :: PandocMonad m => (Int, LinkTarget) -> P m Element linkRelElement (rIdNum, InternalTarget targetId) = do targetIdNum <- getSlideIdNum targetId return $ - mknode "Relationship" [ ("Id", "rId" <> show rIdNum) + mknode "Relationship" [ ("Id", "rId" <> tshow rIdNum) , ("Type", "http://schemas.openxmlformats.org/officeDocument/2006/relationships/slide") - , ("Target", "slide" <> show targetIdNum <> ".xml") + , ("Target", "slide" <> tshow targetIdNum <> ".xml") ] () linkRelElement (rIdNum, ExternalTarget (url, _)) = return $ - mknode "Relationship" [ ("Id", "rId" <> show rIdNum) + mknode "Relationship" [ ("Id", "rId" <> tshow rIdNum) , ("Type", "http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink") - , ("Target", T.unpack url) + , ("Target", url) , ("TargetMode", "External") ] () @@ -1567,10 +1571,10 @@ mediaRelElement mInfo = let ext = fromMaybe "" (mInfoExt mInfo) in mknode "Relationship" [ ("Id", "rId" <> - show (mInfoLocalId mInfo)) + tshow (mInfoLocalId mInfo)) , ("Type", "http://schemas.openxmlformats.org/officeDocument/2006/relationships/image") , ("Target", "../media/image" <> - show (mInfoGlobalId mInfo) <> T.unpack ext) + tshow (mInfoGlobalId mInfo) <> ext) ] () speakerNotesSlideRelElement :: PandocMonad m => Slide -> P m (Maybe Element) @@ -1580,7 +1584,7 @@ speakerNotesSlideRelElement slide = do return $ case M.lookup idNum mp of Nothing -> Nothing Just n -> - let target = "../notesSlides/notesSlide" <> show n <> ".xml" + let target = "../notesSlides/notesSlide" <> tshow n <> ".xml" in Just $ mknode "Relationship" [ ("Id", "rId2") , ("Type", "http://schemas.openxmlformats.org/officeDocument/2006/relationships/notesSlide") @@ -1619,9 +1623,9 @@ slideToSlideRelElement slide = do slideToSldIdElement :: PandocMonad m => Slide -> P m Element slideToSldIdElement slide = do n <- slideNum slide - let id' = show $ n + 255 + let id' = tshow $ n + 255 rId <- slideToRelId slide - return $ mknode "p:sldId" [("id", id'), ("r:id", T.unpack rId)] () + return $ mknode "p:sldId" [("id", id'), ("r:id", rId)] () presentationToSldIdLst :: PandocMonad m => Presentation -> P m Element presentationToSldIdLst (Presentation _ slides) = do @@ -1646,7 +1650,7 @@ presentationToPresentationElement pres@(Presentation _ slds) = do notesMasterElem = mknode "p:notesMasterIdLst" [] [ mknode "p:NotesMasterId" - [("r:id", "rId" <> show notesMasterRId)] + [("r:id", "rId" <> tshow notesMasterRId)] () ] @@ -1702,17 +1706,17 @@ docPropsElement docProps = do ,("xmlns:dcmitype","http://purl.org/dc/dcmitype/") ,("xmlns:xsi","http://www.w3.org/2001/XMLSchema-instance")] $ - mknode "dc:title" [] (maybe "" T.unpack $ dcTitle docProps) + mknode "dc:title" [] (fromMaybe "" $ dcTitle docProps) : - mknode "dc:creator" [] (maybe "" T.unpack $ dcCreator docProps) + mknode "dc:creator" [] (fromMaybe "" $ dcCreator docProps) : - mknode "cp:keywords" [] (T.unpack keywords) - : ( [mknode "dc:subject" [] $ maybe "" T.unpack $ dcSubject docProps | isJust (dcSubject docProps)]) - <> ( [mknode "dc:description" [] $ maybe "" T.unpack $ dcDescription docProps | isJust (dcDescription docProps)]) - <> ( [mknode "cp:category" [] $ maybe "" T.unpack $ cpCategory docProps | isJust (cpCategory docProps)]) + mknode "cp:keywords" [] keywords + : ( [mknode "dc:subject" [] $ fromMaybe "" $ dcSubject docProps | isJust (dcSubject docProps)]) + <> ( [mknode "dc:description" [] $ fromMaybe "" $ dcDescription docProps | isJust (dcDescription docProps)]) + <> ( [mknode "cp:category" [] $ fromMaybe "" $ cpCategory docProps | isJust (cpCategory docProps)]) <> (\x -> [ mknode "dcterms:created" [("xsi:type","dcterms:W3CDTF")] x - , mknode "dcterms:modified" [("xsi:type","dcterms:W3CDTF")] x - ]) (formatTime defaultTimeLocale "%FT%XZ" utctime) + , mknode "dcterms:modified" [("xsi:type","dcterms:W3CDTF")] x + ]) (T.pack $ formatTime defaultTimeLocale "%FT%XZ" utctime) docPropsToEntry :: PandocMonad m => DocProps -> P m Entry docPropsToEntry docProps = docPropsElement docProps >>= @@ -1723,8 +1727,8 @@ docCustomPropsElement :: PandocMonad m => DocProps -> P m Element docCustomPropsElement docProps = do let mkCustomProp (k, v) pid = mknode "property" [("fmtid","{D5CDD505-2E9C-101B-9397-08002B2CF9AE}") - ,("pid", show pid) - ,("name", T.unpack k)] $ mknode "vt:lpwstr" [] (T.unpack v) + ,("pid", tshow pid) + ,("name", k)] $ mknode "vt:lpwstr" [] v return $ mknode "Properties" [("xmlns","http://schemas.openxmlformats.org/officeDocument/2006/custom-properties") ,("xmlns:vt","http://schemas.openxmlformats.org/officeDocument/2006/docPropsVTypes") @@ -1743,7 +1747,7 @@ viewPropsElement = do distArchive <- asks envDistArchive viewPrElement <- parseXml refArchive distArchive "ppt/viewProps.xml" -- remove "lastView" if it exists: - let notLastView :: Text.XML.Light.Attr -> Bool + let notLastView :: XML.Attr -> Bool notLastView attr = qName (attrKey attr) /= "lastView" return $ @@ -1755,15 +1759,15 @@ makeViewPropsEntry = viewPropsElement >>= elemToEntry "ppt/viewProps.xml" defaultContentTypeToElem :: DefaultContentType -> Element defaultContentTypeToElem dct = mknode "Default" - [("Extension", T.unpack $ defContentTypesExt dct), - ("ContentType", T.unpack $ defContentTypesType dct)] + [("Extension", defContentTypesExt dct), + ("ContentType", defContentTypesType dct)] () overrideContentTypeToElem :: OverrideContentType -> Element overrideContentTypeToElem oct = mknode "Override" - [("PartName", overrideContentTypesPart oct), - ("ContentType", T.unpack $ overrideContentTypesType oct)] + [("PartName", T.pack $ overrideContentTypesPart oct), + ("ContentType", overrideContentTypesType oct)] () contentTypesToElement :: ContentTypes -> Element @@ -1821,7 +1825,8 @@ getSpeakerNotesFilePaths :: PandocMonad m => P m [FilePath] getSpeakerNotesFilePaths = do mp <- asks envSpeakerNotesIdMap let notesIdNums = M.elems mp - return $ map (\n -> "ppt/notesSlides/notesSlide" <> show n <> ".xml") notesIdNums + return $ map (\n -> "ppt/notesSlides/notesSlide" <> show n <> ".xml") + notesIdNums presentationToContentTypes :: PandocMonad m => Presentation -> P m ContentTypes presentationToContentTypes p@(Presentation _ slides) = do @@ -1885,11 +1890,11 @@ getContentType fp | otherwise = Nothing -- Kept as String for XML.Light -autoNumAttrs :: ListAttributes -> [(String, String)] +autoNumAttrs :: ListAttributes -> [(Text, Text)] autoNumAttrs (startNum, numStyle, numDelim) = numAttr <> typeAttr where - numAttr = [("startAt", show startNum) | startNum /= 1] + numAttr = [("startAt", tshow startNum) | startNum /= 1] typeAttr = [("type", typeString <> delimString)] typeString = case numStyle of Decimal -> "arabic" diff --git a/src/Text/Pandoc/XML/Light.hs b/src/Text/Pandoc/XML/Light.hs new file mode 100644 index 000000000..38e4df218 --- /dev/null +++ b/src/Text/Pandoc/XML/Light.hs @@ -0,0 +1,586 @@ +{-# LANGUAGE FlexibleInstances #-} +{-# LANGUAGE DeriveDataTypeable #-} +{-# LANGUAGE OverloadedStrings #-} +{- | + Module : Text.Pandoc.XML.Light + Copyright : Copyright (C) 2021 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane + Stability : alpha + Portability : portable + +xml-light, which we used in pandoc's the XML-based readers, has +some limitations: in particular, it produces nodes with String +instead of Text, and the parser falls over on processing instructions +(see #7091). + +This module exports much of the API of xml-light, but using Text instead +of String. In addition, the xml-light parsers are replaced by xml-conduit's +well-tested parser. (The xml-conduit types are mapped to types +isomorphic to xml-light's, to avoid the need for massive code modifications +elsewhere.) Bridge functions to map xml-light types to this module's +types are also provided (since libraries like texmath still use xml-light). + +Another advantage of the xml-conduit parser is that it gives us +detailed information on xml parse errors. + +In the future we may want to move to using xml-conduit or another +xml library in the code base, but this change gives us +better performance and accuracy without much change in the +code that used xml-light. +-} +module Text.Pandoc.XML.Light + ( -- * Basic types, duplicating those from xml-light but with Text + -- instead of String + Line + , Content(..) + , Element(..) + , Attr(..) + , CData(..) + , CDataKind(..) + , QName(..) + , Node(..) + , unode + , unqual + , add_attr + , add_attrs + -- * Conversion functions from xml-light types + , fromXLQName + , fromXLCData + , fromXLElement + , fromXLAttr + , fromXLContent + -- * Replacement for xml-light's Text.XML.Proc + , strContent + , onlyElems + , elChildren + , onlyText + , findChildren + , filterChildren + , filterChildrenName + , findChild + , filterChild + , filterChildName + , findElement + , filterElement + , filterElementName + , findElements + , filterElements + , filterElementsName + , findAttr + , lookupAttr + , lookupAttrBy + , findAttrBy + -- * Replacement for xml-light's Text.XML.Output + , ppTopElement + , ppElement + , ppContent + , ppcElement + , ppcContent + , showTopElement + , showElement + , showContent + , useShortEmptyTags + , defaultConfigPP + , ConfigPP(..) + -- * Replacement for xml-light's Text.XML.Input + , parseXMLElement + , parseXMLContents + ) where + +import qualified Control.Exception as E +import qualified Text.XML as Conduit +import Text.XML.Unresolved (InvalidEventStream(..)) +import Data.Text (Text) +import qualified Data.Text as T +import qualified Data.Text.Lazy as TL +import Data.Text.Lazy.Builder (Builder, singleton, fromText, toLazyText) +import qualified Data.Map as M +import Data.Data (Data) +import Data.Typeable (Typeable) +import Data.Maybe (mapMaybe, listToMaybe) +import Data.List(find) +import qualified Text.XML.Light as XL + +-- Drop in replacement for parseXMLDoc in xml-light. +parseXMLElement :: TL.Text -> Either T.Text Element +parseXMLElement t = + elementToElement . Conduit.documentRoot <$> + either (Left . T.pack . E.displayException) Right + (Conduit.parseText Conduit.def{ Conduit.psRetainNamespaces = True } t) + +parseXMLContents :: TL.Text -> Either T.Text [Content] +parseXMLContents t = + case Conduit.parseText Conduit.def{ Conduit.psRetainNamespaces = True } t of + Left e -> + case E.fromException e of + Just (ContentAfterRoot _) -> + elContent <$> parseXMLElement ("" <> t <> "") + _ -> Left . T.pack . E.displayException $ e + Right x -> Right [Elem . elementToElement . Conduit.documentRoot $ x] + +elementToElement :: Conduit.Element -> Element +elementToElement (Conduit.Element name attribMap nodes) = + Element (nameToQname name) attrs (mapMaybe nodeToContent nodes) Nothing + where + attrs = map (\(n,v) -> Attr (nameToQname n) v) $ + M.toList attribMap + nameToQname (Conduit.Name localName mbns mbpref) = + case mbpref of + Nothing -> + case T.stripPrefix "xmlns:" localName of + Just rest -> QName rest mbns (Just "xmlns") + Nothing -> QName localName mbns mbpref + _ -> QName localName mbns mbpref + +nodeToContent :: Conduit.Node -> Maybe Content +nodeToContent (Conduit.NodeElement el) = + Just (Elem (elementToElement el)) +nodeToContent (Conduit.NodeContent t) = + Just (Text (CData CDataText t Nothing)) +nodeToContent _ = Nothing + +unqual :: Text -> QName +unqual x = QName x Nothing Nothing + +-- | Add an attribute to an element. +add_attr :: Attr -> Element -> Element +add_attr a e = add_attrs [a] e + +-- | Add some attributes to an element. +add_attrs :: [Attr] -> Element -> Element +add_attrs as e = e { elAttribs = as ++ elAttribs e } + +-- +-- type definitions lightly modified from xml-light +-- + +-- | A line is an Integer +type Line = Integer + +-- | XML content +data Content = Elem Element + | Text CData + | CRef Text + deriving (Show, Typeable, Data) + +-- | XML elements +data Element = Element { + elName :: QName, + elAttribs :: [Attr], + elContent :: [Content], + elLine :: Maybe Line + } deriving (Show, Typeable, Data) + +-- | XML attributes +data Attr = Attr { + attrKey :: QName, + attrVal :: Text + } deriving (Eq, Ord, Show, Typeable, Data) + +-- | XML CData +data CData = CData { + cdVerbatim :: CDataKind, + cdData :: Text, + cdLine :: Maybe Line + } deriving (Show, Typeable, Data) + +data CDataKind + = CDataText -- ^ Ordinary character data; pretty printer escapes &, < etc. + | CDataVerbatim -- ^ Unescaped character data; pretty printer embeds it in case (qURI q1, qURI q2) of + (Nothing,Nothing) -> compare (qPrefix q1) (qPrefix q2) + (u1,u2) -> compare u1 u2 + x -> x + +class Node t where + node :: QName -> t -> Element + +instance Node ([Attr],[Content]) where + node n (attrs,cont) = Element { elName = n + , elAttribs = attrs + , elContent = cont + , elLine = Nothing + } + +instance Node [Attr] where node n as = node n (as,[]::[Content]) +instance Node Attr where node n a = node n [a] +instance Node () where node n () = node n ([]::[Attr]) + +instance Node [Content] where node n cs = node n ([]::[Attr],cs) +instance Node Content where node n c = node n [c] +instance Node ([Attr],Content) where node n (as,c) = node n (as,[c]) +instance Node (Attr,Content) where node n (a,c) = node n ([a],[c]) + +instance Node ([Attr],[Element]) where + node n (as,cs) = node n (as,map Elem cs) + +instance Node ([Attr],Element) where node n (as,c) = node n (as,[c]) +instance Node (Attr,Element) where node n (a,c) = node n ([a],c) +instance Node [Element] where node n es = node n ([]::[Attr],es) +instance Node Element where node n e = node n [e] + +instance Node ([Attr],[CData]) where + node n (as,cs) = node n (as,map Text cs) + +instance Node ([Attr],CData) where node n (as,c) = node n (as,[c]) +instance Node (Attr,CData) where node n (a,c) = node n ([a],c) +instance Node [CData] where node n es = node n ([]::[Attr],es) +instance Node CData where node n e = node n [e] + +instance Node ([Attr],Text) where + node n (as,t) = node n (as, CData { cdVerbatim = CDataText + , cdData = t + , cdLine = Nothing }) + +instance Node (Attr,Text ) where node n (a,t) = node n ([a],t) +instance Node Text where node n t = node n ([]::[Attr],t) + +-- | Create node with unqualified name +unode :: Node t => Text -> t -> Element +unode = node . unqual + +-- +-- conversion from xml-light +-- + +fromXLQName :: XL.QName -> QName +fromXLQName qn = QName { qName = T.pack $ XL.qName qn + , qURI = T.pack <$> XL.qURI qn + , qPrefix = T.pack <$> XL.qPrefix qn } + +fromXLCData :: XL.CData -> CData +fromXLCData cd = CData { cdVerbatim = case XL.cdVerbatim cd of + XL.CDataText -> CDataText + XL.CDataVerbatim -> CDataVerbatim + XL.CDataRaw -> CDataRaw + , cdData = T.pack $ XL.cdData cd + , cdLine = XL.cdLine cd } + +fromXLElement :: XL.Element -> Element +fromXLElement el = Element { elName = fromXLQName $ XL.elName el + , elAttribs = map fromXLAttr $ XL.elAttribs el + , elContent = map fromXLContent $ XL.elContent el + , elLine = XL.elLine el } + +fromXLAttr :: XL.Attr -> Attr +fromXLAttr (XL.Attr qn s) = Attr (fromXLQName qn) (T.pack s) + +fromXLContent :: XL.Content -> Content +fromXLContent (XL.Elem el) = Elem $ fromXLElement el +fromXLContent (XL.Text cd) = Text $ fromXLCData cd +fromXLContent (XL.CRef s) = CRef (T.pack s) + +-- +-- copied from xml-light Text.XML.Proc +-- + +-- | Get the text value of an XML element. This function +-- ignores non-text elements, and concatenates all text elements. +strContent :: Element -> Text +strContent = mconcat . map cdData . onlyText . elContent + +-- | Select only the elements from a list of XML content. +onlyElems :: [Content] -> [Element] +onlyElems xs = [ x | Elem x <- xs ] + +-- | Select only the elements from a parent. +elChildren :: Element -> [Element] +elChildren e = [ x | Elem x <- elContent e ] + +-- | Select only the text from a list of XML content. +onlyText :: [Content] -> [CData] +onlyText xs = [ x | Text x <- xs ] + +-- | Find all immediate children with the given name. +findChildren :: QName -> Element -> [Element] +findChildren q e = filterChildren ((q ==) . elName) e + +-- | Filter all immediate children wrt a given predicate. +filterChildren :: (Element -> Bool) -> Element -> [Element] +filterChildren p e = filter p (onlyElems (elContent e)) + + +-- | Filter all immediate children wrt a given predicate over their names. +filterChildrenName :: (QName -> Bool) -> Element -> [Element] +filterChildrenName p e = filter (p.elName) (onlyElems (elContent e)) + + +-- | Find an immediate child with the given name. +findChild :: QName -> Element -> Maybe Element +findChild q e = listToMaybe (findChildren q e) + +-- | Find an immediate child with the given name. +filterChild :: (Element -> Bool) -> Element -> Maybe Element +filterChild p e = listToMaybe (filterChildren p e) + +-- | Find an immediate child with name matching a predicate. +filterChildName :: (QName -> Bool) -> Element -> Maybe Element +filterChildName p e = listToMaybe (filterChildrenName p e) + +-- | Find the left-most occurrence of an element matching given name. +findElement :: QName -> Element -> Maybe Element +findElement q e = listToMaybe (findElements q e) + +-- | Filter the left-most occurrence of an element wrt. given predicate. +filterElement :: (Element -> Bool) -> Element -> Maybe Element +filterElement p e = listToMaybe (filterElements p e) + +-- | Filter the left-most occurrence of an element wrt. given predicate. +filterElementName :: (QName -> Bool) -> Element -> Maybe Element +filterElementName p e = listToMaybe (filterElementsName p e) + +-- | Find all non-nested occurances of an element. +-- (i.e., once we have found an element, we do not search +-- for more occurances among the element's children). +findElements :: QName -> Element -> [Element] +findElements qn e = filterElementsName (qn==) e + +-- | Find all non-nested occurrences of an element wrt. given predicate. +-- (i.e., once we have found an element, we do not search +-- for more occurances among the element's children). +filterElements :: (Element -> Bool) -> Element -> [Element] +filterElements p e + | p e = [e] + | otherwise = concatMap (filterElements p) $ onlyElems $ elContent e + +-- | Find all non-nested occurences of an element wrt a predicate over element names. +-- (i.e., once we have found an element, we do not search +-- for more occurances among the element's children). +filterElementsName :: (QName -> Bool) -> Element -> [Element] +filterElementsName p e = filterElements (p.elName) e + +-- | Lookup the value of an attribute. +findAttr :: QName -> Element -> Maybe Text +findAttr x e = lookupAttr x (elAttribs e) + +-- | Lookup attribute name from list. +lookupAttr :: QName -> [Attr] -> Maybe Text +lookupAttr x = lookupAttrBy (x ==) + +-- | Lookup the first attribute whose name satisfies the given predicate. +lookupAttrBy :: (QName -> Bool) -> [Attr] -> Maybe Text +lookupAttrBy p as = attrVal `fmap` find (p . attrKey) as + +-- | Lookup the value of the first attribute whose name +-- satisfies the given predicate. +findAttrBy :: (QName -> Bool) -> Element -> Maybe Text +findAttrBy p e = lookupAttrBy p (elAttribs e) + + +-- +-- duplicates functinos from Text.XML.Output +-- + +-- | The XML 1.0 header +xmlHeader :: Text +xmlHeader = "" + + +-------------------------------------------------------------------------------- +data ConfigPP = ConfigPP + { shortEmptyTag :: QName -> Bool + , prettify :: Bool + } + +-- | Default pretty orinting configuration. +-- * Always use abbreviate empty tags. +defaultConfigPP :: ConfigPP +defaultConfigPP = ConfigPP { shortEmptyTag = const True + , prettify = False + } + +-- | The predicate specifies for which empty tags we should use XML's +-- abbreviated notation . This is useful if we are working with +-- some XML-ish standards (such as certain versions of HTML) where some +-- empty tags should always be displayed in the form. +useShortEmptyTags :: (QName -> Bool) -> ConfigPP -> ConfigPP +useShortEmptyTags p c = c { shortEmptyTag = p } + + +-- | Specify if we should use extra white-space to make document more readable. +-- WARNING: This adds additional white-space to text elements, +-- and so it may change the meaning of the document. +useExtraWhiteSpace :: Bool -> ConfigPP -> ConfigPP +useExtraWhiteSpace p c = c { prettify = p } + +-- | A configuration that tries to make things pretty +-- (possibly at the cost of changing the semantics a bit +-- through adding white space.) +prettyConfigPP :: ConfigPP +prettyConfigPP = useExtraWhiteSpace True defaultConfigPP + + +-------------------------------------------------------------------------------- + + +-- | Pretty printing renders XML documents faithfully, +-- with the exception that whitespace may be added\/removed +-- in non-verbatim character data. +ppTopElement :: Element -> Text +ppTopElement = ppcTopElement prettyConfigPP + +-- | Pretty printing elements +ppElement :: Element -> Text +ppElement = ppcElement prettyConfigPP + +-- | Pretty printing content +ppContent :: Content -> Text +ppContent = ppcContent prettyConfigPP + +-- | Pretty printing renders XML documents faithfully, +-- with the exception that whitespace may be added\/removed +-- in non-verbatim character data. +ppcTopElement :: ConfigPP -> Element -> Text +ppcTopElement c e = T.unlines [xmlHeader,ppcElement c e] + +-- | Pretty printing elements +ppcElement :: ConfigPP -> Element -> Text +ppcElement c = TL.toStrict . toLazyText . ppElementS c mempty + +-- | Pretty printing content +ppcContent :: ConfigPP -> Content -> Text +ppcContent c = TL.toStrict . toLazyText . ppContentS c mempty + +ppcCData :: ConfigPP -> CData -> Text +ppcCData c = TL.toStrict . toLazyText . ppCDataS c mempty + +type Indent = Builder + +-- | Pretty printing content using ShowT +ppContentS :: ConfigPP -> Indent -> Content -> Builder +ppContentS c i x = case x of + Elem e -> ppElementS c i e + Text t -> ppCDataS c i t + CRef r -> showCRefS r + +ppElementS :: ConfigPP -> Indent -> Element -> Builder +ppElementS c i e = i <> tagStart (elName e) (elAttribs e) <> + (case elContent e of + [] | "?" `T.isPrefixOf` qName name -> fromText " ?>" + | shortEmptyTag c name -> fromText " />" + [Text t] -> singleton '>' <> ppCDataS c mempty t <> tagEnd name + cs -> singleton '>' <> nl <> + mconcat (map ((<> nl) . ppContentS c (sp <> i)) cs) <> + i <> tagEnd name + where (nl,sp) = if prettify c then ("\n"," ") else ("","") + ) + where name = elName e + +ppCDataS :: ConfigPP -> Indent -> CData -> Builder +ppCDataS c i t = i <> if cdVerbatim t /= CDataText || not (prettify c) + then showCDataS t + else foldr cons mempty (T.unpack (showCData t)) + where cons :: Char -> Builder -> Builder + cons '\n' ys = singleton '\n' <> i <> ys + cons y ys = singleton y <> ys + + + +-------------------------------------------------------------------------------- + +-- | Adds the header. +showTopElement :: Element -> Text +showTopElement c = xmlHeader <> showElement c + +showContent :: Content -> Text +showContent = ppcContent defaultConfigPP + +showElement :: Element -> Text +showElement = ppcElement defaultConfigPP + +showCData :: CData -> Text +showCData = ppcCData defaultConfigPP + +-- Note: crefs should not contain '&', ';', etc. +showCRefS :: Text -> Builder +showCRefS r = singleton '&' <> fromText r <> singleton ';' + +-- | Convert a text element to characters. +showCDataS :: CData -> Builder +showCDataS cd = + case cdVerbatim cd of + CDataText -> escStr (cdData cd) + CDataVerbatim -> fromText " escCData (cdData cd) <> + fromText "]]>" + CDataRaw -> fromText (cdData cd) + +-------------------------------------------------------------------------------- +escCData :: Text -> Builder +escCData t + | "]]>" `T.isPrefixOf` t = + fromText "]]]]>" <> fromText (T.drop 3 t) +escCData t + = case T.uncons t of + Nothing -> mempty + Just (c,t') -> singleton c <> escCData t' + +escChar :: Char -> Builder +escChar c = case c of + '<' -> fromText "<" + '>' -> fromText ">" + '&' -> fromText "&" + '"' -> fromText """ + -- we use ' instead of ' because IE apparently has difficulties + -- rendering ' in xhtml. + -- Reported by Rohan Drape . + '\'' -> fromText "'" + _ -> singleton c + + {- original xml-light version: + -- NOTE: We escape '\r' explicitly because otherwise they get lost + -- when parsed back in because of then end-of-line normalization rules. + _ | isPrint c || c == '\n' -> singleton c + | otherwise -> showText "&#" . showsT oc . singleton ';' + where oc = ord c + -} + +escStr :: Text -> Builder +escStr cs = if T.any needsEscape cs + then mconcat (map escChar (T.unpack cs)) + else fromText cs + where + needsEscape '<' = True + needsEscape '>' = True + needsEscape '&' = True + needsEscape '"' = True + needsEscape '\'' = True + needsEscape _ = False + +tagEnd :: QName -> Builder +tagEnd qn = fromText " showQName qn <> singleton '>' + +tagStart :: QName -> [Attr] -> Builder +tagStart qn as = singleton '<' <> showQName qn <> as_str + where as_str = if null as + then mempty + else mconcat (map showAttr as) + +showAttr :: Attr -> Builder +showAttr (Attr qn v) = singleton ' ' <> showQName qn <> + singleton '=' <> + singleton '"' <> escStr v <> singleton '"' + +showQName :: QName -> Builder +showQName q = + case qPrefix q of + Nothing -> fromText (qName q) + Just p -> fromText p <> singleton ':' <> fromText (qName q) diff --git a/src/Text/Pandoc/XMLParser.hs b/src/Text/Pandoc/XMLParser.hs deleted file mode 100644 index 8ad22a66a..000000000 --- a/src/Text/Pandoc/XMLParser.hs +++ /dev/null @@ -1,66 +0,0 @@ -{-# LANGUAGE OverloadedStrings #-} -{- | - Module : Text.Pandoc.XMLParser - Copyright : Copyright (C) 2021 John MacFarlane - License : GNU GPL, version 2 or above - - Maintainer : John MacFarlane - Stability : alpha - Portability : portable - -Bridge to allow using xml-conduit's parser with xml-light's types. --} -module Text.Pandoc.XMLParser - ( parseXMLElement - , parseXMLContents - , module Text.XML.Light.Types - ) where - -import qualified Control.Exception as E -import qualified Text.XML as Conduit -import Text.XML.Unresolved (InvalidEventStream(..)) -import qualified Text.XML.Light as Light -import Text.XML.Light.Types -import qualified Data.Text as T -import qualified Data.Text.Lazy as TL -import qualified Data.Map as M -import Data.Maybe (mapMaybe) - --- Drop in replacement for parseXMLDoc in xml-light. -parseXMLElement :: TL.Text -> Either T.Text Light.Element -parseXMLElement t = - elementToElement . Conduit.documentRoot <$> - either (Left . T.pack . E.displayException) Right - (Conduit.parseText Conduit.def{ Conduit.psRetainNamespaces = True } t) - -parseXMLContents :: TL.Text -> Either T.Text [Light.Content] -parseXMLContents t = - case Conduit.parseText Conduit.def{ Conduit.psRetainNamespaces = True } t of - Left e -> - case E.fromException e of - Just (ContentAfterRoot _) -> - elContent <$> parseXMLElement ("" <> t <> "") - _ -> Left . T.pack . E.displayException $ e - Right x -> Right [Light.Elem . elementToElement . Conduit.documentRoot $ x] - -elementToElement :: Conduit.Element -> Light.Element -elementToElement (Conduit.Element name attribMap nodes) = - Light.Element (nameToQname name) attrs (mapMaybe nodeToContent nodes) Nothing - where - attrs = map (\(n,v) -> Light.Attr (nameToQname n) (T.unpack v)) $ - M.toList attribMap - nameToQname (Conduit.Name localName mbns mbpref) = - case mbpref of - Nothing | "xmlns:" `T.isPrefixOf` localName -> - Light.QName (T.unpack $ T.drop 6 localName) (T.unpack <$> mbns) - (Just "xmlns") - _ -> Light.QName (T.unpack localName) (T.unpack <$> mbns) - (T.unpack <$> mbpref) - -nodeToContent :: Conduit.Node -> Maybe Light.Content -nodeToContent (Conduit.NodeElement el) = - Just (Light.Elem (elementToElement el)) -nodeToContent (Conduit.NodeContent t) = - Just (Light.Text (Light.CData Light.CDataText (T.unpack t) Nothing)) -nodeToContent _ = Nothing - diff --git a/test/Tests/Writers/OOXML.hs b/test/Tests/Writers/OOXML.hs index 376f02c55..c1e47622d 100644 --- a/test/Tests/Writers/OOXML.hs +++ b/test/Tests/Writers/OOXML.hs @@ -43,7 +43,8 @@ compareXMLBool _ _ = False displayDiff :: Content -> Content -> String displayDiff elemA elemB = - showDiff (1,1) $ getDiff (lines $ ppContent elemA) (lines $ ppContent elemB) + showDiff (1,1) + (getDiff (lines $ showContent elemA) (lines $ showContent elemB)) goldenArchive :: FilePath -> IO Archive goldenArchive fp = toArchive . BL.fromStrict <$> BS.readFile fp diff --git a/test/docx/golden/block_quotes.docx b/test/docx/golden/block_quotes.docx index d3b16d0f2..ed7d1165c 100644 Binary files a/test/docx/golden/block_quotes.docx and b/test/docx/golden/block_quotes.docx differ diff --git a/test/docx/golden/codeblock.docx b/test/docx/golden/codeblock.docx index 6293ef493..07ae75676 100644 Binary files a/test/docx/golden/codeblock.docx and b/test/docx/golden/codeblock.docx differ diff --git a/test/docx/golden/comments.docx b/test/docx/golden/comments.docx index 4205a1516..e5f034378 100644 Binary files a/test/docx/golden/comments.docx and b/test/docx/golden/comments.docx differ diff --git a/test/docx/golden/custom_style_no_reference.docx b/test/docx/golden/custom_style_no_reference.docx index adb3f23db..174942135 100644 Binary files a/test/docx/golden/custom_style_no_reference.docx and b/test/docx/golden/custom_style_no_reference.docx differ diff --git a/test/docx/golden/custom_style_preserve.docx b/test/docx/golden/custom_style_preserve.docx index 92c8137fe..b5c31a851 100644 Binary files a/test/docx/golden/custom_style_preserve.docx and b/test/docx/golden/custom_style_preserve.docx differ diff --git a/test/docx/golden/custom_style_reference.docx b/test/docx/golden/custom_style_reference.docx index f53470617..c42ca1b05 100644 Binary files a/test/docx/golden/custom_style_reference.docx and b/test/docx/golden/custom_style_reference.docx differ diff --git a/test/docx/golden/definition_list.docx b/test/docx/golden/definition_list.docx index d6af90a72..1cb4c1fd7 100644 Binary files a/test/docx/golden/definition_list.docx and b/test/docx/golden/definition_list.docx differ diff --git a/test/docx/golden/document-properties-short-desc.docx b/test/docx/golden/document-properties-short-desc.docx index e18dbe853..7122456ea 100644 Binary files a/test/docx/golden/document-properties-short-desc.docx and b/test/docx/golden/document-properties-short-desc.docx differ diff --git a/test/docx/golden/document-properties.docx b/test/docx/golden/document-properties.docx index 820299043..616ba0f81 100644 Binary files a/test/docx/golden/document-properties.docx and b/test/docx/golden/document-properties.docx differ diff --git a/test/docx/golden/headers.docx b/test/docx/golden/headers.docx index ae0f41d12..c30dcdee9 100644 Binary files a/test/docx/golden/headers.docx and b/test/docx/golden/headers.docx differ diff --git a/test/docx/golden/image.docx b/test/docx/golden/image.docx index 94cd35dfa..8a704b41e 100644 Binary files a/test/docx/golden/image.docx and b/test/docx/golden/image.docx differ diff --git a/test/docx/golden/inline_code.docx b/test/docx/golden/inline_code.docx index 879f2a25b..b1906c8c4 100644 Binary files a/test/docx/golden/inline_code.docx and b/test/docx/golden/inline_code.docx differ diff --git a/test/docx/golden/inline_formatting.docx b/test/docx/golden/inline_formatting.docx index 93f86478f..8adf1cf75 100644 Binary files a/test/docx/golden/inline_formatting.docx and b/test/docx/golden/inline_formatting.docx differ diff --git a/test/docx/golden/inline_images.docx b/test/docx/golden/inline_images.docx index 967d297f2..584117503 100644 Binary files a/test/docx/golden/inline_images.docx and b/test/docx/golden/inline_images.docx differ diff --git a/test/docx/golden/link_in_notes.docx b/test/docx/golden/link_in_notes.docx index c5614e2fa..8859fe55c 100644 Binary files a/test/docx/golden/link_in_notes.docx and b/test/docx/golden/link_in_notes.docx differ diff --git a/test/docx/golden/links.docx b/test/docx/golden/links.docx index 0f39a831f..b80f3b3ba 100644 Binary files a/test/docx/golden/links.docx and b/test/docx/golden/links.docx differ diff --git a/test/docx/golden/lists.docx b/test/docx/golden/lists.docx index 07046f223..35beed68a 100644 Binary files a/test/docx/golden/lists.docx and b/test/docx/golden/lists.docx differ diff --git a/test/docx/golden/lists_continuing.docx b/test/docx/golden/lists_continuing.docx index 3656618e6..2c29fd674 100644 Binary files a/test/docx/golden/lists_continuing.docx and b/test/docx/golden/lists_continuing.docx differ diff --git a/test/docx/golden/lists_multiple_initial.docx b/test/docx/golden/lists_multiple_initial.docx index 8798253d5..10a948886 100644 Binary files a/test/docx/golden/lists_multiple_initial.docx and b/test/docx/golden/lists_multiple_initial.docx differ diff --git a/test/docx/golden/lists_restarting.docx b/test/docx/golden/lists_restarting.docx index 0a24d1840..5b90e74a0 100644 Binary files a/test/docx/golden/lists_restarting.docx and b/test/docx/golden/lists_restarting.docx differ diff --git a/test/docx/golden/nested_anchors_in_header.docx b/test/docx/golden/nested_anchors_in_header.docx index 52bb7a217..cc81b46d1 100644 Binary files a/test/docx/golden/nested_anchors_in_header.docx and b/test/docx/golden/nested_anchors_in_header.docx differ diff --git a/test/docx/golden/notes.docx b/test/docx/golden/notes.docx index 182c06c64..1394dc442 100644 Binary files a/test/docx/golden/notes.docx and b/test/docx/golden/notes.docx differ diff --git a/test/docx/golden/raw-blocks.docx b/test/docx/golden/raw-blocks.docx index 7b69a56a3..0d1688694 100644 Binary files a/test/docx/golden/raw-blocks.docx and b/test/docx/golden/raw-blocks.docx differ diff --git a/test/docx/golden/raw-bookmarks.docx b/test/docx/golden/raw-bookmarks.docx index 3d3a35701..be1caef2d 100644 Binary files a/test/docx/golden/raw-bookmarks.docx and b/test/docx/golden/raw-bookmarks.docx differ diff --git a/test/docx/golden/table_one_row.docx b/test/docx/golden/table_one_row.docx index 5ae37b406..a1d2323c2 100644 Binary files a/test/docx/golden/table_one_row.docx and b/test/docx/golden/table_one_row.docx differ diff --git a/test/docx/golden/table_with_list_cell.docx b/test/docx/golden/table_with_list_cell.docx index c29aa6716..2f3a831a7 100644 Binary files a/test/docx/golden/table_with_list_cell.docx and b/test/docx/golden/table_with_list_cell.docx differ diff --git a/test/docx/golden/tables.docx b/test/docx/golden/tables.docx index 664493246..af066107c 100644 Binary files a/test/docx/golden/tables.docx and b/test/docx/golden/tables.docx differ diff --git a/test/docx/golden/track_changes_deletion.docx b/test/docx/golden/track_changes_deletion.docx index b6d15340e..9cc7a075f 100644 Binary files a/test/docx/golden/track_changes_deletion.docx and b/test/docx/golden/track_changes_deletion.docx differ diff --git a/test/docx/golden/track_changes_insertion.docx b/test/docx/golden/track_changes_insertion.docx index f8e1092d2..f8b8dcfde 100644 Binary files a/test/docx/golden/track_changes_insertion.docx and b/test/docx/golden/track_changes_insertion.docx differ diff --git a/test/docx/golden/track_changes_move.docx b/test/docx/golden/track_changes_move.docx index b4cda82f2..1c3baf0bf 100644 Binary files a/test/docx/golden/track_changes_move.docx and b/test/docx/golden/track_changes_move.docx differ diff --git a/test/docx/golden/track_changes_scrubbed_metadata.docx b/test/docx/golden/track_changes_scrubbed_metadata.docx index ee222efa0..28686970d 100644 Binary files a/test/docx/golden/track_changes_scrubbed_metadata.docx and b/test/docx/golden/track_changes_scrubbed_metadata.docx differ diff --git a/test/docx/golden/unicode.docx b/test/docx/golden/unicode.docx index c6f8d9c96..7051cefbd 100644 Binary files a/test/docx/golden/unicode.docx and b/test/docx/golden/unicode.docx differ diff --git a/test/docx/golden/verbatim_subsuper.docx b/test/docx/golden/verbatim_subsuper.docx index ea8146690..9df631640 100644 Binary files a/test/docx/golden/verbatim_subsuper.docx and b/test/docx/golden/verbatim_subsuper.docx differ diff --git a/test/pptx/code-custom.pptx b/test/pptx/code-custom.pptx index 58070eb3f..5e9c2c630 100644 Binary files a/test/pptx/code-custom.pptx and b/test/pptx/code-custom.pptx differ diff --git a/test/pptx/code-custom_templated.pptx b/test/pptx/code-custom_templated.pptx index db9b7e371..15232c32d 100644 Binary files a/test/pptx/code-custom_templated.pptx and b/test/pptx/code-custom_templated.pptx differ diff --git a/test/pptx/code.pptx b/test/pptx/code.pptx index c7b1ed7d5..aab0cc6f5 100644 Binary files a/test/pptx/code.pptx and b/test/pptx/code.pptx differ diff --git a/test/pptx/code_templated.pptx b/test/pptx/code_templated.pptx index 6944d92bf..fe5b675f3 100644 Binary files a/test/pptx/code_templated.pptx and b/test/pptx/code_templated.pptx differ diff --git a/test/pptx/document-properties-short-desc.pptx b/test/pptx/document-properties-short-desc.pptx index ae0d28429..de5e68151 100644 Binary files a/test/pptx/document-properties-short-desc.pptx and b/test/pptx/document-properties-short-desc.pptx differ diff --git a/test/pptx/document-properties-short-desc_templated.pptx b/test/pptx/document-properties-short-desc_templated.pptx index 37c74c69a..89e6fbdf2 100644 Binary files a/test/pptx/document-properties-short-desc_templated.pptx and b/test/pptx/document-properties-short-desc_templated.pptx differ diff --git a/test/pptx/document-properties.pptx b/test/pptx/document-properties.pptx index 324e443a1..6bcbd1b9c 100644 Binary files a/test/pptx/document-properties.pptx and b/test/pptx/document-properties.pptx differ diff --git a/test/pptx/document-properties_templated.pptx b/test/pptx/document-properties_templated.pptx index c81b983e3..79d48560b 100644 Binary files a/test/pptx/document-properties_templated.pptx and b/test/pptx/document-properties_templated.pptx differ diff --git a/test/pptx/endnotes.pptx b/test/pptx/endnotes.pptx index 30ce33db6..9d46036fe 100644 Binary files a/test/pptx/endnotes.pptx and b/test/pptx/endnotes.pptx differ diff --git a/test/pptx/endnotes_templated.pptx b/test/pptx/endnotes_templated.pptx index d6c604968..54ec7f305 100644 Binary files a/test/pptx/endnotes_templated.pptx and b/test/pptx/endnotes_templated.pptx differ diff --git a/test/pptx/endnotes_toc.pptx b/test/pptx/endnotes_toc.pptx index 000e17ecd..a028b346f 100644 Binary files a/test/pptx/endnotes_toc.pptx and b/test/pptx/endnotes_toc.pptx differ diff --git a/test/pptx/endnotes_toc_templated.pptx b/test/pptx/endnotes_toc_templated.pptx index fdcd2e29b..1158c16fc 100644 Binary files a/test/pptx/endnotes_toc_templated.pptx and b/test/pptx/endnotes_toc_templated.pptx differ diff --git a/test/pptx/images.pptx b/test/pptx/images.pptx index e73126376..670a825de 100644 Binary files a/test/pptx/images.pptx and b/test/pptx/images.pptx differ diff --git a/test/pptx/images_templated.pptx b/test/pptx/images_templated.pptx index e3f968e9e..6d297ef11 100644 Binary files a/test/pptx/images_templated.pptx and b/test/pptx/images_templated.pptx differ diff --git a/test/pptx/inline_formatting.pptx b/test/pptx/inline_formatting.pptx index eadb9372e..473b9498d 100644 Binary files a/test/pptx/inline_formatting.pptx and b/test/pptx/inline_formatting.pptx differ diff --git a/test/pptx/inline_formatting_templated.pptx b/test/pptx/inline_formatting_templated.pptx index 8ca6bab2b..2cdf54474 100644 Binary files a/test/pptx/inline_formatting_templated.pptx and b/test/pptx/inline_formatting_templated.pptx differ diff --git a/test/pptx/lists.pptx b/test/pptx/lists.pptx index ae188ee68..ffc2eb9f7 100644 Binary files a/test/pptx/lists.pptx and b/test/pptx/lists.pptx differ diff --git a/test/pptx/lists_templated.pptx b/test/pptx/lists_templated.pptx index 60301fa50..676954cb8 100644 Binary files a/test/pptx/lists_templated.pptx and b/test/pptx/lists_templated.pptx differ diff --git a/test/pptx/raw_ooxml.pptx b/test/pptx/raw_ooxml.pptx index 17124a50d..29164af15 100644 Binary files a/test/pptx/raw_ooxml.pptx and b/test/pptx/raw_ooxml.pptx differ diff --git a/test/pptx/raw_ooxml_templated.pptx b/test/pptx/raw_ooxml_templated.pptx index 19ae7dd4e..1742b3296 100644 Binary files a/test/pptx/raw_ooxml_templated.pptx and b/test/pptx/raw_ooxml_templated.pptx differ diff --git a/test/pptx/remove_empty_slides.pptx b/test/pptx/remove_empty_slides.pptx index b650b7585..c6df8e18e 100644 Binary files a/test/pptx/remove_empty_slides.pptx and b/test/pptx/remove_empty_slides.pptx differ diff --git a/test/pptx/remove_empty_slides_templated.pptx b/test/pptx/remove_empty_slides_templated.pptx index 0ab029614..cf6e52eef 100644 Binary files a/test/pptx/remove_empty_slides_templated.pptx and b/test/pptx/remove_empty_slides_templated.pptx differ diff --git a/test/pptx/slide_breaks.pptx b/test/pptx/slide_breaks.pptx index 2a6e35080..e06d9079d 100644 Binary files a/test/pptx/slide_breaks.pptx and b/test/pptx/slide_breaks.pptx differ diff --git a/test/pptx/slide_breaks_slide_level_1.pptx b/test/pptx/slide_breaks_slide_level_1.pptx index a7bcf6a4b..449339778 100644 Binary files a/test/pptx/slide_breaks_slide_level_1.pptx and b/test/pptx/slide_breaks_slide_level_1.pptx differ diff --git a/test/pptx/slide_breaks_slide_level_1_templated.pptx b/test/pptx/slide_breaks_slide_level_1_templated.pptx index 21b018c25..258098082 100644 Binary files a/test/pptx/slide_breaks_slide_level_1_templated.pptx and b/test/pptx/slide_breaks_slide_level_1_templated.pptx differ diff --git a/test/pptx/slide_breaks_templated.pptx b/test/pptx/slide_breaks_templated.pptx index 4ec4772a4..2f0213919 100644 Binary files a/test/pptx/slide_breaks_templated.pptx and b/test/pptx/slide_breaks_templated.pptx differ diff --git a/test/pptx/slide_breaks_toc.pptx b/test/pptx/slide_breaks_toc.pptx index 5983657b6..9dbfa41a0 100644 Binary files a/test/pptx/slide_breaks_toc.pptx and b/test/pptx/slide_breaks_toc.pptx differ diff --git a/test/pptx/slide_breaks_toc_templated.pptx b/test/pptx/slide_breaks_toc_templated.pptx index dd54c7082..f288dde14 100644 Binary files a/test/pptx/slide_breaks_toc_templated.pptx and b/test/pptx/slide_breaks_toc_templated.pptx differ diff --git a/test/pptx/speaker_notes.pptx b/test/pptx/speaker_notes.pptx index b3e5ed5b9..0ab1302da 100644 Binary files a/test/pptx/speaker_notes.pptx and b/test/pptx/speaker_notes.pptx differ diff --git a/test/pptx/speaker_notes_after_metadata.pptx b/test/pptx/speaker_notes_after_metadata.pptx index 1078854bb..6343bffe4 100644 Binary files a/test/pptx/speaker_notes_after_metadata.pptx and b/test/pptx/speaker_notes_after_metadata.pptx differ diff --git a/test/pptx/speaker_notes_after_metadata_templated.pptx b/test/pptx/speaker_notes_after_metadata_templated.pptx index 5116c6c4e..5d4465f64 100644 Binary files a/test/pptx/speaker_notes_after_metadata_templated.pptx and b/test/pptx/speaker_notes_after_metadata_templated.pptx differ diff --git a/test/pptx/speaker_notes_afterheader.pptx b/test/pptx/speaker_notes_afterheader.pptx index 0c8e49bd9..d581681aa 100644 Binary files a/test/pptx/speaker_notes_afterheader.pptx and b/test/pptx/speaker_notes_afterheader.pptx differ diff --git a/test/pptx/speaker_notes_afterheader_templated.pptx b/test/pptx/speaker_notes_afterheader_templated.pptx index 68695939d..c922df3a8 100644 Binary files a/test/pptx/speaker_notes_afterheader_templated.pptx and b/test/pptx/speaker_notes_afterheader_templated.pptx differ diff --git a/test/pptx/speaker_notes_afterseps.pptx b/test/pptx/speaker_notes_afterseps.pptx index 7ed9b946d..13f564bf0 100644 Binary files a/test/pptx/speaker_notes_afterseps.pptx and b/test/pptx/speaker_notes_afterseps.pptx differ diff --git a/test/pptx/speaker_notes_afterseps_templated.pptx b/test/pptx/speaker_notes_afterseps_templated.pptx index 79fc82345..f0b302738 100644 Binary files a/test/pptx/speaker_notes_afterseps_templated.pptx and b/test/pptx/speaker_notes_afterseps_templated.pptx differ diff --git a/test/pptx/speaker_notes_templated.pptx b/test/pptx/speaker_notes_templated.pptx index 9f943c279..2d8d00242 100644 Binary files a/test/pptx/speaker_notes_templated.pptx and b/test/pptx/speaker_notes_templated.pptx differ diff --git a/test/pptx/start_numbering_at.pptx b/test/pptx/start_numbering_at.pptx index ac72d8ced..4320128b3 100644 Binary files a/test/pptx/start_numbering_at.pptx and b/test/pptx/start_numbering_at.pptx differ diff --git a/test/pptx/start_numbering_at_templated.pptx b/test/pptx/start_numbering_at_templated.pptx index 15c7b5469..0a3f6e56d 100644 Binary files a/test/pptx/start_numbering_at_templated.pptx and b/test/pptx/start_numbering_at_templated.pptx differ diff --git a/test/pptx/tables.pptx b/test/pptx/tables.pptx index 926c5e699..e41219844 100644 Binary files a/test/pptx/tables.pptx and b/test/pptx/tables.pptx differ diff --git a/test/pptx/tables_templated.pptx b/test/pptx/tables_templated.pptx index a37e72d2c..82b8aa13d 100644 Binary files a/test/pptx/tables_templated.pptx and b/test/pptx/tables_templated.pptx differ diff --git a/test/pptx/two_column.pptx b/test/pptx/two_column.pptx index 7f86533fe..270a7eeac 100644 Binary files a/test/pptx/two_column.pptx and b/test/pptx/two_column.pptx differ diff --git a/test/pptx/two_column_templated.pptx b/test/pptx/two_column_templated.pptx index 89e3db0ab..44985d701 100644 Binary files a/test/pptx/two_column_templated.pptx and b/test/pptx/two_column_templated.pptx differ -- cgit v1.2.3 From e1454fe0d0e2f1cb4e9c5753f095a1f0a8580ffe Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Mon, 1 Mar 2021 14:14:58 +0100 Subject: Jira writer: use Span identifiers as anchors Closes: tarleb/jira-wiki-markup#3. --- src/Text/Pandoc/Writers/Jira.hs | 4 +++- test/Tests/Writers/Jira.hs | 9 ++++++++- 2 files changed, 11 insertions(+), 2 deletions(-) (limited to 'test/Tests/Writers') diff --git a/src/Text/Pandoc/Writers/Jira.hs b/src/Text/Pandoc/Writers/Jira.hs index c21085a4f..131896201 100644 --- a/src/Text/Pandoc/Writers/Jira.hs +++ b/src/Text/Pandoc/Writers/Jira.hs @@ -292,7 +292,9 @@ quotedToJira qtype xs = do spanToJira :: PandocMonad m => Attr -> [Inline] -> JiraConverter m [Jira.Inline] -spanToJira (_, _classes, _) = toJiraInlines +spanToJira (ident, _classes, _attribs) inls = case ident of + "" -> toJiraInlines inls + _ -> (Jira.Anchor ident :) <$> toJiraInlines inls registerNotes :: PandocMonad m => [Block] -> JiraConverter m [Jira.Inline] registerNotes contents = do diff --git a/test/Tests/Writers/Jira.hs b/test/Tests/Writers/Jira.hs index 93d830c94..aff8348d4 100644 --- a/test/Tests/Writers/Jira.hs +++ b/test/Tests/Writers/Jira.hs @@ -3,6 +3,7 @@ module Tests.Writers.Jira (tests) where import Data.Text (unpack) import Test.Tasty +import Test.Tasty.HUnit (HasCallStack) import Tests.Helpers import Text.Pandoc import Text.Pandoc.Arbitrary () @@ -12,7 +13,7 @@ jira :: (ToPandoc a) => a -> String jira = unpack . purely (writeJira def) . toPandoc infix 4 =: -(=:) :: (ToString a, ToPandoc a) +(=:) :: (ToString a, ToPandoc a, HasCallStack) => String -> (a, String) -> TestTree (=:) = test jira @@ -61,5 +62,11 @@ tests = linkWith ("", ["user-account"], []) "~johndoe" "" "~johndoe" =?> "[~johndoe]" ] + + , testGroup "spans" + [ "id is used as anchor" =: + spanWith ("unicorn", [], []) (str "Unicorn") =?> + "{anchor:unicorn}Unicorn" + ] ] ] -- cgit v1.2.3 From eb184d9148e8a3e8c896a71550b1f0bee8da9a21 Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Mon, 8 Mar 2021 12:40:25 +0100 Subject: Jira writer: use noformat instead of code for unknown languages. Code blocks that are not marked as a language supported by Jira are rendered as preformatted text with `{noformat}` blocks. Fixes: tarleb/jira-wiki-markup#4 --- src/Text/Pandoc/Writers/Jira.hs | 10 ++++---- test/Tests/Writers/Jira.hs | 10 ++++++++ test/writer.jira | 55 +++++++++++++++++------------------------ 3 files changed, 37 insertions(+), 38 deletions(-) (limited to 'test/Tests/Writers') diff --git a/src/Text/Pandoc/Writers/Jira.hs b/src/Text/Pandoc/Writers/Jira.hs index 131896201..a714dac2e 100644 --- a/src/Text/Pandoc/Writers/Jira.hs +++ b/src/Text/Pandoc/Writers/Jira.hs @@ -126,13 +126,13 @@ toJiraCode :: PandocMonad m -> Text -> JiraConverter m [Jira.Block] toJiraCode (ident, classes, _attribs) code = do - let lang = case find (\c -> T.toLower c `elem` knownLanguages) classes of - Nothing -> Jira.Language "java" - Just l -> Jira.Language l let addAnchor b = if T.null ident then b else [Jira.Para (singleton (Jira.Anchor ident))] <> b - return . addAnchor . singleton $ Jira.Code lang mempty code + return . addAnchor . singleton $ + case find (\c -> T.toLower c `elem` knownLanguages) classes of + Nothing -> Jira.NoFormat mempty code + Just l -> Jira.Code (Jira.Language l) mempty code -- | Creates a Jira definition list toJiraDefinitionList :: PandocMonad m @@ -310,7 +310,7 @@ registerNotes contents = do knownLanguages :: [Text] knownLanguages = [ "actionscript", "ada", "applescript", "bash", "c", "c#", "c++" - , "css", "erlang", "go", "groovy", "haskell", "html", "javascript" + , "css", "erlang", "go", "groovy", "haskell", "html", "java", "javascript" , "json", "lua", "nyan", "objc", "perl", "php", "python", "r", "ruby" , "scala", "sql", "swift", "visualbasic", "xml", "yaml" ] diff --git a/test/Tests/Writers/Jira.hs b/test/Tests/Writers/Jira.hs index aff8348d4..b618c3970 100644 --- a/test/Tests/Writers/Jira.hs +++ b/test/Tests/Writers/Jira.hs @@ -68,5 +68,15 @@ tests = spanWith ("unicorn", [], []) (str "Unicorn") =?> "{anchor:unicorn}Unicorn" ] + + , testGroup "code" + [ "code block with known language" =: + codeBlockWith ("", ["java"], []) "Book book = new Book(\"Algebra\")" =?> + "{code:java}\nBook book = new Book(\"Algebra\")\n{code}" + + , "code block without language" =: + codeBlockWith ("", [], []) "preformatted\n text.\n" =?> + "{noformat}\npreformatted\n text.\n{noformat}" + ] ] ] diff --git a/test/writer.jira b/test/writer.jira index aff0dc320..71b720d65 100644 --- a/test/writer.jira +++ b/test/writer.jira @@ -33,11 +33,10 @@ bq. This is a block quote. It is pretty short. {quote} Code in a block quote: -{code:java} +{noformat} sub status { print "working"; -} -{code} +}{noformat} A list: # item one @@ -56,22 +55,20 @@ And a following paragraph. h1. {anchor:code-blocks}Code Blocks Code: -{code:java} +{noformat} ---- (should be four hyphens) sub status { print "working"; } -this code block is indented by one tab -{code} +this code block is indented by one tab{noformat} And: -{code:java} +{noformat} this code block is indented by two tabs -These should not be escaped: \$ \\ \> \[ \{ -{code} +These should not be escaped: \$ \\ \> \[ \{{noformat} ---- h1. {anchor:lists}Lists h2. {anchor:unordered}Unordered @@ -236,9 +233,8 @@ red fruit contains seeds, crisp, pleasant to taste * *_orange_* orange fruit -{code:java} -{ orange code block } -{code} +{noformat} +{ orange code block }{noformat} bq. orange block quote Multiple definitions, tight: @@ -292,16 +288,14 @@ foo This should be a code block, though: -{code:java} +{noformat}
foo -
-{code} +{noformat} As should this: -{code:java} -
foo
-{code} +{noformat} +
foo
{noformat} Now, nested: foo @@ -312,16 +306,14 @@ Multiline: Code block: -{code:java} - -{code} +{noformat} +{noformat} Just plain comment, with trailing spaces on the line: Code: -{code:java} -
-{code} +{noformat} +
{noformat} Hr’s: ---- @@ -478,9 +470,8 @@ Indented [thrice|/url]. This should \[not\]\[\] be a link. -{code:java} -[not]: /url -{code} +{noformat} +[not]: /url{noformat} Foo [bar|/url/]. Foo [biz|/url/]. @@ -506,9 +497,8 @@ An e-mail address: [mailto:nobody@nowhere.net] bq. Blockquoted: [http://example.com/] Auto-links should not occur here: {{}} -{code:java} -or here: -{code} +{noformat} +or here: {noformat} ---- h1. {anchor:images}Images From "Voyage dans la Lune" by Georges Melies \(1902): @@ -534,9 +524,8 @@ This paragraph should not be part of the note, as it is not indented. Subsequent blocks are indented to show that they belong to the footnote \(as with list items). -{code:java} - { } -{code} +{noformat} + { }{noformat} If you want, you can indent every line, but you can also be lazy and just indent the first line of each block. -- cgit v1.2.3 From a8aa301428752d96cdd58d7f4ecaa7d054f3505d Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Sat, 13 Mar 2021 12:10:02 +0100 Subject: Jira writer: improve div/panel handling Include div attributes in panels, always render divs with class `panel` as panels, and avoid nesting of panels. --- src/Text/Pandoc/Writers/Jira.hs | 39 ++++++++++++++++++++++++++++----------- test/Tests/Writers/Jira.hs | 30 ++++++++++++++++++++++++++++++ 2 files changed, 58 insertions(+), 11 deletions(-) (limited to 'test/Tests/Writers') diff --git a/src/Text/Pandoc/Writers/Jira.hs b/src/Text/Pandoc/Writers/Jira.hs index a714dac2e..aa78d9419 100644 --- a/src/Text/Pandoc/Writers/Jira.hs +++ b/src/Text/Pandoc/Writers/Jira.hs @@ -39,11 +39,17 @@ writeJira :: PandocMonad m => WriterOptions -> Pandoc -> m Text writeJira opts = runDefaultConverter (writerWrapText opts) (pandocToJira opts) -- | State to keep track of footnotes. -newtype ConverterState = ConverterState { stNotes :: [Text] } +data ConverterState = ConverterState + { stNotes :: [Text] -- ^ Footnotes to be appended to the end of the text + , stInPanel :: Bool -- ^ whether we are in a @{panel}@ block + } -- | Initial converter state. startState :: ConverterState -startState = ConverterState { stNotes = [] } +startState = ConverterState + { stNotes = [] + , stInPanel = False + } -- | Converter monad type JiraConverter m = ReaderT WrapOption (StateT ConverterState m) @@ -126,14 +132,20 @@ toJiraCode :: PandocMonad m -> Text -> JiraConverter m [Jira.Block] toJiraCode (ident, classes, _attribs) code = do - let addAnchor b = if T.null ident - then b - else [Jira.Para (singleton (Jira.Anchor ident))] <> b - return . addAnchor . singleton $ + return . addAnchor ident . singleton $ case find (\c -> T.toLower c `elem` knownLanguages) classes of Nothing -> Jira.NoFormat mempty code Just l -> Jira.Code (Jira.Language l) mempty code +-- | Prepends an anchor with the given identifier. +addAnchor :: Text -> [Jira.Block] -> [Jira.Block] +addAnchor ident = + if T.null ident + then id + else \case + Jira.Para xs : bs -> (Jira.Para (Jira.Anchor ident : xs) : bs) + bs -> (Jira.Para (singleton (Jira.Anchor ident)) : bs) + -- | Creates a Jira definition list toJiraDefinitionList :: PandocMonad m => [([Inline], [[Block]])] @@ -149,11 +161,16 @@ toJiraDefinitionList defItems = do toJiraPanel :: PandocMonad m => Attr -> [Block] -> JiraConverter m [Jira.Block] -toJiraPanel attr blocks = do - jiraBlocks <- toJiraBlocks blocks - return $ if attr == nullAttr - then jiraBlocks - else singleton (Jira.Panel [] jiraBlocks) +toJiraPanel (ident, classes, attribs) blocks = do + inPanel <- gets stInPanel + if inPanel || ("panel" `notElem` classes && null attribs) + then addAnchor ident <$> toJiraBlocks blocks + else do + modify $ \st -> st{ stInPanel = True } + jiraBlocks <- toJiraBlocks blocks + modify $ \st -> st{ stInPanel = inPanel } + let params = map (uncurry Jira.Parameter) attribs + return $ singleton (Jira.Panel params $ addAnchor ident jiraBlocks) -- | Creates a Jira header toJiraHeader :: PandocMonad m diff --git a/test/Tests/Writers/Jira.hs b/test/Tests/Writers/Jira.hs index b618c3970..0c6f48853 100644 --- a/test/Tests/Writers/Jira.hs +++ b/test/Tests/Writers/Jira.hs @@ -79,4 +79,34 @@ tests = "{noformat}\npreformatted\n text.\n{noformat}" ] ] + + , testGroup "blocks" + [ testGroup "div" + [ "empty attributes" =: + divWith nullAttr (para "interesting text") =?> + "interesting text" + + , "just identifier" =: + divWith ("a", [], []) (para "interesting text") =?> + "{anchor:a}interesting text" + + , "with class 'panel'" =: + divWith ("", ["panel"], []) (para "Contents!") =?> + "{panel}\nContents\\!\n{panel}\n" + + , "panel with id" =: + divWith ("b", ["panel"], []) (para "text") =?> + "{panel}\n{anchor:b}text\n{panel}\n" + + , "title attribute" =: + divWith ("", [], [("title", "Gimme!")]) (para "Contents!") =?> + "{panel:title=Gimme!}\nContents\\!\n{panel}\n" + + , "nested panels" =: + let panelAttr = ("", ["panel"], []) + in divWith panelAttr (para "hi" <> + divWith panelAttr (para "wassup?")) =?> + "{panel}\nhi\n\nwassup?\n{panel}\n" + ] + ] ] -- cgit v1.2.3 From 82e8c29cb0a89d7129f459bef6696254ec56e0c6 Mon Sep 17 00:00:00 2001 From: Erik Rask Date: Fri, 19 Feb 2021 13:05:35 +0100 Subject: Include Header.Attr.attributes as XML attributes on section Add key-value pairs found in the attributes list of Header.Attr as XML attributes on the corresponding section element. Any key name not allowed as an XML attribute name is dropped, as are keys with invalid values where they are defined as enums in DocBook, and xml:id (for DocBook 5)/id (for DocBook 4) to not intervene with computed identifiers. --- src/Text/Pandoc/Writers/Docbook.hs | 47 ++++++++++++++++++++++++++++++++++++-- test/Tests/Writers/Docbook.hs | 37 ++++++++++++++++++++++++++++++ 2 files changed, 82 insertions(+), 2 deletions(-) (limited to 'test/Tests/Writers') diff --git a/src/Text/Pandoc/Writers/Docbook.hs b/src/Text/Pandoc/Writers/Docbook.hs index a6776608d..1f10c9d04 100644 --- a/src/Text/Pandoc/Writers/Docbook.hs +++ b/src/Text/Pandoc/Writers/Docbook.hs @@ -168,7 +168,7 @@ blockToDocbook :: PandocMonad m => WriterOptions -> Block -> DB m (Doc Text) blockToDocbook _ Null = return empty -- Add ids to paragraphs in divs with ids - this is needed for -- pandoc-citeproc to get link anchors in bibliographies: -blockToDocbook opts (Div (id',"section":_,_) (Header lvl _ ils : xs)) = do +blockToDocbook opts (Div (id',"section":_,_) (Header lvl (_,_,attrs) ils : xs)) = do version <- ask -- Docbook doesn't allow sections with no content, so insert some if needed let bs = if null xs @@ -188,7 +188,10 @@ blockToDocbook opts (Div (id',"section":_,_) (Header lvl _ ils : xs)) = do -- standalone documents will include them in the template. then [("xmlns", "http://docbook.org/ns/docbook"),("xmlns:xlink", "http://www.w3.org/1999/xlink")] else [] - attribs = nsAttr <> idAttr + + -- Populate miscAttr with Header.Attr.attributes, filtering out non-valid DocBook section attributes, id, and xml:id + miscAttr = filter (isSectionAttr version) attrs + attribs = nsAttr <> idAttr <> miscAttr title' <- inlinesToDocbook opts ils contents <- blocksToDocbook opts bs return $ inTags True tag attribs $ inTagsSimple "title" title' $$ contents @@ -451,3 +454,43 @@ idAndRole (id',cls,_) = ident <> role where ident = [("id", id') | not (T.null id')] role = [("role", T.unwords cls) | not (null cls)] + +isSectionAttr :: DocBookVersion -> (Text, Text) -> Bool +isSectionAttr _ ("label",_) = True +isSectionAttr _ ("status",_) = True +isSectionAttr DocBook5 ("annotations",_) = True +isSectionAttr DocBook5 ("dir","ltr") = True +isSectionAttr DocBook5 ("dir","rtl") = True +isSectionAttr DocBook5 ("dir","lro") = True +isSectionAttr DocBook5 ("dir","rlo") = True +isSectionAttr _ ("remap",_) = True +isSectionAttr _ ("revisionflag","changed") = True +isSectionAttr _ ("revisionflag","added") = True +isSectionAttr _ ("revisionflag","deleted") = True +isSectionAttr _ ("revisionflag","off") = True +isSectionAttr _ ("role",_) = True +isSectionAttr DocBook5 ("version",_) = True +isSectionAttr DocBook5 ("xml:base",_) = True +isSectionAttr DocBook5 ("xml:lang",_) = True +isSectionAttr _ ("xreflabel",_) = True +isSectionAttr DocBook5 ("linkend",_) = True +isSectionAttr DocBook5 ("linkends",_) = True +isSectionAttr DocBook5 ("xlink:actuate",_) = True +isSectionAttr DocBook5 ("xlink:arcrole",_) = True +isSectionAttr DocBook5 ("xlink:from",_) = True +isSectionAttr DocBook5 ("xlink:href",_) = True +isSectionAttr DocBook5 ("xlink:label",_) = True +isSectionAttr DocBook5 ("xlink:role",_) = True +isSectionAttr DocBook5 ("xlink:show",_) = True +isSectionAttr DocBook5 ("xlink:title",_) = True +isSectionAttr DocBook5 ("xlink:to",_) = True +isSectionAttr DocBook5 ("xlink:type",_) = True +isSectionAttr DocBook4 ("arch",_) = True +isSectionAttr DocBook4 ("condition",_) = True +isSectionAttr DocBook4 ("conformance",_) = True +isSectionAttr DocBook4 ("lang",_) = True +isSectionAttr DocBook4 ("os",_) = True +isSectionAttr DocBook4 ("revision",_) = True +isSectionAttr DocBook4 ("security",_) = True +isSectionAttr DocBook4 ("vendor",_) = True +isSectionAttr _ (_,_) = False \ No newline at end of file diff --git a/test/Tests/Writers/Docbook.hs b/test/Tests/Writers/Docbook.hs index 842aed7ae..46203eeae 100644 --- a/test/Tests/Writers/Docbook.hs +++ b/test/Tests/Writers/Docbook.hs @@ -11,9 +11,14 @@ import Text.Pandoc.Builder docbook :: (ToPandoc a) => a -> String docbook = docbookWithOpts def{ writerWrapText = WrapNone } +docbook5 :: (ToPandoc a) => a -> String +docbook5 = docbook5WithOpts def{ writerWrapText = WrapNone } + docbookWithOpts :: ToPandoc a => WriterOptions -> a -> String docbookWithOpts opts = unpack . purely (writeDocbook4 opts) . toPandoc +docbook5WithOpts :: ToPandoc a => WriterOptions -> a -> String +docbook5WithOpts opts = unpack . purely (writeDocbook5 opts) . toPandoc {- "my test" =: X =?> Y @@ -366,4 +371,36 @@ tests = [ testGroup "line blocks" ] ] ] + , testGroup "section attributes" $ + let + headers = headerWith ("myid1",[],[("role","internal"),("xml:id","anotherid"),("dir","rtl")]) 1 "header1" + <> headerWith ("myid2",[],[("invalidname","value"),("arch","linux"),("dir","invaliddir")]) 1 "header2" + in + [ test docbook5 "sections with attributes (db5)" $ + headers =?> + unlines [ "
" + , " header1" + , " " + , " " + , "
" + , "
" + , " header2" + , " " + , " " + , "
" + ] + , test docbook "sections with attributes (db4)" $ + headers =?> + unlines [ "" + , " header1" + , " " + , " " + , "" + , "" + , " header2" + , " " + , " " + , "" + ] + ] ] -- cgit v1.2.3 From 038261ea529bc4516d7cee501db70020938dbf2b Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Mon, 5 Apr 2021 21:45:52 +0200 Subject: JATS writer: escape disallows chars in identifiers XML identifiers must start with an underscore or letter, and can contain only a limited set of punctuation characters. Any IDs not adhering to these rules are rewritten by writing the offending characters as Uxxxx, where `xxxx` is the character's hex code. --- src/Text/Pandoc/Writers/JATS.hs | 27 ++-- src/Text/Pandoc/Writers/JATS/References.hs | 5 +- src/Text/Pandoc/Writers/JATS/Table.hs | 4 +- src/Text/Pandoc/XML.hs | 30 ++++- test/Tests/Writers/JATS.hs | 205 ++++++++++++++++------------- 5 files changed, 162 insertions(+), 109 deletions(-) (limited to 'test/Tests/Writers') diff --git a/src/Text/Pandoc/Writers/JATS.hs b/src/Text/Pandoc/Writers/JATS.hs index a9369db7a..26f94cb03 100644 --- a/src/Text/Pandoc/Writers/JATS.hs +++ b/src/Text/Pandoc/Writers/JATS.hs @@ -239,7 +239,7 @@ languageFor classes = codeAttr :: Attr -> (Text, [(Text, Text)]) codeAttr (ident,classes,kvs) = (lang, attr) where - attr = [("id",ident) | not (T.null ident)] ++ + attr = [("id", escapeNCName ident) | not (T.null ident)] ++ [("language",lang) | not (T.null lang)] ++ [(k,v) | (k,v) <- kvs, k `elem` ["code-type", "code-version", "executable", @@ -251,7 +251,8 @@ codeAttr (ident,classes,kvs) = (lang, attr) blockToJATS :: PandocMonad m => WriterOptions -> Block -> JATS m (Doc Text) blockToJATS _ Null = return empty blockToJATS opts (Div (id',"section":_,kvs) (Header _lvl _ ils : xs)) = do - let idAttr = [("id", writerIdentifierPrefix opts <> id') | not (T.null id')] + let idAttr = [ ("id", writerIdentifierPrefix opts <> escapeNCName id') + | not (T.null id')] let otherAttrs = ["sec-type", "specific-use"] let attribs = idAttr ++ [(k,v) | (k,v) <- kvs, k `elem` otherAttrs] title' <- inlinesToJATS opts ils @@ -260,7 +261,7 @@ blockToJATS opts (Div (id',"section":_,kvs) (Header _lvl _ ils : xs)) = do inTagsSimple "title" title' $$ contents -- Bibliography reference: blockToJATS opts (Div (ident,_,_) [Para lst]) | "ref-" `T.isPrefixOf` ident = - inTags True "ref" [("id", ident)] . + inTags True "ref" [("id", escapeNCName ident)] . inTagsSimple "mixed-citation" <$> inlinesToJATS opts lst blockToJATS opts (Div ("refs",_,_) xs) = do @@ -271,14 +272,14 @@ blockToJATS opts (Div ("refs",_,_) xs) = do return $ inTagsIndented "ref-list" contents blockToJATS opts (Div (ident,[cls],kvs) bs) | cls `elem` ["fig", "caption", "table-wrap"] = do contents <- blocksToJATS opts bs - let attr = [("id", ident) | not (T.null ident)] ++ + let attr = [("id", escapeNCName ident) | not (T.null ident)] ++ [("xml:lang",l) | ("lang",l) <- kvs] ++ [(k,v) | (k,v) <- kvs, k `elem` ["specific-use", "content-type", "orientation", "position"]] return $ inTags True cls attr contents blockToJATS opts (Div (ident,_,kvs) bs) = do contents <- blocksToJATS opts bs - let attr = [("id", ident) | not (T.null ident)] ++ + let attr = [("id", escapeNCName ident) | not (T.null ident)] ++ [("xml:lang",l) | ("lang",l) <- kvs] ++ [(k,v) | (k,v) <- kvs, k `elem` ["specific-use", "content-type", "orientation", "position"]] @@ -296,7 +297,7 @@ blockToJATS opts (Para [Image (ident,_,kvs) txt let capt = if null txt then empty else inTagsSimple "caption" $ inTagsSimple "p" alt - let attr = [("id", ident) | not (T.null ident)] ++ + let attr = [("id", escapeNCName ident) | not (T.null ident)] ++ [(k,v) | (k,v) <- kvs, k `elem` ["fig-type", "orientation", "position", "specific-use"]] let graphicattr = [("mimetype",maintype), @@ -307,7 +308,7 @@ blockToJATS opts (Para [Image (ident,_,kvs) txt capt $$ selfClosingTag "graphic" graphicattr blockToJATS _ (Para [Image (ident,_,kvs) _ (src, tit)]) = do let (maintype, subtype) = imageMimeType src kvs - let attr = [("id", ident) | not (T.null ident)] ++ + let attr = [("id", escapeNCName ident) | not (T.null ident)] ++ [("mimetype", maintype), ("mime-subtype", subtype), ("xlink:href", src)] ++ @@ -434,7 +435,7 @@ inlineToJATS opts (Note contents) = do let notenum = case notes of (n, _):_ -> n + 1 [] -> 1 - thenote <- inTags True "fn" [("id","fn" <> tshow notenum)] + thenote <- inTags True "fn" [("id", "fn" <> tshow notenum)] <$> wrappedBlocksToJATS (not . isPara) opts (walk demoteHeaderAndRefs contents) modify $ \st -> st{ jatsNotes = (notenum, thenote) : notes } @@ -447,7 +448,7 @@ inlineToJATS opts (Cite _ lst) = inlineToJATS opts (Span ("",_,[]) ils) = inlinesToJATS opts ils inlineToJATS opts (Span (ident,_,kvs) ils) = do contents <- inlinesToJATS opts ils - let attr = [("id",ident) | not (T.null ident)] ++ + let attr = [("id", escapeNCName ident) | not (T.null ident)] ++ [("xml:lang",l) | ("lang",l) <- kvs] ++ [(k,v) | (k,v) <- kvs , k `elem` ["content-type", "rationale", @@ -488,9 +489,9 @@ inlineToJATS _ (Link _attr [Str t] (T.stripPrefix "mailto:" -> Just email, _)) return $ inTagsSimple "email" $ literal (escapeStringForXML email) inlineToJATS opts (Link (ident,_,kvs) txt (T.uncons -> Just ('#', src), _)) = do let attr = mconcat - [ [("id", ident) | not (T.null ident)] + [ [("id", escapeNCName ident) | not (T.null ident)] , [("alt", stringify txt) | not (null txt)] - , [("rid", src)] + , [("rid", escapeNCName src)] , [(k,v) | (k,v) <- kvs, k `elem` ["ref-type", "specific-use"]] , [("ref-type", "bibr") | "ref-" `T.isPrefixOf` src] ] @@ -500,7 +501,7 @@ inlineToJATS opts (Link (ident,_,kvs) txt (T.uncons -> Just ('#', src), _)) = do contents <- inlinesToJATS opts txt return $ inTags False "xref" attr contents inlineToJATS opts (Link (ident,_,kvs) txt (src, tit)) = do - let attr = [("id", ident) | not (T.null ident)] ++ + let attr = [("id", escapeNCName ident) | not (T.null ident)] ++ [("ext-link-type", "uri"), ("xlink:href", src)] ++ [("xlink:title", tit) | not (T.null tit)] ++ @@ -518,7 +519,7 @@ inlineToJATS _ (Image (ident,_,kvs) _ (src, tit)) = do let subtype = fromMaybe "" $ lookup "mime-subtype" kvs `mplus` (T.drop 1 . T.dropWhile (/='/') <$> mbMT) - let attr = [("id", ident) | not (T.null ident)] ++ + let attr = [("id", escapeNCName ident) | not (T.null ident)] ++ [("mimetype", maintype), ("mime-subtype", subtype), ("xlink:href", src)] ++ diff --git a/src/Text/Pandoc/Writers/JATS/References.hs b/src/Text/Pandoc/Writers/JATS/References.hs index 903144128..5b19fd034 100644 --- a/src/Text/Pandoc/Writers/JATS/References.hs +++ b/src/Text/Pandoc/Writers/JATS/References.hs @@ -29,7 +29,7 @@ import Text.Pandoc.Builder (Inlines) import Text.Pandoc.Options (WriterOptions) import Text.Pandoc.Shared (tshow) import Text.Pandoc.Writers.JATS.Types -import Text.Pandoc.XML (escapeStringForXML, inTags) +import Text.Pandoc.XML (escapeNCName, escapeStringForXML, inTags) import qualified Data.Text as T referencesToJATS :: PandocMonad m @@ -46,7 +46,8 @@ referenceToJATS :: PandocMonad m referenceToJATS _opts ref = do let refType = referenceType ref let pubType = [("publication-type", refType) | not (T.null refType)] - let wrap = inTags True "ref" [("id", "ref-" <> unItemId (referenceId ref))] + let ident = escapeNCName $ "ref-" <> unItemId (referenceId ref) + let wrap = inTags True "ref" [("id", ident)] . inTags True "element-citation" pubType return . wrap . vcat $ [ authors diff --git a/src/Text/Pandoc/Writers/JATS/Table.hs b/src/Text/Pandoc/Writers/JATS/Table.hs index 465480f59..2e34900d2 100644 --- a/src/Text/Pandoc/Writers/JATS/Table.hs +++ b/src/Text/Pandoc/Writers/JATS/Table.hs @@ -24,7 +24,7 @@ import Text.Pandoc.Definition import Text.Pandoc.Options (WriterOptions) import Text.Pandoc.Shared (tshow) import Text.Pandoc.Writers.JATS.Types -import Text.Pandoc.XML (inTags, inTagsIndented, selfClosingTag) +import Text.Pandoc.XML (escapeNCName, inTags, inTagsIndented, selfClosingTag) import qualified Data.Text as T import qualified Text.Pandoc.Writers.AnnotatedTable as Ann @@ -216,7 +216,7 @@ cellToJats opts celltype (Ann.Cell (colspec :| _) _colNum cell) = toAttribs :: Attr -> [Text] -> [(Text, Text)] toAttribs (ident, _classes, kvs) knownAttribs = - (if T.null ident then id else (("id", ident) :)) $ + (if T.null ident then id else (("id", escapeNCName ident) :)) $ filter ((`elem` knownAttribs) . fst) kvs tableCellToJats :: PandocMonad m diff --git a/src/Text/Pandoc/XML.hs b/src/Text/Pandoc/XML.hs index 6dbbce1d2..79b4768ec 100644 --- a/src/Text/Pandoc/XML.hs +++ b/src/Text/Pandoc/XML.hs @@ -13,6 +13,7 @@ Functions for escaping and formatting XML. -} module Text.Pandoc.XML ( escapeCharForXML, escapeStringForXML, + escapeNCName, inTags, selfClosingTag, inTagsSimple, @@ -24,7 +25,7 @@ module Text.Pandoc.XML ( escapeCharForXML, html5Attributes, rdfaAttributes ) where -import Data.Char (isAscii, isSpace, ord) +import Data.Char (isAscii, isSpace, ord, isLetter, isDigit) import Data.Text (Text) import qualified Data.Text as T import Text.HTML.TagSoup.Entity (lookupEntity, htmlEntities) @@ -119,8 +120,33 @@ html5EntityMap = foldr go mempty htmlEntities where ent' = T.takeWhile (/=';') (T.pack ent) _ -> entmap +-- | Converts a string into an NCName, i.e., an XML name without colons. +-- Disallowed characters are escaped using @ux%x@, where @%x@ is the +-- hexadecimal unicode identifier of the escaped character. +escapeNCName :: Text -> Text +escapeNCName t = case T.uncons t of + Nothing -> T.empty + Just (c, cs) -> escapeStartChar c <> T.concatMap escapeNCNameChar cs + where + escapeStartChar :: Char -> Text + escapeStartChar c = if isLetter c || c == '_' + then T.singleton c + else escapeChar c --- Unescapes XML entities + escapeNCNameChar :: Char -> Text + escapeNCNameChar c = if isNCNameChar c + then T.singleton c + else escapeChar c + + isNCNameChar :: Char -> Bool + isNCNameChar c = isLetter c || c `elem` ("_-.·" :: String) || isDigit c + || '\x0300' <= c && c <= '\x036f' + || '\x203f' <= c && c <= '\x2040' + + escapeChar :: Char -> Text + escapeChar = T.pack . printf "U%04X" . ord + +-- | Unescapes XML entities fromEntities :: Text -> Text fromEntities t = let (x, y) = T.break (== '&') t diff --git a/test/Tests/Writers/JATS.hs b/test/Tests/Writers/JATS.hs index 2f501c890..23c1686dc 100644 --- a/test/Tests/Writers/JATS.hs +++ b/test/Tests/Writers/JATS.hs @@ -1,21 +1,21 @@ {-# LANGUAGE OverloadedStrings #-} module Tests.Writers.JATS (tests) where -import Data.Text (unpack) +import Data.Text (Text) import Test.Tasty import Tests.Helpers import Text.Pandoc import Text.Pandoc.Arbitrary () import Text.Pandoc.Builder +import qualified Data.Text as T -jats :: (ToPandoc a) => a -> String -jats = unpack - . purely (writeJATS def{ writerWrapText = WrapNone }) - . toPandoc +jats :: (ToPandoc a) => a -> Text +jats = purely (writeJATS def{ writerWrapText = WrapNone }) + . toPandoc -jatsArticleAuthoring :: (ToPandoc a) => a -> String -jatsArticleAuthoring = unpack - . purely (writeJatsArticleAuthoring def{ writerWrapText = WrapNone }) +jatsArticleAuthoring :: (ToPandoc a) => a -> Text +jatsArticleAuthoring = + purely (writeJatsArticleAuthoring def{ writerWrapText = WrapNone }) . toPandoc {- @@ -32,89 +32,114 @@ which is in turn shorthand for infix 4 =: (=:) :: (ToString a, ToPandoc a) - => String -> (a, String) -> TestTree + => String -> (a, Text) -> TestTree (=:) = test jats tests :: [TestTree] -tests = [ testGroup "inline code" - [ "basic" =: code "@&" =?> "

@&

" - , "lang" =: codeWith ("", ["c"], []) "@&" =?> "

@&

" - ] - , testGroup "block code" - [ "basic" =: codeBlock "@&" =?> "@&" - , "lang" =: codeBlockWith ("", ["c"], []) "@&" =?> "@&" - ] - , testGroup "images" - [ "basic" =: - image "/url" "title" mempty - =?> "" - ] - , testGroup "inlines" - [ "Emphasis" =: emph "emphasized" - =?> "

emphasized

" +tests = + [ testGroup "inline code" + [ "basic" =: code "@&" =?> "

@&

" + , "lang" =: codeWith ("", ["c"], []) "@&" =?> "

@&

" + ] + , testGroup "block code" + [ "basic" =: codeBlock "@&" =?> "@&" + , "lang" =: codeBlockWith ("", ["c"], []) "@&" =?> "@&" + ] + , testGroup "images" + [ "basic" =: + image "/url" "title" mempty + =?> "" + ] + , testGroup "inlines" + [ "Emphasis" =: emph "emphasized" + =?> "

emphasized

" + + , test jatsArticleAuthoring "footnote in articleauthoring tag set" + ("test" <> note (para "footnote") =?> + unlines [ "

test" + , "

footnote

" + , "

" + ]) + ] + , "bullet list" =: bulletList [ plain $ text "first" + , plain $ text "second" + , plain $ text "third" + ] + =?> "\n\ + \ \n\ + \

first

\n\ + \
\n\ + \ \n\ + \

second

\n\ + \
\n\ + \ \n\ + \

third

\n\ + \
\n\ + \
" + , testGroup "definition lists" + [ "with internal link" =: definitionList [(link "#go" "" (str "testing"), + [plain (text "hi there")])] =?> + "\n\ + \ \n\ + \ testing\n\ + \ \n\ + \

hi there

\n\ + \
\n\ + \
\n\ + \
" + ] + , testGroup "math" + [ "escape |" =: para (math "\\sigma|_{\\{x\\}}") =?> + "

\n\ + \\n\ + \σ|{x}

" + ] + , testGroup "headers" + [ "unnumbered header" =: + headerWith ("foo",["unnumbered"],[]) 1 + (text "Header 1" <> note (plain $ text "note")) =?> + "\n\ + \ Header 1<xref ref-type=\"fn\" rid=\"fn1\">1</xref>\n\ + \" + , "unnumbered sub header" =: + headerWith ("foo",["unnumbered"],[]) 1 + (text "Header") + <> headerWith ("foo",["unnumbered"],[]) 2 + (text "Sub-Header") =?> + "\n\ + \ Header\n\ + \ \n\ + \ Sub-Header\n\ + \ \n\ + \" + , "containing image" =: + header 1 (image "imgs/foo.jpg" "" (text "Alt text")) =?> + "\n\ + \ <inline-graphic mimetype=\"image\" mime-subtype=\"jpeg\" xlink:href=\"imgs/foo.jpg\" />\n\ + \" + ] + + , testGroup "ids" + [ "non-ASCII in header ID" =: + headerWith ("smørbrød",[],[]) 1 (text "smørbrød") =?> + T.unlines [ "" + , " smørbrød" + , "" + ] + + , "disallowed symbol in header id" =: + headerWith ("i/o",[],[]) 1 (text "I/O") =?> + T.unlines [ "" + , " I/O" + , "" + ] + + , "disallowed symbols in internal link target" =: + link "#foo:bar" "" "baz" =?> + "

baz

" - , test jatsArticleAuthoring "footnote in articleauthoring tag set" - ("test" <> note (para "footnote") =?> - unlines [ "

test" - , "

footnote

" - , "

" - ]) - ] - , "bullet list" =: bulletList [ plain $ text "first" - , plain $ text "second" - , plain $ text "third" - ] - =?> "\n\ - \ \n\ - \

first

\n\ - \
\n\ - \ \n\ - \

second

\n\ - \
\n\ - \ \n\ - \

third

\n\ - \
\n\ - \
" - , testGroup "definition lists" - [ "with internal link" =: definitionList [(link "#go" "" (str "testing"), - [plain (text "hi there")])] =?> - "\n\ - \ \n\ - \ testing\n\ - \ \n\ - \

hi there

\n\ - \
\n\ - \
\n\ - \
" - ] - , testGroup "math" - [ "escape |" =: para (math "\\sigma|_{\\{x\\}}") =?> - "

\n\ - \\n\ - \σ|{x}

" - ] - , testGroup "headers" - [ "unnumbered header" =: - headerWith ("foo",["unnumbered"],[]) 1 - (text "Header 1" <> note (plain $ text "note")) =?> - "\n\ - \ Header 1<xref ref-type=\"fn\" rid=\"fn1\">1</xref>\n\ - \" - , "unnumbered sub header" =: - headerWith ("foo",["unnumbered"],[]) 1 - (text "Header") - <> headerWith ("foo",["unnumbered"],[]) 2 - (text "Sub-Header") =?> - "\n\ - \ Header\n\ - \ \n\ - \ Sub-Header\n\ - \ \n\ - \" - , "containing image" =: - header 1 (image "imgs/foo.jpg" "" (text "Alt text")) =?> - "\n\ - \ <inline-graphic mimetype=\"image\" mime-subtype=\"jpeg\" xlink:href=\"imgs/foo.jpg\" />\n\ - \" - ] - ] + , "code id starting with a number" =: + codeWith ("7y",[],[]) "print 5" =?> + "

print 5

" + ] + ] -- cgit v1.2.3 From 2d60524de43d59ffb1763a33a15cc2ecce613ecf Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Sat, 10 Apr 2021 11:38:02 +0200 Subject: JATS writer: convert spans to elements Spans with attributes are converted to `` elements instead of being wrapped with `` and `` elements. Milestone elements are not allowed in documents using the articleauthoring tag set, so this change ensures the creation of valid documents. Closes: #7211 --- src/Text/Pandoc/Writers/JATS.hs | 13 +++++++------ test/Tests/Writers/JATS.hs | 15 +++++++++++++++ 2 files changed, 22 insertions(+), 6 deletions(-) (limited to 'test/Tests/Writers') diff --git a/src/Text/Pandoc/Writers/JATS.hs b/src/Text/Pandoc/Writers/JATS.hs index e78c6dc8f..5b3e439d4 100644 --- a/src/Text/Pandoc/Writers/JATS.hs +++ b/src/Text/Pandoc/Writers/JATS.hs @@ -444,18 +444,19 @@ inlineToJATS opts (Note contents) = do ("rid", "fn" <> tshow notenum)] $ text (show notenum) inlineToJATS opts (Cite _ lst) = - -- TODO revisit this after examining the jats.csl pipeline inlinesToJATS opts lst -inlineToJATS opts (Span ("",_,[]) ils) = inlinesToJATS opts ils inlineToJATS opts (Span (ident,_,kvs) ils) = do contents <- inlinesToJATS opts ils let attr = [("id", escapeNCName ident) | not (T.null ident)] ++ [("xml:lang",l) | ("lang",l) <- kvs] ++ [(k,v) | (k,v) <- kvs - , k `elem` ["content-type", "rationale", - "rid", "specific-use"]] - return $ selfClosingTag "milestone-start" attr <> contents <> - selfClosingTag "milestone-end" [] + , k `elem` ["alt", "content-type", "rid", "specific-use", + "vocab", "vocab-identifier", "vocab-term", + "vocab-term-identifier"]] + return $ + if null attr + then contents -- unwrap if no relevant attributes are given + else inTags False "named-content" attr contents inlineToJATS _ (Math t str) = do let addPref (Xml.Attr q v) | Xml.qName q == "xmlns" = Xml.Attr q{ Xml.qName = "xmlns:mml" } v diff --git a/test/Tests/Writers/JATS.hs b/test/Tests/Writers/JATS.hs index 23c1686dc..e90438176 100644 --- a/test/Tests/Writers/JATS.hs +++ b/test/Tests/Writers/JATS.hs @@ -142,4 +142,19 @@ tests = codeWith ("7y",[],[]) "print 5" =?> "

print 5

" ] + + , testGroup "spans" + [ "unwrapped if no attributes given" =: + spanWith nullAttr "text in span" =?> + "

text in span

" + + , "converted to named-content element" =: + spanWith ("a", ["ignored"], [("alt", "aa")]) "text" =?> + "

text

" + + , "unwrapped if named-content element would have no attributes" =: + spanWith ("", ["ignored"], [("hidden", "true")]) "text in span" =?> + "

text in span

" + + ] ] -- cgit v1.2.3 From 85f379e474be72ae0f7a53ebc5efe2ad4f8165b4 Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Wed, 28 Apr 2021 12:46:52 +0200 Subject: JATS writer: use either styled-content or named-content for spans. If the element has a content-type attribute, or at least one class, then that value is used as `content-type` and the span is put inside a `` element. Otherwise a `` element is used instead. Closes: #7211 --- src/Text/Pandoc/Writers/JATS.hs | 36 ++++++++++++++++++++++++++---------- test/Tests/Writers/JATS.hs | 14 +++++++++----- 2 files changed, 35 insertions(+), 15 deletions(-) (limited to 'test/Tests/Writers') diff --git a/src/Text/Pandoc/Writers/JATS.hs b/src/Text/Pandoc/Writers/JATS.hs index 0bcfa0df4..9db8723d1 100644 --- a/src/Text/Pandoc/Writers/JATS.hs +++ b/src/Text/Pandoc/Writers/JATS.hs @@ -21,12 +21,13 @@ module Text.Pandoc.Writers.JATS , writeJatsPublishing , writeJatsArticleAuthoring ) where +import Control.Applicative ((<|>)) import Control.Monad.Reader import Control.Monad.State import Data.Generics (everywhere, mkT) import Data.List (partition) import qualified Data.Map as M -import Data.Maybe (fromMaybe) +import Data.Maybe (fromMaybe, listToMaybe) import Data.Time (toGregorian, Day, parseTimeM, defaultTimeLocale, formatTime) import qualified Data.Text as T import Data.Text (Text) @@ -449,18 +450,33 @@ inlineToJATS opts (Note contents) = do $ text (show notenum) inlineToJATS opts (Cite _ lst) = inlinesToJATS opts lst -inlineToJATS opts (Span (ident,_,kvs) ils) = do +inlineToJATS opts (Span (ident,classes,kvs) ils) = do contents <- inlinesToJATS opts ils - let attr = [("id", escapeNCName ident) | not (T.null ident)] ++ - [("xml:lang",l) | ("lang",l) <- kvs] ++ - [(k,v) | (k,v) <- kvs - , k `elem` ["alt", "content-type", "rid", "specific-use", - "vocab", "vocab-identifier", "vocab-term", - "vocab-term-identifier"]] + let commonAttr = [("id", escapeNCName ident) | not (T.null ident)] ++ + [("xml:lang",l) | ("lang",l) <- kvs] ++ + [(k,v) | (k,v) <- kvs, k `elem` ["alt", "specific-use"]] + -- A named-content element is a good fit for spans, but requires a + -- content-type attribute to be present. We use either the explicit + -- attribute or the first class as content type. If neither is + -- available, then we fall back to using a @styled-content@ element. + let (tag, specificAttr) = + case lookup "content-type" kvs <|> listToMaybe classes of + Just ct -> ( "named-content" + , ("content-type", ct) : + [(k, v) | (k, v) <- kvs + , k `elem` ["rid", "vocab", "vocab-identifier", + "vocab-term", "vocab-term-identifier"]]) + -- Fall back to styled-content + Nothing -> ("styled-content" + , [(k, v) | (k,v) <- kvs + , k `elem` ["style", "style-type", "style-detail", + "toggle"]]) + let attr = commonAttr ++ specificAttr + -- unwrap if wrapping element would have no attributes return $ if null attr - then contents -- unwrap if no relevant attributes are given - else inTags False "named-content" attr contents + then contents + else inTags False tag attr contents inlineToJATS _ (Math t str) = do let addPref (Xml.Attr q v) | Xml.qName q == "xmlns" = Xml.Attr q{ Xml.qName = "xmlns:mml" } v diff --git a/test/Tests/Writers/JATS.hs b/test/Tests/Writers/JATS.hs index e90438176..5b96ed2ed 100644 --- a/test/Tests/Writers/JATS.hs +++ b/test/Tests/Writers/JATS.hs @@ -148,13 +148,17 @@ tests = spanWith nullAttr "text in span" =?> "

text in span

" - , "converted to named-content element" =: - spanWith ("a", ["ignored"], [("alt", "aa")]) "text" =?> - "

text

" + , "converted to named-content element if class given" =: + spanWith ("a", ["genus-species"], [("alt", "aa")]) "C. elegans" =?> + ("

" + <> "C. elegans

") - , "unwrapped if named-content element would have no attributes" =: - spanWith ("", ["ignored"], [("hidden", "true")]) "text in span" =?> + , "unwrapped if styled-content element would have no attributes" =: + spanWith ("", [], [("hidden", "true")]) "text in span" =?> "

text in span

" + , "use content-type attribute if present" =: + spanWith ("", [], [("content-type", "species")]) "E. coli" =?> + "

E. coli

" ] ] -- cgit v1.2.3 From d3ca48656f036888fcf60acb7a0364a20f3554bf Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Mon, 17 May 2021 13:10:45 +0200 Subject: ConTeXt writer tests: keep code lines below 80 chars. --- test/Tests/Writers/ConTeXt.hs | 232 ++++++++++++++++++++++-------------------- 1 file changed, 119 insertions(+), 113 deletions(-) (limited to 'test/Tests/Writers') diff --git a/test/Tests/Writers/ConTeXt.hs b/test/Tests/Writers/ConTeXt.hs index 5c1c98d4e..a4aa69dcd 100644 --- a/test/Tests/Writers/ConTeXt.hs +++ b/test/Tests/Writers/ConTeXt.hs @@ -39,116 +39,122 @@ infix 4 =: (=:) = test context tests :: [TestTree] -tests = [ testGroup "inline code" - [ "with '}'" =: code "}" =?> "\\mono{\\}}" - , "without '}'" =: code "]" =?> "\\type{]}" - , testProperty "code property" $ \s -> null s || '\n' `elem` s || - if '{' `elem` s || '}' `elem` s - then context' (code $ pack s) == "\\mono{" ++ - context' (str $ pack s) ++ "}" - else context' (code $ pack s) == "\\type{" ++ s ++ "}" - ] - , testGroup "headers" - [ "level 1" =: - headerWith ("my-header",[],[]) 1 "My header" =?> "\\section[title={My header},reference={my-header}]" - , test contextDiv "section-divs" $ - ( headerWith ("header1", [], []) 1 (text "Header1") - <> headerWith ("header2", [], []) 2 (text "Header2") - <> headerWith ("header3", [], []) 3 (text "Header3") - <> headerWith ("header4", [], []) 4 (text "Header4") - <> headerWith ("header5", [], []) 5 (text "Header5") - <> headerWith ("header6", [], []) 6 (text "Header6")) - =?> - unlines [ "\\startsection[title={Header1},reference={header1}]\n" - , "\\startsubsection[title={Header2},reference={header2}]\n" - , "\\startsubsubsection[title={Header3},reference={header3}]\n" - , "\\startsubsubsubsection[title={Header4},reference={header4}]\n" - , "\\startsubsubsubsubsection[title={Header5},reference={header5}]\n" - , "\\startsubsubsubsubsubsection[title={Header6},reference={header6}]\n" - , "\\stopsubsubsubsubsubsection\n" - , "\\stopsubsubsubsubsection\n" - , "\\stopsubsubsubsection\n" - , "\\stopsubsubsection\n" - , "\\stopsubsection\n" - , "\\stopsection" ] - ] - , testGroup "bullet lists" - [ "nested" =: - bulletList [ - plain (text "top") - <> bulletList [ - plain (text "next") - <> bulletList [plain (text "bot")] - ] - ] =?> unlines - [ "\\startitemize[packed]" - , "\\item" - , " top" - , " \\startitemize[packed]" - , " \\item" - , " next" - , " \\startitemize[packed]" - , " \\item" - , " bot" - , " \\stopitemize" - , " \\stopitemize" - , "\\stopitemize" ] - ] - , testGroup "natural tables" - [ test contextNtb "table with header and caption" $ - let capt = text "Table 1" - aligns = [(AlignRight, ColWidthDefault), (AlignLeft, ColWidthDefault), (AlignCenter, ColWidthDefault), (AlignDefault, ColWidthDefault)] - headers = [plain $ text "Right", - plain $ text "Left", - plain $ text "Center", - plain $ text "Default"] - rows = [[plain $ text "1.1", - plain $ text "1.2", - plain $ text "1.3", - plain $ text "1.4"] - ,[plain $ text "2.1", - plain $ text "2.2", - plain $ text "2.3", - plain $ text "2.4"] - ,[plain $ text "3.1", - plain $ text "3.2", - plain $ text "3.3", - plain $ text "3.4"]] - toRow = Row nullAttr . map simpleCell - in table (simpleCaption $ plain capt) - aligns - (TableHead nullAttr [toRow headers]) - [TableBody nullAttr 0 [] $ map toRow rows] - (TableFoot nullAttr []) - =?> unlines [ "\\startplacetable[title={Table 1}]" - , "\\startTABLE" - , "\\startTABLEhead" - , "\\NC[align=left] Right" - , "\\NC[align=right] Left" - , "\\NC[align=middle] Center" - , "\\NC Default" - , "\\NC\\NR" - , "\\stopTABLEhead" - , "\\startTABLEbody" - , "\\NC[align=left] 1.1" - , "\\NC[align=right] 1.2" - , "\\NC[align=middle] 1.3" - , "\\NC 1.4" - , "\\NC\\NR" - , "\\NC[align=left] 2.1" - , "\\NC[align=right] 2.2" - , "\\NC[align=middle] 2.3" - , "\\NC 2.4" - , "\\NC\\NR" - , "\\stopTABLEbody" - , "\\startTABLEfoot" - , "\\NC[align=left] 3.1" - , "\\NC[align=right] 3.2" - , "\\NC[align=middle] 3.3" - , "\\NC 3.4" - , "\\NC\\NR" - , "\\stopTABLEfoot" - , "\\stopTABLE" - , "\\stopplacetable" ] - ] - ] +tests = + [ testGroup "inline code" + [ "with '}'" =: code "}" =?> "\\mono{\\}}" + , "without '}'" =: code "]" =?> "\\type{]}" + , testProperty "code property" $ \s -> null s || '\n' `elem` s || + if '{' `elem` s || '}' `elem` s + then context' (code $ pack s) == "\\mono{" ++ + context' (str $ pack s) ++ "}" + else context' (code $ pack s) == "\\type{" ++ s ++ "}" + ] + , testGroup "headers" + [ "level 1" =: + headerWith ("my-header",[],[]) 1 "My header" =?> + "\\section[title={My header},reference={my-header}]" + , test contextDiv "section-divs" $ + ( headerWith ("header1", [], []) 1 (text "Header1") + <> headerWith ("header2", [], []) 2 (text "Header2") + <> headerWith ("header3", [], []) 3 (text "Header3") + <> headerWith ("header4", [], []) 4 (text "Header4") + <> headerWith ("header5", [], []) 5 (text "Header5") + <> headerWith ("header6", [], []) 6 (text "Header6")) + =?> + unlines + [ "\\startsection[title={Header1},reference={header1}]\n" + , "\\startsubsection[title={Header2},reference={header2}]\n" + , "\\startsubsubsection[title={Header3},reference={header3}]\n" + , "\\startsubsubsubsection[title={Header4},reference={header4}]\n" + , "\\startsubsubsubsubsection[title={Header5},reference={header5}]\n" + , "\\startsubsubsubsubsubsection[title={Header6},reference={header6}]\n" + , "\\stopsubsubsubsubsubsection\n" + , "\\stopsubsubsubsubsection\n" + , "\\stopsubsubsubsection\n" + , "\\stopsubsubsection\n" + , "\\stopsubsection\n" + , "\\stopsection" ] + ] + , testGroup "bullet lists" + [ "nested" =: + bulletList [ + plain (text "top") + <> bulletList [ + plain (text "next") + <> bulletList [plain (text "bot")] + ] + ] =?> unlines + [ "\\startitemize[packed]" + , "\\item" + , " top" + , " \\startitemize[packed]" + , " \\item" + , " next" + , " \\startitemize[packed]" + , " \\item" + , " bot" + , " \\stopitemize" + , " \\stopitemize" + , "\\stopitemize" ] + ] + , testGroup "natural tables" + [ test contextNtb "table with header and caption" $ + let capt = text "Table 1" + aligns = [ (AlignRight, ColWidthDefault) + , (AlignLeft, ColWidthDefault) + , (AlignCenter, ColWidthDefault) + , (AlignDefault, ColWidthDefault) ] + headers = [plain $ text "Right", + plain $ text "Left", + plain $ text "Center", + plain $ text "Default"] + rows = [[plain $ text "1.1", + plain $ text "1.2", + plain $ text "1.3", + plain $ text "1.4"] + ,[plain $ text "2.1", + plain $ text "2.2", + plain $ text "2.3", + plain $ text "2.4"] + ,[plain $ text "3.1", + plain $ text "3.2", + plain $ text "3.3", + plain $ text "3.4"]] + toRow = Row nullAttr . map simpleCell + in table (simpleCaption $ plain capt) + aligns + (TableHead nullAttr [toRow headers]) + [TableBody nullAttr 0 [] $ map toRow rows] + (TableFoot nullAttr []) + =?> unlines [ "\\startplacetable[title={Table 1}]" + , "\\startTABLE" + , "\\startTABLEhead" + , "\\NC[align=left] Right" + , "\\NC[align=right] Left" + , "\\NC[align=middle] Center" + , "\\NC Default" + , "\\NC\\NR" + , "\\stopTABLEhead" + , "\\startTABLEbody" + , "\\NC[align=left] 1.1" + , "\\NC[align=right] 1.2" + , "\\NC[align=middle] 1.3" + , "\\NC 1.4" + , "\\NC\\NR" + , "\\NC[align=left] 2.1" + , "\\NC[align=right] 2.2" + , "\\NC[align=middle] 2.3" + , "\\NC 2.4" + , "\\NC\\NR" + , "\\stopTABLEbody" + , "\\startTABLEfoot" + , "\\NC[align=left] 3.1" + , "\\NC[align=right] 3.2" + , "\\NC[align=middle] 3.3" + , "\\NC 3.4" + , "\\NC\\NR" + , "\\stopTABLEfoot" + , "\\stopTABLE" + , "\\stopplacetable" ] + ] + ] -- cgit v1.2.3 From 4417dacc440e269c5a4db1d67b52da9e0fe05561 Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Mon, 17 May 2021 13:00:38 +0200 Subject: ConTeXt writer: use span identifiers as reference anchors. Closes: #7246 --- src/Text/Pandoc/Writers/ConTeXt.hs | 8 ++++++-- test/Tests/Writers/ConTeXt.hs | 3 +++ 2 files changed, 9 insertions(+), 2 deletions(-) (limited to 'test/Tests/Writers') diff --git a/src/Text/Pandoc/Writers/ConTeXt.hs b/src/Text/Pandoc/Writers/ConTeXt.hs index b694437d8..57d752a67 100644 --- a/src/Text/Pandoc/Writers/ConTeXt.hs +++ b/src/Text/Pandoc/Writers/ConTeXt.hs @@ -487,7 +487,7 @@ inlineToConTeXt (Note contents) = do then literal "\\footnote{" <> nest 2 (chomp contents') <> char '}' else literal "\\startbuffer " <> nest 2 (chomp contents') <> literal "\\stopbuffer\\footnote{\\getbuffer}" -inlineToConTeXt (Span (_,_,kvs) ils) = do +inlineToConTeXt (Span (ident,_,kvs) ils) = do mblang <- fromBCP47 (lookup "lang" kvs) let wrapDir txt = case lookup "dir" kvs of Just "rtl" -> braces $ "\\righttoleft " <> txt @@ -497,7 +497,11 @@ inlineToConTeXt (Span (_,_,kvs) ils) = do Just lng -> braces ("\\language" <> brackets (literal lng) <> txt) Nothing -> txt - wrapLang . wrapDir <$> inlineListToConTeXt ils + addReference = + if T.null ident + then id + else (("\\reference" <> brackets (literal ident) <> "{}") <>) + addReference . wrapLang . wrapDir <$> inlineListToConTeXt ils -- | Craft the section header, inserting the section reference, if supplied. sectionHeader :: PandocMonad m diff --git a/test/Tests/Writers/ConTeXt.hs b/test/Tests/Writers/ConTeXt.hs index a4aa69dcd..fbbf9b948 100644 --- a/test/Tests/Writers/ConTeXt.hs +++ b/test/Tests/Writers/ConTeXt.hs @@ -43,6 +43,9 @@ tests = [ testGroup "inline code" [ "with '}'" =: code "}" =?> "\\mono{\\}}" , "without '}'" =: code "]" =?> "\\type{]}" + , "span with ID" =: + spanWith ("city", [], []) "Berlin" =?> + "\\reference[city]{}Berlin" , testProperty "code property" $ \s -> null s || '\n' `elem` s || if '{' `elem` s || '}' `elem` s then context' (code $ pack s) == "\\mono{" ++ -- cgit v1.2.3 From 25f5b927773eb730c2d5ef834bd61e1d2d5f09df Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Mon, 17 May 2021 15:37:25 +0200 Subject: HTML writer: ensure headings only have valid attribs in HTML4 Fixes: #5944 --- src/Text/Pandoc/Writers/HTML.hs | 16 +++++- test/Tests/Writers/HTML.hs | 109 +++++++++++++++++++++------------------- 2 files changed, 71 insertions(+), 54 deletions(-) (limited to 'test/Tests/Writers') diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs index 332de1545..f7a387927 100644 --- a/src/Text/Pandoc/Writers/HTML.hs +++ b/src/Text/Pandoc/Writers/HTML.hs @@ -882,7 +882,7 @@ blockToHtml opts (BlockQuote blocks) = do else do contents <- blockListToHtml opts blocks return $ H.blockquote $ nl opts >> contents >> nl opts -blockToHtml opts (Header level attr@(_,classes,kvs) lst) = do +blockToHtml opts (Header level (ident,classes,kvs) lst) = do contents <- inlineListToHtml opts lst let secnum = fromMaybe mempty $ lookup "number" kvs let contents' = if writerNumberSections opts && not (T.null secnum) @@ -890,7 +890,13 @@ blockToHtml opts (Header level attr@(_,classes,kvs) lst) = do then (H.span ! A.class_ "header-section-number" $ toHtml secnum) >> strToHtml " " >> contents else contents - addAttrs opts attr + html5 <- gets stHtml5 + let kvs' = if html5 + then kvs + else [ (k, v) | (k, v) <- kvs + , k `elem` (["lang", "dir", "title", "style" + , "align"] ++ intrinsicEventsHTML4)] + addAttrs opts (ident,classes,kvs') $ case level of 1 -> H.h1 contents' 2 -> H.h2 contents' @@ -1526,6 +1532,12 @@ allowsMathEnvironments MathML = True allowsMathEnvironments (WebTeX _) = True allowsMathEnvironments _ = False +-- | List of intrinsic event attributes allowed on all elements in HTML4. +intrinsicEventsHTML4 :: [Text] +intrinsicEventsHTML4 = + [ "onclick", "ondblclick", "onmousedown", "onmouseup", "onmouseover" + , "onmouseout", "onmouseout", "onkeypress", "onkeydown", "onkeyup"] + isRawHtml :: PandocMonad m => Format -> StateT WriterState m Bool isRawHtml f = do html5 <- gets stHtml5 diff --git a/test/Tests/Writers/HTML.hs b/test/Tests/Writers/HTML.hs index 328801e31..404f6da98 100644 --- a/test/Tests/Writers/HTML.hs +++ b/test/Tests/Writers/HTML.hs @@ -34,55 +34,60 @@ infix 4 =: (=:) = test html tests :: [TestTree] -tests = [ testGroup "inline code" - [ "basic" =: code "@&" =?> "@&" - , "haskell" =: codeWith ("",["haskell"],[]) ">>=" - =?> ">>=" - , "nolanguage" =: codeWith ("",["nolanguage"],[]) ">>=" - =?> ">>=" - ] - , testGroup "images" - [ "alt with formatting" =: - image "/url" "title" ("my " <> emph "image") - =?> "\"my" - ] - , testGroup "blocks" - [ "definition list with empty
" =: - definitionList [(mempty, [para $ text "foo bar"])] - =?> "

foo bar

" - ] - , testGroup "quotes" - [ "quote with cite attribute (without q-tags)" =: - doubleQuoted (spanWith ("", [], [("cite", "http://example.org")]) (str "examples")) - =?> "“examples”" - , tQ "quote with cite attribute (with q-tags)" $ - doubleQuoted (spanWith ("", [], [("cite", "http://example.org")]) (str "examples")) - =?> "examples" - ] - , testGroup "sample" - [ "sample should be rendered correctly" =: - plain (codeWith ("",["sample"],[]) "Answer is 42") =?> - "Answer is 42" - ] - , testGroup "variable" - [ "variable should be rendered correctly" =: - plain (codeWith ("",["variable"],[]) "result") =?> - "result" - ] - , testGroup "sample with style" - [ "samp should wrap highlighted code" =: - codeWith ("",["sample","haskell"],[]) ">>=" - =?> ("" ++ - ">>=") - ] - , testGroup "variable with style" - [ "var should wrap highlighted code" =: - codeWith ("",["haskell","variable"],[]) ">>=" - =?> ("" ++ - ">>=") - ] - ] - where - tQ :: (ToString a, ToPandoc a) - => String -> (a, String) -> TestTree - tQ = test htmlQTags +tests = + [ testGroup "inline code" + [ "basic" =: code "@&" =?> "@&" + , "haskell" =: codeWith ("",["haskell"],[]) ">>=" + =?> ">>=" + , "nolanguage" =: codeWith ("",["nolanguage"],[]) ">>=" + =?> ">>=" + ] + , testGroup "images" + [ "alt with formatting" =: + image "/url" "title" ("my " <> emph "image") + =?> "\"my" + ] + , testGroup "blocks" + [ "definition list with empty
" =: + definitionList [(mempty, [para $ text "foo bar"])] + =?> "

foo bar

" + , "heading with disallowed attributes" =: + headerWith ("", [], [("invalid","1"), ("lang", "en")]) 1 "test" + =?> + "

test

" + ] + , testGroup "quotes" + [ "quote with cite attribute (without q-tags)" =: + doubleQuoted (spanWith ("", [], [("cite", "http://example.org")]) (str "examples")) + =?> "“examples”" + , tQ "quote with cite attribute (with q-tags)" $ + doubleQuoted (spanWith ("", [], [("cite", "http://example.org")]) (str "examples")) + =?> "examples" + ] + , testGroup "sample" + [ "sample should be rendered correctly" =: + plain (codeWith ("",["sample"],[]) "Answer is 42") =?> + "Answer is 42" + ] + , testGroup "variable" + [ "variable should be rendered correctly" =: + plain (codeWith ("",["variable"],[]) "result") =?> + "result" + ] + , testGroup "sample with style" + [ "samp should wrap highlighted code" =: + codeWith ("",["sample","haskell"],[]) ">>=" + =?> ("" ++ + ">>=") + ] + , testGroup "variable with style" + [ "var should wrap highlighted code" =: + codeWith ("",["haskell","variable"],[]) ">>=" + =?> ("" ++ + ">>=") + ] + ] + where + tQ :: (ToString a, ToPandoc a) + => String -> (a, String) -> TestTree + tQ = test htmlQTags -- cgit v1.2.3 From 58fbf56548bf985b40e4338befaf5b11a0665cbe Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Mon, 24 May 2021 09:56:02 +0200 Subject: Jira writer: use `{color}` when span has a color attribute Closes: tarleb/jira-wiki-markup#10 --- src/Text/Pandoc/Writers/Jira.hs | 10 +++++++--- test/Tests/Writers/Jira.hs | 4 ++++ 2 files changed, 11 insertions(+), 3 deletions(-) (limited to 'test/Tests/Writers') diff --git a/src/Text/Pandoc/Writers/Jira.hs b/src/Text/Pandoc/Writers/Jira.hs index aa78d9419..cf4dadebc 100644 --- a/src/Text/Pandoc/Writers/Jira.hs +++ b/src/Text/Pandoc/Writers/Jira.hs @@ -309,9 +309,13 @@ quotedToJira qtype xs = do spanToJira :: PandocMonad m => Attr -> [Inline] -> JiraConverter m [Jira.Inline] -spanToJira (ident, _classes, _attribs) inls = case ident of - "" -> toJiraInlines inls - _ -> (Jira.Anchor ident :) <$> toJiraInlines inls +spanToJira (ident, _classes, attribs) inls = + let wrap = case lookup "color" attribs of + Nothing -> id + Just color -> singleton . Jira.ColorInline (Jira.ColorName color) + in wrap <$> case ident of + "" -> toJiraInlines inls + _ -> (Jira.Anchor ident :) <$> toJiraInlines inls registerNotes :: PandocMonad m => [Block] -> JiraConverter m [Jira.Inline] registerNotes contents = do diff --git a/test/Tests/Writers/Jira.hs b/test/Tests/Writers/Jira.hs index 0c6f48853..d8e856e34 100644 --- a/test/Tests/Writers/Jira.hs +++ b/test/Tests/Writers/Jira.hs @@ -67,6 +67,10 @@ tests = [ "id is used as anchor" =: spanWith ("unicorn", [], []) (str "Unicorn") =?> "{anchor:unicorn}Unicorn" + + , "use `color` attribute" =: + spanWith ("",[],[("color","red")]) "ruby" =?> + "{color:red}ruby{color}" ] , testGroup "code" -- cgit v1.2.3 From d46ea7d7da3f842d265f09730c90cfc3691576ef Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Tue, 25 May 2021 16:54:42 +0200 Subject: Jira: add support for "smart" links Support has been added for the new `[alias|https://example.com|smart-card]` syntax. --- pandoc.cabal | 2 +- src/Text/Pandoc/Readers/Jira.hs | 2 ++ src/Text/Pandoc/Writers/Jira.hs | 2 ++ stack.yaml | 2 +- test/Tests/Readers/Jira.hs | 8 ++++++++ test/Tests/Writers/Jira.hs | 8 ++++++++ 6 files changed, 22 insertions(+), 2 deletions(-) (limited to 'test/Tests/Writers') diff --git a/pandoc.cabal b/pandoc.cabal index 4e10e01f3..241196d7c 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -470,7 +470,7 @@ library http-client-tls >= 0.2.4 && < 0.4, http-types >= 0.8 && < 0.13, ipynb >= 0.1 && < 0.2, - jira-wiki-markup >= 1.3.5 && < 1.4, + jira-wiki-markup >= 1.4 && < 1.5, mtl >= 2.2 && < 2.3, network >= 2.6, network-uri >= 2.6 && < 2.8, diff --git a/src/Text/Pandoc/Readers/Jira.hs b/src/Text/Pandoc/Readers/Jira.hs index a3b415f09..cf111f173 100644 --- a/src/Text/Pandoc/Readers/Jira.hs +++ b/src/Text/Pandoc/Readers/Jira.hs @@ -172,6 +172,8 @@ jiraLinkToPandoc linkType alias url = Jira.Email -> link ("mailto:" <> url') "" alias' Jira.Attachment -> linkWith ("", ["attachment"], []) url' "" alias' Jira.User -> linkWith ("", ["user-account"], []) url' "" alias' + Jira.SmartCard -> linkWith ("", ["smart-card"], []) url' "" alias' + Jira.SmartLink -> linkWith ("", ["smart-link"], []) url' "" alias' -- | Get unicode representation of a Jira icon. iconUnicode :: Jira.Icon -> Text diff --git a/src/Text/Pandoc/Writers/Jira.hs b/src/Text/Pandoc/Writers/Jira.hs index cf4dadebc..1351814e9 100644 --- a/src/Text/Pandoc/Writers/Jira.hs +++ b/src/Text/Pandoc/Writers/Jira.hs @@ -280,6 +280,8 @@ toJiraLink (_, classes, _) (url, _) alias = do | Just email <- T.stripPrefix "mailto:" url' = (Jira.Email, email) | "user-account" `elem` classes = (Jira.User, dropTilde url) | "attachment" `elem` classes = (Jira.Attachment, url) + | "smart-card" `elem` classes = (Jira.SmartCard, url) + | "smart-link" `elem` classes = (Jira.SmartLink, url) | otherwise = (Jira.External, url) dropTilde txt = case T.uncons txt of Just ('~', username) -> username diff --git a/stack.yaml b/stack.yaml index 809157425..16e5ad2cf 100644 --- a/stack.yaml +++ b/stack.yaml @@ -9,7 +9,7 @@ packages: extra-deps: - hslua-1.3.0 - hslua-module-path-0.1.0 -- jira-wiki-markup-1.3.5 +- jira-wiki-markup-1.4.0 - random-1.2.0 - unicode-collation-0.1.3 - citeproc-0.4 diff --git a/test/Tests/Readers/Jira.hs b/test/Tests/Readers/Jira.hs index cb7dde4ea..b7194a3b9 100644 --- a/test/Tests/Readers/Jira.hs +++ b/test/Tests/Readers/Jira.hs @@ -167,6 +167,14 @@ tests = , "user with description" =: "[John Doe|~johndoe]" =?> para (linkWith ("", ["user-account"], []) "~johndoe" "" "John Doe") + + , "'smart' link" =: + "[x|http://example.com|smart-link]" =?> + para (linkWith ("", ["smart-link"], []) "http://example.com" "" "x") + + , "'smart' card" =: + "[x|http://example.com|smart-card]" =?> + para (linkWith ("", ["smart-card"], []) "http://example.com" "" "x") ] , "image" =: diff --git a/test/Tests/Writers/Jira.hs b/test/Tests/Writers/Jira.hs index d8e856e34..00a7ae931 100644 --- a/test/Tests/Writers/Jira.hs +++ b/test/Tests/Writers/Jira.hs @@ -61,6 +61,14 @@ tests = , "user link with user as description" =: linkWith ("", ["user-account"], []) "~johndoe" "" "~johndoe" =?> "[~johndoe]" + + , "'smart' link" =: + para (linkWith ("", ["smart-link"], []) "http://example.com" "" "x") =?> + "[x|http://example.com|smart-link]" + + , "'smart' card" =: + para (linkWith ("", ["smart-card"], []) "http://example.org" "" "x") =?> + "[x|http://example.org|smart-card]" ] , testGroup "spans" -- cgit v1.2.3 From 44484d0dee1bd095240b9faf26f8d1dad8e560ea Mon Sep 17 00:00:00 2001 From: Emily Bourke Date: Sun, 11 Apr 2021 21:42:53 +0100 Subject: Docx reader: Read table column widths. --- src/Text/Pandoc/Readers/Docx.hs | 5 +- src/Text/Pandoc/Readers/Docx/Parse.hs | 2 +- test/Tests/Writers/Docx.hs | 5 ++ test/docx/0_level_headers.native | 4 +- test/docx/golden/table_one_row.docx | Bin 9840 -> 9840 bytes test/docx/golden/table_with_list_cell.docx | Bin 10159 -> 10162 bytes test/docx/golden/tables-default-widths.docx | Bin 0 -> 10200 bytes test/docx/golden/tables.docx | Bin 10200 -> 10202 bytes test/docx/sdt_elements.native | 6 +- test/docx/table_one_row.native | 8 +-- test/docx/table_variable_width.native | 12 ++-- test/docx/table_with_list_cell.native | 6 +- test/docx/tables-default-widths.native | 92 ++++++++++++++++++++++++++++ test/docx/tables.native | 18 +++--- 14 files changed, 128 insertions(+), 30 deletions(-) create mode 100644 test/docx/golden/tables-default-widths.docx create mode 100644 test/docx/tables-default-widths.native (limited to 'test/Tests/Writers') diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs index 22dd54193..375bb7338 100644 --- a/src/Text/Pandoc/Readers/Docx.hs +++ b/src/Text/Pandoc/Readers/Docx.hs @@ -639,7 +639,7 @@ bodyPartToBlocks (ListItem pPr _ _ _ parparts) = bodyPartToBlocks $ Paragraph pPr' parparts bodyPartToBlocks (Tbl _ _ _ []) = return $ para mempty -bodyPartToBlocks (Tbl cap _ look parts@(r:rs)) = do +bodyPartToBlocks (Tbl cap grid look parts@(r:rs)) = do let cap' = simpleCaption $ plain $ text cap (hdr, rows) = case firstRowFormatting look of True | null rs -> (Nothing, [r]) @@ -669,7 +669,8 @@ bodyPartToBlocks (Tbl cap _ look parts@(r:rs)) = do -- so should be possible. Alignment might be more difficult, -- since there doesn't seem to be a column entity in docx. let alignments = replicate width AlignDefault - widths = replicate width ColWidthDefault + totalWidth = sum grid + widths = (\w -> ColWidth (fromInteger w / fromInteger totalWidth)) <$> grid return $ table cap' (zip alignments widths) diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs index 7325ff300..978d6ff3a 100644 --- a/src/Text/Pandoc/Readers/Docx/Parse.hs +++ b/src/Text/Pandoc/Readers/Docx/Parse.hs @@ -563,7 +563,7 @@ elemToTblGrid :: NameSpaces -> Element -> D TblGrid elemToTblGrid ns element | isElem ns "w" "tblGrid" element = let cols = findChildrenByName ns "w" "gridCol" element in - mapD (\e -> maybeToD (findAttrByName ns "w" "val" e >>= stringToInteger)) + mapD (\e -> maybeToD (findAttrByName ns "w" "w" e >>= stringToInteger)) cols elemToTblGrid _ _ = throwError WrongElem diff --git a/test/Tests/Writers/Docx.hs b/test/Tests/Writers/Docx.hs index 2e0f1e3fb..da25b95e0 100644 --- a/test/Tests/Writers/Docx.hs +++ b/test/Tests/Writers/Docx.hs @@ -111,6 +111,11 @@ tests = [ testGroup "inlines" def "docx/tables.native" "docx/golden/tables.docx" + , docxTest + "tables without explicit column widths" + def + "docx/tables-default-widths.native" + "docx/golden/tables-default-widths.docx" , docxTest "tables with lists in cells" def diff --git a/test/docx/0_level_headers.native b/test/docx/0_level_headers.native index 7f875891e..ed589b029 100644 --- a/test/docx/0_level_headers.native +++ b/test/docx/0_level_headers.native @@ -1,6 +1,6 @@ [Table ("",[],[]) (Caption Nothing []) - [(AlignDefault,ColWidthDefault)] + [(AlignDefault,ColWidth 1.0)] (TableHead ("",[],[]) []) [(TableBody ("",[],[]) (RowHeadColumns 0) @@ -49,4 +49,4 @@ ,Para [Strong [Str "Table",Space,Str "Page"]] ,Para [Strong [Str "No",Space,Str "table",Space,Str "of",Space,Str "figures",Space,Str "entries",Space,Str "found."]] ,Header 1 ("introduction",[],[]) [Str "Introduction"] -,Para [Str "Nothing",Space,Str "to",Space,Str "introduce,",Space,Str "yet."]] \ No newline at end of file +,Para [Str "Nothing",Space,Str "to",Space,Str "introduce,",Space,Str "yet."]] diff --git a/test/docx/golden/table_one_row.docx b/test/docx/golden/table_one_row.docx index f75e567ab..a7a8f2519 100644 Binary files a/test/docx/golden/table_one_row.docx and b/test/docx/golden/table_one_row.docx differ diff --git a/test/docx/golden/table_with_list_cell.docx b/test/docx/golden/table_with_list_cell.docx index a49f70643..1362d4609 100644 Binary files a/test/docx/golden/table_with_list_cell.docx and b/test/docx/golden/table_with_list_cell.docx differ diff --git a/test/docx/golden/tables-default-widths.docx b/test/docx/golden/tables-default-widths.docx new file mode 100644 index 000000000..f24e27516 Binary files /dev/null and b/test/docx/golden/tables-default-widths.docx differ diff --git a/test/docx/golden/tables.docx b/test/docx/golden/tables.docx index f24e27516..9dcbbc9d0 100644 Binary files a/test/docx/golden/tables.docx and b/test/docx/golden/tables.docx differ diff --git a/test/docx/sdt_elements.native b/test/docx/sdt_elements.native index dca82f0a0..a072c0d39 100644 --- a/test/docx/sdt_elements.native +++ b/test/docx/sdt_elements.native @@ -1,8 +1,8 @@ [Table ("",[],[]) (Caption Nothing []) - [(AlignDefault,ColWidthDefault) - ,(AlignDefault,ColWidthDefault) - ,(AlignDefault,ColWidthDefault)] + [(AlignDefault,ColWidth 0.22069570301081556) + ,(AlignDefault,ColWidth 0.22069570301081556) + ,(AlignDefault,ColWidth 0.5586085939783689)] (TableHead ("",[],[]) []) [(TableBody ("",[],[]) (RowHeadColumns 0) diff --git a/test/docx/table_one_row.native b/test/docx/table_one_row.native index e9188b145..88d5e3af5 100644 --- a/test/docx/table_one_row.native +++ b/test/docx/table_one_row.native @@ -1,8 +1,8 @@ [Table ("",[],[]) (Caption Nothing []) - [(AlignDefault,ColWidthDefault) - ,(AlignDefault,ColWidthDefault) - ,(AlignDefault,ColWidthDefault)] + [(AlignDefault,ColWidth 0.3333333333333333) + ,(AlignDefault,ColWidth 0.3333333333333333) + ,(AlignDefault,ColWidth 0.3333333333333333)] (TableHead ("",[],[]) []) [(TableBody ("",[],[]) (RowHeadColumns 0) @@ -15,4 +15,4 @@ ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) [Plain [Str "Table"]]]])] (TableFoot ("",[],[]) - [])] \ No newline at end of file + [])] diff --git a/test/docx/table_variable_width.native b/test/docx/table_variable_width.native index 229cb83b1..43ac40cca 100644 --- a/test/docx/table_variable_width.native +++ b/test/docx/table_variable_width.native @@ -1,10 +1,10 @@ [Table ("",[],[]) (Caption Nothing []) - [(AlignDefault,ColWidthDefault) - ,(AlignDefault,ColWidthDefault) - ,(AlignDefault,ColWidthDefault) - ,(AlignDefault,ColWidthDefault) - ,(AlignDefault,ColWidthDefault)] + [(AlignDefault,ColWidth 2.0096205237840725e-2) + ,(AlignDefault,ColWidth 1.9882415820416888e-2) + ,(AlignDefault,ColWidth 0.22202030999465527) + ,(AlignDefault,ColWidth 0.4761090326028862) + ,(AlignDefault,ColWidth 1.0689470871191876e-4)] (TableHead ("",[],[]) [Row ("",[],[]) [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) @@ -42,4 +42,4 @@ ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) []]])] (TableFoot ("",[],[]) - [])] \ No newline at end of file + [])] diff --git a/test/docx/table_with_list_cell.native b/test/docx/table_with_list_cell.native index 06d8606da..51a35184b 100644 --- a/test/docx/table_with_list_cell.native +++ b/test/docx/table_with_list_cell.native @@ -1,7 +1,7 @@ [Table ("",[],[]) (Caption Nothing []) - [(AlignDefault,ColWidthDefault) - ,(AlignDefault,ColWidthDefault)] + [(AlignDefault,ColWidth 0.5) + ,(AlignDefault,ColWidth 0.5)] (TableHead ("",[],[]) [Row ("",[],[]) [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) @@ -22,4 +22,4 @@ ,[Para [Str "A"]] ,[Para [Str "Numbered",Space,Str "list."]]]]]])] (TableFoot ("",[],[]) - [])] \ No newline at end of file + [])] diff --git a/test/docx/tables-default-widths.native b/test/docx/tables-default-widths.native new file mode 100644 index 000000000..e541e5a6e --- /dev/null +++ b/test/docx/tables-default-widths.native @@ -0,0 +1,92 @@ +[Header 2 ("a-table-with-and-without-a-header-row",[],[]) [Str "A",Space,Str "table,",Space,Str "with",Space,Str "and",Space,Str "without",Space,Str "a",Space,Str "header",Space,Str "row"] +,Table ("",[],[]) (Caption Nothing + []) + [(AlignDefault,ColWidthDefault) + ,(AlignDefault,ColWidthDefault) + ,(AlignDefault,ColWidthDefault) + ,(AlignDefault,ColWidthDefault)] + (TableHead ("",[],[]) + [Row ("",[],[]) + [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "Name"]] + ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "Game"]] + ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "Fame"]] + ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "Blame"]]]]) + [(TableBody ("",[],[]) (RowHeadColumns 0) + [] + [Row ("",[],[]) + [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "Lebron",Space,Str "James"]] + ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "Basketball"]] + ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "Very",Space,Str "High"]] + ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "Leaving",Space,Str "Cleveland"]]] + ,Row ("",[],[]) + [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "Ryan",Space,Str "Braun"]] + ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "Baseball"]] + ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "Moderate"]] + ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "Steroids"]]] + ,Row ("",[],[]) + [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "Russell",Space,Str "Wilson"]] + ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "Football"]] + ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "High"]] + ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "Tacky",Space,Str "uniform"]]]])] + (TableFoot ("",[],[]) + []) +,Table ("",[],[]) (Caption Nothing + []) + [(AlignDefault,ColWidthDefault) + ,(AlignDefault,ColWidthDefault)] + (TableHead ("",[],[]) + []) + [(TableBody ("",[],[]) (RowHeadColumns 0) + [] + [Row ("",[],[]) + [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "Sinple"]] + ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "Table"]]] + ,Row ("",[],[]) + [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "Without"]] + ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "Header"]]]])] + (TableFoot ("",[],[]) + []) +,Table ("",[],[]) (Caption Nothing + []) + [(AlignDefault,ColWidthDefault) + ,(AlignDefault,ColWidthDefault)] + (TableHead ("",[],[]) + []) + [(TableBody ("",[],[]) (RowHeadColumns 0) + [] + [Row ("",[],[]) + [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Para [Str "Simple"] + ,Para [Str "Multiparagraph"]] + ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Para [Str "Table"] + ,Para [Str "Full"]]] + ,Row ("",[],[]) + [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Para [Str "Of"] + ,Para [Str "Paragraphs"]] + ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Para [Str "In",Space,Str "each"] + ,Para [Str "Cell."]]]])] + (TableFoot ("",[],[]) + [])] \ No newline at end of file diff --git a/test/docx/tables.native b/test/docx/tables.native index e541e5a6e..5a89496be 100644 --- a/test/docx/tables.native +++ b/test/docx/tables.native @@ -1,10 +1,10 @@ [Header 2 ("a-table-with-and-without-a-header-row",[],[]) [Str "A",Space,Str "table,",Space,Str "with",Space,Str "and",Space,Str "without",Space,Str "a",Space,Str "header",Space,Str "row"] ,Table ("",[],[]) (Caption Nothing []) - [(AlignDefault,ColWidthDefault) - ,(AlignDefault,ColWidthDefault) - ,(AlignDefault,ColWidthDefault) - ,(AlignDefault,ColWidthDefault)] + [(AlignDefault,ColWidth 0.25) + ,(AlignDefault,ColWidth 0.25) + ,(AlignDefault,ColWidth 0.25) + ,(AlignDefault,ColWidth 0.25)] (TableHead ("",[],[]) [Row ("",[],[]) [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) @@ -48,8 +48,8 @@ []) ,Table ("",[],[]) (Caption Nothing []) - [(AlignDefault,ColWidthDefault) - ,(AlignDefault,ColWidthDefault)] + [(AlignDefault,ColWidth 0.5) + ,(AlignDefault,ColWidth 0.5)] (TableHead ("",[],[]) []) [(TableBody ("",[],[]) (RowHeadColumns 0) @@ -68,8 +68,8 @@ []) ,Table ("",[],[]) (Caption Nothing []) - [(AlignDefault,ColWidthDefault) - ,(AlignDefault,ColWidthDefault)] + [(AlignDefault,ColWidth 0.5) + ,(AlignDefault,ColWidth 0.5)] (TableHead ("",[],[]) []) [(TableBody ("",[],[]) (RowHeadColumns 0) @@ -89,4 +89,4 @@ [Para [Str "In",Space,Str "each"] ,Para [Str "Cell."]]]])] (TableFoot ("",[],[]) - [])] \ No newline at end of file + [])] -- cgit v1.2.3 From af9de925de4a441f4c45a977363f9a56589f57bc Mon Sep 17 00:00:00 2001 From: Jan Tojnar Date: Sat, 5 Jun 2021 14:16:44 +0200 Subject: DocBook writer: Remove non-existent admonitions attention, error and hint are actually just reStructuredText specific. danger was too until introduced in DocBook 5.2: https://github.com/docbook/docbook/issues/55 --- src/Text/Pandoc/Writers/Docbook.hs | 3 +-- test/Tests/Writers/Docbook.hs | 12 ++++++------ 2 files changed, 7 insertions(+), 8 deletions(-) (limited to 'test/Tests/Writers') diff --git a/src/Text/Pandoc/Writers/Docbook.hs b/src/Text/Pandoc/Writers/Docbook.hs index 25bd308bf..33a6f5f0c 100644 --- a/src/Text/Pandoc/Writers/Docbook.hs +++ b/src/Text/Pandoc/Writers/Docbook.hs @@ -198,8 +198,7 @@ blockToDocbook opts (Div (id',"section":_,_) (Header lvl (_,_,attrs) ils : xs)) blockToDocbook opts (Div (ident,classes,_) bs) = do version <- ask let identAttribs = [(idName version, ident) | not (T.null ident)] - admonitions = ["attention","caution","danger","error","hint", - "important","note","tip","warning"] + admonitions = ["caution","danger","important","note","tip","warning"] case classes of (l:_) | l `elem` admonitions -> do let (mTitleBs, bodyBs) = diff --git a/test/Tests/Writers/Docbook.hs b/test/Tests/Writers/Docbook.hs index 46203eeae..f517f803a 100644 --- a/test/Tests/Writers/Docbook.hs +++ b/test/Tests/Writers/Docbook.hs @@ -83,32 +83,32 @@ tests = [ testGroup "line blocks" , "" ] , "admonition-with-title" =: - divWith ("foo", ["attention"], []) ( + divWith ("foo", ["note"], []) ( divWith ("foo", ["title"], []) (plain (text "This is title")) <> para "This is a test" ) =?> unlines - [ "" + [ "" , " This is title" , " " , " This is a test" , " " - , "" + , "" ] , "admonition-with-title-in-para" =: - divWith ("foo", ["attention"], []) ( + divWith ("foo", ["note"], []) ( divWith ("foo", ["title"], []) (para "This is title") <> para "This is a test" ) =?> unlines - [ "" + [ "" , " This is title" , " " , " This is a test" , " " - , "" + , "" ] , "single-child" =: divWith ("foo", [], []) (para "This is a test") -- cgit v1.2.3 From 0948af9cc549f0ea3b85fa760aa521b8deaad2c0 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 29 Jun 2021 11:15:40 -0700 Subject: Docx writer: Add table numbering for captioned tables. The numbers are added using fields, so that Word can create a list of tables that will update automatically. --- src/Text/Pandoc/Writers/Docx/Table.hs | 31 ++++++++++++++++++++++++++++--- src/Text/Pandoc/Writers/Docx/Types.hs | 2 ++ test/Tests/Writers/OOXML.hs | 4 +++- test/docx/golden/image.docx | Bin 26774 -> 26776 bytes 4 files changed, 33 insertions(+), 4 deletions(-) (limited to 'test/Tests/Writers') diff --git a/src/Text/Pandoc/Writers/Docx/Table.hs b/src/Text/Pandoc/Writers/Docx/Table.hs index 49917e315..7a84c5278 100644 --- a/src/Text/Pandoc/Writers/Docx/Table.hs +++ b/src/Text/Pandoc/Writers/Docx/Table.hs @@ -17,7 +17,7 @@ import Control.Monad.State.Strict import Data.Array import Data.Text (Text) import Text.Pandoc.Definition -import Text.Pandoc.Class.PandocMonad (PandocMonad) +import Text.Pandoc.Class.PandocMonad (PandocMonad, translateTerm) import Text.Pandoc.Writers.Docx.Types import Text.Pandoc.Shared import Text.Printf (printf) @@ -25,6 +25,7 @@ import Text.Pandoc.Writers.GridTable hiding (Table) import Text.Pandoc.Writers.OOXML import Text.Pandoc.XML.Light as XML hiding (Attr) import qualified Data.Text as T +import qualified Text.Pandoc.Translations as Term import qualified Text.Pandoc.Writers.GridTable as Grid tableToOpenXML :: PandocMonad m @@ -33,15 +34,23 @@ tableToOpenXML :: PandocMonad m -> WS m [Content] tableToOpenXML blocksToOpenXML gridTable = do setFirstPara - let (Grid.Table _attr caption colspecs _rowheads thead tbodies tfoot) = + let (Grid.Table (ident,_,_) caption colspecs _rowheads thead tbodies tfoot) = gridTable let (Caption _maybeShortCaption captionBlocks) = caption + tablenum <- gets stNextTableNum + unless (null captionBlocks) $ + modify $ \st -> st{ stNextTableNum = tablenum + 1 } + let tableid = if T.null ident + then "table" <> tshow tablenum + else ident + tablename <- translateTerm Term.Table let captionStr = stringify captionBlocks let aligns = map fst $ elems colspecs captionXml <- if null captionBlocks then return [] else withParaPropM (pStyleM "Table Caption") - $ blocksToOpenXML captionBlocks + $ blocksToOpenXML + $ addLabel tableid tablename tablenum captionBlocks -- We set "in table" after processing the caption, because we don't -- want the "Table Caption" style to be overwritten with "Compact". modify $ \s -> s { stInTable = True } @@ -81,6 +90,22 @@ tableToOpenXML blocksToOpenXML gridTable = do modify $ \s -> s { stInTable = False } return $ captionXml ++ [Elem tbl] +addLabel :: Text -> Text -> Int -> [Block] -> [Block] +addLabel tableid tablename tablenum bs = + case bs of + (Para ils : rest) -> Para (label : Space : ils) : rest + (Plain ils : rest) -> Plain (label : Space : ils) : rest + _ -> Para [label] : bs + where + label = Span (tableid,[],[]) + [Str (tablename <> "\160"), + RawInline (Format "openxml") + (" " \\* ARABIC \">" + <> tshow tablenum + <> ""), + Str ":"] + -- | Parts of a table data RowType = HeadRow | BodyRow | FootRow diff --git a/src/Text/Pandoc/Writers/Docx/Types.hs b/src/Text/Pandoc/Writers/Docx/Types.hs index 36ac45ad2..74b8d2753 100644 --- a/src/Text/Pandoc/Writers/Docx/Types.hs +++ b/src/Text/Pandoc/Writers/Docx/Types.hs @@ -118,6 +118,7 @@ data WriterState = WriterState{ , stDynamicTextProps :: Set.Set CharStyleName , stCurId :: Int , stNextFigureNum :: Int + , stNextTableNum :: Int } defaultWriterState :: WriterState @@ -139,6 +140,7 @@ defaultWriterState = WriterState{ , stDynamicTextProps = Set.empty , stCurId = 20 , stNextFigureNum = 1 + , stNextTableNum = 1 } setFirstPara :: PandocMonad m => WS m () diff --git a/test/Tests/Writers/OOXML.hs b/test/Tests/Writers/OOXML.hs index c1e47622d..83f05cfec 100644 --- a/test/Tests/Writers/OOXML.hs +++ b/test/Tests/Writers/OOXML.hs @@ -55,7 +55,9 @@ testArchive :: (WriterOptions -> Pandoc -> PandocIO BL.ByteString) -> IO Archive testArchive writerFn opts fp = do txt <- T.readFile fp - bs <- runIOorExplode $ readNative def txt >>= writerFn opts + bs <- runIOorExplode $ do + setTranslations "en-US" + readNative def txt >>= writerFn opts return $ toArchive bs compareFileList :: FilePath -> Archive -> Archive -> Maybe String diff --git a/test/docx/golden/image.docx b/test/docx/golden/image.docx index 9fe65326f..7c2d8a9ac 100644 Binary files a/test/docx/golden/image.docx and b/test/docx/golden/image.docx differ -- cgit v1.2.3