diff options
Diffstat (limited to 'src/Text/Pandoc/Writers')
57 files changed, 4471 insertions, 2789 deletions
diff --git a/src/Text/Pandoc/Writers/AnnotatedTable.hs b/src/Text/Pandoc/Writers/AnnotatedTable.hs index 48c9d61f2..3f69496a9 100644 --- a/src/Text/Pandoc/Writers/AnnotatedTable.hs +++ b/src/Text/Pandoc/Writers/AnnotatedTable.hs @@ -1,8 +1,12 @@ {-# LANGUAGE BangPatterns #-} {-# LANGUAGE DeriveDataTypeable #-} {-# LANGUAGE DeriveGeneric #-} +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE GeneralizedNewtypeDeriving #-} +{-# LANGUAGE MultiParamTypeClasses #-} {-# LANGUAGE TupleSections #-} +{-# LANGUAGE UndecidableInstances #-} {- | Module : Text.Pandoc.Writers.AnnotatedTable @@ -45,6 +49,7 @@ import Data.Generics ( Data import Data.List.NonEmpty ( NonEmpty(..) ) import GHC.Generics ( Generic ) import qualified Text.Pandoc.Builder as B +import Text.Pandoc.Walk ( Walkable (..) ) -- | An annotated table type, corresponding to the Pandoc 'B.Table' -- constructor and the HTML @\<table\>@ element. It records the data @@ -298,3 +303,21 @@ fromBodyRow (BodyRow attr _ rh rb) = fromCell :: Cell -> B.Cell fromCell (Cell _ _ c) = c + +-- +-- Instances +-- +instance Walkable a B.Cell => Walkable a Cell where + walkM f (Cell colspecs colnum cell) = + Cell colspecs colnum <$> walkM f cell + query f (Cell _colspecs _colnum cell) = query f cell + +instance Walkable a B.Cell => Walkable a HeaderRow where + walkM f (HeaderRow attr rownum cells) = + HeaderRow attr rownum <$> walkM f cells + query f (HeaderRow _attr _rownum cells) = query f cells + +instance Walkable a B.Cell => Walkable a TableHead where + walkM f (TableHead attr rows) = + TableHead attr <$> walkM f rows + query f (TableHead _attr rows) = query f rows diff --git a/src/Text/Pandoc/Writers/AsciiDoc.hs b/src/Text/Pandoc/Writers/AsciiDoc.hs index e742577b6..ab7e5f1a9 100644 --- a/src/Text/Pandoc/Writers/AsciiDoc.hs +++ b/src/Text/Pandoc/Writers/AsciiDoc.hs @@ -1,7 +1,7 @@ {-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Writers.AsciiDoc - Copyright : Copyright (C) 2006-2020 John MacFarlane + Copyright : Copyright (C) 2006-2021 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -22,6 +22,7 @@ module Text.Pandoc.Writers.AsciiDoc (writeAsciiDoc, writeAsciiDoctor) where import Control.Monad.State.Strict import Data.Char (isPunctuation, isSpace) import Data.List (intercalate, intersperse) +import Data.List.NonEmpty (NonEmpty(..)) import Data.Maybe (fromMaybe, isJust) import qualified Data.Set as Set import qualified Data.Text as T @@ -37,6 +38,7 @@ import Text.Pandoc.Shared import Text.Pandoc.Templates (renderTemplate) import Text.Pandoc.Writers.Shared + data WriterState = WriterState { defListMarker :: Text , orderedListLevel :: Int , bulletListLevel :: Int @@ -45,6 +47,10 @@ data WriterState = WriterState { defListMarker :: Text , asciidoctorVariant :: Bool , inList :: Bool , hasMath :: Bool + -- |0 is no table + -- 1 is top level table + -- 2 is a table in a table + , tableNestingLevel :: Int } defaultWriterState :: WriterState @@ -56,6 +62,7 @@ defaultWriterState = WriterState { defListMarker = "::" , asciidoctorVariant = False , inList = False , hasMath = False + , tableNestingLevel = 0 } -- | Convert Pandoc to AsciiDoc. @@ -98,8 +105,11 @@ pandocToAsciiDoc opts (Pandoc meta blocks) = do -- | Escape special characters for AsciiDoc. escapeString :: Text -> Text -escapeString = escapeStringUsing escs - where escs = backslashEscapes "{" +escapeString t + | T.any (== '{') t = T.concatMap escChar t + | otherwise = t + where escChar '{' = "\\{" + escChar c = T.singleton c -- | Ordered list start parser for use in Para below. olMarker :: Parser Text ParserState Char @@ -194,7 +204,7 @@ blockToAsciiDoc opts (BlockQuote blocks) = do else contents let bar = text "____" return $ bar $$ chomp contents' $$ bar <> blankline -blockToAsciiDoc opts (Table _ blkCapt specs thead tbody tfoot) = do +blockToAsciiDoc opts block@(Table _ blkCapt specs thead tbody tfoot) = do let (caption, aligns, widths, headers, rows) = toLegacyTable blkCapt specs thead tbody tfoot caption' <- inlineListToAsciiDoc opts caption @@ -236,23 +246,42 @@ blockToAsciiDoc opts (Table _ blkCapt specs thead tbody tfoot) = do $ zipWith colspec aligns widths') <> text "," <> headerspec <> text "]" + + -- construct cells and recurse in case of nested tables + parentTableLevel <- gets tableNestingLevel + let currentNestingLevel = parentTableLevel + 1 + + modify $ \st -> st{ tableNestingLevel = currentNestingLevel } + + let separator = text (if parentTableLevel == 0 + then "|" -- top level separator + else "!") -- nested separator + let makeCell [Plain x] = do d <- blockListToAsciiDoc opts [Plain x] - return $ text "|" <> chomp d + return $ separator <> chomp d makeCell [Para x] = makeCell [Plain x] - makeCell [] = return $ text "|" - makeCell bs = do d <- blockListToAsciiDoc opts bs - return $ text "a|" $$ d + makeCell [] = return separator + makeCell bs = if currentNestingLevel == 2 + then do + --asciidoc only supports nesting once + report $ BlockNotRendered block + return separator + else do + d <- blockListToAsciiDoc opts bs + return $ (text "a" <> separator) $$ d + let makeRow cells = hsep `fmap` mapM makeCell cells rows' <- mapM makeRow rows head' <- makeRow headers + modify $ \st -> st{ tableNestingLevel = parentTableLevel } let head'' = if all null headers then empty else head' let colwidth = if writerWrapText opts == WrapAuto then writerColumns opts else 100000 - let maxwidth = maximum $ map offset (head':rows') + let maxwidth = maximum $ fmap offset (head' :| rows') let body = if maxwidth > colwidth then vsep rows' else vcat rows' - let border = text "|===" - return $ + let border = separator <> text "===" + return $ caption'' $$ tablespec $$ border $$ head'' $$ body $$ border $$ blankline blockToAsciiDoc opts (BulletList items) = do inlist <- gets inList @@ -470,7 +499,9 @@ inlineToAsciiDoc opts (Quoted qt lst) = do | otherwise -> [Str "``"] ++ lst ++ [Str "''"] inlineToAsciiDoc _ (Code _ str) = do isAsciidoctor <- gets asciidoctorVariant - let contents = literal (escapeStringUsing (backslashEscapes "`") str) + let escChar '`' = "\\'" + escChar c = T.singleton c + let contents = literal (T.concatMap escChar str) return $ if isAsciidoctor then text "`+" <> contents <> "+`" diff --git a/src/Text/Pandoc/Writers/BibTeX.hs b/src/Text/Pandoc/Writers/BibTeX.hs new file mode 100644 index 000000000..95de6b71f --- /dev/null +++ b/src/Text/Pandoc/Writers/BibTeX.hs @@ -0,0 +1,61 @@ +{-# LANGUAGE OverloadedStrings #-} +{- | + Module : Text.Pandoc.Writers.BibTeX + Copyright : Copyright (C) 2021 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm@berkeley.edu> + Stability : alpha + Portability : portable + +Writes a BibTeX or BibLaTeX bibliographies based on the +'references' metadata in a Pandoc document. +-} +module Text.Pandoc.Writers.BibTeX + ( writeBibTeX + , writeBibLaTeX + ) +where + +import Text.Pandoc.Options +import Text.Pandoc.Definition +import Data.Text (Text) +import Data.Maybe (mapMaybe) +import Citeproc (parseLang) +import Text.Pandoc.Class (PandocMonad) +import Text.Pandoc.Citeproc.BibTeX as BibTeX +import Text.Pandoc.Citeproc.MetaValue (metaValueToReference) +import Text.Pandoc.Writers.Shared (lookupMetaString, defField, + addVariablesToContext) +import Text.DocLayout (render, vcat) +import Text.DocTemplates (Context(..)) +import Text.Pandoc.Templates (renderTemplate) + +-- | Write BibTeX based on the references metadata from a Pandoc document. +writeBibTeX :: PandocMonad m => WriterOptions -> Pandoc -> m Text +writeBibTeX = writeBibTeX' BibTeX.Bibtex + +-- | Write BibLaTeX based on the references metadata from a Pandoc document. +writeBibLaTeX :: PandocMonad m => WriterOptions -> Pandoc -> m Text +writeBibLaTeX = writeBibTeX' BibTeX.Biblatex + +writeBibTeX' :: PandocMonad m => Variant -> WriterOptions -> Pandoc -> m Text +writeBibTeX' variant opts (Pandoc meta _) = do + let mblang = case lookupMetaString "lang" meta of + "" -> Nothing + t -> either (const Nothing) Just $ parseLang t + let refs = case lookupMeta "references" meta of + Just (MetaList xs) -> mapMaybe metaValueToReference xs + _ -> [] + let main = vcat $ map (BibTeX.writeBibtexString opts variant mblang) refs + let context = defField "body" main + $ addVariablesToContext opts (mempty :: Context Text) + let colwidth = if writerWrapText opts == WrapAuto + then Just $ writerColumns opts + else Nothing + return $ render colwidth $ + case writerTemplate opts of + Nothing -> main + Just tpl -> renderTemplate tpl context + + diff --git a/src/Text/Pandoc/Writers/CommonMark.hs b/src/Text/Pandoc/Writers/CommonMark.hs index 66ded218f..8733b7149 100644 --- a/src/Text/Pandoc/Writers/CommonMark.hs +++ b/src/Text/Pandoc/Writers/CommonMark.hs @@ -1,6 +1,6 @@ {- | Module : Text.Pandoc.Writers.CommonMark - Copyright : Copyright (C) 2015-2020 John MacFarlane + Copyright : Copyright (C) 2015-2021 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> diff --git a/src/Text/Pandoc/Writers/ConTeXt.hs b/src/Text/Pandoc/Writers/ConTeXt.hs index 0a6313513..3cafcefba 100644 --- a/src/Text/Pandoc/Writers/ConTeXt.hs +++ b/src/Text/Pandoc/Writers/ConTeXt.hs @@ -3,7 +3,7 @@ {-# LANGUAGE ViewPatterns #-} {- | Module : Text.Pandoc.Writers.ConTeXt - Copyright : Copyright (C) 2007-2020 John MacFarlane + Copyright : Copyright (C) 2007-2021 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -20,7 +20,7 @@ import Data.Maybe (mapMaybe) import Data.Text (Text) import qualified Data.Text as T import Network.URI (unEscapeString) -import Text.Pandoc.BCP47 +import Text.Collate.Lang (Lang(..)) import Text.Pandoc.Class.PandocMonad (PandocMonad, report, toLang) import Text.Pandoc.Definition import Text.Pandoc.ImageSize @@ -177,8 +177,12 @@ blockToConTeXt (Para lst) = do contents <- inlineListToConTeXt lst return $ contents <> blankline blockToConTeXt (LineBlock lns) = do - doclines <- nowrap . vcat <$> mapM inlineListToConTeXt lns - return $ "\\startlines" $$ doclines $$ "\\stoplines" <> blankline + let emptyToBlankline doc = if isEmpty doc + then blankline + else doc + doclines <- mapM inlineListToConTeXt lns + let contextLines = vcat . map emptyToBlankline $ doclines + return $ "\\startlines" $$ contextLines $$ "\\stoplines" <> blankline blockToConTeXt (BlockQuote lst) = do contents <- blockListToConTeXt lst return $ "\\startblockquote" $$ nest 0 contents $$ "\\stopblockquote" <> blankline @@ -228,13 +232,7 @@ blockToConTeXt (OrderedList (start, style', delim) lst) = do Period -> "stopper=." OneParen -> "stopper=)" TwoParens -> "left=(,stopper=)" - let width = maximum $ map T.length $ take (length contents) - (orderedListMarkers (start, style', delim)) - let width' = (toEnum width + 1) / 2 - let width'' = if width' > (1.5 :: Double) - then "width=" <> tshow width' <> "em" - else "" - let specs2Items = filter (not . T.null) [start', delim', width''] + let specs2Items = filter (not . T.null) [start', delim'] let specs2 = if null specs2Items then "" else "[" <> T.intercalate "," specs2Items <> "]" @@ -248,8 +246,8 @@ blockToConTeXt (OrderedList (start, style', delim) lst) = do UpperAlpha -> 'A') : if isTightList lst then ",packed]" else "]" let specs = T.pack style'' <> specs2 - return $ "\\startitemize" <> literal specs $$ vcat contents $$ - "\\stopitemize" <> blankline + return $ "\\startenumerate" <> literal specs $$ vcat contents $$ + "\\stopenumerate" <> blankline blockToConTeXt (DefinitionList lst) = liftM vcat $ mapM defListItemToConTeXt lst blockToConTeXt HorizontalRule = return $ "\\thinrule" <> blankline @@ -432,9 +430,13 @@ inlineToConTeXt (Link _ txt (src, _)) = do put $ st {stNextRef = next + 1} let ref = "url" <> tshow next contents <- inlineListToConTeXt txt + let escChar '#' = "\\#" + escChar '%' = "\\%" + escChar c = T.singleton c + let escContextURL = T.concatMap escChar return $ "\\useURL" <> brackets (literal ref) - <> brackets (literal $ escapeStringUsing [('#',"\\#"),('%',"\\%")] src) + <> brackets (literal $ escContextURL src) <> (if isAutolink then empty else brackets empty <> brackets contents) @@ -477,7 +479,7 @@ inlineToConTeXt (Note contents) = do then literal "\\footnote{" <> nest 2 (chomp contents') <> char '}' else literal "\\startbuffer " <> nest 2 (chomp contents') <> literal "\\stopbuffer\\footnote{\\getbuffer}" -inlineToConTeXt (Span (_,_,kvs) ils) = do +inlineToConTeXt (Span (ident,_,kvs) ils) = do mblang <- fromBCP47 (lookup "lang" kvs) let wrapDir txt = case lookup "dir" kvs of Just "rtl" -> braces $ "\\righttoleft " <> txt @@ -487,7 +489,11 @@ inlineToConTeXt (Span (_,_,kvs) ils) = do Just lng -> braces ("\\language" <> brackets (literal lng) <> txt) Nothing -> txt - wrapLang . wrapDir <$> inlineListToConTeXt ils + addReference = + if T.null ident + then id + else (("\\reference" <> brackets (literal ident) <> "{}") <>) + addReference . wrapLang . wrapDir <$> inlineListToConTeXt ils -- | Craft the section header, inserting the section reference, if supplied. sectionHeader :: PandocMonad m @@ -549,26 +555,26 @@ fromBCP47 mbs = fromBCP47' <$> toLang mbs -- https://tools.ietf.org/html/bcp47#section-2.1 -- http://wiki.contextgarden.net/Language_Codes fromBCP47' :: Maybe Lang -> Maybe Text -fromBCP47' (Just (Lang "ar" _ "SY" _) ) = Just "ar-sy" -fromBCP47' (Just (Lang "ar" _ "IQ" _) ) = Just "ar-iq" -fromBCP47' (Just (Lang "ar" _ "JO" _) ) = Just "ar-jo" -fromBCP47' (Just (Lang "ar" _ "LB" _) ) = Just "ar-lb" -fromBCP47' (Just (Lang "ar" _ "DZ" _) ) = Just "ar-dz" -fromBCP47' (Just (Lang "ar" _ "MA" _) ) = Just "ar-ma" -fromBCP47' (Just (Lang "de" _ _ ["1901"]) ) = Just "deo" -fromBCP47' (Just (Lang "de" _ "DE" _) ) = Just "de-de" -fromBCP47' (Just (Lang "de" _ "AT" _) ) = Just "de-at" -fromBCP47' (Just (Lang "de" _ "CH" _) ) = Just "de-ch" -fromBCP47' (Just (Lang "el" _ _ ["poly"]) ) = Just "agr" -fromBCP47' (Just (Lang "en" _ "US" _) ) = Just "en-us" -fromBCP47' (Just (Lang "en" _ "GB" _) ) = Just "en-gb" -fromBCP47' (Just (Lang "grc"_ _ _) ) = Just "agr" -fromBCP47' (Just (Lang "el" _ _ _) ) = Just "gr" -fromBCP47' (Just (Lang "eu" _ _ _) ) = Just "ba" -fromBCP47' (Just (Lang "he" _ _ _) ) = Just "il" -fromBCP47' (Just (Lang "jp" _ _ _) ) = Just "ja" -fromBCP47' (Just (Lang "uk" _ _ _) ) = Just "ua" -fromBCP47' (Just (Lang "vi" _ _ _) ) = Just "vn" -fromBCP47' (Just (Lang "zh" _ _ _) ) = Just "cn" -fromBCP47' (Just (Lang l _ _ _) ) = Just l -fromBCP47' Nothing = Nothing +fromBCP47' (Just (Lang "ar" _ (Just "SY") _ _ _)) = Just "ar-sy" +fromBCP47' (Just (Lang "ar" _ (Just "IQ") _ _ _)) = Just "ar-iq" +fromBCP47' (Just (Lang "ar" _ (Just "JO") _ _ _)) = Just "ar-jo" +fromBCP47' (Just (Lang "ar" _ (Just "LB") _ _ _)) = Just "ar-lb" +fromBCP47' (Just (Lang "ar" _ (Just "DZ") _ _ _)) = Just "ar-dz" +fromBCP47' (Just (Lang "ar" _ (Just "MA") _ _ _)) = Just "ar-ma" +fromBCP47' (Just (Lang "de" _ _ ["1901"] _ _)) = Just "deo" +fromBCP47' (Just (Lang "de" _ (Just "DE") _ _ _)) = Just "de-de" +fromBCP47' (Just (Lang "de" _ (Just "AT") _ _ _)) = Just "de-at" +fromBCP47' (Just (Lang "de" _ (Just "CH") _ _ _)) = Just "de-ch" +fromBCP47' (Just (Lang "el" _ _ ["poly"] _ _)) = Just "agr" +fromBCP47' (Just (Lang "en" _ (Just "US") _ _ _)) = Just "en-us" +fromBCP47' (Just (Lang "en" _ (Just "GB") _ _ _)) = Just "en-gb" +fromBCP47' (Just (Lang "grc"_ _ _ _ _)) = Just "agr" +fromBCP47' (Just (Lang "el" _ _ _ _ _)) = Just "gr" +fromBCP47' (Just (Lang "eu" _ _ _ _ _)) = Just "ba" +fromBCP47' (Just (Lang "he" _ _ _ _ _)) = Just "il" +fromBCP47' (Just (Lang "jp" _ _ _ _ _)) = Just "ja" +fromBCP47' (Just (Lang "uk" _ _ _ _ _)) = Just "ua" +fromBCP47' (Just (Lang "vi" _ _ _ _ _)) = Just "vn" +fromBCP47' (Just (Lang "zh" _ _ _ _ _)) = Just "cn" +fromBCP47' (Just (Lang l _ _ _ _ _)) = Just l +fromBCP47' Nothing = Nothing diff --git a/src/Text/Pandoc/Writers/CslJson.hs b/src/Text/Pandoc/Writers/CslJson.hs index 08310de65..395335667 100644 --- a/src/Text/Pandoc/Writers/CslJson.hs +++ b/src/Text/Pandoc/Writers/CslJson.hs @@ -1,7 +1,7 @@ {-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Writers.CslJson - Copyright : Copyright (C) 2020 John MacFarlane + Copyright : Copyright (C) 2020-2021 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -34,23 +34,24 @@ import Control.Monad.Identity import Citeproc.Locale (getLocale) import Citeproc.CslJson import Text.Pandoc.Options (WriterOptions) -import Data.Maybe (mapMaybe) +import Data.Maybe (mapMaybe, fromMaybe) import Data.Aeson.Encode.Pretty (Config (..), Indent (Spaces), NumberFormat (Generic), defConfig, encodePretty') writeCslJson :: PandocMonad m => WriterOptions -> Pandoc -> m Text writeCslJson _opts (Pandoc meta _) = do - let lang = maybe (Lang "en" (Just "US")) parseLang - (lookupMeta "lang" meta >>= metaValueToText) + let lang = fromMaybe (Lang "en" Nothing (Just "US") [] [] []) + (lookupMeta "lang" meta >>= metaValueToText >>= + either (const Nothing) Just . parseLang) locale <- case getLocale lang of Left e -> throwError $ PandocCiteprocError e Right l -> return l - case lookupMeta "references" meta of - Just (MetaList rs) -> return $ (UTF8.toText $ - toCslJson locale (mapMaybe metaValueToReference rs)) - <> "\n" - _ -> throwError $ PandocAppError "No references field" + let rs = case lookupMeta "references" meta of + Just (MetaList xs) -> xs + _ -> [] + return $ UTF8.toText + (toCslJson locale (mapMaybe metaValueToReference rs)) <> "\n" fromInlines :: [Inline] -> CslJson Text fromInlines = foldMap fromInline . B.fromList diff --git a/src/Text/Pandoc/Writers/Custom.hs b/src/Text/Pandoc/Writers/Custom.hs index 8da611b61..58c4bb5be 100644 --- a/src/Text/Pandoc/Writers/Custom.hs +++ b/src/Text/Pandoc/Writers/Custom.hs @@ -2,7 +2,7 @@ {-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Writers.Custom - Copyright : Copyright (C) 2012-2020 John MacFarlane + Copyright : Copyright (C) 2012-2021 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> diff --git a/src/Text/Pandoc/Writers/Docbook.hs b/src/Text/Pandoc/Writers/Docbook.hs index 408d8cc0c..33a6f5f0c 100644 --- a/src/Text/Pandoc/Writers/Docbook.hs +++ b/src/Text/Pandoc/Writers/Docbook.hs @@ -3,7 +3,7 @@ {-# LANGUAGE ViewPatterns #-} {- | Module : Text.Pandoc.Writers.Docbook - Copyright : Copyright (C) 2006-2020 John MacFarlane + Copyright : Copyright (C) 2006-2021 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -15,6 +15,7 @@ Conversion of 'Pandoc' documents to Docbook XML. module Text.Pandoc.Writers.Docbook ( writeDocbook4, writeDocbook5 ) where import Control.Monad.Reader import Data.Generics (everywhere, mkT) +import Data.Maybe (isNothing) import Data.Monoid (Any (..)) import Data.Text (Text) import qualified Data.Text as T @@ -40,6 +41,25 @@ data DocBookVersion = DocBook4 | DocBook5 type DB = ReaderT DocBookVersion +-- | Get level of the top-level headers based on the configured top-level division. +-- The header level can then be used to determine appropriate DocBook element +-- for each subdivision associated with a header. +-- The numbering here follows LaTeX's internal numbering +getStartLvl :: WriterOptions -> Int +getStartLvl opts = + case writerTopLevelDivision opts of + TopLevelPart -> -1 + TopLevelChapter -> 0 + TopLevelSection -> 1 + TopLevelDefault -> 1 + +-- | Get correct name for the id attribute based on DocBook version. +-- DocBook 4 used custom id attribute but DocBook 5 adopted the xml:id specification. +-- https://www.w3.org/TR/xml-id/ +idName :: DocBookVersion -> Text +idName DocBook5 = "xml:id" +idName DocBook4 = "id" + -- | Convert list of authors to a docbook <author> section authorToDocbook :: PandocMonad m => WriterOptions -> [Inline] -> DB m B.Inlines authorToDocbook opts name' = do @@ -79,12 +99,7 @@ writeDocbook opts (Pandoc meta blocks) = do let colwidth = if writerWrapText opts == WrapAuto then Just $ writerColumns opts else Nothing - -- The numbering here follows LaTeX's internal numbering - let startLvl = case writerTopLevelDivision opts of - TopLevelPart -> -1 - TopLevelChapter -> 0 - TopLevelSection -> 1 - TopLevelDefault -> 1 + let startLvl = getStartLvl opts let fromBlocks = blocksToDocbook opts . makeSections False (Just startLvl) auths' <- mapM (authorToDocbook opts) $ docAuthors meta @@ -153,7 +168,7 @@ blockToDocbook :: PandocMonad m => WriterOptions -> Block -> DB m (Doc Text) blockToDocbook _ Null = return empty -- Add ids to paragraphs in divs with ids - this is needed for -- pandoc-citeproc to get link anchors in bibliographies: -blockToDocbook opts (Div (id',"section":_,_) (Header lvl _ ils : xs)) = do +blockToDocbook opts (Div (id',"section":_,_) (Header lvl (_,_,attrs) ils : xs)) = do version <- ask -- Docbook doesn't allow sections with no content, so insert some if needed let bs = if null xs @@ -166,28 +181,52 @@ blockToDocbook opts (Div (id',"section":_,_) (Header lvl _ ils : xs)) = do then "section" else "sect" <> tshow n _ -> "simplesect" - idName = if version == DocBook5 - then "xml:id" - else "id" - idAttr = [(idName, writerIdentifierPrefix opts <> id') | not (T.null id')] - nsAttr = if version == DocBook5 && lvl == 0 then [("xmlns", "http://docbook.org/ns/docbook"),("xmlns:xlink", "http://www.w3.org/1999/xlink")] - else [] - attribs = nsAttr <> idAttr + idAttr = [(idName version, writerIdentifierPrefix opts <> id') | not (T.null id')] + -- We want to add namespaces to the root (top-level) element. + nsAttr = if version == DocBook5 && lvl == getStartLvl opts && isNothing (writerTemplate opts) + -- Though, DocBook 4 does not support namespaces and + -- standalone documents will include them in the template. + then [("xmlns", "http://docbook.org/ns/docbook"),("xmlns:xlink", "http://www.w3.org/1999/xlink")] + else [] + + -- Populate miscAttr with Header.Attr.attributes, filtering out non-valid DocBook section attributes, id, and xml:id + miscAttr = filter (isSectionAttr version) attrs + attribs = nsAttr <> idAttr <> miscAttr title' <- inlinesToDocbook opts ils contents <- blocksToDocbook opts bs return $ inTags True tag attribs $ inTagsSimple "title" title' $$ contents -blockToDocbook opts (Div (ident,_,_) [Para lst]) = - let attribs = [("id", ident) | not (T.null ident)] in - if hasLineBreaks lst - then flush . nowrap . inTags False "literallayout" attribs - <$> inlinesToDocbook opts lst - else inTags True "para" attribs <$> inlinesToDocbook opts lst -blockToDocbook opts (Div (ident,_,_) bs) = do - contents <- blocksToDocbook opts (map plainToPara bs) - return $ - (if T.null ident - then mempty - else selfClosingTag "anchor" [("id", ident)]) $$ contents +blockToDocbook opts (Div (ident,classes,_) bs) = do + version <- ask + let identAttribs = [(idName version, ident) | not (T.null ident)] + admonitions = ["caution","danger","important","note","tip","warning"] + case classes of + (l:_) | l `elem` admonitions -> do + let (mTitleBs, bodyBs) = + case bs of + -- Matches AST produced by the DocBook reader → Markdown writer → Markdown reader chain. + (Div (_,["title"],_) [Para ts] : rest) -> (Just (inlinesToDocbook opts ts), rest) + -- Matches AST produced by the Docbook reader. + (Div (_,["title"],_) ts : rest) -> (Just (blocksToDocbook opts ts), rest) + _ -> (Nothing, bs) + admonitionTitle <- case mTitleBs of + Nothing -> return mempty + -- id will be attached to the admonition so let’s pass empty identAttrs. + Just titleBs -> inTags False "title" [] <$> titleBs + admonitionBody <- handleDivBody [] bodyBs + return (inTags True l identAttribs (admonitionTitle $$ admonitionBody)) + _ -> handleDivBody identAttribs bs + where + handleDivBody identAttribs [Para lst] = + if hasLineBreaks lst + then flush . nowrap . inTags False "literallayout" identAttribs + <$> inlinesToDocbook opts lst + else inTags True "para" identAttribs <$> inlinesToDocbook opts lst + handleDivBody identAttribs bodyBs = do + contents <- blocksToDocbook opts (map plainToPara bodyBs) + return $ + (if null identAttribs + then mempty + else selfClosingTag "anchor" identAttribs) $$ contents blockToDocbook _ h@Header{} = do -- should be handled by Div section above, except inside lists/blockquotes report $ BlockNotRendered h @@ -213,17 +252,18 @@ blockToDocbook opts (LineBlock lns) = blockToDocbook opts $ linesToPara lns blockToDocbook opts (BlockQuote blocks) = inTagsIndented "blockquote" <$> blocksToDocbook opts blocks -blockToDocbook _ (CodeBlock (_,classes,_) str) = return $ +blockToDocbook opts (CodeBlock (_,classes,_) str) = return $ literal ("<programlisting" <> lang <> ">") <> cr <> flush (literal (escapeStringForXML str) <> cr <> literal "</programlisting>") where lang = if null langs then "" else " language=\"" <> escapeStringForXML (head langs) <> "\"" - isLang l = T.toLower l `elem` map T.toLower languages + syntaxMap = writerSyntaxMap opts + isLang l = T.toLower l `elem` map T.toLower (languages syntaxMap) langsFrom s = if isLang s then [s] - else languagesByExtension . T.toLower $ s + else (languagesByExtension syntaxMap) . T.toLower $ s langs = concatMap langsFrom classes blockToDocbook opts (BulletList lst) = do let attribs = [("spacing", "compact") | isTightList lst] @@ -341,11 +381,12 @@ inlineToDocbook opts (Quoted _ lst) = inTagsSimple "quote" <$> inlinesToDocbook opts lst inlineToDocbook opts (Cite _ lst) = inlinesToDocbook opts lst -inlineToDocbook opts (Span (ident,_,_) ils) = +inlineToDocbook opts (Span (ident,_,_) ils) = do + version <- ask ((if T.null ident then mempty - else selfClosingTag "anchor" [("id", ident)]) <>) <$> - inlinesToDocbook opts ils + else selfClosingTag "anchor" [(idName version, ident)]) <>) <$> + inlinesToDocbook opts ils inlineToDocbook _ (Code _ str) = return $ inTagsSimple "literal" $ literal (escapeStringForXML str) inlineToDocbook opts (Math t str) @@ -413,3 +454,43 @@ idAndRole (id',cls,_) = ident <> role where ident = [("id", id') | not (T.null id')] role = [("role", T.unwords cls) | not (null cls)] + +isSectionAttr :: DocBookVersion -> (Text, Text) -> Bool +isSectionAttr _ ("label",_) = True +isSectionAttr _ ("status",_) = True +isSectionAttr DocBook5 ("annotations",_) = True +isSectionAttr DocBook5 ("dir","ltr") = True +isSectionAttr DocBook5 ("dir","rtl") = True +isSectionAttr DocBook5 ("dir","lro") = True +isSectionAttr DocBook5 ("dir","rlo") = True +isSectionAttr _ ("remap",_) = True +isSectionAttr _ ("revisionflag","changed") = True +isSectionAttr _ ("revisionflag","added") = True +isSectionAttr _ ("revisionflag","deleted") = True +isSectionAttr _ ("revisionflag","off") = True +isSectionAttr _ ("role",_) = True +isSectionAttr DocBook5 ("version",_) = True +isSectionAttr DocBook5 ("xml:base",_) = True +isSectionAttr DocBook5 ("xml:lang",_) = True +isSectionAttr _ ("xreflabel",_) = True +isSectionAttr DocBook5 ("linkend",_) = True +isSectionAttr DocBook5 ("linkends",_) = True +isSectionAttr DocBook5 ("xlink:actuate",_) = True +isSectionAttr DocBook5 ("xlink:arcrole",_) = True +isSectionAttr DocBook5 ("xlink:from",_) = True +isSectionAttr DocBook5 ("xlink:href",_) = True +isSectionAttr DocBook5 ("xlink:label",_) = True +isSectionAttr DocBook5 ("xlink:role",_) = True +isSectionAttr DocBook5 ("xlink:show",_) = True +isSectionAttr DocBook5 ("xlink:title",_) = True +isSectionAttr DocBook5 ("xlink:to",_) = True +isSectionAttr DocBook5 ("xlink:type",_) = True +isSectionAttr DocBook4 ("arch",_) = True +isSectionAttr DocBook4 ("condition",_) = True +isSectionAttr DocBook4 ("conformance",_) = True +isSectionAttr DocBook4 ("lang",_) = True +isSectionAttr DocBook4 ("os",_) = True +isSectionAttr DocBook4 ("revision",_) = True +isSectionAttr DocBook4 ("security",_) = True +isSectionAttr DocBook4 ("vendor",_) = True +isSectionAttr _ (_,_) = False diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index a380fd4fa..a3c4b6be1 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -7,7 +7,7 @@ {-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Writers.Docx - Copyright : Copyright (C) 2012-2020 John MacFarlane + Copyright : Copyright (C) 2012-2021 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -22,7 +22,6 @@ import Control.Applicative ((<|>)) import Control.Monad.Except (catchError, throwError) import Control.Monad.Reader import Control.Monad.State.Strict -import qualified Data.ByteString as B import qualified Data.ByteString.Lazy as BL import Data.Char (isSpace, isLetter) import Data.List (intercalate, isPrefixOf, isSuffixOf) @@ -31,13 +30,14 @@ import qualified Data.Map as M import Data.Maybe (fromMaybe, isNothing, mapMaybe, maybeToList) import qualified Data.Set as Set import qualified Data.Text as T +import Data.Text (Text) import qualified Data.Text.Lazy as TL import Data.Time.Clock.POSIX import Data.Digest.Pure.SHA (sha1, showDigest) import Skylighting -import System.Random (randomRs, mkStdGen) -import Text.Pandoc.BCP47 (getLang, renderLang) -import Text.Pandoc.Class.PandocMonad (PandocMonad, report, toLang) +import Text.Collate.Lang (renderLang) +import Text.Pandoc.Class.PandocMonad (PandocMonad, report, toLang, translateTerm) +import qualified Text.Pandoc.Translations as Term import qualified Text.Pandoc.Class.PandocMonad as P import Data.Time import Text.Pandoc.UTF8 (fromTextLazy) @@ -47,135 +47,37 @@ import Text.Pandoc.Highlighting (highlight) import Text.Pandoc.Error import Text.Pandoc.ImageSize import Text.Pandoc.Logging -import Text.Pandoc.MIME (MimeType, extensionFromMimeType, getMimeType, - getMimeTypeDef) +import Text.Pandoc.MIME (extensionFromMimeType, getMimeType, getMimeTypeDef) import Text.Pandoc.Options import Text.Pandoc.Writers.Docx.StyleMap +import Text.Pandoc.Writers.Docx.Table +import Text.Pandoc.Writers.Docx.Types import Text.Pandoc.Shared import Text.Pandoc.Walk +import qualified Text.Pandoc.Writers.GridTable as Grid import Text.Pandoc.Writers.Math import Text.Pandoc.Writers.Shared -import Text.Printf (printf) import Text.TeXMath -import Text.XML.Light as XML -import Text.XML.Light.Cursor as XMLC import Text.Pandoc.Writers.OOXML - -data ListMarker = NoMarker - | BulletMarker - | NumberMarker ListNumberStyle ListNumberDelim Int - deriving (Show, Read, Eq, Ord) - -listMarkerToId :: ListMarker -> String -listMarkerToId NoMarker = "990" -listMarkerToId BulletMarker = "991" -listMarkerToId (NumberMarker sty delim n) = - '9' : '9' : styNum : delimNum : show n - where styNum = case sty of - DefaultStyle -> '2' - Example -> '3' - Decimal -> '4' - LowerRoman -> '5' - UpperRoman -> '6' - LowerAlpha -> '7' - UpperAlpha -> '8' - delimNum = case delim of - DefaultDelim -> '0' - Period -> '1' - OneParen -> '2' - TwoParens -> '3' - -data EnvProps = EnvProps{ styleElement :: Maybe Element - , otherElements :: [Element] - } - -instance Semigroup EnvProps where - EnvProps s es <> EnvProps s' es' = EnvProps (s <|> s') (es ++ es') - -instance Monoid EnvProps where - mempty = EnvProps Nothing [] - mappend = (<>) +import Text.Pandoc.XML.Light as XML +import Data.Generics (mkT, everywhere) squashProps :: EnvProps -> [Element] squashProps (EnvProps Nothing es) = es squashProps (EnvProps (Just e) es) = e : es -data WriterEnv = WriterEnv{ envTextProperties :: EnvProps - , envParaProperties :: EnvProps - , envRTL :: Bool - , envListLevel :: Int - , envListNumId :: Int - , envInDel :: Bool - , envChangesAuthor :: T.Text - , envChangesDate :: T.Text - , envPrintWidth :: Integer - } - -defaultWriterEnv :: WriterEnv -defaultWriterEnv = WriterEnv{ envTextProperties = mempty - , envParaProperties = mempty - , envRTL = False - , envListLevel = -1 - , envListNumId = 1 - , envInDel = False - , envChangesAuthor = "unknown" - , envChangesDate = "1969-12-31T19:00:00Z" - , envPrintWidth = 1 - } - -data WriterState = WriterState{ - stFootnotes :: [Element] - , stComments :: [([(T.Text, T.Text)], [Inline])] - , stSectionIds :: Set.Set T.Text - , stExternalLinks :: M.Map String String - , stImages :: M.Map FilePath (String, String, Maybe MimeType, B.ByteString) - , stLists :: [ListMarker] - , stInsId :: Int - , stDelId :: Int - , stStyleMaps :: StyleMaps - , stFirstPara :: Bool - , stInTable :: Bool - , stInList :: Bool - , stTocTitle :: [Inline] - , stDynamicParaProps :: Set.Set ParaStyleName - , stDynamicTextProps :: Set.Set CharStyleName - , stCurId :: Int - } - -defaultWriterState :: WriterState -defaultWriterState = WriterState{ - stFootnotes = defaultFootnotes - , stComments = [] - , stSectionIds = Set.empty - , stExternalLinks = M.empty - , stImages = M.empty - , stLists = [NoMarker] - , stInsId = 1 - , stDelId = 1 - , stStyleMaps = StyleMaps M.empty M.empty - , stFirstPara = False - , stInTable = False - , stInList = False - , stTocTitle = [Str "Table of Contents"] - , stDynamicParaProps = Set.empty - , stDynamicTextProps = Set.empty - , stCurId = 20 - } - -type WS m = ReaderT WriterEnv (StateT WriterState m) - -renumIdMap :: Int -> [Element] -> M.Map String String +renumIdMap :: Int -> [Element] -> M.Map Text Text renumIdMap _ [] = M.empty renumIdMap n (e:es) | Just oldId <- findAttr (QName "Id" Nothing Nothing) e = - M.insert oldId ("rId" ++ show n) (renumIdMap (n+1) es) + M.insert oldId ("rId" <> tshow n) (renumIdMap (n+1) es) | otherwise = renumIdMap n es -replaceAttr :: (QName -> Bool) -> String -> [XML.Attr] -> [XML.Attr] +replaceAttr :: (QName -> Bool) -> Text -> [XML.Attr] -> [XML.Attr] replaceAttr f val = map $ \a -> if f (attrKey a) then XML.Attr (attrKey a) val else a -renumId :: (QName -> Bool) -> M.Map String String -> Element -> Element +renumId :: (QName -> Bool) -> M.Map Text Text -> Element -> Element renumId f renumMap e | Just oldId <- findAttrBy f e , Just newId <- M.lookup oldId renumMap = @@ -184,18 +86,12 @@ renumId f renumMap e e { elAttribs = attrs' } | otherwise = e -renumIds :: (QName -> Bool) -> M.Map String String -> [Element] -> [Element] +renumIds :: (QName -> Bool) -> M.Map Text Text -> [Element] -> [Element] renumIds f renumMap = map (renumId f renumMap) -findAttrTextBy :: (QName -> Bool) -> Element -> Maybe T.Text -findAttrTextBy x = fmap T.pack . findAttrBy x - -lookupAttrTextBy :: (QName -> Bool) -> [XML.Attr] -> Maybe T.Text -lookupAttrTextBy x = fmap T.pack . lookupAttrBy x - -- | Certain characters are invalid in XML even if escaped. -- See #1992 -stripInvalidChars :: T.Text -> T.Text +stripInvalidChars :: Text -> Text stripInvalidChars = T.filter isValidChar -- | See XML reference @@ -217,7 +113,7 @@ writeDocx opts doc = do let doc' = Pandoc meta blocks' username <- P.lookupEnv "USERNAME" - utctime <- P.getCurrentTime + utctime <- P.getTimestamp oldUserDataDir <- P.getUserDataDir P.setUserDataDir Nothing res <- P.readDefaultDataFile "reference.docx" @@ -234,11 +130,11 @@ writeDocx opts doc = do -- Gets the template size let mbpgsz = mbsectpr >>= filterElementName (wname (=="pgSz")) - let mbAttrSzWidth = mbpgsz >>= lookupAttrTextBy ((=="w") . qName) . elAttribs + let mbAttrSzWidth = mbpgsz >>= lookupAttrBy ((=="w") . qName) . elAttribs let mbpgmar = mbsectpr >>= filterElementName (wname (=="pgMar")) - let mbAttrMarLeft = mbpgmar >>= lookupAttrTextBy ((=="left") . qName) . elAttribs - let mbAttrMarRight = mbpgmar >>= lookupAttrTextBy ((=="right") . qName) . elAttribs + let mbAttrMarLeft = mbpgmar >>= lookupAttrBy ((=="left") . qName) . elAttribs + let mbAttrMarRight = mbpgmar >>= lookupAttrBy ((=="right") . qName) . elAttribs -- Get the available area (converting the size and the margins to int and -- doing the difference @@ -250,24 +146,21 @@ writeDocx opts doc = do -- styles mblang <- toLang $ getLang opts meta + -- TODO FIXME avoid this generic traversal! + -- lang is in w:docDefaults / w:rPr / w:lang let addLang :: Element -> Element - addLang e = case (\l -> XMLC.toTree . go (T.unpack $ renderLang l) $ - XMLC.fromElement e) <$> mblang of - Just (Elem e') -> e' - _ -> e -- return original - where go :: String -> Cursor -> Cursor - go l cursor = case XMLC.findRec (isLangElt . current) cursor of - Nothing -> cursor - Just t -> XMLC.modifyContent (setval l) t - setval :: String -> Content -> Content - setval l (Elem e') = Elem $ e'{ elAttribs = map (setvalattr l) $ - elAttribs e' } - setval _ x = x - setvalattr :: String -> XML.Attr -> XML.Attr - setvalattr l (XML.Attr qn@(QName "val" _ _) _) = XML.Attr qn l - setvalattr _ x = x - isLangElt (Elem e') = qName (elName e') == "lang" - isLangElt _ = False + addLang = case mblang of + Nothing -> id + Just l -> everywhere (mkT (go (renderLang l))) + where + go :: Text -> Element -> Element + go l e' + | qName (elName e') == "lang" + = e'{ elAttribs = map (setvalattr l) $ elAttribs e' } + | otherwise = e' + + setvalattr l (XML.Attr qn@(QName "val" _ _) _) = XML.Attr qn l + setvalattr _ x = x let stylepath = "word/styles.xml" styledoc <- addLang <$> parseXml refArchive distArchive stylepath @@ -337,12 +230,13 @@ writeDocx opts doc = do -- [Content_Types].xml let mkOverrideNode (part', contentType') = mknode "Override" - [("PartName",part'),("ContentType",contentType')] () + [("PartName", T.pack part') + ,("ContentType", contentType')] () let mkImageOverride (_, imgpath, mbMimeType, _) = - mkOverrideNode ("/word/" ++ imgpath, - maybe "application/octet-stream" T.unpack mbMimeType) + mkOverrideNode ("/word/" <> imgpath, + fromMaybe "application/octet-stream" mbMimeType) let mkMediaOverride imgpath = - mkOverrideNode ('/':imgpath, T.unpack $ getMimeTypeDef imgpath) + mkOverrideNode ("/" <> imgpath, getMimeTypeDef imgpath) let overrides = map mkOverrideNode ( [("/word/webSettings.xml", "application/vnd.openxmlformats-officedocument.wordprocessingml.webSettings+xml") @@ -369,13 +263,14 @@ writeDocx opts doc = do ,("/word/footnotes.xml", "application/vnd.openxmlformats-officedocument.wordprocessingml.footnotes+xml") ] ++ - map (\x -> (maybe "" ("/word/" ++) $ extractTarget x, + map (\x -> (maybe "" (T.unpack . ("/word/" <>)) (extractTarget x), "application/vnd.openxmlformats-officedocument.wordprocessingml.header+xml")) headers ++ - map (\x -> (maybe "" ("/word/" ++) $ extractTarget x, + map (\x -> (maybe "" (T.unpack . ("/word/" <>)) (extractTarget x), "application/vnd.openxmlformats-officedocument.wordprocessingml.footer+xml")) footers) ++ map mkImageOverride imgs ++ - [ mkMediaOverride (eRelativePath e) | e <- zEntries refArchive - , "word/media/" `isPrefixOf` eRelativePath e ] + [ mkMediaOverride (eRelativePath e) + | e <- zEntries refArchive + , "word/media/" `isPrefixOf` eRelativePath e ] let defaultnodes = [mknode "Default" [("Extension","xml"),("ContentType","application/xml")] (), @@ -421,7 +316,7 @@ writeDocx opts doc = do let renumHeaders = renumIds (\q -> qName q == "Id") idMap headers let renumFooters = renumIds (\q -> qName q == "Id") idMap footers let baserels = baserels' ++ renumHeaders ++ renumFooters - let toImgRel (ident,path,_,_) = mknode "Relationship" [("Type","http://schemas.openxmlformats.org/officeDocument/2006/relationships/image"),("Id",ident),("Target",path)] () + let toImgRel (ident,path,_,_) = mknode "Relationship" [("Type","http://schemas.openxmlformats.org/officeDocument/2006/relationships/image"),("Id",T.pack ident),("Target",T.pack path)] () let imgrels = map toImgRel imgs let toLinkRel (src,ident) = mknode "Relationship" [("Type","http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink"),("Id",ident),("Target",src),("TargetMode","External") ] () let linkrels = map toLinkRel $ M.toList $ stExternalLinks st @@ -441,7 +336,7 @@ writeDocx opts doc = do Nothing -> mknode "w:sectPr" [] () -- let sectpr = fromMaybe (mknode "w:sectPr" [] ()) mbsectpr' - let contents' = contents ++ [sectpr] + let contents' = contents ++ [Elem sectpr] let docContents = mknode "w:document" stdAttributes $ mknode "w:body" [] contents' @@ -489,10 +384,10 @@ writeDocx opts doc = do numbering <- parseXml refArchive distArchive numpath let newNumElts = mkNumbering (stLists st) let pandocAdded e = - case findAttrTextBy ((== "abstractNumId") . qName) e >>= safeRead of + case findAttrBy ((== "abstractNumId") . qName) e >>= safeRead of Just numid -> numid >= (990 :: Int) Nothing -> - case findAttrTextBy ((== "numId") . qName) e >>= safeRead of + case findAttrBy ((== "numId") . qName) e >>= safeRead of Just numid -> numid >= (1000 :: Int) Nothing -> False let oldElts = filter (not . pandocAdded) $ onlyElems (elContent numbering) @@ -514,7 +409,7 @@ writeDocx opts doc = do let extraCoreProps = ["subject","lang","category","description"] let extraCorePropsMap = M.fromList $ zip extraCoreProps ["dc:subject","dc:language","cp:category","dc:description"] - let lookupMetaString' :: T.Text -> Meta -> T.Text + let lookupMetaString' :: Text -> Meta -> Text lookupMetaString' key' meta' = case key' of "description" -> T.intercalate "_x000d_\n" (map stringify $ lookupMetaBlocks "description" meta') @@ -530,20 +425,21 @@ writeDocx opts doc = do : mktnode "dc:creator" [] (T.intercalate "; " (map stringify $ docAuthors meta)) : [ mktnode (M.findWithDefault "" k extraCorePropsMap) [] (lookupMetaString' k meta) | k <- M.keys (unMeta meta), k `elem` extraCoreProps] - ++ mknode "cp:keywords" [] (T.unpack $ T.intercalate ", " keywords) + ++ mknode "cp:keywords" [] (T.intercalate ", " keywords) : (\x -> [ mknode "dcterms:created" [("xsi:type","dcterms:W3CDTF")] x , mknode "dcterms:modified" [("xsi:type","dcterms:W3CDTF")] x - ]) (formatTime defaultTimeLocale "%FT%XZ" utctime) + ]) (T.pack $ formatTime defaultTimeLocale "%FT%XZ" utctime) let docPropsEntry = toEntry docPropsPath epochtime $ renderXml docProps -- docProps/custom.xml - let customProperties :: [(String, String)] - customProperties = [(T.unpack k, T.unpack $ lookupMetaString k meta) | k <- M.keys (unMeta meta) + let customProperties :: [(Text, Text)] + customProperties = [ (k, lookupMetaString k meta) + | k <- M.keys (unMeta meta) , k `notElem` (["title", "author", "keywords"] ++ extraCoreProps)] let mkCustomProp (k, v) pid = mknode "property" [("fmtid","{D5CDD505-2E9C-101B-9397-08002B2CF9AE}") - ,("pid", show pid) + ,("pid", tshow pid) ,("name", k)] $ mknode "vt:lpwstr" [] v let customPropsPath = "docProps/custom.xml" let customProps = mknode "Properties" @@ -574,12 +470,27 @@ writeDocx opts doc = do -- adds references to footnotes or endnotes we don't have... -- we do, however, copy some settings over from reference let settingsPath = "word/settings.xml" - settingsList = [ "w:autoHyphenation" - , "w:consecutiveHyphenLimit" - , "w:hyphenationZone" - , "w:doNotHyphenateCap" - , "w:evenAndOddHeaders" - , "w:proofState" + settingsList = [ "zoom" + , "embedSystemFonts" + , "doNotTrackMoves" + , "defaultTabStop" + , "drawingGridHorizontalSpacing" + , "drawingGridVerticalSpacing" + , "displayHorizontalDrawingGridEvery" + , "displayVerticalDrawingGridEvery" + , "characterSpacingControl" + , "savePreviewPicture" + , "mathPr" + , "themeFontLang" + , "decimalSymbol" + , "listSeparator" + , "autoHyphenation" + , "consecutiveHyphenLimit" + , "hyphenationZone" + , "doNotHyphenateCap" + , "evenAndOddHeaders" + , "proofState" + , "compat" ] settingsEntry <- copyChildren refArchive distArchive settingsPath epochtime settingsList @@ -593,7 +504,8 @@ writeDocx opts doc = do fontTableEntry <- entryFromArchive refArchive "word/fontTable.xml" webSettingsEntry <- entryFromArchive refArchive "word/webSettings.xml" headerFooterEntries <- mapM (entryFromArchive refArchive . ("word/" ++)) $ - mapMaybe extractTarget (headers ++ footers) + mapMaybe (fmap T.unpack . extractTarget) + (headers ++ footers) let miscRelEntries = [ e | e <- zEntries refArchive , "word/_rels/" `isPrefixOf` eRelativePath e , ".xml.rels" `isSuffixOf` eRelativePath e @@ -619,8 +531,8 @@ newParaPropToOpenXml (fromStyleName -> s) = let styleId = T.filter (not . isSpace) s in mknode "w:style" [ ("w:type", "paragraph") , ("w:customStyle", "1") - , ("w:styleId", T.unpack styleId)] - [ mknode "w:name" [("w:val", T.unpack s)] () + , ("w:styleId", styleId)] + [ mknode "w:name" [("w:val", s)] () , mknode "w:basedOn" [("w:val","BodyText")] () , mknode "w:qFormat" [] () ] @@ -630,8 +542,8 @@ newTextPropToOpenXml (fromStyleName -> s) = let styleId = T.filter (not . isSpace) s in mknode "w:style" [ ("w:type", "character") , ("w:customStyle", "1") - , ("w:styleId", T.unpack styleId)] - [ mknode "w:name" [("w:val", T.unpack s)] () + , ("w:styleId", styleId)] + [ mknode "w:name" [("w:val", s)] () , mknode "w:basedOn" [("w:val","BodyTextChar")] () ] @@ -642,13 +554,14 @@ styleToOpenXml sm style = toStyle toktype | hasStyleName (fromString $ show toktype) (smCharStyle sm) = Nothing | otherwise = Just $ mknode "w:style" [("w:type","character"), - ("w:customStyle","1"),("w:styleId",show toktype)] - [ mknode "w:name" [("w:val",show toktype)] () + ("w:customStyle","1"),("w:styleId", tshow toktype)] + [ mknode "w:name" [("w:val", tshow toktype)] () , mknode "w:basedOn" [("w:val","VerbatimChar")] () , mknode "w:rPr" [] $ - [ mknode "w:color" [("w:val",tokCol toktype)] () + [ mknode "w:color" [("w:val", tokCol toktype)] () | tokCol toktype /= "auto" ] ++ - [ mknode "w:shd" [("w:val","clear"),("w:fill",tokBg toktype)] () + [ mknode "w:shd" [("w:val","clear") + ,("w:fill",tokBg toktype)] () | tokBg toktype /= "auto" ] ++ [ mknode "w:b" [] () | tokFeature tokenBold toktype ] ++ [ mknode "w:i" [] () | tokFeature tokenItalic toktype ] ++ @@ -656,10 +569,10 @@ styleToOpenXml sm style = ] tokStyles = tokenStyles style tokFeature f toktype = maybe False f $ M.lookup toktype tokStyles - tokCol toktype = maybe "auto" (drop 1 . fromColor) + tokCol toktype = maybe "auto" (T.pack . drop 1 . fromColor) $ (tokenColor =<< M.lookup toktype tokStyles) `mplus` defaultColor style - tokBg toktype = maybe "auto" (drop 1 . fromColor) + tokBg toktype = maybe "auto" (T.pack . drop 1 . fromColor) $ (tokenBackground =<< M.lookup toktype tokStyles) `mplus` backgroundColor style parStyle | hasStyleName "Source Code" (smParaStyle sm) = Nothing @@ -672,23 +585,25 @@ styleToOpenXml sm style = , mknode "w:pPr" [] $ mknode "w:wordWrap" [("w:val","off")] () : - maybe [] (\col -> [mknode "w:shd" [("w:val","clear"),("w:fill",drop 1 $ fromColor col)] ()]) (backgroundColor style) + maybe [] (\col -> [mknode "w:shd" [("w:val","clear"),("w:fill", T.pack $ drop 1 $ fromColor col)] ()]) (backgroundColor style) ] -copyChildren :: (PandocMonad m) => Archive -> Archive -> String -> Integer -> [String] -> m Entry +copyChildren :: (PandocMonad m) + => Archive -> Archive -> String -> Integer -> [Text] -> m Entry copyChildren refArchive distArchive path timestamp elNames = do ref <- parseXml refArchive distArchive path dist <- parseXml distArchive distArchive path + let elsToCopy = + map cleanElem $ filterChildrenName (\e -> qName e `elem` elNames) ref + let elsToKeep = + [e | Elem e <- elContent dist, not (any (hasSameNameAs e) elsToCopy)] return $ toEntry path timestamp $ renderXml dist{ - elContent = elContent dist ++ copyContent ref + elContent = map Elem elsToKeep ++ map Elem elsToCopy } where - strName QName{qName=name, qPrefix=prefix} - | Just p <- prefix = p++":"++name - | otherwise = name - shouldCopy = (`elem` elNames) . strName - cleanElem el@Element{elName=name} = Elem el{elName=name{qURI=Nothing}} - copyContent = map cleanElem . filterChildrenName shouldCopy + hasSameNameAs (Element {elName = n1}) (Element {elName = n2}) = + qName n1 == qName n2 + cleanElem el@Element{elName=name} = el{elName=name{qURI=Nothing}} -- this is the lowest number used for a list numId baseListId :: Int @@ -697,43 +612,42 @@ baseListId = 1000 mkNumbering :: [ListMarker] -> [Element] mkNumbering lists = elts ++ zipWith mkNum lists [baseListId..(baseListId + length lists - 1)] - where elts = zipWith mkAbstractNum (ordNub lists) $ - randomRs (0x10000000, 0xFFFFFFFF) $ mkStdGen 1848 + where elts = map mkAbstractNum (ordNub lists) maxListLevel :: Int maxListLevel = 8 mkNum :: ListMarker -> Int -> Element mkNum marker numid = - mknode "w:num" [("w:numId",show numid)] + mknode "w:num" [("w:numId",tshow numid)] $ mknode "w:abstractNumId" [("w:val",listMarkerToId marker)] () : case marker of NoMarker -> [] BulletMarker -> [] NumberMarker _ _ start -> - map (\lvl -> mknode "w:lvlOverride" [("w:ilvl",show (lvl :: Int))] - $ mknode "w:startOverride" [("w:val",show start)] ()) + map (\lvl -> mknode "w:lvlOverride" [("w:ilvl",tshow (lvl :: Int))] + $ mknode "w:startOverride" [("w:val",tshow start)] ()) [0..maxListLevel] -mkAbstractNum :: ListMarker -> Integer -> Element -mkAbstractNum marker nsid = +mkAbstractNum :: ListMarker -> Element +mkAbstractNum marker = mknode "w:abstractNum" [("w:abstractNumId",listMarkerToId marker)] - $ mknode "w:nsid" [("w:val", printf "%8x" nsid)] () + $ mknode "w:nsid" [("w:val", "A" <> listMarkerToId marker)] () : mknode "w:multiLevelType" [("w:val","multilevel")] () : map (mkLvl marker) [0..maxListLevel] mkLvl :: ListMarker -> Int -> Element mkLvl marker lvl = - mknode "w:lvl" [("w:ilvl",show lvl)] $ + mknode "w:lvl" [("w:ilvl",tshow lvl)] $ [ mknode "w:start" [("w:val",start)] () | marker /= NoMarker && marker /= BulletMarker ] ++ [ mknode "w:numFmt" [("w:val",fmt)] () - , mknode "w:lvlText" [("w:val",lvltxt)] () + , mknode "w:lvlText" [("w:val", lvltxt)] () , mknode "w:lvlJc" [("w:val","left")] () , mknode "w:pPr" [] - [ mknode "w:ind" [ ("w:left",show $ lvl * step + step) - , ("w:hanging",show (hang :: Int)) + [ mknode "w:ind" [ ("w:left",tshow $ lvl * step + step) + , ("w:hanging",tshow (hang :: Int)) ] () ] ] @@ -742,8 +656,8 @@ mkLvl marker lvl = NoMarker -> ("bullet"," ","1") BulletMarker -> ("bullet",bulletFor lvl,"1") NumberMarker st de n -> (styleFor st lvl - ,patternFor de ("%" ++ show (lvl + 1)) - ,show n) + ,patternFor de ("%" <> tshow (lvl + 1)) + ,tshow n) step = 720 hang = 480 bulletFor 0 = "\x2022" -- filled circle @@ -766,9 +680,9 @@ mkLvl marker lvl = styleFor DefaultStyle 5 = "lowerRoman" styleFor DefaultStyle x = styleFor DefaultStyle (x `mod` 6) styleFor _ _ = "decimal" - patternFor OneParen s = s ++ ")" - patternFor TwoParens s = "(" ++ s ++ ")" - patternFor _ s = s ++ "." + patternFor OneParen s = s <> ")" + patternFor TwoParens s = "(" <> s <> ")" + patternFor _ s = s <> "." getNumId :: (PandocMonad m) => WS m Int getNumId = (((baseListId - 1) +) . length) `fmap` gets stLists @@ -776,8 +690,8 @@ getNumId = (((baseListId - 1) +) . length) `fmap` gets stLists makeTOC :: (PandocMonad m) => WriterOptions -> WS m [Element] makeTOC opts = do - let depth = "1-"++show (writerTOCDepth opts) - let tocCmd = "TOC \\o \""++depth++"\" \\h \\z \\u" + let depth = "1-" <> tshow (writerTOCDepth opts) + let tocCmd = "TOC \\o \"" <> depth <> "\" \\h \\z \\u" tocTitle <- gets stTocTitle title <- withParaPropM (pStyleM "TOC Heading") (blocksToOpenXML opts [Para tocTitle]) return @@ -788,7 +702,7 @@ makeTOC opts = do mknode "w:docPartUnique" [] ()] -- w:docPartObj ), -- w:sdtPr - mknode "w:sdtContent" [] (title++[ + mknode "w:sdtContent" [] (title ++ [ Elem $ mknode "w:p" [] ( mknode "w:r" [] [ mknode "w:fldChar" [("w:fldCharType","begin"),("w:dirty","true")] (), @@ -802,7 +716,9 @@ makeTOC opts = do -- | Convert Pandoc document to two lists of -- OpenXML elements (the main document and footnotes). -writeOpenXML :: (PandocMonad m) => WriterOptions -> Pandoc -> WS m ([Element], [Element],[Element]) +writeOpenXML :: (PandocMonad m) + => WriterOptions -> Pandoc + -> WS m ([Content], [Element], [Element]) writeOpenXML opts (Pandoc meta blocks) = do let tit = docTitle meta let auths = docAuthors meta @@ -828,8 +744,9 @@ writeOpenXML opts (Pandoc meta blocks) = do let toComment (kvs, ils) = do annotation <- inlinesToOpenXML opts ils return $ - mknode "w:comment" [('w':':':T.unpack k,T.unpack v) | (k,v) <- kvs] + mknode "w:comment" [("w:" <> k, v) | (k,v) <- kvs] [ mknode "w:p" [] $ + map Elem [ mknode "w:pPr" [] [ mknode "w:pStyle" [("w:val", "CommentText")] () ] , mknode "w:r" [] @@ -844,11 +761,11 @@ writeOpenXML opts (Pandoc meta blocks) = do toc <- if includeTOC then makeTOC opts else return [] - let meta' = title ++ subtitle ++ authors ++ date ++ abstract ++ toc + let meta' = title ++ subtitle ++ authors ++ date ++ abstract ++ map Elem toc return (meta' ++ doc', notes', comments') -- | Convert a list of Pandoc blocks to OpenXML. -blocksToOpenXML :: (PandocMonad m) => WriterOptions -> [Block] -> WS m [Element] +blocksToOpenXML :: (PandocMonad m) => WriterOptions -> [Block] -> WS m [Content] blocksToOpenXML opts = fmap concat . mapM (blockToOpenXML opts) . separateTables -- Word combines adjacent tables unless you put an empty paragraph between @@ -859,35 +776,29 @@ separateTables (x@Table{}:xs@(Table{}:_)) = x : RawBlock (Format "openxml") "<w:p />" : separateTables xs separateTables (x:xs) = x : separateTables xs -pStyleM :: (PandocMonad m) => ParaStyleName -> WS m XML.Element -pStyleM styleName = do - pStyleMap <- gets (smParaStyle . stStyleMaps) - let sty' = getStyleIdFromName styleName pStyleMap - return $ mknode "w:pStyle" [("w:val", T.unpack $ fromStyleId sty')] () - rStyleM :: (PandocMonad m) => CharStyleName -> WS m XML.Element rStyleM styleName = do cStyleMap <- gets (smCharStyle . stStyleMaps) let sty' = getStyleIdFromName styleName cStyleMap - return $ mknode "w:rStyle" [("w:val", T.unpack $ fromStyleId sty')] () + return $ mknode "w:rStyle" [("w:val", fromStyleId sty')] () -getUniqueId :: (PandocMonad m) => WS m String +getUniqueId :: (PandocMonad m) => WS m Text -- the + 20 is to ensure that there are no clashes with the rIds -- already in word/document.xml.rel getUniqueId = do n <- gets stCurId modify $ \st -> st{stCurId = n + 1} - return $ show n + return $ tshow n -- | Key for specifying user-defined docx styles. -dynamicStyleKey :: T.Text +dynamicStyleKey :: Text dynamicStyleKey = "custom-style" -- | Convert a Pandoc block element to OpenXML. -blockToOpenXML :: (PandocMonad m) => WriterOptions -> Block -> WS m [Element] +blockToOpenXML :: (PandocMonad m) => WriterOptions -> Block -> WS m [Content] blockToOpenXML opts blk = withDirection $ blockToOpenXML' opts blk -blockToOpenXML' :: (PandocMonad m) => WriterOptions -> Block -> WS m [Element] +blockToOpenXML' :: (PandocMonad m) => WriterOptions -> Block -> WS m [Content] blockToOpenXML' _ Null = return [] blockToOpenXML' opts (Div (ident,_classes,kvs) bs) = do stylemod <- case lookup dynamicStyleKey kvs of @@ -921,18 +832,18 @@ blockToOpenXML' opts (Header lev (ident,_,kvs) lst) = do Just n -> do num <- withTextPropM (rStyleM "SectionNumber") (inlineToOpenXML opts (Str n)) - return $ num ++ [mknode "w:r" [] [mknode "w:tab" [] ()]] + return $ num ++ [Elem $ mknode "w:r" [] [mknode "w:tab" [] ()]] Nothing -> return [] else return [] contents <- (number ++) <$> inlinesToOpenXML opts lst if T.null ident - then return [mknode "w:p" [] (paraProps ++ contents)] + then return [Elem $ mknode "w:p" [] (map Elem paraProps ++ contents)] else do let bookmarkName = ident modify $ \s -> s{ stSectionIds = Set.insert bookmarkName $ stSectionIds s } bookmarkedContents <- wrapBookmark bookmarkName contents - return [mknode "w:p" [] (paraProps ++ bookmarkedContents)] + return [Elem $ mknode "w:p" [] (map Elem paraProps ++ bookmarkedContents)] blockToOpenXML' opts (Plain lst) = do isInTable <- gets stInTable isInList <- gets stInList @@ -944,15 +855,31 @@ blockToOpenXML' opts (Plain lst) = do -- title beginning with fig: indicates that the image is a figure blockToOpenXML' opts (Para [Image attr alt (src,T.stripPrefix "fig:" -> Just tit)]) = do setFirstPara + fignum <- gets stNextFigureNum + unless (null alt) $ modify $ \st -> st{ stNextFigureNum = fignum + 1 } + let figid = "fig" <> tshow fignum + figname <- translateTerm Term.Figure prop <- pStyleM $ if null alt then "Figure" else "Captioned Figure" paraProps <- local (\env -> env { envParaProperties = EnvProps (Just prop) [] <> envParaProperties env }) (getParaProps False) contents <- inlinesToOpenXML opts [Image attr alt (src,tit)] - captionNode <- withParaPropM (pStyleM "Image Caption") - $ blockToOpenXML opts (Para alt) - return $ mknode "w:p" [] (paraProps ++ contents) : captionNode + captionNode <- if null alt + then return [] + else withParaPropM (pStyleM "Image Caption") + $ blockToOpenXML opts + (Para $ Span (figid,[],[]) + [Str (figname <> "\160"), + RawInline (Format "openxml") + ("<w:fldSimple w:instr=\"SEQ Figure" + <> " \\* ARABIC \"><w:r><w:t>" + <> tshow fignum + <> "</w:t></w:r></w:fldSimple>"), + Str ":", Space] : alt) + return $ + Elem (mknode "w:p" [] (map Elem paraProps ++ contents)) + : captionNode blockToOpenXML' opts (Para lst) | null lst && not (isEnabled Ext_empty_paragraphs opts) = return [] | otherwise = do @@ -969,10 +896,12 @@ blockToOpenXML' opts (Para lst) ps -> ps modify $ \s -> s { stFirstPara = False } contents <- inlinesToOpenXML opts lst - return [mknode "w:p" [] (paraProps' ++ contents)] + return [Elem $ mknode "w:p" [] (map Elem paraProps' ++ contents)] blockToOpenXML' opts (LineBlock lns) = blockToOpenXML opts $ linesToPara lns blockToOpenXML' _ b@(RawBlock format str) - | format == Format "openxml" = return [ x | Elem x <- parseXML str ] + | format == Format "openxml" = return [ + Text (CData CDataRaw str Nothing) + ] | otherwise = do report $ BlockNotRendered b return [] @@ -987,73 +916,14 @@ blockToOpenXML' opts (CodeBlock attrs@(ident, _, _) str) = do wrapBookmark ident p blockToOpenXML' _ HorizontalRule = do setFirstPara - return [ + return [ Elem $ mknode "w:p" [] $ mknode "w:r" [] $ mknode "w:pict" [] $ mknode "v:rect" [("style","width:0;height:1.5pt"), ("o:hralign","center"), ("o:hrstd","t"),("o:hr","t")] () ] -blockToOpenXML' opts (Table _ blkCapt specs thead tbody tfoot) = do - let (caption, aligns, widths, headers, rows) = toLegacyTable blkCapt specs thead tbody tfoot - setFirstPara - modify $ \s -> s { stInTable = True } - let captionStr = stringify caption - caption' <- if null caption - then return [] - else withParaPropM (pStyleM "Table Caption") - $ blockToOpenXML opts (Para caption) - let alignmentFor al = mknode "w:jc" [("w:val",alignmentToString al)] () - -- Table cells require a <w:p> element, even an empty one! - -- Not in the spec but in Word 2007, 2010. See #4953. - let cellToOpenXML (al, cell) = do - es <- withParaProp (alignmentFor al) $ blocksToOpenXML opts cell - return $ if any (\e -> qName (elName e) == "p") es - then es - else es ++ [mknode "w:p" [] ()] - headers' <- mapM cellToOpenXML $ zip aligns headers - rows' <- mapM (mapM cellToOpenXML . zip aligns) rows - let borderProps = mknode "w:tcPr" [] - [ mknode "w:tcBorders" [] - $ mknode "w:bottom" [("w:val","single")] () - , mknode "w:vAlign" [("w:val","bottom")] () ] - compactStyle <- pStyleM "Compact" - let emptyCell' = [mknode "w:p" [] [mknode "w:pPr" [] [compactStyle]]] - let mkcell border contents = mknode "w:tc" [] - $ [ borderProps | border ] ++ - if null contents - then emptyCell' - else contents - let mkrow border cells = mknode "w:tr" [] $ - [mknode "w:trPr" [] [ - mknode "w:cnfStyle" [("w:firstRow","1")] ()] | border] - ++ map (mkcell border) cells - let textwidth = 7920 -- 5.5 in in twips, 1/20 pt - let fullrow = 5000 -- 100% specified in pct - let rowwidth = fullrow * sum widths - let mkgridcol w = mknode "w:gridCol" - [("w:w", show (floor (textwidth * w) :: Integer))] () - let hasHeader = not $ all null headers - modify $ \s -> s { stInTable = False } - return $ - caption' ++ - [mknode "w:tbl" [] - ( mknode "w:tblPr" [] - ( mknode "w:tblStyle" [("w:val","Table")] () : - mknode "w:tblW" [("w:type", "pct"), ("w:w", show rowwidth)] () : - mknode "w:tblLook" [("w:firstRow",if hasHeader then "1" else "0") - ,("w:lastRow","0") - ,("w:firstColumn","0") - ,("w:lastColumn","0") - ,("w:noHBand","0") - ,("w:noVBand","0")] () : - [ mknode "w:tblCaption" [("w:val", T.unpack captionStr)] () - | not (null caption) ] ) - : mknode "w:tblGrid" [] - (if all (==0) widths - then [] - else map mkgridcol widths) - : [ mkrow True headers' | hasHeader ] ++ - map (mkrow False) rows' - )] +blockToOpenXML' opts (Table attr caption colspecs thead tbodies tfoot) = + tableToOpenXML (blocksToOpenXML opts) + (Grid.toTable attr caption colspecs thead tbodies tfoot) blockToOpenXML' opts el | BulletList lst <- el = addOpenXMLList BulletMarker lst | OrderedList (start, numstyle, numdelim) lst <- el @@ -1070,7 +940,9 @@ blockToOpenXML' opts (DefinitionList items) = do setFirstPara return l -definitionListItemToOpenXML :: (PandocMonad m) => WriterOptions -> ([Inline],[[Block]]) -> WS m [Element] +definitionListItemToOpenXML :: (PandocMonad m) + => WriterOptions -> ([Inline],[[Block]]) + -> WS m [Content] definitionListItemToOpenXML opts (term,defs) = do term' <- withParaPropM (pStyleM "Definition Term") $ blockToOpenXML opts (Para term) @@ -1083,8 +955,11 @@ addList marker = do lists <- gets stLists modify $ \st -> st{ stLists = lists ++ [marker] } -listItemToOpenXML :: (PandocMonad m) => WriterOptions -> Int -> [Block] -> WS m [Element] -listItemToOpenXML _ _ [] = return [] +listItemToOpenXML :: (PandocMonad m) + => WriterOptions + -> Int -> [Block] + -> WS m [Content] +listItemToOpenXML _ _ [] = return [] listItemToOpenXML opts numid (first:rest) = do oldInList <- gets stInList modify $ \st -> st{ stInList = True } @@ -1103,15 +978,8 @@ listItemToOpenXML opts numid (first:rest) = do modify $ \st -> st{ stInList = oldInList } return $ first'' ++ rest'' -alignmentToString :: Alignment -> [Char] -alignmentToString alignment = case alignment of - AlignLeft -> "left" - AlignRight -> "right" - AlignCenter -> "center" - AlignDefault -> "left" - -- | Convert a list of inline elements to OpenXML. -inlinesToOpenXML :: (PandocMonad m) => WriterOptions -> [Inline] -> WS m [Element] +inlinesToOpenXML :: PandocMonad m => WriterOptions -> [Inline] -> WS m [Content] inlinesToOpenXML opts lst = concat `fmap` mapM (inlineToOpenXML opts) lst withNumId :: (PandocMonad m) => Int -> WS m a -> WS m a @@ -1120,10 +988,6 @@ withNumId numid = local $ \env -> env{ envListNumId = numid } asList :: (PandocMonad m) => WS m a -> WS m a asList = local $ \env -> env{ envListLevel = envListLevel env + 1 } -isStyle :: Element -> Bool -isStyle e = isElem [] "w" "rStyle" e || - isElem [] "w" "pStyle" e - getTextProps :: (PandocMonad m) => WS m [Element] getTextProps = do props <- asks envTextProperties @@ -1146,23 +1010,13 @@ getParaProps displayMathPara = do listLevel <- asks envListLevel numid <- asks envListNumId let listPr = [mknode "w:numPr" [] - [ mknode "w:ilvl" [("w:val",show listLevel)] () - , mknode "w:numId" [("w:val",show numid)] () ] | listLevel >= 0 && not displayMathPara] + [ mknode "w:ilvl" [("w:val",tshow listLevel)] () + , mknode "w:numId" [("w:val",tshow numid)] () ] | listLevel >= 0 && not displayMathPara] return $ case listPr ++ squashProps props of [] -> [] ps -> [mknode "w:pPr" [] ps] -withParaProp :: PandocMonad m => Element -> WS m a -> WS m a -withParaProp d p = - local (\env -> env {envParaProperties = ep <> envParaProperties env}) p - where ep = if isStyle d then EnvProps (Just d) [] else EnvProps Nothing [d] - -withParaPropM :: PandocMonad m => WS m Element -> WS m a -> WS m a -withParaPropM md p = do - d <- md - withParaProp d p - -formattedString :: PandocMonad m => T.Text -> WS m [Element] +formattedString :: PandocMonad m => Text -> WS m [Element] formattedString str = -- properly handle soft hyphens case splitTextBy (=='\173') str of @@ -1171,7 +1025,7 @@ formattedString str = sh <- formattedRun [mknode "w:softHyphen" [] ()] intercalate sh <$> mapM formattedString' ws -formattedString' :: PandocMonad m => T.Text -> WS m [Element] +formattedString' :: PandocMonad m => Text -> WS m [Element] formattedString' str = do inDel <- asks envInDel formattedRun [ mktnode (if inDel then "w:delText" else "w:t") @@ -1182,16 +1036,13 @@ formattedRun els = do props <- getTextProps return [ mknode "w:r" [] $ props ++ els ] -setFirstPara :: PandocMonad m => WS m () -setFirstPara = modify $ \s -> s { stFirstPara = True } - -- | Convert an inline element to OpenXML. -inlineToOpenXML :: PandocMonad m => WriterOptions -> Inline -> WS m [Element] +inlineToOpenXML :: PandocMonad m => WriterOptions -> Inline -> WS m [Content] inlineToOpenXML opts il = withDirection $ inlineToOpenXML' opts il -inlineToOpenXML' :: PandocMonad m => WriterOptions -> Inline -> WS m [Element] +inlineToOpenXML' :: PandocMonad m => WriterOptions -> Inline -> WS m [Content] inlineToOpenXML' _ (Str str) = - formattedString str + map Elem <$> formattedString str inlineToOpenXML' opts Space = inlineToOpenXML opts (Str " ") inlineToOpenXML' opts SoftBreak = inlineToOpenXML opts (Str " ") inlineToOpenXML' opts (Span ("",["csl-block"],[]) ils) = @@ -1199,10 +1050,11 @@ inlineToOpenXML' opts (Span ("",["csl-block"],[]) ils) = inlineToOpenXML' opts (Span ("",["csl-left-margin"],[]) ils) = inlinesToOpenXML opts ils inlineToOpenXML' opts (Span ("",["csl-right-inline"],[]) ils) = - ([mknode "w:r" [] - (mknode "w:t" - [("xml:space","preserve")] - ("\t" :: String))] ++) + ([Elem $ + mknode "w:r" [] + (mknode "w:t" + [("xml:space","preserve")] + ("\t" :: Text))] ++) <$> inlinesToOpenXML opts ils inlineToOpenXML' opts (Span ("",["csl-indent"],[]) ils) = inlinesToOpenXML opts ils @@ -1212,18 +1064,18 @@ inlineToOpenXML' _ (Span (ident,["comment-start"],kvs) ils) = do let ident' = fromMaybe ident (lookup "id" kvs) kvs' = filter (("id" /=) . fst) kvs modify $ \st -> st{ stComments = (("id",ident'):kvs', ils) : stComments st } - return [ mknode "w:commentRangeStart" [("w:id", T.unpack ident')] () ] + return [ Elem $ mknode "w:commentRangeStart" [("w:id", ident')] () ] inlineToOpenXML' _ (Span (ident,["comment-end"],kvs) _) = -- prefer the "id" in kvs, since that is the one produced by the docx -- reader. let ident' = fromMaybe ident (lookup "id" kvs) - in - return [ mknode "w:commentRangeEnd" [("w:id", T.unpack ident')] () - , mknode "w:r" [] - [ mknode "w:rPr" [] - [ mknode "w:rStyle" [("w:val", "CommentReference")] () ] - , mknode "w:commentReference" [("w:id", T.unpack ident')] () ] - ] + in return . map Elem $ + [ mknode "w:commentRangeEnd" [("w:id", ident')] () + , mknode "w:r" [] + [ mknode "w:rPr" [] + [ mknode "w:rStyle" [("w:val", "CommentReference")] () ] + , mknode "w:commentReference" [("w:id", ident')] () ] + ] inlineToOpenXML' opts (Span (ident,classes,kvs) ils) = do stylemod <- case lookup dynamicStyleKey kvs of Just (fromString . T.unpack -> sty) -> do @@ -1246,8 +1098,8 @@ inlineToOpenXML' opts (Span (ident,classes,kvs) ils) = do defaultAuthor <- asks envChangesAuthor let author = fromMaybe defaultAuthor (lookup "author" kvs) let mdate = lookup "date" kvs - return $ ("w:author", T.unpack author) : - maybe [] (\date -> [("w:date", T.unpack date)]) mdate + return $ ("w:author", author) : + maybe [] (\date -> [("w:date", date)]) mdate insmod <- if "insertion" `elem` classes then do changeAuthorDate <- getChangeAuthorDate @@ -1255,8 +1107,9 @@ inlineToOpenXML' opts (Span (ident,classes,kvs) ils) = do modify $ \s -> s{stInsId = insId + 1} return $ \f -> do x <- f - return [ mknode "w:ins" - (("w:id", show insId) : changeAuthorDate) x] + return [Elem $ + mknode "w:ins" + (("w:id", tshow insId) : changeAuthorDate) x] else return id delmod <- if "deletion" `elem` classes then do @@ -1265,16 +1118,20 @@ inlineToOpenXML' opts (Span (ident,classes,kvs) ils) = do modify $ \s -> s{stDelId = delId + 1} return $ \f -> local (\env->env{envInDel=True}) $ do x <- f - return [mknode "w:del" - (("w:id", show delId) : changeAuthorDate) x] + return [Elem $ mknode "w:del" + (("w:id", tshow delId) : changeAuthorDate) x] else return id contents <- insmod $ delmod $ dirmod $ stylemod $ pmod $ inlinesToOpenXML opts ils wrapBookmark ident contents inlineToOpenXML' opts (Strong lst) = - withTextProp (mknode "w:b" [] ()) $ inlinesToOpenXML opts lst + withTextProp (mknode "w:b" [] ()) $ + withTextProp (mknode "w:bCs" [] ()) $ -- needed for LTR, #6911 + inlinesToOpenXML opts lst inlineToOpenXML' opts (Emph lst) = - withTextProp (mknode "w:i" [] ()) $ inlinesToOpenXML opts lst + withTextProp (mknode "w:i" [] ()) $ + withTextProp (mknode "w:iCs" [] ()) $ -- needed for LTR, #6911 + inlinesToOpenXML opts lst inlineToOpenXML' opts (Underline lst) = withTextProp (mknode "w:u" [("w:val","single")] ()) $ inlinesToOpenXML opts lst @@ -1290,9 +1147,10 @@ inlineToOpenXML' opts (SmallCaps lst) = inlineToOpenXML' opts (Strikeout lst) = withTextProp (mknode "w:strike" [] ()) $ inlinesToOpenXML opts lst -inlineToOpenXML' _ LineBreak = return [br] +inlineToOpenXML' _ LineBreak = return [Elem br] inlineToOpenXML' _ il@(RawInline f str) - | f == Format "openxml" = return [ x | Elem x <- parseXML str ] + | f == Format "openxml" = return + [Text (CData CDataRaw str Nothing)] | otherwise = do report $ InlineNotRendered il return [] @@ -1305,26 +1163,26 @@ inlineToOpenXML' opts (Math mathType str) = do when (mathType == DisplayMath) setFirstPara res <- (lift . lift) (convertMath writeOMML mathType str) case res of - Right r -> return [r] + Right r -> return [Elem $ fromXLElement r] Left il -> inlineToOpenXML' opts il inlineToOpenXML' opts (Cite _ lst) = inlinesToOpenXML opts lst inlineToOpenXML' opts (Code attrs str) = do let alltoktypes = [KeywordTok ..] tokTypesMap <- mapM (\tt -> (,) tt <$> rStyleM (fromString $ show tt)) alltoktypes - let unhighlighted = intercalate [br] `fmap` + let unhighlighted = (map Elem . intercalate [br]) `fmap` mapM formattedString (T.lines str) formatOpenXML _fmtOpts = intercalate [br] . map (map toHlTok) toHlTok (toktype,tok) = mknode "w:r" [] [ mknode "w:rPr" [] $ maybeToList (lookup toktype tokTypesMap) - , mknode "w:t" [("xml:space","preserve")] (T.unpack tok) ] + , mknode "w:t" [("xml:space","preserve")] tok ] withTextPropM (rStyleM "Verbatim Char") $ if isNothing (writerHighlightStyle opts) then unhighlighted else case highlight (writerSyntaxMap opts) formatOpenXML attrs str of - Right h -> return h + Right h -> return (map Elem h) Left msg -> do unless (T.null msg) $ report $ CouldNotHighlight msg unhighlighted @@ -1335,7 +1193,7 @@ inlineToOpenXML' opts (Note bs) = do let notemarker = mknode "w:r" [] [ mknode "w:rPr" [] footnoteStyle , mknode "w:footnoteRef" [] () ] - let notemarkerXml = RawInline (Format "openxml") $ T.pack $ ppElement notemarker + let notemarkerXml = RawInline (Format "openxml") $ ppElement notemarker let insertNoteRef (Plain ils : xs) = Plain (notemarkerXml : Space : ils) : xs insertNoteRef (Para ils : xs) = Para (notemarkerXml : Space : ils) : xs insertNoteRef xs = Para [notemarkerXml] : xs @@ -1347,26 +1205,26 @@ inlineToOpenXML' opts (Note bs) = do $ insertNoteRef bs) let newnote = mknode "w:footnote" [("w:id", notenum)] contents modify $ \s -> s{ stFootnotes = newnote : notes } - return [ mknode "w:r" [] + return [ Elem $ mknode "w:r" [] [ mknode "w:rPr" [] footnoteStyle , mknode "w:footnoteReference" [("w:id", notenum)] () ] ] -- internal link: inlineToOpenXML' opts (Link _ txt (T.uncons -> Just ('#', xs),_)) = do contents <- withTextPropM (rStyleM "Hyperlink") $ inlinesToOpenXML opts txt return - [ mknode "w:hyperlink" [("w:anchor", T.unpack $ toBookmarkName xs)] contents ] + [ Elem $ mknode "w:hyperlink" [("w:anchor", toBookmarkName xs)] contents ] -- external link: inlineToOpenXML' opts (Link _ txt (src,_)) = do contents <- withTextPropM (rStyleM "Hyperlink") $ inlinesToOpenXML opts txt extlinks <- gets stExternalLinks - id' <- case M.lookup (T.unpack src) extlinks of + id' <- case M.lookup src extlinks of Just i -> return i Nothing -> do - i <- ("rId"++) `fmap` getUniqueId + i <- ("rId" <>) <$> getUniqueId modify $ \st -> st{ stExternalLinks = - M.insert (T.unpack src) i extlinks } + M.insert src i extlinks } return i - return [ mknode "w:hyperlink" [("r:id",id')] contents ] + return [ Elem $ mknode "w:hyperlink" [("r:id",id')] contents ] inlineToOpenXML' opts (Image attr@(imgident, _, _) alt (src, title)) = do pageWidth <- asks envPrintWidth imgs <- gets stImages @@ -1384,17 +1242,17 @@ inlineToOpenXML' opts (Image attr@(imgident, _, _) alt (src, title)) = do ,("noChangeAspect","1")] () nvPicPr = mknode "pic:nvPicPr" [] [ mknode "pic:cNvPr" - [("descr",T.unpack src),("id","0"),("name","Picture")] () + [("descr",src),("id","0"),("name","Picture")] () , cNvPicPr ] blipFill = mknode "pic:blipFill" [] - [ mknode "a:blip" [("r:embed",ident)] () + [ mknode "a:blip" [("r:embed",T.pack ident)] () , mknode "a:stretch" [] $ mknode "a:fillRect" [] () ] xfrm = mknode "a:xfrm" [] [ mknode "a:off" [("x","0"),("y","0")] () - , mknode "a:ext" [("cx",show xemu) - ,("cy",show yemu)] () ] + , mknode "a:ext" [("cx",tshow xemu) + ,("cy",tshow yemu)] () ] prstGeom = mknode "a:prstGeom" [("prst","rect")] $ mknode "a:avLst" [] () ln = mknode "a:ln" [("w","9525")] @@ -1415,12 +1273,12 @@ inlineToOpenXML' opts (Image attr@(imgident, _, _) alt (src, title)) = do imgElt = mknode "w:r" [] $ mknode "w:drawing" [] $ mknode "wp:inline" [] - [ mknode "wp:extent" [("cx",show xemu),("cy",show yemu)] () + [ mknode "wp:extent" [("cx",tshow xemu),("cy",tshow yemu)] () , mknode "wp:effectExtent" [("b","0"),("l","0"),("r","0"),("t","0")] () , mknode "wp:docPr" - [ ("descr", T.unpack $ stringify alt) - , ("title", T.unpack title) + [ ("descr", stringify alt) + , ("title", title) , ("id","1") , ("name","Picture") ] () @@ -1430,10 +1288,10 @@ inlineToOpenXML' opts (Image attr@(imgident, _, _) alt (src, title)) = do imgElt wrapBookmark imgident =<< case stImage of - Just imgData -> return [generateImgElt imgData] + Just imgData -> return [Elem $ generateImgElt imgData] Nothing -> ( do --try (img, mt) <- P.fetchItem src - ident <- ("rId"++) `fmap` getUniqueId + ident <- ("rId" <>) <$> getUniqueId let imgext = case mt >>= extensionFromMimeType of @@ -1446,11 +1304,12 @@ inlineToOpenXML' opts (Image attr@(imgident, _, _) alt (src, title)) = do Just Eps -> ".eps" Just Svg -> ".svg" Just Emf -> ".emf" + Just Tiff -> ".tiff" Nothing -> "" - imgpath = "media/" <> ident <> T.unpack imgext - mbMimeType = mt <|> getMimeType imgpath + imgpath = "media/" <> ident <> imgext + mbMimeType = mt <|> getMimeType (T.unpack imgpath) - imgData = (ident, imgpath, mbMimeType, img) + imgData = (T.unpack ident, T.unpack imgpath, mbMimeType, img) if T.null imgext then -- without an extension there is no rule for content type @@ -1458,7 +1317,7 @@ inlineToOpenXML' opts (Image attr@(imgident, _, _) alt (src, title)) = do else do -- insert mime type to use in constructing [Content_Types].xml modify $ \st -> st { stImages = M.insert (T.unpack src) imgData $ stImages st } - return [generateImgElt imgData] + return [Elem $ generateImgElt imgData] ) `catchError` ( \e -> do report $ CouldNotFetchResource src $ T.pack (show e) @@ -1469,22 +1328,6 @@ inlineToOpenXML' opts (Image attr@(imgident, _, _) alt (src, title)) = do br :: Element br = mknode "w:r" [] [mknode "w:br" [] ()] --- Word will insert these footnotes into the settings.xml file --- (whether or not they're visible in the document). If they're in the --- file, but not in the footnotes.xml file, it will produce --- problems. So we want to make sure we insert them into our document. -defaultFootnotes :: [Element] -defaultFootnotes = [ mknode "w:footnote" - [("w:type", "separator"), ("w:id", "-1")] - [ mknode "w:p" [] - [mknode "w:r" [] - [ mknode "w:separator" [] ()]]] - , mknode "w:footnote" - [("w:type", "continuationSeparator"), ("w:id", "0")] - [ mknode "w:p" [] - [ mknode "w:r" [] - [ mknode "w:continuationSeparator" [] ()]]]] - withDirection :: PandocMonad m => WS m a -> WS m a withDirection x = do @@ -1508,20 +1351,20 @@ withDirection x = do , envTextProperties = EnvProps textStyle textProps' } -wrapBookmark :: (PandocMonad m) => T.Text -> [Element] -> WS m [Element] +wrapBookmark :: (PandocMonad m) => Text -> [Content] -> WS m [Content] wrapBookmark "" contents = return contents wrapBookmark ident contents = do id' <- getUniqueId let bookmarkStart = mknode "w:bookmarkStart" [("w:id", id') - ,("w:name", T.unpack $ toBookmarkName ident)] () + ,("w:name", toBookmarkName ident)] () bookmarkEnd = mknode "w:bookmarkEnd" [("w:id", id')] () - return $ bookmarkStart : contents ++ [bookmarkEnd] + return $ Elem bookmarkStart : contents ++ [Elem bookmarkEnd] -- Word imposes a 40 character limit on bookmark names and requires -- that they begin with a letter. So we just use a hash of the -- identifier when otherwise we'd have an illegal bookmark name. -toBookmarkName :: T.Text -> T.Text +toBookmarkName :: Text -> Text toBookmarkName s | Just (c, _) <- T.uncons s , isLetter c diff --git a/src/Text/Pandoc/Writers/Docx/StyleMap.hs b/src/Text/Pandoc/Writers/Docx/StyleMap.hs index c3c54c7e5..04868eaad 100644 --- a/src/Text/Pandoc/Writers/Docx/StyleMap.hs +++ b/src/Text/Pandoc/Writers/Docx/StyleMap.hs @@ -2,7 +2,7 @@ {- | Module : Text.Pandoc.Writers.Docx.StyleMap Copyright : © 2014-2020 Jesse Rosenthal <jrosenthal@jhu.edu>, - 2014-2020 John MacFarlane <jgm@berkeley.edu>, + 2014-2021 John MacFarlane <jgm@berkeley.edu>, 2015-2019 Nikolay Yakimov <root@livid.pp.ru> License : GNU GPL, version 2 or above diff --git a/src/Text/Pandoc/Writers/Docx/Table.hs b/src/Text/Pandoc/Writers/Docx/Table.hs new file mode 100644 index 000000000..7a84c5278 --- /dev/null +++ b/src/Text/Pandoc/Writers/Docx/Table.hs @@ -0,0 +1,227 @@ +{-# LANGUAGE LambdaCase #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE ScopedTypeVariables #-} +{- | +Module : Text.Pandoc.Writers.Docx.Table +Copyright : Copyright (C) 2012-2021 John MacFarlane +License : GNU GPL, version 2 or above +Maintainer : John MacFarlane <jgm@berkeley.edu> + +Conversion of table blocks to docx. +-} +module Text.Pandoc.Writers.Docx.Table + ( tableToOpenXML + ) where + +import Control.Monad.State.Strict +import Data.Array +import Data.Text (Text) +import Text.Pandoc.Definition +import Text.Pandoc.Class.PandocMonad (PandocMonad, translateTerm) +import Text.Pandoc.Writers.Docx.Types +import Text.Pandoc.Shared +import Text.Printf (printf) +import Text.Pandoc.Writers.GridTable hiding (Table) +import Text.Pandoc.Writers.OOXML +import Text.Pandoc.XML.Light as XML hiding (Attr) +import qualified Data.Text as T +import qualified Text.Pandoc.Translations as Term +import qualified Text.Pandoc.Writers.GridTable as Grid + +tableToOpenXML :: PandocMonad m + => ([Block] -> WS m [Content]) + -> Grid.Table + -> WS m [Content] +tableToOpenXML blocksToOpenXML gridTable = do + setFirstPara + let (Grid.Table (ident,_,_) caption colspecs _rowheads thead tbodies tfoot) = + gridTable + let (Caption _maybeShortCaption captionBlocks) = caption + tablenum <- gets stNextTableNum + unless (null captionBlocks) $ + modify $ \st -> st{ stNextTableNum = tablenum + 1 } + let tableid = if T.null ident + then "table" <> tshow tablenum + else ident + tablename <- translateTerm Term.Table + let captionStr = stringify captionBlocks + let aligns = map fst $ elems colspecs + captionXml <- if null captionBlocks + then return [] + else withParaPropM (pStyleM "Table Caption") + $ blocksToOpenXML + $ addLabel tableid tablename tablenum captionBlocks + -- We set "in table" after processing the caption, because we don't + -- want the "Table Caption" style to be overwritten with "Compact". + modify $ \s -> s { stInTable = True } + head' <- cellGridToOpenXML blocksToOpenXML HeadRow aligns thead + bodies <- mapM (cellGridToOpenXML blocksToOpenXML BodyRow aligns) tbodies + foot' <- cellGridToOpenXML blocksToOpenXML FootRow aligns tfoot + + let hasHeader = not . null . indices . partRowAttrs $ thead + let hasFooter = not . null . indices . partRowAttrs $ tfoot + -- for compatibility with Word <= 2007, we include a val with a bitmask + -- 0×0020 Apply first row conditional formatting + -- 0×0040 Apply last row conditional formatting + -- 0×0080 Apply first column conditional formatting + -- 0×0100 Apply last column conditional formatting + -- 0×0200 Do not apply row banding conditional formatting + -- 0×0400 Do not apply column banding conditional formattin + let tblLookVal = if hasHeader then (0x20 :: Int) else 0 + let (gridCols, tblWattr) = tableLayout (elems colspecs) + let tbl = mknode "w:tbl" [] + ( mknode "w:tblPr" [] + ( mknode "w:tblStyle" [("w:val","Table")] () : + mknode "w:tblW" tblWattr () : + mknode "w:tblLook" [("w:firstRow",if hasHeader then "1" else "0") + ,("w:lastRow",if hasFooter then "1" else "0") + ,("w:firstColumn","0") + ,("w:lastColumn","0") + ,("w:noHBand","0") + ,("w:noVBand","0") + ,("w:val", T.pack $ printf "%04x" tblLookVal) + ] () : + [ mknode "w:tblCaption" [("w:val", captionStr)] () + | not (T.null captionStr) ] + ) + : mknode "w:tblGrid" [] gridCols + : head' ++ mconcat bodies ++ foot' + ) + modify $ \s -> s { stInTable = False } + return $ captionXml ++ [Elem tbl] + +addLabel :: Text -> Text -> Int -> [Block] -> [Block] +addLabel tableid tablename tablenum bs = + case bs of + (Para ils : rest) -> Para (label : Space : ils) : rest + (Plain ils : rest) -> Plain (label : Space : ils) : rest + _ -> Para [label] : bs + where + label = Span (tableid,[],[]) + [Str (tablename <> "\160"), + RawInline (Format "openxml") + ("<w:fldSimple w:instr=\"SEQ Table" + <> " \\* ARABIC \"><w:r><w:t>" + <> tshow tablenum + <> "</w:t></w:r></w:fldSimple>"), + Str ":"] + +-- | Parts of a table +data RowType = HeadRow | BodyRow | FootRow + +alignmentToString :: Alignment -> Text +alignmentToString = \case + AlignLeft -> "left" + AlignRight -> "right" + AlignCenter -> "center" + AlignDefault -> "left" + +tableLayout :: [ColSpec] -> ([Element], [(Text, Text)]) +tableLayout specs = + let + textwidth = 7920 -- 5.5 in in twips (1 twip == 1/20 pt) + fullrow = 5000 -- 100% specified in pct (1 pct == 1/50th of a percent) + ncols = length specs + getWidth = \case + ColWidth n -> n + _ -> 0 + widths = map (getWidth . snd) specs + rowwidth = round (fullrow * sum widths) :: Int + widthToTwips w = floor (textwidth * w) :: Int + mkGridCol w = mknode "w:gridCol" [("w:w", tshow (widthToTwips w))] () + in if all (== 0) widths + then ( replicate ncols $ mkGridCol (1.0 / fromIntegral ncols) + , [ ("w:type", "auto"), ("w:w", "0")]) + else ( map mkGridCol widths + , [ ("w:type", "pct"), ("w:w", tshow rowwidth) ]) + +cellGridToOpenXML :: PandocMonad m + => ([Block] -> WS m [Content]) + -> RowType + -> [Alignment] + -> Part + -> WS m [Element] +cellGridToOpenXML blocksToOpenXML rowType aligns part@(Part _ cellArray _) = + if null (elems cellArray) + then return mempty + else mapM (rowToOpenXML blocksToOpenXML) $ + partToRows rowType aligns part + +data OOXMLCell + = OOXMLCell Attr Alignment RowSpan ColSpan [Block] + | OOXMLCellMerge ColSpan + +data OOXMLRow = OOXMLRow RowType Attr [OOXMLCell] + +partToRows :: RowType -> [Alignment] -> Part -> [OOXMLRow] +partToRows rowType aligns part = + let + toOOXMLCell :: Alignment -> RowIndex -> ColIndex -> GridCell -> [OOXMLCell] + toOOXMLCell columnAlign ridx cidx = \case + ContentCell attr align rowspan colspan blocks -> + -- Respect non-default, cell specific alignment. + let align' = case align of + AlignDefault -> columnAlign + _ -> align + in [OOXMLCell attr align' rowspan colspan blocks] + ContinuationCell idx'@(ridx',cidx') | ridx /= ridx', cidx == cidx' -> + case (partCellArray part)!idx' of + (ContentCell _ _ _ colspan _) -> [OOXMLCellMerge colspan] + x -> error $ "Content cell expected, got, " ++ show x ++ + " at index " ++ show idx' + _ -> mempty + mkRow :: (RowIndex, Attr) -> OOXMLRow + mkRow (ridx, attr) = OOXMLRow rowType attr + . mconcat + . zipWith (\align -> uncurry $ toOOXMLCell align ridx) + aligns + . assocs + . rowArray ridx + $ partCellArray part + in map mkRow $ assocs (partRowAttrs part) + +rowToOpenXML :: PandocMonad m + => ([Block] -> WS m [Content]) + -> OOXMLRow + -> WS m Element +rowToOpenXML blocksToOpenXML (OOXMLRow rowType _attr cells) = do + xmlcells <- mapM (ooxmlCellToOpenXML blocksToOpenXML) cells + let addTrPr = case rowType of + HeadRow -> (mknode "w:trPr" [] + [mknode "w:tblHeader" [("w:val", "true")] ()] :) + BodyRow -> id + FootRow -> id + return $ mknode "w:tr" [] (addTrPr xmlcells) + +ooxmlCellToOpenXML :: PandocMonad m + => ([Block] -> WS m [Content]) + -> OOXMLCell + -> WS m Element +ooxmlCellToOpenXML blocksToOpenXML = \case + OOXMLCellMerge (ColSpan colspan) -> do + return $ mknode "w:tc" [] + [ mknode "w:tcPr" [] [ mknode "w:gridSpan" [("w:val", tshow colspan)] () + , mknode "w:vMerge" [("w:val", "continue")] () ] + , mknode "w:p" [] [mknode "w:pPr" [] ()]] + OOXMLCell _attr align rowspan (ColSpan colspan) contents -> do + compactStyle <- pStyleM "Compact" + es <- withParaProp (alignmentFor align) $ blocksToOpenXML contents + -- Table cells require a <w:p> element, even an empty one! + -- Not in the spec but in Word 2007, 2010. See #4953. And + -- apparently the last element must be a <w:p>, see #6983. + return . mknode "w:tc" [] $ + Elem + (mknode "w:tcPr" [] ([ mknode "w:gridSpan" [("w:val", tshow colspan)] () + | colspan > 1] ++ + [ mknode "w:vMerge" [("w:val", "restart")] () + | rowspan > RowSpan 1 ])) : + if null contents + then [Elem $ mknode "w:p" [] [mknode "w:pPr" [] [compactStyle]]] + else case reverse (onlyElems es) of + b:e:_ | qName (elName b) == "bookmarkEnd" -- y tho? + , qName (elName e) == "p" -> es + e:_ | qName (elName e) == "p" -> es + _ -> es ++ [Elem $ mknode "w:p" [] ()] + +alignmentFor :: Alignment -> Element +alignmentFor al = mknode "w:jc" [("w:val",alignmentToString al)] () diff --git a/src/Text/Pandoc/Writers/Docx/Types.hs b/src/Text/Pandoc/Writers/Docx/Types.hs new file mode 100644 index 000000000..74b8d2753 --- /dev/null +++ b/src/Text/Pandoc/Writers/Docx/Types.hs @@ -0,0 +1,185 @@ +{-# LANGUAGE OverloadedStrings #-} +{- | +Module : Text.Pandoc.Writers.Docx +Copyright : Copyright (C) 2012-2021 John MacFarlane +License : GNU GPL, version 2 or above +Maintainer : John MacFarlane <jgm@berkeley.edu> + +Conversion of table blocks to docx. +-} +module Text.Pandoc.Writers.Docx.Types + ( EnvProps (..) + , WriterEnv (..) + , defaultWriterEnv + , WriterState (..) + , defaultWriterState + , WS + , ListMarker (..) + , listMarkerToId + , pStyleM + , isStyle + , setFirstPara + , withParaProp + , withParaPropM + ) where + +import Control.Applicative ((<|>)) +import Control.Monad.Reader +import Control.Monad.State.Strict +import Data.Text (Text) +import Text.Pandoc.Class.PandocMonad (PandocMonad) +import Text.Pandoc.Definition +import Text.Pandoc.MIME (MimeType) +import Text.Pandoc.Writers.Docx.StyleMap +import Text.Pandoc.Writers.OOXML +import Text.Pandoc.XML.Light as XML +import qualified Data.ByteString as B +import qualified Data.Map as M +import qualified Data.Set as Set +import qualified Data.Text as T + +data ListMarker = NoMarker + | BulletMarker + | NumberMarker ListNumberStyle ListNumberDelim Int + deriving (Show, Read, Eq, Ord) + +listMarkerToId :: ListMarker -> Text +listMarkerToId NoMarker = "990" +listMarkerToId BulletMarker = "991" +listMarkerToId (NumberMarker sty delim n) = T.pack $ + '9' : '9' : styNum : delimNum : show n + where styNum = case sty of + DefaultStyle -> '2' + Example -> '3' + Decimal -> '4' + LowerRoman -> '5' + UpperRoman -> '6' + LowerAlpha -> '7' + UpperAlpha -> '8' + delimNum = case delim of + DefaultDelim -> '0' + Period -> '1' + OneParen -> '2' + TwoParens -> '3' + + +data EnvProps = EnvProps{ styleElement :: Maybe Element + , otherElements :: [Element] + } + +instance Semigroup EnvProps where + EnvProps s es <> EnvProps s' es' = EnvProps (s <|> s') (es ++ es') + +instance Monoid EnvProps where + mempty = EnvProps Nothing [] + mappend = (<>) + +data WriterEnv = WriterEnv + { envTextProperties :: EnvProps + , envParaProperties :: EnvProps + , envRTL :: Bool + , envListLevel :: Int + , envListNumId :: Int + , envInDel :: Bool + , envChangesAuthor :: Text + , envChangesDate :: Text + , envPrintWidth :: Integer + } + +defaultWriterEnv :: WriterEnv +defaultWriterEnv = WriterEnv + { envTextProperties = mempty + , envParaProperties = mempty + , envRTL = False + , envListLevel = -1 + , envListNumId = 1 + , envInDel = False + , envChangesAuthor = "unknown" + , envChangesDate = "1969-12-31T19:00:00Z" + , envPrintWidth = 1 + } + + +data WriterState = WriterState{ + stFootnotes :: [Element] + , stComments :: [([(Text, Text)], [Inline])] + , stSectionIds :: Set.Set Text + , stExternalLinks :: M.Map Text Text + , stImages :: M.Map FilePath (String, String, Maybe MimeType, B.ByteString) + , stLists :: [ListMarker] + , stInsId :: Int + , stDelId :: Int + , stStyleMaps :: StyleMaps + , stFirstPara :: Bool + , stInTable :: Bool + , stInList :: Bool + , stTocTitle :: [Inline] + , stDynamicParaProps :: Set.Set ParaStyleName + , stDynamicTextProps :: Set.Set CharStyleName + , stCurId :: Int + , stNextFigureNum :: Int + , stNextTableNum :: Int + } + +defaultWriterState :: WriterState +defaultWriterState = WriterState{ + stFootnotes = defaultFootnotes + , stComments = [] + , stSectionIds = Set.empty + , stExternalLinks = M.empty + , stImages = M.empty + , stLists = [NoMarker] + , stInsId = 1 + , stDelId = 1 + , stStyleMaps = StyleMaps M.empty M.empty + , stFirstPara = False + , stInTable = False + , stInList = False + , stTocTitle = [Str "Table of Contents"] + , stDynamicParaProps = Set.empty + , stDynamicTextProps = Set.empty + , stCurId = 20 + , stNextFigureNum = 1 + , stNextTableNum = 1 + } + +setFirstPara :: PandocMonad m => WS m () +setFirstPara = modify $ \s -> s { stFirstPara = True } + +type WS m = ReaderT WriterEnv (StateT WriterState m) + +-- Word will insert these footnotes into the settings.xml file +-- (whether or not they're visible in the document). If they're in the +-- file, but not in the footnotes.xml file, it will produce +-- problems. So we want to make sure we insert them into our document. +defaultFootnotes :: [Element] +defaultFootnotes = [ mknode "w:footnote" + [("w:type", "separator"), ("w:id", "-1")] + [ mknode "w:p" [] + [mknode "w:r" [] + [ mknode "w:separator" [] ()]]] + , mknode "w:footnote" + [("w:type", "continuationSeparator"), ("w:id", "0")] + [ mknode "w:p" [] + [ mknode "w:r" [] + [ mknode "w:continuationSeparator" [] ()]]]] + +pStyleM :: (PandocMonad m) => ParaStyleName -> WS m XML.Element +pStyleM styleName = do + pStyleMap <- gets (smParaStyle . stStyleMaps) + let sty' = getStyleIdFromName styleName pStyleMap + return $ mknode "w:pStyle" [("w:val", fromStyleId sty')] () + +withParaProp :: PandocMonad m => Element -> WS m a -> WS m a +withParaProp d p = + local (\env -> env {envParaProperties = ep <> envParaProperties env}) p + where ep = if isStyle d then EnvProps (Just d) [] else EnvProps Nothing [d] + +withParaPropM :: PandocMonad m => WS m Element -> WS m a -> WS m a +withParaPropM md p = do + d <- md + withParaProp d p + +isStyle :: Element -> Bool +isStyle e = isElem [] "w" "rStyle" e || + isElem [] "w" "pStyle" e diff --git a/src/Text/Pandoc/Writers/DokuWiki.hs b/src/Text/Pandoc/Writers/DokuWiki.hs index 90ec6824f..602c70ebe 100644 --- a/src/Text/Pandoc/Writers/DokuWiki.hs +++ b/src/Text/Pandoc/Writers/DokuWiki.hs @@ -1,7 +1,7 @@ {-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Writers.DokuWiki - Copyright : Copyright (C) 2008-2020 John MacFarlane + Copyright : Copyright (C) 2008-2021 John MacFarlane License : GNU GPL, version 2 or above Maintainer : Clare Macrae <clare.macrae@googlemail.com> @@ -27,6 +27,7 @@ import Control.Monad.Reader (ReaderT, asks, local, runReaderT) import Control.Monad.State.Strict (StateT, evalStateT) import Data.Default (Default (..)) import Data.List (transpose) +import Data.List.NonEmpty (nonEmpty) import Data.Text (Text) import qualified Data.Text as T import Text.Pandoc.Class.PandocMonad (PandocMonad, report) @@ -172,7 +173,8 @@ blockToDokuWiki opts (Table _ blkCapt specs thead tbody tfoot) = do then return [] else zipWithM (tableItemToDokuWiki opts) aligns headers rows' <- mapM (zipWithM (tableItemToDokuWiki opts) aligns) rows - let widths = map (maximum . map T.length) $ transpose (headers':rows') + let widths = map (maybe 0 maximum . nonEmpty . map T.length) + $ transpose (headers':rows') let padTo (width, al) s = case width - T.length s of x | x > 0 -> diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs index 12004889f..508fb6a98 100644 --- a/src/Text/Pandoc/Writers/EPUB.hs +++ b/src/Text/Pandoc/Writers/EPUB.hs @@ -5,7 +5,7 @@ {-# LANGUAGE ScopedTypeVariables #-} {- | Module : Text.Pandoc.Writers.EPUB - Copyright : Copyright (C) 2010-2020 John MacFarlane + Copyright : Copyright (C) 2010-2021 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -24,12 +24,13 @@ import Control.Monad.State.Strict (StateT, evalState, evalStateT, get, gets, lift, modify) import qualified Data.ByteString.Lazy as B import qualified Data.ByteString.Lazy.Char8 as B8 -import Data.Char (isAlphaNum, isAscii, isDigit, toLower) +import Data.Char (isAlphaNum, isAscii, isDigit) import Data.List (isInfixOf, isPrefixOf) import qualified Data.Map as M import Data.Maybe (fromMaybe, isNothing, mapMaybe, isJust) import qualified Data.Set as Set -import qualified Data.Text as TS +import qualified Data.Text as T +import Data.Text (Text) import qualified Data.Text.Lazy as TL import Network.HTTP (urlEncode) import System.FilePath (takeExtension, takeFileName, makeRelative) @@ -48,15 +49,13 @@ import Text.Pandoc.Options (EPUBVersion (..), HTMLMathMethod (..), ObfuscationMethod (NoObfuscation), WrapOption (..), WriterOptions (..)) import Text.Pandoc.Shared (makeSections, normalizeDate, renderTags', - safeRead, stringify, trim, uniqueIdent, tshow) + stringify, uniqueIdent, tshow) import qualified Text.Pandoc.UTF8 as UTF8 import Text.Pandoc.UUID (getRandomUUID) import Text.Pandoc.Walk (query, walk, walkM) import Text.Pandoc.Writers.HTML (writeHtmlStringForEPUB) import Text.Printf (printf) -import Text.XML.Light (Attr (..), Element (..), Node (..), QName (..), - add_attrs, lookupAttr, node, onlyElems, parseXML, - ppElement, showElement, strContent, unode, unqual) +import Text.Pandoc.XML.Light import Text.Pandoc.XML (escapeStringForXML) import Text.DocTemplates (FromContext(lookupContext), Context(..), ToContext(toVal), Val(..)) @@ -68,69 +67,72 @@ newtype Chapter = Chapter [Block] data EPUBState = EPUBState { stMediaPaths :: [(FilePath, (FilePath, Maybe Entry))] , stMediaNextId :: Int - , stEpubSubdir :: String + , stEpubSubdir :: FilePath } type E m = StateT EPUBState m data EPUBMetadata = EPUBMetadata{ - epubIdentifier :: [Identifier] - , epubTitle :: [Title] - , epubDate :: [Date] - , epubLanguage :: String - , epubCreator :: [Creator] - , epubContributor :: [Creator] - , epubSubject :: [String] - , epubDescription :: Maybe String - , epubType :: Maybe String - , epubFormat :: Maybe String - , epubPublisher :: Maybe String - , epubSource :: Maybe String - , epubRelation :: Maybe String - , epubCoverage :: Maybe String - , epubRights :: Maybe String - , epubCoverImage :: Maybe String - , epubStylesheets :: [FilePath] - , epubPageDirection :: Maybe ProgressionDirection - , epubIbooksFields :: [(String, String)] - , epubCalibreFields :: [(String, String)] + epubIdentifier :: [Identifier] + , epubTitle :: [Title] + , epubDate :: [Date] + , epubLanguage :: Text + , epubCreator :: [Creator] + , epubContributor :: [Creator] + , epubSubject :: [Text] + , epubDescription :: Maybe Text + , epubType :: Maybe Text + , epubFormat :: Maybe Text + , epubPublisher :: Maybe Text + , epubSource :: Maybe Text + , epubRelation :: Maybe Text + , epubCoverage :: Maybe Text + , epubRights :: Maybe Text + , epubBelongsToCollection :: Maybe Text + , epubGroupPosition :: Maybe Text + , epubCoverImage :: Maybe FilePath + , epubStylesheets :: [FilePath] + , epubPageDirection :: Maybe ProgressionDirection + , epubIbooksFields :: [(Text, Text)] + , epubCalibreFields :: [(Text, Text)] } deriving Show data Date = Date{ - dateText :: String - , dateEvent :: Maybe String + dateText :: Text + , dateEvent :: Maybe Text } deriving Show data Creator = Creator{ - creatorText :: String - , creatorRole :: Maybe String - , creatorFileAs :: Maybe String + creatorText :: Text + , creatorRole :: Maybe Text + , creatorFileAs :: Maybe Text } deriving Show data Identifier = Identifier{ - identifierText :: String - , identifierScheme :: Maybe String + identifierText :: Text + , identifierScheme :: Maybe Text } deriving Show data Title = Title{ - titleText :: String - , titleFileAs :: Maybe String - , titleType :: Maybe String + titleText :: Text + , titleFileAs :: Maybe Text + , titleType :: Maybe Text } deriving Show data ProgressionDirection = LTR | RTL deriving Show -dcName :: String -> QName +dcName :: Text -> QName dcName n = QName n Nothing (Just "dc") -dcNode :: Node t => String -> t -> Element +dcNode :: Node t => Text -> t -> Element dcNode = node . dcName -opfName :: String -> QName +opfName :: Text -> QName opfName n = QName n Nothing (Just "opf") -toId :: FilePath -> String -toId = map (\x -> if isAlphaNum x || x == '-' || x == '_' +toId :: FilePath -> Text +toId = T.pack . + map (\x -> if isAlphaNum x || x == '-' || x == '_' then x else '_') . takeFileName @@ -138,8 +140,8 @@ removeNote :: Inline -> Inline removeNote (Note _) = Str "" removeNote x = x -toVal' :: String -> Val TS.Text -toVal' = toVal . TS.pack +toVal' :: Text -> Val T.Text +toVal' = toVal mkEntry :: PandocMonad m => FilePath -> B.ByteString -> E m Entry mkEntry path content = do @@ -158,32 +160,37 @@ mkEntry path content = do getEPUBMetadata :: PandocMonad m => WriterOptions -> Meta -> E m EPUBMetadata getEPUBMetadata opts meta = do let md = metadataFromMeta opts meta - let elts = maybe [] (onlyElems . parseXML) $ writerEpubMetadata opts + elts <- case writerEpubMetadata opts of + Nothing -> return [] + Just t -> case parseXMLContents (TL.fromStrict t) of + Left msg -> throwError $ + PandocXMLError "epub metadata" msg + Right ns -> return (onlyElems ns) let md' = foldr addMetadataFromXML md elts let addIdentifier m = if null (epubIdentifier m) then do randomId <- getRandomUUID - return $ m{ epubIdentifier = [Identifier (show randomId) Nothing] } + return $ m{ epubIdentifier = [Identifier (tshow randomId) Nothing] } else return m let addLanguage m = - if null (epubLanguage m) + if T.null (epubLanguage m) then case lookupContext "lang" (writerVariables opts) of - Just x -> return m{ epubLanguage = TS.unpack x } + Just x -> return m{ epubLanguage = x } Nothing -> do mLang <- lift $ P.lookupEnv "LANG" let localeLang = case mLang of Just lang -> - TS.map (\c -> if c == '_' then '-' else c) $ - TS.takeWhile (/='.') lang + T.map (\c -> if c == '_' then '-' else c) $ + T.takeWhile (/='.') lang Nothing -> "en-US" - return m{ epubLanguage = TS.unpack localeLang } + return m{ epubLanguage = localeLang } else return m let fixDate m = if null (epubDate m) then do - currentTime <- lift P.getCurrentTime + currentTime <- lift P.getTimestamp return $ m{ epubDate = [ Date{ dateText = showDateTimeISO8601 currentTime , dateEvent = Nothing } ] } @@ -193,7 +200,7 @@ getEPUBMetadata opts meta = do then return m else do let authors' = map stringify $ docAuthors meta - let toAuthor name = Creator{ creatorText = TS.unpack name + let toAuthor name = Creator{ creatorText = name , creatorRole = Just "aut" , creatorFileAs = Nothing } return $ m{ epubCreator = map toAuthor authors' ++ epubCreator m } @@ -235,35 +242,37 @@ addMetadataFromXML e@(Element (QName name _ (Just "dc")) attrs _ _) md | name == "relation" = md { epubRelation = Just $ strContent e } | name == "coverage" = md { epubCoverage = Just $ strContent e } | name == "rights" = md { epubRights = Just $ strContent e } + | name == "belongs-to-collection" = md { epubBelongsToCollection = Just $ strContent e } + | name == "group-position" = md { epubGroupPosition = Just $ strContent e } | otherwise = md where getAttr n = lookupAttr (opfName n) attrs addMetadataFromXML e@(Element (QName "meta" _ _) attrs _ _) md = case getAttr "property" of - Just s | "ibooks:" `isPrefixOf` s -> - md{ epubIbooksFields = (drop 7 s, strContent e) : + Just s | "ibooks:" `T.isPrefixOf` s -> + md{ epubIbooksFields = (T.drop 7 s, strContent e) : epubIbooksFields md } _ -> case getAttr "name" of - Just s | "calibre:" `isPrefixOf` s -> + Just s | "calibre:" `T.isPrefixOf` s -> md{ epubCalibreFields = - (drop 8 s, fromMaybe "" $ getAttr "content") : + (T.drop 8 s, fromMaybe "" $ getAttr "content") : epubCalibreFields md } _ -> md where getAttr n = lookupAttr (unqual n) attrs addMetadataFromXML _ md = md -metaValueToString :: MetaValue -> String -metaValueToString (MetaString s) = TS.unpack s -metaValueToString (MetaInlines ils) = TS.unpack $ stringify ils -metaValueToString (MetaBlocks bs) = TS.unpack $ stringify bs +metaValueToString :: MetaValue -> Text +metaValueToString (MetaString s) = s +metaValueToString (MetaInlines ils) = stringify ils +metaValueToString (MetaBlocks bs) = stringify bs metaValueToString (MetaBool True) = "true" metaValueToString (MetaBool False) = "false" metaValueToString _ = "" metaValueToPaths :: MetaValue -> [FilePath] -metaValueToPaths (MetaList xs) = map metaValueToString xs -metaValueToPaths x = [metaValueToString x] +metaValueToPaths (MetaList xs) = map (T.unpack . metaValueToString) xs +metaValueToPaths x = [T.unpack $ metaValueToString x] -getList :: TS.Text -> Meta -> (MetaValue -> a) -> [a] +getList :: T.Text -> Meta -> (MetaValue -> a) -> [a] getList s meta handleMetaValue = case lookupMeta s meta of Just (MetaList xs) -> map handleMetaValue xs @@ -287,7 +296,7 @@ getTitle meta = getList "title" meta handleMetaValue , titleType = metaValueToString <$> M.lookup "type" m } handleMetaValue mv = Title (metaValueToString mv) Nothing Nothing -getCreator :: TS.Text -> Meta -> [Creator] +getCreator :: T.Text -> Meta -> [Creator] getCreator s meta = getList s meta handleMetaValue where handleMetaValue (MetaMap m) = Creator{ creatorText = maybe "" metaValueToString $ M.lookup "text" m @@ -295,7 +304,7 @@ getCreator s meta = getList s meta handleMetaValue , creatorRole = metaValueToString <$> M.lookup "role" m } handleMetaValue mv = Creator (metaValueToString mv) Nothing Nothing -getDate :: TS.Text -> Meta -> [Date] +getDate :: T.Text -> Meta -> [Date] getDate s meta = getList s meta handleMetaValue where handleMetaValue (MetaMap m) = Date{ dateText = fromMaybe "" $ @@ -304,7 +313,7 @@ getDate s meta = getList s meta handleMetaValue handleMetaValue mv = Date { dateText = fromMaybe "" $ normalizeDate' $ metaValueToString mv , dateEvent = Nothing } -simpleList :: TS.Text -> Meta -> [String] +simpleList :: T.Text -> Meta -> [Text] simpleList s meta = case lookupMeta s meta of Just (MetaList xs) -> map metaValueToString xs @@ -313,26 +322,28 @@ simpleList s meta = metadataFromMeta :: WriterOptions -> Meta -> EPUBMetadata metadataFromMeta opts meta = EPUBMetadata{ - epubIdentifier = identifiers - , epubTitle = titles - , epubDate = date - , epubLanguage = language - , epubCreator = creators - , epubContributor = contributors - , epubSubject = subjects - , epubDescription = description - , epubType = epubtype - , epubFormat = format - , epubPublisher = publisher - , epubSource = source - , epubRelation = relation - , epubCoverage = coverage - , epubRights = rights - , epubCoverImage = coverImage - , epubStylesheets = stylesheets - , epubPageDirection = pageDirection - , epubIbooksFields = ibooksFields - , epubCalibreFields = calibreFields + epubIdentifier = identifiers + , epubTitle = titles + , epubDate = date + , epubLanguage = language + , epubCreator = creators + , epubContributor = contributors + , epubSubject = subjects + , epubDescription = description + , epubType = epubtype + , epubFormat = format + , epubPublisher = publisher + , epubSource = source + , epubRelation = relation + , epubCoverage = coverage + , epubRights = rights + , epubBelongsToCollection = belongsToCollection + , epubGroupPosition = groupPosition + , epubCoverImage = coverImage + , epubStylesheets = stylesheets + , epubPageDirection = pageDirection + , epubIbooksFields = ibooksFields + , epubCalibreFields = calibreFields } where identifiers = getIdentifier meta titles = getTitle meta @@ -350,30 +361,31 @@ metadataFromMeta opts meta = EPUBMetadata{ relation = metaValueToString <$> lookupMeta "relation" meta coverage = metaValueToString <$> lookupMeta "coverage" meta rights = metaValueToString <$> lookupMeta "rights" meta - coverImage = - (TS.unpack <$> lookupContext "epub-cover-image" - (writerVariables opts)) + belongsToCollection = metaValueToString <$> lookupMeta "belongs-to-collection" meta + groupPosition = metaValueToString <$> lookupMeta "group-position" meta + coverImage = T.unpack <$> + lookupContext "epub-cover-image" (writerVariables opts) `mplus` (metaValueToString <$> lookupMeta "cover-image" meta) mCss = lookupMeta "css" meta <|> lookupMeta "stylesheet" meta stylesheets = maybe [] metaValueToPaths mCss ++ case lookupContext "css" (writerVariables opts) of - Just xs -> map TS.unpack xs + Just xs -> map T.unpack xs Nothing -> case lookupContext "css" (writerVariables opts) of - Just x -> [TS.unpack x] + Just x -> [T.unpack x] Nothing -> [] - pageDirection = case map toLower . metaValueToString <$> + pageDirection = case T.toLower . metaValueToString <$> lookupMeta "page-progression-direction" meta of Just "ltr" -> Just LTR Just "rtl" -> Just RTL _ -> Nothing ibooksFields = case lookupMeta "ibooks" meta of Just (MetaMap mp) - -> M.toList $ M.mapKeys TS.unpack $ M.map metaValueToString mp + -> M.toList $ M.map metaValueToString mp _ -> [] calibreFields = case lookupMeta "calibre" meta of Just (MetaMap mp) - -> M.toList $ M.mapKeys TS.unpack $ M.map metaValueToString mp + -> M.toList $ M.map metaValueToString mp _ -> [] -- | Produce an EPUB2 file from a Pandoc document. @@ -399,9 +411,11 @@ writeEPUB :: PandocMonad m writeEPUB epubVersion opts doc = do let epubSubdir = writerEpubSubdirectory opts -- sanity check on epubSubdir - unless (TS.all (\c -> isAscii c && isAlphaNum c) epubSubdir) $ + unless (T.all (\c -> isAscii c && isAlphaNum c) epubSubdir) $ throwError $ PandocEpubSubdirectoryError epubSubdir - let initState = EPUBState { stMediaPaths = [], stMediaNextId = 0, stEpubSubdir = TS.unpack epubSubdir } + let initState = EPUBState { stMediaPaths = [] + , stMediaNextId = 0 + , stEpubSubdir = T.unpack epubSubdir } evalStateT (pandocToEPUB epubVersion opts doc) initState pandocToEPUB :: PandocMonad m @@ -425,7 +439,7 @@ pandocToEPUB version opts doc = do [] -> case epubTitle metadata of [] -> "UNTITLED" (x:_) -> titleText x - x -> TS.unpack $ stringify x + x -> stringify x -- stylesheet stylesheets <- case epubStylesheets metadata of @@ -447,7 +461,8 @@ pandocToEPUB version opts doc = do (ListVal $ map (\e -> toVal' $ (if useprefix then "../" else "") <> - makeRelative epubSubdir (eRelativePath e)) + T.pack + (makeRelative epubSubdir (eRelativePath e))) stylesheetEntries) mempty @@ -465,28 +480,34 @@ pandocToEPUB version opts doc = do case epubCoverImage metadata of Nothing -> return ([],[]) Just img -> do - let coverImage = takeFileName img + let fp = takeFileName img + mediaPaths <- gets (map (fst . snd) . stMediaPaths) + coverImageName <- -- see #4206 + if ("media/" <> fp) `elem` mediaPaths + then getMediaNextNewName (takeExtension fp) + else return fp imgContent <- lift $ P.readFileLazy img (coverImageWidth, coverImageHeight) <- case imageSize opts' (B.toStrict imgContent) of Right sz -> return $ sizeInPixels sz Left err' -> (0, 0) <$ report - (CouldNotDetermineImageSize (TS.pack img) err') + (CouldNotDetermineImageSize (T.pack img) err') cpContent <- lift $ writeHtml opts'{ writerVariables = Context (M.fromList [ ("coverpage", toVal' "true"), ("pagetitle", toVal $ - escapeStringForXML $ TS.pack plainTitle), - ("cover-image", toVal' coverImage), + escapeStringForXML plainTitle), + ("cover-image", + toVal' $ T.pack coverImageName), ("cover-image-width", toVal' $ - show coverImageWidth), + tshow coverImageWidth), ("cover-image-height", toVal' $ - show coverImageHeight)]) <> + tshow coverImageHeight)]) <> cssvars True <> vars } (Pandoc meta []) coverEntry <- mkEntry "text/cover.xhtml" cpContent - coverImageEntry <- mkEntry ("media/" ++ coverImage) + coverImageEntry <- mkEntry ("media/" ++ coverImageName) imgContent return ( [ coverEntry ] , [ coverImageEntry ] ) @@ -498,7 +519,7 @@ pandocToEPUB version opts doc = do ("titlepage", toVal' "true"), ("body-type", toVal' "frontmatter"), ("pagetitle", toVal $ - escapeStringForXML $ TS.pack plainTitle)]) + escapeStringForXML plainTitle)]) <> cssvars True <> vars } (Pandoc meta []) tpEntry <- mkEntry "text/title_page.xhtml" tpContent @@ -507,7 +528,7 @@ pandocToEPUB version opts doc = do let matchingGlob f = do xs <- lift $ P.glob f when (null xs) $ - report $ CouldNotFetchResource (TS.pack f) "glob did not match any font files" + report $ CouldNotFetchResource (T.pack f) "glob did not match any font files" return xs let mkFontEntry f = mkEntry ("fonts/" ++ takeFileName f) =<< lift (P.readFileLazy f) @@ -554,16 +575,42 @@ pandocToEPUB version opts doc = do let chapters' = secsToChapters secs - let extractLinkURL' :: Int -> Inline -> [(TS.Text, TS.Text)] + let extractLinkURL' :: Int -> Inline -> [(T.Text, T.Text)] extractLinkURL' num (Span (ident, _, _) _) - | not (TS.null ident) = [(ident, TS.pack (showChapter num) <> "#" <> ident)] + | not (T.null ident) = [(ident, showChapter num <> "#" <> ident)] + extractLinkURL' num (Link (ident, _, _) _ _) + | not (T.null ident) = [(ident, showChapter num <> "#" <> ident)] + extractLinkURL' num (Image (ident, _, _) _ _) + | not (T.null ident) = [(ident, showChapter num <> "#" <> ident)] + extractLinkURL' num (RawInline fmt raw) + | isHtmlFormat fmt + = foldr (\tag -> + case tag of + TagOpen{} -> + case fromAttrib "id" tag of + "" -> id + x -> ((x, showChapter num <> "#" <> x):) + _ -> id) + [] (parseTags raw) extractLinkURL' _ _ = [] - let extractLinkURL :: Int -> Block -> [(TS.Text, TS.Text)] + let extractLinkURL :: Int -> Block -> [(T.Text, T.Text)] extractLinkURL num (Div (ident, _, _) _) - | not (TS.null ident) = [(ident, TS.pack (showChapter num) <> "#" <> ident)] + | not (T.null ident) = [(ident, showChapter num <> "#" <> ident)] extractLinkURL num (Header _ (ident, _, _) _) - | not (TS.null ident) = [(ident, TS.pack (showChapter num) <> "#" <> ident)] + | not (T.null ident) = [(ident, showChapter num <> "#" <> ident)] + extractLinkURL num (Table (ident,_,_) _ _ _ _ _) + | not (T.null ident) = [(ident, showChapter num <> "#" <> ident)] + extractLinkURL num (RawBlock fmt raw) + | isHtmlFormat fmt + = foldr (\tag -> + case tag of + TagOpen{} -> + case fromAttrib "id" tag of + "" -> id + x -> ((x, showChapter num <> "#" <> x):) + _ -> id) + [] (parseTags raw) extractLinkURL num b = query (extractLinkURL' num) b let reftable = concat $ zipWith (\(Chapter bs) num -> @@ -572,7 +619,7 @@ pandocToEPUB version opts doc = do let fixInternalReferences :: Inline -> Inline fixInternalReferences (Link attr lab (src, tit)) - | Just ('#', xs) <- TS.uncons src = case lookup xs reftable of + | Just ('#', xs) <- T.uncons src = case lookup xs reftable of Just ys -> Link attr lab (ys, tit) Nothing -> Link attr lab (src, tit) fixInternalReferences x = x @@ -585,7 +632,7 @@ pandocToEPUB version opts doc = do chapters' let chapToEntry num (Chapter bs) = - mkEntry ("text/" ++ showChapter num) =<< + mkEntry ("text/" ++ T.unpack (showChapter num)) =<< writeHtml opts'{ writerVariables = Context (M.fromList [("body-type", toVal' bodyType), @@ -632,12 +679,12 @@ pandocToEPUB version opts doc = do let chapterNode ent = unode "item" ! ([("id", toId $ makeRelative epubSubdir $ eRelativePath ent), - ("href", makeRelative epubSubdir + ("href", T.pack $ makeRelative epubSubdir $ eRelativePath ent), ("media-type", "application/xhtml+xml")] ++ case props ent of [] -> [] - xs -> [("properties", unwords xs)]) + xs -> [("properties", T.unwords xs)]) $ () let chapterRefNode ent = unode "itemref" ! @@ -646,17 +693,17 @@ pandocToEPUB version opts doc = do let pictureNode ent = unode "item" ! [("id", toId $ makeRelative epubSubdir $ eRelativePath ent), - ("href", makeRelative epubSubdir + ("href", T.pack $ makeRelative epubSubdir $ eRelativePath ent), ("media-type", - maybe "application/octet-stream" TS.unpack + fromMaybe "application/octet-stream" $ mediaTypeOf $ eRelativePath ent)] $ () let fontNode ent = unode "item" ! [("id", toId $ makeRelative epubSubdir $ eRelativePath ent), - ("href", makeRelative epubSubdir + ("href", T.pack $ makeRelative epubSubdir $ eRelativePath ent), - ("media-type", maybe "" TS.unpack $ + ("media-type", fromMaybe "" $ getMimeType $ eRelativePath ent)] $ () let tocTitle = maybe plainTitle @@ -664,8 +711,8 @@ pandocToEPUB version opts doc = do uuid <- case epubIdentifier metadata of (x:_) -> return $ identifierText x -- use first identifier as UUID [] -> throwError $ PandocShouldNeverHappenError "epubIdentifier is null" -- shouldn't happen - currentTime <- lift P.getCurrentTime - let contentsData = UTF8.fromStringLazy $ ppTopElement $ + currentTime <- lift P.getTimestamp + let contentsData = UTF8.fromTextLazy $ TL.fromStrict $ ppTopElement $ unode "package" ! ([("version", case version of EPUB2 -> "2.0" @@ -683,7 +730,8 @@ pandocToEPUB version opts doc = do ,("media-type","application/xhtml+xml")] ++ [("properties","nav") | epub3 ]) $ () ] ++ - [ unode "item" ! [("id","stylesheet" ++ show n), ("href",fp) + [ unode "item" ! [("id","stylesheet" <> tshow n) + , ("href", T.pack fp) ,("media-type","text/css")] $ () | (n :: Int, fp) <- zip [1..] (map (makeRelative epubSubdir . eRelativePath) @@ -728,7 +776,7 @@ pandocToEPUB version opts doc = do let tocLevel = writerTOCDepth opts let navPointNode :: PandocMonad m - => (Int -> [Inline] -> TS.Text -> [Element] -> Element) + => (Int -> [Inline] -> T.Text -> [Element] -> Element) -> Block -> StateT Int m [Element] navPointNode formatter (Div (ident,_,_) (Header lvl (_,_,kvs) ils : children)) = @@ -738,7 +786,7 @@ pandocToEPUB version opts doc = do n <- get modify (+1) let num = fromMaybe "" $ lookup "number" kvs - let tit = if writerNumberSections opts && not (TS.null num) + let tit = if writerNumberSections opts && not (T.null num) then Span ("", ["section-header-number"], []) [Str num] : Space : ils else ils @@ -752,21 +800,21 @@ pandocToEPUB version opts doc = do concat <$> mapM (navPointNode formatter) bs navPointNode _ _ = return [] - let navMapFormatter :: Int -> [Inline] -> TS.Text -> [Element] -> Element + let navMapFormatter :: Int -> [Inline] -> T.Text -> [Element] -> Element navMapFormatter n tit src subs = unode "navPoint" ! - [("id", "navPoint-" ++ show n)] $ - [ unode "navLabel" $ unode "text" $ TS.unpack $ stringify tit - , unode "content" ! [("src", "text/" <> TS.unpack src)] $ () + [("id", "navPoint-" <> tshow n)] $ + [ unode "navLabel" $ unode "text" $ stringify tit + , unode "content" ! [("src", "text/" <> src)] $ () ] ++ subs let tpNode = unode "navPoint" ! [("id", "navPoint-0")] $ - [ unode "navLabel" $ unode "text" (TS.unpack $ stringify $ docTitle' meta) + [ unode "navLabel" $ unode "text" (stringify $ docTitle' meta) , unode "content" ! [("src", "text/title_page.xhtml")] $ () ] navMap <- lift $ evalStateT (concat <$> mapM (navPointNode navMapFormatter) secs) 1 - let tocData = UTF8.fromStringLazy $ ppTopElement $ + let tocData = B.fromStrict $ UTF8.fromText $ ppTopElement $ unode "ncx" ! [("version","2005-1") ,("xmlns","http://www.daisy.org/z3986/2005/ncx/")] $ [ unode "head" $ @@ -788,23 +836,24 @@ pandocToEPUB version opts doc = do ] tocEntry <- mkEntry "toc.ncx" tocData - let navXhtmlFormatter :: Int -> [Inline] -> TS.Text -> [Element] -> Element + let navXhtmlFormatter :: Int -> [Inline] -> T.Text -> [Element] -> Element navXhtmlFormatter n tit src subs = unode "li" ! - [("id", "toc-li-" ++ show n)] $ + [("id", "toc-li-" <> tshow n)] $ (unode "a" ! - [("href", "text/" <> TS.unpack src)] + [("href", "text/" <> src)] $ titElements) : case subs of [] -> [] (_:_) -> [unode "ol" ! [("class","toc")] $ subs] - where titElements = parseXML titRendered + where titElements = either (const []) id $ + parseXMLContents (TL.fromStrict titRendered) titRendered = case P.runPure (writeHtmlStringForEPUB version opts{ writerTemplate = Nothing , writerVariables = Context (M.fromList [("pagetitle", toVal $ - escapeStringForXML $ TS.pack plainTitle)]) + escapeStringForXML plainTitle)]) <> writerVariables opts} (Pandoc nullMeta [Plain $ walk clean tit])) of @@ -819,33 +868,40 @@ pandocToEPUB version opts doc = do tocBlocks <- lift $ evalStateT (concat <$> mapM (navPointNode navXhtmlFormatter) secs) 1 let navBlocks = [RawBlock (Format "html") - $ TS.pack $ showElement $ -- prettyprinting introduces bad spaces + $ showElement $ -- prettyprinting introduces bad spaces unode navtag ! ([("epub:type","toc") | epub3] ++ [("id","toc")]) $ [ unode "h1" ! [("id","toc-title")] $ tocTitle , unode "ol" ! [("class","toc")] $ tocBlocks ]] let landmarkItems = if epub3 - then [ unode "li" + then unode "li" + [ unode "a" ! [("href", + "text/title_page.xhtml") + ,("epub:type", "titlepage")] $ + ("Title Page" :: Text) ] : + [ unode "li" [ unode "a" ! [("href", "text/cover.xhtml") ,("epub:type", "cover")] $ - ("Cover" :: String)] | + ("Cover" :: Text)] | isJust (epubCoverImage metadata) ] ++ [ unode "li" [ unode "a" ! [("href", "#toc") ,("epub:type", "toc")] $ - ("Table of contents" :: String) + ("Table of Contents" :: Text) ] | writerTableOfContents opts ] else [] - let landmarks = [RawBlock (Format "html") $ TS.pack $ ppElement $ + let landmarks = [RawBlock (Format "html") $ ppElement $ unode "nav" ! [("epub:type","landmarks") ,("id","landmarks") ,("hidden","hidden")] $ [ unode "ol" landmarkItems ] | not (null landmarkItems)] navData <- lift $ writeHtml opts'{ writerVariables = - Context (M.fromList [("navpage", toVal' "true")]) + Context (M.fromList [("navpage", toVal' "true") + ,("body-type", toVal' "frontmatter") + ]) <> cssvars False <> vars } (Pandoc (setMeta "title" (walk removeNote $ fromList $ docTitle' meta) nullMeta) @@ -857,22 +913,22 @@ pandocToEPUB version opts doc = do UTF8.fromStringLazy "application/epub+zip" -- container.xml - let containerData = UTF8.fromStringLazy $ ppTopElement $ + let containerData = B.fromStrict $ UTF8.fromText $ ppTopElement $ unode "container" ! [("version","1.0") ,("xmlns","urn:oasis:names:tc:opendocument:xmlns:container")] $ unode "rootfiles" $ unode "rootfile" ! [("full-path", (if null epubSubdir then "" - else epubSubdir ++ "/") ++ "content.opf") + else T.pack epubSubdir <> "/") <> "content.opf") ,("media-type","application/oebps-package+xml")] $ () containerEntry <- mkEntry "META-INF/container.xml" containerData -- com.apple.ibooks.display-options.xml - let apple = UTF8.fromStringLazy $ ppTopElement $ + let apple = B.fromStrict $ UTF8.fromText $ ppTopElement $ unode "display_options" $ unode "platform" ! [("name","*")] $ - unode "option" ! [("name","specified-fonts")] $ ("true" :: String) + unode "option" ! [("name","specified-fonts")] $ ("true" :: Text) appleEntry <- mkEntry "META-INF/com.apple.ibooks.display-options.xml" apple -- construct archive @@ -893,8 +949,9 @@ metadataElement version md currentTime = ++ descriptionNodes ++ typeNodes ++ formatNodes ++ publisherNodes ++ sourceNodes ++ relationNodes ++ coverageNodes ++ rightsNodes ++ coverImageNodes - ++ modifiedNodes - withIds base f = concat . zipWith f (map (\x -> base ++ ('-' : show x)) + ++ modifiedNodes ++ belongsToCollectionNodes + withIds base f = concat . zipWith f (map (\x -> base <> + T.cons '-' (tshow x)) ([1..] :: [Int])) identifierNodes = withIds "epub-id" toIdentifierNode $ epubIdentifier md @@ -908,9 +965,9 @@ metadataElement version md currentTime = (x:_) -> [dcNode "date" ! [("id","epub-date")] $ dateText x] ibooksNodes = map ibooksNode (epubIbooksFields md) - ibooksNode (k, v) = unode "meta" ! [("property", "ibooks:" ++ k)] $ v + ibooksNode (k, v) = unode "meta" ! [("property", "ibooks:" <> k)] $ v calibreNodes = map calibreNode (epubCalibreFields md) - calibreNode (k, v) = unode "meta" ! [("name", "calibre:" ++ k), + calibreNode (k, v) = unode "meta" ! [("name", "calibre:" <> k), ("content", v)] $ () languageNodes = [dcTag "language" $ epubLanguage md] creatorNodes = withIds "epub-creator" (toCreatorNode "creator") $ @@ -932,7 +989,16 @@ metadataElement version md currentTime = $ epubCoverImage md modifiedNodes = [ unode "meta" ! [("property", "dcterms:modified")] $ showDateTimeISO8601 currentTime | version == EPUB3 ] - dcTag n s = unode ("dc:" ++ n) s + belongsToCollectionNodes = + maybe [] + (\belongsToCollection -> (unode "meta" ! [("property", "belongs-to-collection"), ("id", "epub-collection-1")] $ belongsToCollection ) + : + [unode "meta" ! [("refines", "#epub-collection-1"), ("property", "collection-type")] $ ("series" :: Text) ]) + (epubBelongsToCollection md)++ + maybe [] + (\groupPosition -> [unode "meta" ! [("refines", "#epub-collection-1"), ("property", "group-position")] $ groupPosition ]) + (epubGroupPosition md) + dcTag n s = unode ("dc:" <> n) s dcTag' n s = [dcTag n s] toIdentifierNode id' (Identifier txt scheme) | version == EPUB2 = [dcNode "identifier" ! @@ -940,7 +1006,7 @@ metadataElement version md currentTime = txt] | otherwise = (dcNode "identifier" ! [("id",id')] $ txt) : maybe [] ((\x -> [unode "meta" ! - [ ("refines",'#':id') + [ ("refines","#" <> id') , ("property","identifier-type") , ("scheme","onix:codelist5") ] @@ -956,10 +1022,10 @@ metadataElement version md currentTime = (creatorRole creator >>= toRelator)) $ creatorText creator] | otherwise = [dcNode s ! [("id",id')] $ creatorText creator] ++ maybe [] (\x -> [unode "meta" ! - [("refines",'#':id'),("property","file-as")] $ x]) + [("refines","#" <> id'),("property","file-as")] $ x]) (creatorFileAs creator) ++ maybe [] (\x -> [unode "meta" ! - [("refines",'#':id'),("property","role"), + [("refines","#" <> id'),("property","role"), ("scheme","marc:relators")] $ x]) (creatorRole creator >>= toRelator) toTitleNode id' title @@ -971,16 +1037,16 @@ metadataElement version md currentTime = | otherwise = [dcNode "title" ! [("id",id')] $ titleText title] ++ maybe [] (\x -> [unode "meta" ! - [("refines",'#':id'),("property","file-as")] $ x]) + [("refines","#" <> id'),("property","file-as")] $ x]) (titleFileAs title) ++ maybe [] (\x -> [unode "meta" ! - [("refines",'#':id'),("property","title-type")] $ x]) + [("refines","#" <> id'),("property","title-type")] $ x]) (titleType title) toDateNode id' date = [dcNode "date" ! (("id",id') : maybe [] (\x -> [("opf:event",x)]) (dateEvent date)) $ dateText date] - schemeToOnix :: String -> String + schemeToOnix :: Text -> Text schemeToOnix "ISBN-10" = "02" schemeToOnix "GTIN-13" = "03" schemeToOnix "UPC" = "04" @@ -998,59 +1064,64 @@ metadataElement version md currentTime = schemeToOnix "OLCC" = "28" schemeToOnix _ = "01" -showDateTimeISO8601 :: UTCTime -> String -showDateTimeISO8601 = formatTime defaultTimeLocale "%FT%TZ" +showDateTimeISO8601 :: UTCTime -> Text +showDateTimeISO8601 = T.pack . formatTime defaultTimeLocale "%FT%TZ" transformTag :: PandocMonad m - => Tag TS.Text - -> E m (Tag TS.Text) + => Tag T.Text + -> E m (Tag T.Text) transformTag tag@(TagOpen name attr) | name `elem` ["video", "source", "img", "audio"] && isNothing (lookup "data-external" attr) = do let src = fromAttrib "src" tag let poster = fromAttrib "poster" tag - newsrc <- modifyMediaRef $ TS.unpack src - newposter <- modifyMediaRef $ TS.unpack poster + newsrc <- modifyMediaRef $ T.unpack src + newposter <- modifyMediaRef $ T.unpack poster let attr' = filter (\(x,_) -> x /= "src" && x /= "poster") attr ++ - [("src", "../" <> newsrc) | not (TS.null newsrc)] ++ - [("poster", "../" <> newposter) | not (TS.null newposter)] + [("src", "../" <> newsrc) | not (T.null newsrc)] ++ + [("poster", "../" <> newposter) | not (T.null newposter)] return $ TagOpen name attr' transformTag tag = return tag modifyMediaRef :: PandocMonad m => FilePath - -> E m TS.Text + -> E m T.Text modifyMediaRef "" = return "" modifyMediaRef oldsrc = do media <- gets stMediaPaths case lookup oldsrc media of - Just (n,_) -> return $ TS.pack n + Just (n,_) -> return $ T.pack n Nothing -> catchError - (do (img, mbMime) <- P.fetchItem $ TS.pack oldsrc - let ext = maybe (takeExtension (takeWhile (/='?') oldsrc)) TS.unpack + (do (img, mbMime) <- P.fetchItem $ T.pack oldsrc + let ext = maybe (takeExtension (takeWhile (/='?') oldsrc)) T.unpack (("." <>) <$> (mbMime >>= extensionFromMimeType)) newName <- getMediaNextNewName ext let newPath = "media/" ++ newName entry <- mkEntry newPath (B.fromChunks . (:[]) $ img) modify $ \st -> st{ stMediaPaths = (oldsrc, (newPath, Just entry)):media} - return $ TS.pack newPath) + return $ T.pack newPath) (\e -> do - report $ CouldNotFetchResource (TS.pack oldsrc) (tshow e) - return $ TS.pack oldsrc) + report $ CouldNotFetchResource (T.pack oldsrc) (tshow e) + return $ T.pack oldsrc) -getMediaNextNewName :: PandocMonad m => String -> E m String +getMediaNextNewName :: PandocMonad m => FilePath -> E m FilePath getMediaNextNewName ext = do nextId <- gets stMediaNextId modify $ \st -> st { stMediaNextId = nextId + 1 } - let nextName = "file" ++ show nextId ++ ext - (P.fetchItem (TS.pack nextName) >> getMediaNextNewName ext) `catchError` const (return nextName) + return $ "file" ++ show nextId ++ ext + +isHtmlFormat :: Format -> Bool +isHtmlFormat (Format "html") = True +isHtmlFormat (Format "html4") = True +isHtmlFormat (Format "html5") = True +isHtmlFormat _ = False transformBlock :: PandocMonad m => Block -> E m Block transformBlock (RawBlock fmt raw) - | fmt == Format "html" = do + | isHtmlFormat fmt = do let tags = parseTags raw tags' <- mapM transformTag tags return $ RawBlock fmt (renderTags' tags') @@ -1060,56 +1131,43 @@ transformInline :: PandocMonad m => WriterOptions -> Inline -> E m Inline -transformInline _opts (Image attr lab (src,tit)) = do - newsrc <- modifyMediaRef $ TS.unpack src +transformInline _opts (Image attr@(_,_,kvs) lab (src,tit)) + | isNothing (lookup "external" kvs) = do + newsrc <- modifyMediaRef $ T.unpack src return $ Image attr lab ("../" <> newsrc, tit) transformInline opts x@(Math t m) | WebTeX url <- writerHTMLMathMethod opts = do - newsrc <- modifyMediaRef (TS.unpack url <> urlEncode (TS.unpack m)) + newsrc <- modifyMediaRef (T.unpack url <> urlEncode (T.unpack m)) let mathclass = if t == DisplayMath then "display" else "inline" return $ Span ("",["math",mathclass],[]) [Image nullAttr [x] ("../" <> newsrc, "")] transformInline _opts (RawInline fmt raw) - | fmt == Format "html" = do + | isHtmlFormat fmt = do let tags = parseTags raw tags' <- mapM transformTag tags return $ RawInline fmt (renderTags' tags') transformInline _ x = return x -(!) :: (t -> Element) -> [(String, String)] -> t -> Element +(!) :: (t -> Element) -> [(Text, Text)] -> t -> Element (!) f attrs n = add_attrs (map (\(k,v) -> Attr (unqual k) v) attrs) (f n) --- | Version of 'ppTopElement' that specifies UTF-8 encoding. -ppTopElement :: Element -> String -ppTopElement = ("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" ++) . unEntity . ppElement - -- unEntity removes numeric entities introduced by ppElement - -- (kindlegen seems to choke on these). - where unEntity [] = "" - unEntity ('&':'#':xs) = - let (ds,ys) = break (==';') xs - rest = drop 1 ys - in case safeRead (TS.pack $ "'\\" <> ds <> "'") of - Just x -> x : unEntity rest - Nothing -> '&':'#':unEntity xs - unEntity (x:xs) = x : unEntity xs - mediaTypeOf :: FilePath -> Maybe MimeType mediaTypeOf x = let mediaPrefixes = ["image", "video", "audio"] in case getMimeType x of - Just y | any (`TS.isPrefixOf` y) mediaPrefixes -> Just y + Just y | any (`T.isPrefixOf` y) mediaPrefixes -> Just y _ -> Nothing -- Returns filename for chapter number. -showChapter :: Int -> String -showChapter = printf "ch%03d.xhtml" +showChapter :: Int -> Text +showChapter = T.pack . printf "ch%03d.xhtml" -- Add identifiers to any headers without them. addIdentifiers :: WriterOptions -> [Block] -> [Block] addIdentifiers opts bs = evalState (mapM go bs) Set.empty where go (Header n (ident,classes,kvs) ils) = do ids <- get - let ident' = if TS.null ident + let ident' = if T.null ident then uniqueIdent (writerExtensions opts) ils ids else ident modify $ Set.insert ident' @@ -1117,27 +1175,27 @@ addIdentifiers opts bs = evalState (mapM go bs) Set.empty go x = return x -- Variant of normalizeDate that allows partial dates: YYYY, YYYY-MM -normalizeDate' :: String -> Maybe String -normalizeDate' = fmap TS.unpack . go . trim . TS.pack +normalizeDate' :: Text -> Maybe Text +normalizeDate' = go . T.strip where go xs - | TS.length xs == 4 -- YYY - , TS.all isDigit xs = Just xs - | (y, s) <- TS.splitAt 4 xs -- YYY-MM - , Just ('-', m) <- TS.uncons s - , TS.length m == 2 - , TS.all isDigit y && TS.all isDigit m = Just xs + | T.length xs == 4 -- YYY + , T.all isDigit xs = Just xs + | (y, s) <- T.splitAt 4 xs -- YYY-MM + , Just ('-', m) <- T.uncons s + , T.length m == 2 + , T.all isDigit y && T.all isDigit m = Just xs | otherwise = normalizeDate xs -toRelator :: String -> Maybe String +toRelator :: Text -> Maybe Text toRelator x | x `elem` relators = Just x - | otherwise = lookup (map toLower x) relatorMap + | otherwise = lookup (T.toLower x) relatorMap -relators :: [String] +relators :: [Text] relators = map snd relatorMap -relatorMap :: [(String, String)] +relatorMap :: [(Text, Text)] relatorMap = [("abridger", "abr") ,("actor", "act") diff --git a/src/Text/Pandoc/Writers/FB2.hs b/src/Text/Pandoc/Writers/FB2.hs index 701ff3d9b..3b5d04427 100644 --- a/src/Text/Pandoc/Writers/FB2.hs +++ b/src/Text/Pandoc/Writers/FB2.hs @@ -3,7 +3,7 @@ {- | Module : Text.Pandoc.Writers.FB2 Copyright : Copyright (C) 2011-2012 Sergey Astanin - 2012-2020 John MacFarlane + 2012-2021 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane @@ -19,29 +19,29 @@ FictionBook is an XML-based e-book format. For more information see: module Text.Pandoc.Writers.FB2 (writeFB2) where import Control.Monad (zipWithM) -import Control.Monad.Except (catchError) +import Control.Monad.Except (catchError, throwError) import Control.Monad.State.Strict (StateT, evalStateT, get, gets, lift, liftM, modify) import Data.ByteString.Base64 (encode) import Data.Char (isAscii, isControl, isSpace) import Data.Either (lefts, rights) import Data.List (intercalate) -import Data.Text (Text, pack) +import Data.Text (Text) import qualified Data.Text as T +import qualified Data.Text.Lazy as TL import qualified Data.Text.Encoding as TE import Network.HTTP (urlEncode) -import Text.XML.Light -import qualified Text.XML.Light as X -import qualified Text.XML.Light.Cursor as XC -import qualified Text.XML.Light.Input as XI +import Text.Pandoc.XML.Light as X import Text.Pandoc.Class.PandocMonad (PandocMonad, report) import qualified Text.Pandoc.Class.PandocMonad as P import Text.Pandoc.Definition +import Text.Pandoc.Error (PandocError(..)) import Text.Pandoc.Logging import Text.Pandoc.Options (HTMLMathMethod (..), WriterOptions (..), def) import Text.Pandoc.Shared (capitalize, isURI, orderedListMarkers, makeSections, tshow, stringify) import Text.Pandoc.Writers.Shared (lookupMetaString, toLegacyTable) +import Data.Generics (everywhere, mkT) -- | Data to be written at the end of the document: -- (foot)notes, URLs, references, images. @@ -86,7 +86,7 @@ pandocToFB2 opts (Pandoc meta blocks) = do (imgs,missing) <- get >>= (lift . fetchImages . imagesToFetch) let body' = replaceImagesWithAlt missing body let fb2_xml = el "FictionBook" (fb2_attrs, [desc, body'] ++ notes ++ imgs) - return $ pack $ xml_head ++ showContent fb2_xml ++ "\n" + return $ xml_head <> showContent fb2_xml <> "\n" where xml_head = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" fb2_attrs = @@ -98,8 +98,8 @@ pandocToFB2 opts (Pandoc meta blocks) = do description :: PandocMonad m => Meta -> FBM m Content description meta' = do let genre = case lookupMetaString "genre" meta' of - "" -> el "genre" ("unrecognised" :: String) - s -> el "genre" (T.unpack s) + "" -> el "genre" ("unrecognised" :: Text) + s -> el "genre" s bt <- booktitle meta' let as = authors meta' dd <- docdate meta' @@ -110,7 +110,7 @@ description meta' = do Just (MetaInlines [Str s]) -> [el "lang" $ iso639 s] Just (MetaString s) -> [el "lang" $ iso639 s] _ -> [] - where iso639 = T.unpack . T.takeWhile (/= '-') -- Convert BCP 47 to ISO 639 + where iso639 = T.takeWhile (/= '-') -- Convert BCP 47 to ISO 639 let coverimage url = do let img = Image nullAttr mempty (url, "") im <- insertImage InlineImage img @@ -122,7 +122,7 @@ description meta' = do return $ el "description" [ el "title-info" (genre : (as ++ bt ++ annotation ++ dd ++ coverpage ++ lang)) - , el "document-info" [el "program-used" ("pandoc" :: String)] + , el "document-info" [el "program-used" ("pandoc" :: Text)] ] booktitle :: PandocMonad m => Meta -> FBM m [Content] @@ -135,15 +135,15 @@ authors meta' = cMap author (docAuthors meta') author :: [Inline] -> [Content] author ss = - let ws = words . cMap plain $ ss - email = el "email" <$> take 1 (filter ('@' `elem`) ws) - ws' = filter ('@' `notElem`) ws + let ws = T.words $ mconcat $ map plain ss + email = el "email" <$> take 1 (filter (T.any (=='@')) ws) + ws' = filter (not . T.any (== '@')) ws names = case ws' of [nickname] -> [ el "nickname" nickname ] [fname, lname] -> [ el "first-name" fname , el "last-name" lname ] (fname:rest) -> [ el "first-name" fname - , el "middle-name" (concat . init $ rest) + , el "middle-name" (T.concat . init $ rest) , el "last-name" (last rest) ] [] -> [] in list $ el "author" (names ++ email) @@ -204,7 +204,7 @@ renderFootnotes = do el "body" ([uattr "name" "notes"], map renderFN (reverse fns)) where renderFN (n, idstr, cs) = - let fn_texts = el "title" (el "p" (show n)) : cs + let fn_texts = el "title" (el "p" (tshow n)) : cs in el "section" ([uattr "id" idstr], fn_texts) -- | Fetch images and encode them for the FictionBook XML. @@ -280,7 +280,7 @@ isMimeType s = where types = ["text","image","audio","video","application","message","multipart"] valid c = isAscii c && not (isControl c) && not (isSpace c) && - c `notElem` ("()<>@,;:\\\"/[]?=" :: String) + c `notElem` ("()<>@,;:\\\"/[]?=" :: [Char]) footnoteID :: Int -> Text footnoteID i = "n" <> tshow i @@ -304,10 +304,13 @@ blockToXml (Para [Image atr alt (src,tgt)]) = insertImage NormalImage (Image atr alt (src,tit)) blockToXml (Para ss) = list . el "p" <$> cMapM toXml ss blockToXml (CodeBlock _ s) = return . spaceBeforeAfter . - map (el "p" . el "code" . T.unpack) . T.lines $ s + map (el "p" . el "code") . T.lines $ s blockToXml (RawBlock f str) = if f == Format "fb2" - then return $ XI.parseXML str + then + case parseXMLContents (TL.fromStrict str) of + Left msg -> throwError $ PandocXMLError "" msg + Right nds -> return nds else return [] blockToXml (Div _ bs) = cMapM blockToXml bs blockToXml (BlockQuote bs) = list . el "cite" <$> cMapM blockToXml bs @@ -341,11 +344,11 @@ blockToXml (Table _ blkCapt specs thead tbody tfoot) = do c <- el "emphasis" <$> cMapM toXml caption return [el "table" (hd <> bd), el "p" c] where - mkrow :: PandocMonad m => String -> [[Block]] -> [Alignment] -> FBM m Content + mkrow :: PandocMonad m => Text -> [[Block]] -> [Alignment] -> FBM m Content mkrow tag cells aligns' = el "tr" <$> mapM (mkcell tag) (zip cells aligns') -- - mkcell :: PandocMonad m => String -> ([Block], Alignment) -> FBM m Content + mkcell :: PandocMonad m => Text -> ([Block], Alignment) -> FBM m Content mkcell tag (cell, align) = do cblocks <- cMapM blockToXml cell return $ el tag ([align_attr align], cblocks) @@ -419,7 +422,7 @@ toXml (Quoted DoubleQuote ss) = do inner <- cMapM toXml ss return $ [txt "“"] ++ inner ++ [txt "”"] toXml (Cite _ ss) = cMapM toXml ss -- FIXME: support citation styles -toXml (Code _ s) = return [el "code" $ T.unpack s] +toXml (Code _ s) = return [el "code" s] toXml Space = return [txt " "] toXml SoftBreak = return [txt "\n"] toXml LineBreak = return [txt "\n"] @@ -451,7 +454,7 @@ insertMath immode formula = do let imgurl = url <> T.pack (urlEncode $ T.unpack formula) let img = Image nullAttr alt (imgurl, "") insertImage immode img - _ -> return [el "code" $ T.unpack formula] + _ -> return [el "code" formula] insertImage :: PandocMonad m => ImageMode -> Inline -> FBM m [Content] insertImage immode (Image _ alt (url,ttl)) = do @@ -466,31 +469,16 @@ insertImage immode (Image _ alt (url,ttl)) = do el "image" $ [ attr ("l","href") ("#" <> fname) , attr ("l","type") (tshow immode) - , uattr "alt" (T.pack $ cMap plain alt) ] + , uattr "alt" (mconcat $ map plain alt) ] ++ ttlattr insertImage _ _ = error "unexpected inline instead of image" replaceImagesWithAlt :: [Text] -> Content -> Content -replaceImagesWithAlt missingHrefs body = - let cur = XC.fromContent body - cur' = replaceAll cur - in XC.toTree . XC.root $ cur' +replaceImagesWithAlt missingHrefs = everywhere (mkT go) where - -- - replaceAll :: XC.Cursor -> XC.Cursor - replaceAll c = - let n = XC.current c - c' = if isImage n && isMissing n - then XC.modifyContent replaceNode c - else c - in case XC.nextDF c' of - (Just cnext) -> replaceAll cnext - Nothing -> c' -- end of document - -- - isImage :: Content -> Bool - isImage (Elem e) = elName e == uname "image" - isImage _ = False - -- + go c = if isMissing c + then replaceNode c + else c isMissing (Elem img@Element{}) = let imgAttrs = elAttribs img badAttrs = map (attr ("l","href")) missingHrefs @@ -500,18 +488,18 @@ replaceImagesWithAlt missingHrefs body = replaceNode :: Content -> Content replaceNode n@(Elem img@Element{}) = let attrs = elAttribs img - alt = getAttrVal attrs (uname "alt") + alt = getAttrVal attrs (unqual "alt") imtype = getAttrVal attrs (qname "l" "type") in case (alt, imtype) of (Just alt', Just imtype') -> - if imtype' == show NormalImage + if imtype' == tshow NormalImage then el "p" alt' - else txt $ T.pack alt' - (Just alt', Nothing) -> txt $ T.pack alt' -- no type attribute + else txt alt' + (Just alt', Nothing) -> txt alt' -- no type attribute _ -> n -- don't replace if alt text is not found replaceNode n = n -- - getAttrVal :: [X.Attr] -> QName -> Maybe String + getAttrVal :: [X.Attr] -> QName -> Maybe Text getAttrVal attrs name = case filter ((name ==) . attrKey) attrs of (a:_) -> Just (attrVal a) @@ -519,7 +507,7 @@ replaceImagesWithAlt missingHrefs body = -- | Wrap all inlines with an XML tag (given its unqualified name). -wrap :: PandocMonad m => String -> [Inline] -> FBM m Content +wrap :: PandocMonad m => Text -> [Inline] -> FBM m Content wrap tagname inlines = el tagname `liftM` cMapM toXml inlines -- " Create a singleton list. @@ -527,31 +515,31 @@ list :: a -> [a] list = (:[]) -- | Convert an 'Inline' to plaintext. -plain :: Inline -> String -plain (Str s) = T.unpack s -plain (Emph ss) = cMap plain ss -plain (Underline ss) = cMap plain ss -plain (Span _ ss) = cMap plain ss -plain (Strong ss) = cMap plain ss -plain (Strikeout ss) = cMap plain ss -plain (Superscript ss) = cMap plain ss -plain (Subscript ss) = cMap plain ss -plain (SmallCaps ss) = cMap plain ss -plain (Quoted _ ss) = cMap plain ss -plain (Cite _ ss) = cMap plain ss -- FIXME -plain (Code _ s) = T.unpack s +plain :: Inline -> Text +plain (Str s) = s +plain (Emph ss) = mconcat $ map plain ss +plain (Underline ss) = mconcat $ map plain ss +plain (Span _ ss) = mconcat $ map plain ss +plain (Strong ss) = mconcat $ map plain ss +plain (Strikeout ss) = mconcat $ map plain ss +plain (Superscript ss) = mconcat $ map plain ss +plain (Subscript ss) = mconcat $ map plain ss +plain (SmallCaps ss) = mconcat $ map plain ss +plain (Quoted _ ss) = mconcat $ map plain ss +plain (Cite _ ss) = mconcat $ map plain ss -- FIXME +plain (Code _ s) = s plain Space = " " plain SoftBreak = " " plain LineBreak = "\n" -plain (Math _ s) = T.unpack s +plain (Math _ s) = s plain (RawInline _ _) = "" -plain (Link _ text (url,_)) = concat (map plain text ++ [" <", T.unpack url, ">"]) -plain (Image _ alt _) = cMap plain alt +plain (Link _ text (url,_)) = mconcat (map plain text ++ [" <", url, ">"]) +plain (Image _ alt _) = mconcat $ map plain alt plain (Note _) = "" -- FIXME -- | Create an XML element. el :: (Node t) - => String -- ^ unqualified element name + => Text -- ^ unqualified element name -> t -- ^ node contents -> Content -- ^ XML content el name cs = Elem $ unode name cs @@ -564,22 +552,18 @@ spaceBeforeAfter cs = -- | Create a plain-text XML content. txt :: Text -> Content -txt s = Text $ CData CDataText (T.unpack s) Nothing +txt s = Text $ CData CDataText s Nothing -- | Create an XML attribute with an unqualified name. -uattr :: String -> Text -> Text.XML.Light.Attr -uattr name = Attr (uname name) . T.unpack +uattr :: Text -> Text -> X.Attr +uattr name = Attr (unqual name) -- | Create an XML attribute with a qualified name from given namespace. -attr :: (String, String) -> Text -> Text.XML.Light.Attr -attr (ns, name) = Attr (qname ns name) . T.unpack - --- | Unqualified name -uname :: String -> QName -uname name = QName name Nothing Nothing +attr :: (Text, Text) -> Text -> X.Attr +attr (ns, name) = Attr (qname ns name) -- | Qualified name -qname :: String -> String -> QName +qname :: Text -> Text -> QName qname ns name = QName name Nothing (Just ns) -- | Abbreviation for 'concatMap'. diff --git a/src/Text/Pandoc/Writers/GridTable.hs b/src/Text/Pandoc/Writers/GridTable.hs new file mode 100644 index 000000000..bc468febc --- /dev/null +++ b/src/Text/Pandoc/Writers/GridTable.hs @@ -0,0 +1,157 @@ +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE GeneralizedNewtypeDeriving #-} +{-# LANGUAGE LambdaCase #-} +{-# LANGUAGE TupleSections #-} + +{- | +Module : Text.Pandoc.Writers.GridTable +Copyright : © 2020-2021 Albert Krewinkel +License : GNU GPL, version 2 or above + +Maintainer : Albert Krewinkel <albert@zeitkraut.de> + +Grid representation of pandoc tables. +-} +module Text.Pandoc.Writers.GridTable + ( Table (..) + , GridCell (..) + , RowIndex (..) + , ColIndex (..) + , CellIndex + , Part (..) + , toTable + , rowArray + ) where + +import Control.Monad (forM_) +import Control.Monad.ST +import Data.Array +import Data.Array.MArray +import Data.Array.ST +import Data.Maybe (listToMaybe) +import Data.STRef +import Text.Pandoc.Definition hiding (Table) +import qualified Text.Pandoc.Builder as B + +-- | A grid cell contains either a real table cell, or is the +-- continuation of a column or row-spanning cell. In the latter case, +-- the index of the continued cell is provided. +data GridCell + = ContentCell Attr Alignment RowSpan ColSpan [Block] + | ContinuationCell CellIndex + deriving (Show) + +-- | Row index in a table part. +newtype RowIndex = RowIndex Int deriving (Enum, Eq, Ix, Ord, Show) +-- | Column index in a table part. +newtype ColIndex = ColIndex Int deriving (Enum, Eq, Ix, Ord, Show) + +-- | Index to a cell in a table part. +type CellIndex = (RowIndex, ColIndex) + +-- | Cells are placed on a grid. Row attributes are stored in a separate +-- array. +data Part = Part + { partAttr :: Attr + , partCellArray :: Array (RowIndex,ColIndex) GridCell + , partRowAttrs :: Array RowIndex Attr + } + +data Table = Table + { tableAttr :: Attr + , tableCaption :: Caption + , tableColSpecs :: Array ColIndex ColSpec + , tableRowHeads :: RowHeadColumns + , tableHead :: Part + , tableBodies :: [Part] + , tableFoot :: Part + } + +toTable + :: B.Attr + -> B.Caption + -> [B.ColSpec] + -> B.TableHead + -> [B.TableBody] + -> B.TableFoot + -> Table +toTable attr caption colSpecs thead tbodies tfoot = + Table attr caption colSpecs' rowHeads thGrid tbGrids tfGrid + where + colSpecs' = listArray (ColIndex 1, ColIndex $ length colSpecs) colSpecs + rowHeads = case listToMaybe tbodies of + Nothing -> RowHeadColumns 0 + Just (TableBody _attr rowHeadCols _headerRows _rows) -> rowHeadCols + thGrid = let (TableHead headAttr rows) = thead + in rowsToPart headAttr rows + tbGrids = map bodyToGrid tbodies + tfGrid = let (TableFoot footAttr rows) = tfoot + in rowsToPart footAttr rows + bodyToGrid (TableBody bodyAttr _rowHeadCols headRows rows) = + rowsToPart bodyAttr (headRows ++ rows) + +data BuilderCell + = FilledCell GridCell + | FreeCell + +fromBuilderCell :: BuilderCell -> GridCell +fromBuilderCell = \case + FilledCell c -> c + FreeCell -> error "Found an unassigned cell." + +rowsToPart :: Attr -> [B.Row] -> Part +rowsToPart attr = \case + [] -> Part + attr + (listArray ((RowIndex 1, ColIndex 1), (RowIndex 0, ColIndex 0)) []) + (listArray (RowIndex 1, RowIndex 0) []) + rows@(Row _attr firstRow:_) -> + let nrows = length rows + ncols = sum $ map (\(Cell _ _ _ (ColSpan cs) _) -> cs) firstRow + gbounds = ((RowIndex 1, ColIndex 1), (RowIndex nrows, ColIndex ncols)) + mutableGrid :: ST s (STArray s CellIndex GridCell) + mutableGrid = do + grid <- newArray gbounds FreeCell + ridx <- newSTRef (RowIndex 1) + forM_ rows $ \(Row _attr cells) -> do + cidx <- newSTRef (ColIndex 1) + forM_ cells $ \(Cell cellAttr align rs cs blks) -> do + ridx' <- readSTRef ridx + let nextFreeInRow colindex@(ColIndex c) = do + readArray grid (ridx', colindex) >>= \case + FreeCell -> pure colindex + _ -> nextFreeInRow $ ColIndex (c + 1) + cidx' <- readSTRef cidx >>= nextFreeInRow + writeArray grid (ridx', cidx') . FilledCell $ + ContentCell cellAttr align rs cs blks + forM_ (continuationIndices ridx' cidx' rs cs) $ \idx -> do + writeArray grid idx . FilledCell $ + ContinuationCell (ridx', cidx') + -- go to new column + writeSTRef cidx cidx' + -- go to next row + modifySTRef ridx (incrRowIndex 1) + -- Swap BuilderCells with normal GridCells. + mapArray fromBuilderCell grid + in Part + { partCellArray = runSTArray mutableGrid + , partRowAttrs = listArray (RowIndex 1, RowIndex nrows) $ + map (\(Row rowAttr _) -> rowAttr) rows + , partAttr = attr + } + +continuationIndices :: RowIndex -> ColIndex -> RowSpan -> ColSpan -> [CellIndex] +continuationIndices (RowIndex ridx) (ColIndex cidx) rowspan colspan = + let (RowSpan rs) = rowspan + (ColSpan cs) = colspan + in [ (RowIndex r, ColIndex c) | r <- [ridx..(ridx + rs - 1)] + , c <- [cidx..(cidx + cs - 1)] + , (r, c) /= (ridx, cidx)] + +rowArray :: RowIndex -> Array CellIndex GridCell -> Array ColIndex GridCell +rowArray ridx grid = + let ((_minRidx, minCidx), (_maxRidx, maxCidx)) = bounds grid + in ixmap (minCidx, maxCidx) (ridx,) grid + +incrRowIndex :: RowSpan -> RowIndex -> RowIndex +incrRowIndex (RowSpan n) (RowIndex r) = RowIndex $ r + n diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs index cba6b7d1c..6f91d1965 100644 --- a/src/Text/Pandoc/Writers/HTML.hs +++ b/src/Text/Pandoc/Writers/HTML.hs @@ -6,7 +6,7 @@ {-# LANGUAGE ViewPatterns #-} {- | Module : Text.Pandoc.Writers.HTML - Copyright : Copyright (C) 2006-2020 John MacFarlane + Copyright : Copyright (C) 2006-2021 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -31,9 +31,9 @@ module Text.Pandoc.Writers.HTML ( import Control.Monad.Identity (runIdentity) import Control.Monad.State.Strict import Data.Char (ord) -import Data.List (intercalate, intersperse, partition, delete, (\\)) +import Data.List (intercalate, intersperse, partition, delete, (\\), foldl') import Data.List.NonEmpty (NonEmpty((:|))) -import Data.Maybe (fromMaybe, isJust, isNothing, mapMaybe) +import Data.Maybe (fromMaybe, isJust, isNothing) import qualified Data.Set as Set import Data.Text (Text) import qualified Data.Text as T @@ -265,6 +265,8 @@ pandocToHtml opts (Pandoc meta blocks) = do let stringifyHTML = escapeStringForXML . stringify let authsMeta = map stringifyHTML $ docAuthors meta let dateMeta = stringifyHTML $ docDate meta + let descriptionMeta = escapeStringForXML $ + lookupMetaString "description" meta slideVariant <- gets stSlideVariant let sects = adjustNumbers opts $ makeSections (writerNumberSections opts) Nothing $ @@ -354,6 +356,52 @@ pandocToHtml opts (Pandoc meta blocks) = do PlainMath -> defField "displaymath-css" True WebTeX _ -> defField "displaymath-css" True _ -> id) . + (if slideVariant == RevealJsSlides + then -- set boolean options explicitly, since + -- template can't distinguish False/undefined + defField "controls" True . + defField "controlsTutorial" True . + defField "controlsLayout" ("bottom-right" :: Text) . + defField "controlsBackArrows" ("faded" :: Text) . + defField "progress" True . + defField "slideNumber" False . + defField "showSlideNumber" ("all" :: Text) . + defField "hashOneBasedIndex" False . + defField "hash" False . + defField "respondToHashChanges" True . + defField "history" False . + defField "keyboard" True . + defField "overview" True . + defField "disableLayout" False . + defField "center" True . + defField "touch" True . + defField "loop" False . + defField "rtl" False . + defField "navigationMode" ("default" :: Text) . + defField "shuffle" False . + defField "fragments" True . + defField "fragmentInURL" True . + defField "embedded" False . + defField "help" True . + defField "pause" True . + defField "showNotes" False . + defField "autoPlayMedia" ("null" :: Text) . + defField "preloadIframes" ("null" :: Text) . + defField "autoSlide" ("0" :: Text) . + defField "autoSlideStoppable" True . + defField "autoSlideMethod" ("null" :: Text) . + defField "defaultTiming" ("null" :: Text) . + defField "mouseWheel" False . + defField "display" ("block" :: Text) . + defField "hideInactiveCursor" True . + defField "hideCursorTime" ("5000" :: Text) . + defField "previewLinks" False . + defField "transition" ("slide" :: Text) . + defField "transitionSpeed" ("default" :: Text) . + defField "backgroundTransition" ("fade" :: Text) . + defField "viewDistance" ("3" :: Text) . + defField "mobileViewDistance" ("2" :: Text) + else id) . defField "document-css" (isNothing mCss && slideVariant == NoSlides) . defField "quotes" (stQuotes st) . -- for backwards compatibility we populate toc @@ -364,6 +412,7 @@ pandocToHtml opts (Pandoc meta blocks) = do defField "author-meta" authsMeta . maybe id (defField "date-meta") (normalizeDate dateMeta) . + defField "description-meta" descriptionMeta . defField "pagetitle" (stringifyHTML . docTitle $ meta) . defField "idprefix" (writerIdentifierPrefix opts) . @@ -553,30 +602,35 @@ tagWithAttributes opts html5 selfClosing tagname attr = addAttrs :: PandocMonad m => WriterOptions -> Attr -> Html -> StateT WriterState m Html -addAttrs opts attr h = foldl (!) h <$> attrsToHtml opts attr +addAttrs opts attr h = foldl' (!) h <$> attrsToHtml opts attr toAttrs :: PandocMonad m => [(Text, Text)] -> StateT WriterState m [Attribute] toAttrs kvs = do html5 <- gets stHtml5 mbEpubVersion <- gets stEPUBVersion - return $ mapMaybe (\(x,y) -> - if html5 - then - if x `Set.member` (html5Attributes <> rdfaAttributes) - || T.any (== ':') x -- e.g. epub: namespace - || "data-" `T.isPrefixOf` x - || "aria-" `T.isPrefixOf` x - then Just $ customAttribute (textTag x) (toValue y) - else Just $ customAttribute (textTag ("data-" <> x)) - (toValue y) - else - if mbEpubVersion == Just EPUB2 && - not (x `Set.member` (html4Attributes <> rdfaAttributes) || - "xml:" `T.isPrefixOf` x) - then Nothing - else Just $ customAttribute (textTag x) (toValue y)) - kvs + reverse . snd <$> foldM (go html5 mbEpubVersion) (Set.empty, []) kvs + where + go html5 mbEpubVersion (keys, attrs) (k,v) = do + if k `Set.member` keys + then do + report $ DuplicateAttribute k v + return (keys, attrs) + else return (Set.insert k keys, addAttr html5 mbEpubVersion k v attrs) + addAttr html5 mbEpubVersion x y + | html5 + = if x `Set.member` (html5Attributes <> rdfaAttributes) + || T.any (== ':') x -- e.g. epub: namespace + || "data-" `T.isPrefixOf` x + || "aria-" `T.isPrefixOf` x + then (customAttribute (textTag x) (toValue y) :) + else (customAttribute (textTag ("data-" <> x)) (toValue y) :) + | mbEpubVersion == Just EPUB2 + , not (x `Set.member` (html4Attributes <> rdfaAttributes) || + "xml:" `T.isPrefixOf` x) + = id + | otherwise + = (customAttribute (textTag x) (toValue y) :) attrsToHtml :: PandocMonad m => WriterOptions -> Attr -> StateT WriterState m [Attribute] @@ -617,17 +671,20 @@ dimensionsToAttrList attr = consolidateStyles $ go Width ++ go Height figure :: PandocMonad m => WriterOptions -> Attr -> [Inline] -> (Text, Text) -> StateT WriterState m Html -figure opts attr txt (s,tit) = do +figure opts attr@(_, _, attrList) txt (s,tit) = do html5 <- gets stHtml5 -- Screen-readers will normally read the @alt@ text and the figure; we -- want to avoid them reading the same text twice. With HTML5 we can -- use aria-hidden for the caption; with HTML4, we use an empty -- alt-text instead. + -- When the alt text differs from the caption both should be read. let alt = if html5 then txt else [Str ""] let tocapt = if html5 - then H5.figcaption ! - H5.customAttribute (textTag "aria-hidden") - (toValue @Text "true") + then (H5.figcaption !) $ + if isJust (lookup "alt" attrList) + then mempty + else H5.customAttribute (textTag "aria-hidden") + (toValue @Text "true") else H.p ! A.class_ "caption" img <- inlineToHtml opts (Image attr alt (s,tit)) capt <- if null txt @@ -707,12 +764,12 @@ blockToHtml opts (Div (ident, "section":dclasses, dkvs) let fragmentClass = case slideVariant of RevealJsSlides -> "fragment" _ -> "incremental" - let inDiv zs = RawBlock (Format "html") ("<div class=\"" + let inDiv' zs = RawBlock (Format "html") ("<div class=\"" <> fragmentClass <> "\">") : (zs ++ [RawBlock (Format "html") "</div>"]) let breakOnPauses zs = case splitBy isPause zs of [] -> [] - y:ys -> y ++ concatMap inDiv ys + y:ys -> y ++ concatMap inDiv' ys let (titleBlocks, innerSecs) = if titleSlide -- title slides have no content of their own @@ -774,9 +831,10 @@ blockToHtml opts (Div attr@(ident, classes, kvs') bs) = do lookup "entry-spacing" kvs' >>= safeRead } let isCslBibEntry = "csl-entry" `elem` classes - let kvs = [(k,v) | (k,v) <- kvs', k /= "width"] ++ - [("style", "width:" <> w <> ";") | "column" `elem` classes, - ("width", w) <- kvs'] ++ + let kvs = [(k,v) | (k,v) <- kvs' + , k /= "width" || "column" `notElem` classes] ++ + [("style", "width:" <> w <> ";") | "column" `elem` classes + , ("width", w) <- kvs'] ++ [("role", "doc-bibliography") | isCslBibBody && html5] ++ [("role", "doc-biblioentry") | isCslBibEntry && html5] let speakerNotes = "notes" `elem` classes @@ -790,14 +848,17 @@ blockToHtml opts (Div attr@(ident, classes, kvs') bs) = do classes' = case slideVariant of NoSlides -> classes _ -> filter (\k -> k /= "incremental" && k /= "nonincremental") classes + let paraToPlain (Para ils) = Plain ils + paraToPlain x = x + let bs' = if "csl-entry" `elem` classes' + then walk paraToPlain bs + else bs contents <- if "columns" `elem` classes' then -- we don't use blockListToHtml because it inserts -- a newline between the column divs, which throws -- off widths! see #4028 - mconcat <$> mapM (blockToHtml opts) bs - else if isCslBibEntry - then mconcat <$> mapM (cslEntryToHtml opts') bs - else blockListToHtml opts' bs + mconcat <$> mapM (blockToHtml opts) bs' + else blockListToHtml opts' bs' let contents' = nl opts >> contents >> nl opts let (divtag, classes'') = if html5 && "section" `elem` classes' then (H5.section, filter (/= "section") classes') @@ -883,7 +944,7 @@ blockToHtml opts (BlockQuote blocks) = do else do contents <- blockListToHtml opts blocks return $ H.blockquote $ nl opts >> contents >> nl opts -blockToHtml opts (Header level attr@(_,classes,kvs) lst) = do +blockToHtml opts (Header level (ident,classes,kvs) lst) = do contents <- inlineListToHtml opts lst let secnum = fromMaybe mempty $ lookup "number" kvs let contents' = if writerNumberSections opts && not (T.null secnum) @@ -891,7 +952,13 @@ blockToHtml opts (Header level attr@(_,classes,kvs) lst) = do then (H.span ! A.class_ "header-section-number" $ toHtml secnum) >> strToHtml " " >> contents else contents - addAttrs opts attr + html5 <- gets stHtml5 + let kvs' = if html5 + then kvs + else [ (k, v) | (k, v) <- kvs + , k `elem` (["lang", "dir", "title", "style" + , "align"] ++ intrinsicEventsHTML4)] + addAttrs opts (ident,classes,kvs') $ case level of 1 -> H.h1 contents' 2 -> H.h2 contents' @@ -927,7 +994,7 @@ blockToHtml opts (OrderedList (startnum, numstyle, _) lst) = do numstyle'] else []) l <- ordList opts contents - return $ foldl (!) l attribs + return $ foldl' (!) l attribs blockToHtml opts (DefinitionList lst) = do contents <- mapM (\(term, defs) -> do term' <- liftM H.dt $ inlineListToHtml opts term @@ -1225,6 +1292,10 @@ inlineToHtml opts inline = do LineBreak -> return $ do if html5 then H5.br else H.br strToHtml "\n" + (Span ("",[cls],[]) ils) + | cls == "csl-block" || cls == "csl-left-margin" || + cls == "csl-right-inline" || cls == "csl-indent" + -> inlineListToHtml opts ils >>= inDiv cls (Span (id',classes,kvs) ils) -> let spanLikeTag = case classes of @@ -1377,7 +1448,7 @@ inlineToHtml opts inline = do return $ if T.null tit then link' else link' ! A.title (toValue tit) - (Image attr txt (s,tit)) -> do + (Image attr@(_, _, attrList) txt (s, tit)) -> do let alternate = stringify txt slideVariant <- gets stSlideVariant let isReveal = slideVariant == RevealJsSlides @@ -1390,7 +1461,8 @@ inlineToHtml opts inline = do [A.title $ toValue tit | not (T.null tit)] ++ attrs imageTag = (if html5 then H5.img else H.img - , [A.alt $ toValue alternate | not (null txt)] ) + , [A.alt $ toValue alternate | not (null txt) && + isNothing (lookup "alt" attrList)] ) mediaTag tg fallbackTxt = let linkTxt = if null txt then fallbackTxt @@ -1404,7 +1476,7 @@ inlineToHtml opts inline = do Just "audio" -> mediaTag H5.audio "Audio" Just _ -> (H5.embed, []) _ -> imageTag - return $ foldl (!) tag $ attributes ++ specAttrs + return $ foldl' (!) tag $ attributes ++ specAttrs -- note: null title included, as in Markdown.pl (Note contents) -> do notes <- gets stNotes @@ -1457,11 +1529,15 @@ blockListToNote opts ref blocks = do else let lastBlock = last blocks otherBlocks = init blocks in case lastBlock of - (Para lst) -> otherBlocks ++ + Para [Image _ _ (_,tit)] + | "fig:" `T.isPrefixOf` tit + -> otherBlocks ++ [lastBlock, + Plain backlink] + Para lst -> otherBlocks ++ [Para (lst ++ backlink)] - (Plain lst) -> otherBlocks ++ + Plain lst -> otherBlocks ++ [Plain (lst ++ backlink)] - _ -> otherBlocks ++ [lastBlock, + _ -> otherBlocks ++ [lastBlock, Plain backlink] contents <- blockListToHtml opts blocks' let noteItem = H.li ! prefixedId opts ("fn" <> ref) $ contents @@ -1474,22 +1550,12 @@ blockListToNote opts ref blocks = do _ -> noteItem return $ nl opts >> noteItem' -cslEntryToHtml :: PandocMonad m - => WriterOptions - -> Block - -> StateT WriterState m Html -cslEntryToHtml opts (Para xs) = do +inDiv :: PandocMonad m=> Text -> Html -> StateT WriterState m Html +inDiv cls x = do html5 <- gets stHtml5 - let inDiv :: Text -> Html -> Html - inDiv cls x = (if html5 then H5.div else H.div) - x ! A.class_ (toValue cls) - let go (Span ("",[cls],[]) ils) - | cls == "csl-block" || cls == "csl-left-margin" || - cls == "csl-right-inline" || cls == "csl-indent" - = inDiv cls <$> inlineListToHtml opts ils - go il = inlineToHtml opts il - mconcat <$> mapM go xs -cslEntryToHtml opts x = blockToHtml opts x + return $ + (if html5 then H5.div else H.div) + x ! A.class_ (toValue cls) isMathEnvironment :: Text -> Bool isMathEnvironment s = "\\begin{" `T.isPrefixOf` s && @@ -1529,6 +1595,12 @@ allowsMathEnvironments MathML = True allowsMathEnvironments (WebTeX _) = True allowsMathEnvironments _ = False +-- | List of intrinsic event attributes allowed on all elements in HTML4. +intrinsicEventsHTML4 :: [Text] +intrinsicEventsHTML4 = + [ "onclick", "ondblclick", "onmousedown", "onmouseup", "onmouseover" + , "onmouseout", "onmouseout", "onkeypress", "onkeydown", "onkeyup"] + isRawHtml :: PandocMonad m => Format -> StateT WriterState m Bool isRawHtml f = do html5 <- gets stHtml5 diff --git a/src/Text/Pandoc/Writers/Haddock.hs b/src/Text/Pandoc/Writers/Haddock.hs index aaa19ed07..75e14714b 100644 --- a/src/Text/Pandoc/Writers/Haddock.hs +++ b/src/Text/Pandoc/Writers/Haddock.hs @@ -15,6 +15,7 @@ Haddock: <http://www.haskell.org/haddock/doc/html/> -} module Text.Pandoc.Writers.Haddock (writeHaddock) where import Control.Monad.State.Strict +import Data.Char (isAlphaNum) import Data.Default import Data.Text (Text) import qualified Data.Text as T @@ -71,8 +72,18 @@ notesToHaddock opts notes = -- | Escape special characters for Haddock. escapeString :: Text -> Text -escapeString = escapeStringUsing haddockEscapes - where haddockEscapes = backslashEscapes "\\/'`\"@<" +escapeString t + | T.all isAlphaNum t = t + | otherwise = T.concatMap escChar t + where + escChar '\\' = "\\\\" + escChar '/' = "\\/" + escChar '\'' = "\\'" + escChar '`' = "\\`" + escChar '"' = "\\\"" + escChar '@' = "\\@" + escChar '<' = "\\<" + escChar c = T.singleton c -- | Convert Pandoc block element to haddock. blockToHaddock :: PandocMonad m diff --git a/src/Text/Pandoc/Writers/ICML.hs b/src/Text/Pandoc/Writers/ICML.hs index dcf5acfef..c254fbc58 100644 --- a/src/Text/Pandoc/Writers/ICML.hs +++ b/src/Text/Pandoc/Writers/ICML.hs @@ -621,7 +621,12 @@ imageICML opts style attr (src, _) = do image = inTags True "Image" [("Self","ue6"), ("ItemTransform", scale<>" -"<>hw<>" -"<>hh)] $ vcat [ - inTags True "Properties" [] $ inTags True "Profile" [("type","string")] $ text "$ID/Embedded" + inTags True "Properties" [] $ vcat [ + inTags True "Profile" [("type","string")] $ text "$ID/Embedded" + , selfClosingTag "GraphicBounds" [("Left","0"), ("Top","0") + , ("Right", showFl $ ow*ow / imgWidth) + , ("Bottom", showFl $ oh*oh / imgHeight)] + ] , selfClosingTag "Link" [("Self", "ueb"), ("LinkResourceURI", src')] ] doc = inTags True "CharacterStyleRange" attrs diff --git a/src/Text/Pandoc/Writers/Ipynb.hs b/src/Text/Pandoc/Writers/Ipynb.hs index d01d5a7e5..2613851c5 100644 --- a/src/Text/Pandoc/Writers/Ipynb.hs +++ b/src/Text/Pandoc/Writers/Ipynb.hs @@ -2,7 +2,7 @@ {-# LANGUAGE ScopedTypeVariables #-} {- | Module : Text.Pandoc.Writers.Ipynb - Copyright : Copyright (C) 2019-2020 John MacFarlane + Copyright : Copyright (C) 2019-2021 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> diff --git a/src/Text/Pandoc/Writers/JATS.hs b/src/Text/Pandoc/Writers/JATS.hs index 7058a4557..9db8723d1 100644 --- a/src/Text/Pandoc/Writers/JATS.hs +++ b/src/Text/Pandoc/Writers/JATS.hs @@ -1,9 +1,10 @@ +{-# LANGUAGE LambdaCase #-} +{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE ScopedTypeVariables #-} -{-# LANGUAGE OverloadedStrings #-} -{-# LANGUAGE ViewPatterns #-} +{-# LANGUAGE ViewPatterns #-} {- | Module : Text.Pandoc.Writers.JATS - Copyright : Copyright (C) 2017-2020 John MacFarlane + Copyright : 2017-2021 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -20,15 +21,17 @@ module Text.Pandoc.Writers.JATS , writeJatsPublishing , writeJatsArticleAuthoring ) where +import Control.Applicative ((<|>)) import Control.Monad.Reader import Control.Monad.State import Data.Generics (everywhere, mkT) import Data.List (partition) import qualified Data.Map as M -import Data.Maybe (fromMaybe) +import Data.Maybe (fromMaybe, listToMaybe) import Data.Time (toGregorian, Day, parseTimeM, defaultTimeLocale, formatTime) import qualified Data.Text as T import Data.Text (Text) +import Text.Pandoc.Citeproc (getReferences) import Text.Pandoc.Class.PandocMonad (PandocMonad, report) import Text.Pandoc.Definition import Text.Pandoc.Highlighting (languages, languagesByExtension) @@ -40,6 +43,7 @@ import Text.DocLayout import Text.Pandoc.Shared import Text.Pandoc.Templates (renderTemplate) import Text.DocTemplates (Context(..), Val(..)) +import Text.Pandoc.Writers.JATS.References (referencesToJATS) import Text.Pandoc.Writers.JATS.Table (tableToJATS) import Text.Pandoc.Writers.JATS.Types import Text.Pandoc.Writers.Math @@ -71,15 +75,19 @@ writeJATS = writeJatsArchiving -- | Convert a @'Pandoc'@ document to JATS. writeJats :: PandocMonad m => JATSTagSet -> WriterOptions -> Pandoc -> m Text -writeJats tagSet opts d = - runReaderT (evalStateT (docToJATS opts d) initialState) - environment - where initialState = JATSState { jatsNotes = [] } - environment = JATSEnv +writeJats tagSet opts d = do + refs <- if extensionEnabled Ext_element_citations $ writerExtensions opts + then getReferences Nothing d + else pure [] + let environment = JATSEnv { jatsTagSet = tagSet , jatsInlinesWriter = inlinesToJATS - , jatsBlockWriter = blockToJATS + , jatsBlockWriter = wrappedBlocksToJATS + , jatsReferences = refs } + let initialState = JATSState { jatsNotes = [] } + runReaderT (evalStateT (docToJATS opts d) initialState) + environment -- | Convert Pandoc document to string in JATS format. docToJATS :: PandocMonad m => WriterOptions -> Pandoc -> JATS m Text @@ -168,13 +176,15 @@ plainToPara x = x -- | Convert a list of pairs of terms and definitions into a list of -- JATS varlistentrys. deflistItemsToJATS :: PandocMonad m - => WriterOptions -> [([Inline],[[Block]])] -> JATS m (Doc Text) + => WriterOptions + -> [([Inline],[[Block]])] -> JATS m (Doc Text) deflistItemsToJATS opts items = vcat <$> mapM (uncurry (deflistItemToJATS opts)) items -- | Convert a term and a list of blocks into a JATS varlistentry. deflistItemToJATS :: PandocMonad m - => WriterOptions -> [Inline] -> [[Block]] -> JATS m (Doc Text) + => WriterOptions + -> [Inline] -> [[Block]] -> JATS m (Doc Text) deflistItemToJATS opts term defs = do term' <- inlinesToJATS opts term def' <- wrappedBlocksToJATS (not . isPara) @@ -186,7 +196,8 @@ deflistItemToJATS opts term defs = do -- | Convert a list of lists of blocks to a list of JATS list items. listItemsToJATS :: PandocMonad m - => WriterOptions -> Maybe [Text] -> [[Block]] -> JATS m (Doc Text) + => WriterOptions + -> Maybe [Text] -> [[Block]] -> JATS m (Doc Text) listItemsToJATS opts markers items = case markers of Nothing -> vcat <$> mapM (listItemToJATS opts Nothing) items @@ -194,12 +205,13 @@ listItemsToJATS opts markers items = -- | Convert a list of blocks into a JATS list item. listItemToJATS :: PandocMonad m - => WriterOptions -> Maybe Text -> [Block] -> JATS m (Doc Text) + => WriterOptions + -> Maybe Text -> [Block] -> JATS m (Doc Text) listItemToJATS opts mbmarker item = do contents <- wrappedBlocksToJATS (not . isParaOrList) opts (walk demoteHeaderAndRefs item) return $ inTagsIndented "list-item" $ - maybe empty (\lbl -> inTagsSimple "label" (text $ T.unpack lbl)) mbmarker + maybe empty (inTagsSimple "label" . text . T.unpack) mbmarker $$ contents imageMimeType :: Text -> [(Text, Text)] -> (Text, Text) @@ -213,33 +225,36 @@ imageMimeType src kvs = (T.drop 1 . T.dropWhile (/='/') <$> mbMT) in (maintype, subtype) -languageFor :: [Text] -> Text -languageFor classes = +languageFor :: WriterOptions -> [Text] -> Text +languageFor opts classes = case langs of (l:_) -> escapeStringForXML l [] -> "" - where isLang l = T.toLower l `elem` map T.toLower languages + where + syntaxMap = writerSyntaxMap opts + isLang l = T.toLower l `elem` map T.toLower (languages syntaxMap) langsFrom s = if isLang s then [s] - else languagesByExtension . T.toLower $ s + else (languagesByExtension syntaxMap) . T.toLower $ s langs = concatMap langsFrom classes -codeAttr :: Attr -> (Text, [(Text, Text)]) -codeAttr (ident,classes,kvs) = (lang, attr) +codeAttr :: WriterOptions -> Attr -> (Text, [(Text, Text)]) +codeAttr opts (ident,classes,kvs) = (lang, attr) where - attr = [("id",ident) | not (T.null ident)] ++ + attr = [("id", escapeNCName ident) | not (T.null ident)] ++ [("language",lang) | not (T.null lang)] ++ [(k,v) | (k,v) <- kvs, k `elem` ["code-type", "code-version", "executable", "language-version", "orientation", "platforms", "position", "specific-use"]] - lang = languageFor classes + lang = languageFor opts classes -- | Convert a Pandoc block element to JATS. blockToJATS :: PandocMonad m => WriterOptions -> Block -> JATS m (Doc Text) blockToJATS _ Null = return empty blockToJATS opts (Div (id',"section":_,kvs) (Header _lvl _ ils : xs)) = do - let idAttr = [("id", writerIdentifierPrefix opts <> id') | not (T.null id')] + let idAttr = [ ("id", writerIdentifierPrefix opts <> escapeNCName id') + | not (T.null id')] let otherAttrs = ["sec-type", "specific-use"] let attribs = idAttr ++ [(k,v) | (k,v) <- kvs, k `elem` otherAttrs] title' <- inlinesToJATS opts ils @@ -247,21 +262,26 @@ blockToJATS opts (Div (id',"section":_,kvs) (Header _lvl _ ils : xs)) = do return $ inTags True "sec" attribs $ inTagsSimple "title" title' $$ contents -- Bibliography reference: -blockToJATS opts (Div (T.stripPrefix "ref-" -> Just _,_,_) [Para lst]) = +blockToJATS opts (Div (ident,_,_) [Para lst]) | "ref-" `T.isPrefixOf` ident = + inTags True "ref" [("id", escapeNCName ident)] . + inTagsSimple "mixed-citation" <$> inlinesToJATS opts lst blockToJATS opts (Div ("refs",_,_) xs) = do - contents <- blocksToJATS opts xs + refs <- asks jatsReferences + contents <- if null refs + then blocksToJATS opts xs + else referencesToJATS opts refs return $ inTagsIndented "ref-list" contents blockToJATS opts (Div (ident,[cls],kvs) bs) | cls `elem` ["fig", "caption", "table-wrap"] = do contents <- blocksToJATS opts bs - let attr = [("id", ident) | not (T.null ident)] ++ + let attr = [("id", escapeNCName ident) | not (T.null ident)] ++ [("xml:lang",l) | ("lang",l) <- kvs] ++ [(k,v) | (k,v) <- kvs, k `elem` ["specific-use", "content-type", "orientation", "position"]] return $ inTags True cls attr contents blockToJATS opts (Div (ident,_,kvs) bs) = do contents <- blocksToJATS opts bs - let attr = [("id", ident) | not (T.null ident)] ++ + let attr = [("id", escapeNCName ident) | not (T.null ident)] ++ [("xml:lang",l) | ("lang",l) <- kvs] ++ [(k,v) | (k,v) <- kvs, k `elem` ["specific-use", "content-type", "orientation", "position"]] @@ -279,7 +299,7 @@ blockToJATS opts (Para [Image (ident,_,kvs) txt let capt = if null txt then empty else inTagsSimple "caption" $ inTagsSimple "p" alt - let attr = [("id", ident) | not (T.null ident)] ++ + let attr = [("id", escapeNCName ident) | not (T.null ident)] ++ [(k,v) | (k,v) <- kvs, k `elem` ["fig-type", "orientation", "position", "specific-use"]] let graphicattr = [("mimetype",maintype), @@ -290,7 +310,7 @@ blockToJATS opts (Para [Image (ident,_,kvs) txt capt $$ selfClosingTag "graphic" graphicattr blockToJATS _ (Para [Image (ident,_,kvs) _ (src, tit)]) = do let (maintype, subtype) = imageMimeType src kvs - let attr = [("id", ident) | not (T.null ident)] ++ + let attr = [("id", escapeNCName ident) | not (T.null ident)] ++ [("mimetype", maintype), ("mime-subtype", subtype), ("xlink:href", src)] ++ @@ -306,13 +326,16 @@ blockToJATS opts (LineBlock lns) = blockToJATS opts $ linesToPara lns blockToJATS opts (BlockQuote blocks) = do tagSet <- asks jatsTagSet - let blocksToJats' = if tagSet == TagSetArticleAuthoring - then wrappedBlocksToJATS (not . isPara) - else blocksToJATS - inTagsIndented "disp-quote" <$> blocksToJats' opts blocks -blockToJATS _ (CodeBlock a str) = return $ + let needsWrap = if tagSet == TagSetArticleAuthoring + then not . isPara + else \case + Header{} -> True + HorizontalRule -> True + _ -> False + inTagsIndented "disp-quote" <$> wrappedBlocksToJATS needsWrap opts blocks +blockToJATS opts (CodeBlock a str) = return $ inTags False tag attr (flush (text (T.unpack $ escapeStringForXML str))) - where (lang, attr) = codeAttr a + where (lang, attr) = codeAttr opts a tag = if T.null lang then "preformat" else "code" blockToJATS _ (BulletList []) = return empty blockToJATS opts (BulletList lst) = @@ -392,9 +415,9 @@ inlineToJATS opts (Quoted SingleQuote lst) = do inlineToJATS opts (Quoted DoubleQuote lst) = do contents <- inlinesToJATS opts lst return $ char '“' <> contents <> char '”' -inlineToJATS _ (Code a str) = +inlineToJATS opts (Code a str) = return $ inTags False tag attr $ literal (escapeStringForXML str) - where (lang, attr) = codeAttr a + where (lang, attr) = codeAttr opts a tag = if T.null lang then "monospace" else "code" inlineToJATS _ il@(RawInline f x) | f == "jats" = return $ literal x @@ -417,7 +440,8 @@ inlineToJATS opts (Note contents) = do let notenum = case notes of (n, _):_ -> n + 1 [] -> 1 - thenote <- inTags True "fn" [("id","fn" <> tshow notenum)] + thenote <- inTags True "fn" [("id", "fn" <> tshow notenum)] + . (inTagsSimple "label" (literal $ tshow notenum) <>) <$> wrappedBlocksToJATS (not . isPara) opts (walk demoteHeaderAndRefs contents) modify $ \st -> st{ jatsNotes = (notenum, thenote) : notes } @@ -425,18 +449,34 @@ inlineToJATS opts (Note contents) = do ("rid", "fn" <> tshow notenum)] $ text (show notenum) inlineToJATS opts (Cite _ lst) = - -- TODO revisit this after examining the jats.csl pipeline inlinesToJATS opts lst -inlineToJATS opts (Span ("",_,[]) ils) = inlinesToJATS opts ils -inlineToJATS opts (Span (ident,_,kvs) ils) = do +inlineToJATS opts (Span (ident,classes,kvs) ils) = do contents <- inlinesToJATS opts ils - let attr = [("id",ident) | not (T.null ident)] ++ - [("xml:lang",l) | ("lang",l) <- kvs] ++ - [(k,v) | (k,v) <- kvs - , k `elem` ["content-type", "rationale", - "rid", "specific-use"]] - return $ selfClosingTag "milestone-start" attr <> contents <> - selfClosingTag "milestone-end" [] + let commonAttr = [("id", escapeNCName ident) | not (T.null ident)] ++ + [("xml:lang",l) | ("lang",l) <- kvs] ++ + [(k,v) | (k,v) <- kvs, k `elem` ["alt", "specific-use"]] + -- A named-content element is a good fit for spans, but requires a + -- content-type attribute to be present. We use either the explicit + -- attribute or the first class as content type. If neither is + -- available, then we fall back to using a @styled-content@ element. + let (tag, specificAttr) = + case lookup "content-type" kvs <|> listToMaybe classes of + Just ct -> ( "named-content" + , ("content-type", ct) : + [(k, v) | (k, v) <- kvs + , k `elem` ["rid", "vocab", "vocab-identifier", + "vocab-term", "vocab-term-identifier"]]) + -- Fall back to styled-content + Nothing -> ("styled-content" + , [(k, v) | (k,v) <- kvs + , k `elem` ["style", "style-type", "style-detail", + "toggle"]]) + let attr = commonAttr ++ specificAttr + -- unwrap if wrapping element would have no attributes + return $ + if null attr + then contents + else inTags False tag attr contents inlineToJATS _ (Math t str) = do let addPref (Xml.Attr q v) | Xml.qName q == "xmlns" = Xml.Attr q{ Xml.qName = "xmlns:mml" } v @@ -470,17 +510,20 @@ inlineToJATS _ (Link _attr [Str t] (T.stripPrefix "mailto:" -> Just email, _)) | escapeURI t == email = return $ inTagsSimple "email" $ literal (escapeStringForXML email) inlineToJATS opts (Link (ident,_,kvs) txt (T.uncons -> Just ('#', src), _)) = do - let attr = [("id", ident) | not (T.null ident)] ++ - [("alt", stringify txt) | not (null txt)] ++ - [("rid", src)] ++ - [(k,v) | (k,v) <- kvs, k `elem` ["ref-type", "specific-use"]] + let attr = mconcat + [ [("id", escapeNCName ident) | not (T.null ident)] + , [("alt", stringify txt) | not (null txt)] + , [("rid", escapeNCName src)] + , [(k,v) | (k,v) <- kvs, k `elem` ["ref-type", "specific-use"]] + , [("ref-type", "bibr") | "ref-" `T.isPrefixOf` src] + ] if null txt then return $ selfClosingTag "xref" attr else do contents <- inlinesToJATS opts txt return $ inTags False "xref" attr contents inlineToJATS opts (Link (ident,_,kvs) txt (src, tit)) = do - let attr = [("id", ident) | not (T.null ident)] ++ + let attr = [("id", escapeNCName ident) | not (T.null ident)] ++ [("ext-link-type", "uri"), ("xlink:href", src)] ++ [("xlink:title", tit) | not (T.null tit)] ++ @@ -498,7 +541,7 @@ inlineToJATS _ (Image (ident,_,kvs) _ (src, tit)) = do let subtype = fromMaybe "" $ lookup "mime-subtype" kvs `mplus` (T.drop 1 . T.dropWhile (/='/') <$> mbMT) - let attr = [("id", ident) | not (T.null ident)] ++ + let attr = [("id", escapeNCName ident) | not (T.null ident)] ++ [("mimetype", maintype), ("mime-subtype", subtype), ("xlink:href", src)] ++ @@ -529,7 +572,7 @@ demoteHeaderAndRefs (Div ("refs",cls,kvs) bs) = demoteHeaderAndRefs x = x parseDate :: Text -> Maybe Day -parseDate s = msum (map (\fs -> parsetimeWith fs $ T.unpack s) formats) :: Maybe Day +parseDate s = msum (map (`parsetimeWith` T.unpack s) formats) where parsetimeWith = parseTimeM True defaultTimeLocale formats = ["%x","%m/%d/%Y", "%D","%F", "%d %b %Y", "%e %B %Y", "%b. %e, %Y", "%B %e, %Y", diff --git a/src/Text/Pandoc/Writers/JATS/References.hs b/src/Text/Pandoc/Writers/JATS/References.hs new file mode 100644 index 000000000..5b19fd034 --- /dev/null +++ b/src/Text/Pandoc/Writers/JATS/References.hs @@ -0,0 +1,164 @@ +{-# LANGUAGE LambdaCase #-} +{-# LANGUAGE OverloadedStrings #-} +{- | + Module : Text.Pandoc.Writers.JATS.References + Copyright : © 2021 Albert Krewinkel + License : GNU GPL, version 2 or above + + Maintainer : Albert Krewinkel <tarleb@zeitkraut.de> + Stability : alpha + Portability : portable + +Creation of a bibliography list using @<element-citation>@ elements in +reference items. +-} +module Text.Pandoc.Writers.JATS.References + ( referencesToJATS + , referenceToJATS + ) where + +import Citeproc.Pandoc () +import Citeproc.Types + ( Date (..), DateParts (..), ItemId (..), Name (..), Reference (..) + , Val (..) , lookupVariable, valToText + ) +import Data.Text (Text) +import Text.DocLayout (Doc, empty, isEmpty, literal, vcat) +import Text.Pandoc.Class.PandocMonad (PandocMonad) +import Text.Pandoc.Builder (Inlines) +import Text.Pandoc.Options (WriterOptions) +import Text.Pandoc.Shared (tshow) +import Text.Pandoc.Writers.JATS.Types +import Text.Pandoc.XML (escapeNCName, escapeStringForXML, inTags) +import qualified Data.Text as T + +referencesToJATS :: PandocMonad m + => WriterOptions + -> [Reference Inlines] + -> JATS m (Doc Text) +referencesToJATS opts = + fmap (inTags True "ref-list" [] . vcat) . mapM (referenceToJATS opts) + +referenceToJATS :: PandocMonad m + => WriterOptions + -> Reference Inlines + -> JATS m (Doc Text) +referenceToJATS _opts ref = do + let refType = referenceType ref + let pubType = [("publication-type", refType) | not (T.null refType)] + let ident = escapeNCName $ "ref-" <> unItemId (referenceId ref) + let wrap = inTags True "ref" [("id", ident)] + . inTags True "element-citation" pubType + return . wrap . vcat $ + [ authors + , "title" `varInTag` + if refType == "book" + then "source" + else "article-title" + , if refType == "book" + then empty + else "container-title" `varInTag` "source" + , editors + , "publisher" `varInTag` "publisher-name" + , "publisher-place" `varInTag` "publisher-loc" + , yearTag + , accessed + , "volume" `varInTag` "volume" + , "issue" `varInTag` "issue" + , "page-first" `varInTag` "fpage" + , "page-last" `varInTag` "lpage" + , "pages" `varInTag` "page-range" + , "ISBN" `varInTag` "isbn" + , "ISSN" `varInTag` "issn" + , varInTagWith "doi" "pub-id" [("pub-id-type", "doi")] + , varInTagWith "pmid" "pub-id" [("pub-id-type", "pmid")] + ] + where + varInTag var tagName = varInTagWith var tagName [] + + varInTagWith var tagName tagAttribs = + case lookupVariable var ref >>= valToText of + Nothing -> mempty + Just val -> inTags' tagName tagAttribs . literal $ + escapeStringForXML val + + authors = case lookupVariable "author" ref of + Just (NamesVal names) -> + inTags True "person-group" [("person-group-type", "author")] . vcat $ + map toNameElements names + _ -> empty + + editors = case lookupVariable "editor" ref of + Just (NamesVal names) -> + inTags True "person-group" [("person-group-type", "editor")] . vcat $ + map toNameElements names + _ -> empty + + yearTag = + case lookupVariable "issued" ref of + Just (DateVal date) -> toDateElements date + _ -> empty + + accessed = + case lookupVariable "accessed" ref of + Just (DateVal d) -> inTags' "date-in-citation" + [("content-type", "access-date")] + (toDateElements d) + _ -> empty + +toDateElements :: Date -> Doc Text +toDateElements date = + case dateParts date of + dp@(DateParts (y:m:d:_)):_ -> yearElement y dp <> + monthElement m <> + dayElement d + dp@(DateParts (y:m:_)):_ -> yearElement y dp <> monthElement m + dp@(DateParts (y:_)):_ -> yearElement y dp + _ -> empty + +yearElement :: Int -> DateParts -> Doc Text +yearElement year dp = + inTags' "year" [("iso-8601-date", iso8601 dp)] $ literal (fourDigits year) + +monthElement :: Int -> Doc Text +monthElement month = inTags' "month" [] . literal $ twoDigits month + +dayElement :: Int -> Doc Text +dayElement day = inTags' "day" [] . literal $ twoDigits day + +iso8601 :: DateParts -> Text +iso8601 = T.intercalate "-" . \case + DateParts (y:m:d:_) -> [fourDigits y, twoDigits m, twoDigits d] + DateParts (y:m:_) -> [fourDigits y, twoDigits m] + DateParts (y:_) -> [fourDigits y] + _ -> [] + +twoDigits :: Int -> Text +twoDigits n = T.takeEnd 2 $ '0' `T.cons` tshow n + +fourDigits :: Int -> Text +fourDigits n = T.takeEnd 4 $ "000" <> tshow n + +toNameElements :: Name -> Doc Text +toNameElements name = + if not (isEmpty nameTags) + then inTags' "name" [] nameTags + else nameLiteral name `inNameTag` "string-name" + where + inNameTag mVal tag = case mVal of + Nothing -> empty + Just val -> inTags' tag [] . literal $ escapeStringForXML val + surnamePrefix = maybe mempty (`T.snoc` ' ') $ + nameNonDroppingParticle name + givenSuffix = maybe mempty (T.cons ' ') $ + nameDroppingParticle name + nameTags = mconcat + [ ((surnamePrefix <>) <$> nameFamily name) `inNameTag` "surname" + , ((<> givenSuffix) <$> nameGiven name) `inNameTag` "given-names" + , nameSuffix name `inNameTag` "suffix" + ] + +-- | Put the supplied contents between start and end tags of tagType, +-- with specified attributes. +inTags' :: Text -> [(Text, Text)] -> Doc Text -> Doc Text +inTags' = inTags False diff --git a/src/Text/Pandoc/Writers/JATS/Table.hs b/src/Text/Pandoc/Writers/JATS/Table.hs index a4d42832d..70569bdcd 100644 --- a/src/Text/Pandoc/Writers/JATS/Table.hs +++ b/src/Text/Pandoc/Writers/JATS/Table.hs @@ -3,7 +3,7 @@ {-# LANGUAGE TupleSections #-} {- | Module : Text.Pandoc.Writers.JATS.Table - Copyright : © 2020 Albert Krewinkel + Copyright : © 2020-2021 Albert Krewinkel License : GNU GPL, version 2 or above Maintainer : Albert Krewinkel <tarleb@zeitkraut.de> @@ -24,7 +24,7 @@ import Text.Pandoc.Definition import Text.Pandoc.Options (WriterOptions) import Text.Pandoc.Shared (tshow) import Text.Pandoc.Writers.JATS.Types -import Text.Pandoc.XML (inTags, inTagsIndented, selfClosingTag) +import Text.Pandoc.XML (escapeNCName, inTags, inTagsIndented, selfClosingTag) import qualified Data.Text as T import qualified Text.Pandoc.Writers.AnnotatedTable as Ann @@ -34,13 +34,19 @@ tableToJATS :: PandocMonad m -> JATS m (Doc Text) tableToJATS opts (Ann.Table attr caption colspecs thead tbodies tfoot) = do let (Caption _maybeShortCaption captionBlocks) = caption + -- Only paragraphs are allowed in captions, all other blocks must be + -- wrapped in @<p>@ elements. + let needsWrapping = \case + Plain{} -> False + Para{} -> False + _ -> True tbl <- captionlessTable opts attr colspecs thead tbodies tfoot captionDoc <- if null captionBlocks then return empty else do blockToJATS <- asks jatsBlockWriter - inTagsIndented "caption" . vcat <$> - mapM (blockToJATS opts) captionBlocks + inTagsIndented "caption" <$> + blockToJATS needsWrapping opts captionBlocks return $ inTags True "table-wrap" [] $ captionDoc $$ tbl captionlessTable :: PandocMonad m @@ -216,7 +222,7 @@ cellToJats opts celltype (Ann.Cell (colspec :| _) _colNum cell) = toAttribs :: Attr -> [Text] -> [(Text, Text)] toAttribs (ident, _classes, kvs) knownAttribs = - (if T.null ident then id else (("id", ident) :)) $ + (if T.null ident then id else (("id", escapeNCName ident) :)) $ filter ((`elem` knownAttribs) . fst) kvs tableCellToJats :: PandocMonad m @@ -230,7 +236,7 @@ tableCellToJats opts ctype colAlign (Cell attr align rowspan colspan item) = do inlinesToJats <- asks jatsInlinesWriter let cellContents = \case [Plain inlines] -> inlinesToJats opts inlines - blocks -> vcat <$> mapM (blockToJats opts) blocks + blocks -> blockToJats needsWrapInCell opts blocks let tag' = case ctype of BodyCell -> "td" HeaderCell -> "th" @@ -246,3 +252,17 @@ tableCellToJats opts ctype colAlign (Cell attr align rowspan colspan item) = do . maybeCons (colspanAttrib colspan) $ toAttribs attr validAttribs inTags False tag' attribs <$> cellContents item + +-- | Whether the JATS produced from this block should be wrapped in a +-- @<p>@ element when put directly below a @<td>@ element. +needsWrapInCell :: Block -> Bool +needsWrapInCell = \case + Plain{} -> False -- should be unwrapped anyway + Para{} -> False + BulletList{} -> False + OrderedList{} -> False + DefinitionList{} -> False + HorizontalRule -> False + CodeBlock{} -> False + RawBlock{} -> False -- responsibility of the user + _ -> True diff --git a/src/Text/Pandoc/Writers/JATS/Types.hs b/src/Text/Pandoc/Writers/JATS/Types.hs index 8162f3bc0..8d8673cf6 100644 --- a/src/Text/Pandoc/Writers/JATS/Types.hs +++ b/src/Text/Pandoc/Writers/JATS/Types.hs @@ -1,6 +1,6 @@ {- | Module : Text.Pandoc.Writers.JATS.Types - Copyright : Copyright (C) 2017-2020 John MacFarlane + Copyright : Copyright (C) 2017-2021 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -17,11 +17,12 @@ module Text.Pandoc.Writers.JATS.Types ) where +import Citeproc.Types (Reference) import Control.Monad.Reader (ReaderT) import Control.Monad.State (StateT) import Data.Text (Text) import Text.DocLayout (Doc) -import Text.Pandoc.Definition (Block, Inline) +import Text.Pandoc.Builder (Block, Inline, Inlines) import Text.Pandoc.Options (WriterOptions) -- | JATS tag set variant @@ -36,10 +37,20 @@ newtype JATSState = JATSState { jatsNotes :: [(Int, Doc Text)] } +-- | Environment containing all information relevant for rendering. data JATSEnv m = JATSEnv - { jatsTagSet :: JATSTagSet + { jatsTagSet :: JATSTagSet -- ^ The tag set that's being ouput + + , jatsBlockWriter :: (Block -> Bool) + -> WriterOptions -> [Block] -> JATS m (Doc Text) + -- ^ Converts a block list to JATS, wrapping top-level blocks into a + -- @<p>@ element if the property evaluates to @True@. + -- See #7227. + , jatsInlinesWriter :: WriterOptions -> [Inline] -> JATS m (Doc Text) - , jatsBlockWriter :: WriterOptions -> Block -> JATS m (Doc Text) + -- ^ Converts an inline list to JATS. + + , jatsReferences :: [Reference Inlines] -- ^ List of references } -- | JATS writer type diff --git a/src/Text/Pandoc/Writers/Jira.hs b/src/Text/Pandoc/Writers/Jira.hs index 6bc048a61..1351814e9 100644 --- a/src/Text/Pandoc/Writers/Jira.hs +++ b/src/Text/Pandoc/Writers/Jira.hs @@ -3,7 +3,7 @@ {-# LANGUAGE PatternGuards #-} {- | Module : Text.Pandoc.Writers.Jira - Copyright : © 2010-2020 Albert Krewinkel, John MacFarlane + Copyright : © 2010-2021 Albert Krewinkel, John MacFarlane License : GNU GPL, version 2 or above Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> @@ -39,11 +39,17 @@ writeJira :: PandocMonad m => WriterOptions -> Pandoc -> m Text writeJira opts = runDefaultConverter (writerWrapText opts) (pandocToJira opts) -- | State to keep track of footnotes. -newtype ConverterState = ConverterState { stNotes :: [Text] } +data ConverterState = ConverterState + { stNotes :: [Text] -- ^ Footnotes to be appended to the end of the text + , stInPanel :: Bool -- ^ whether we are in a @{panel}@ block + } -- | Initial converter state. startState :: ConverterState -startState = ConverterState { stNotes = [] } +startState = ConverterState + { stNotes = [] + , stInPanel = False + } -- | Converter monad type JiraConverter m = ReaderT WrapOption (StateT ConverterState m) @@ -126,13 +132,19 @@ toJiraCode :: PandocMonad m -> Text -> JiraConverter m [Jira.Block] toJiraCode (ident, classes, _attribs) code = do - let lang = case find (\c -> T.toLower c `elem` knownLanguages) classes of - Nothing -> Jira.Language "java" - Just l -> Jira.Language l - let addAnchor b = if T.null ident - then b - else [Jira.Para (singleton (Jira.Anchor ident))] <> b - return . addAnchor . singleton $ Jira.Code lang mempty code + return . addAnchor ident . singleton $ + case find (\c -> T.toLower c `elem` knownLanguages) classes of + Nothing -> Jira.NoFormat mempty code + Just l -> Jira.Code (Jira.Language l) mempty code + +-- | Prepends an anchor with the given identifier. +addAnchor :: Text -> [Jira.Block] -> [Jira.Block] +addAnchor ident = + if T.null ident + then id + else \case + Jira.Para xs : bs -> (Jira.Para (Jira.Anchor ident : xs) : bs) + bs -> (Jira.Para (singleton (Jira.Anchor ident)) : bs) -- | Creates a Jira definition list toJiraDefinitionList :: PandocMonad m @@ -149,11 +161,16 @@ toJiraDefinitionList defItems = do toJiraPanel :: PandocMonad m => Attr -> [Block] -> JiraConverter m [Jira.Block] -toJiraPanel attr blocks = do - jiraBlocks <- toJiraBlocks blocks - return $ if attr == nullAttr - then jiraBlocks - else singleton (Jira.Panel [] jiraBlocks) +toJiraPanel (ident, classes, attribs) blocks = do + inPanel <- gets stInPanel + if inPanel || ("panel" `notElem` classes && null attribs) + then addAnchor ident <$> toJiraBlocks blocks + else do + modify $ \st -> st{ stInPanel = True } + jiraBlocks <- toJiraBlocks blocks + modify $ \st -> st{ stInPanel = inPanel } + let params = map (uncurry Jira.Parameter) attribs + return $ singleton (Jira.Panel params $ addAnchor ident jiraBlocks) -- | Creates a Jira header toJiraHeader :: PandocMonad m @@ -263,6 +280,8 @@ toJiraLink (_, classes, _) (url, _) alias = do | Just email <- T.stripPrefix "mailto:" url' = (Jira.Email, email) | "user-account" `elem` classes = (Jira.User, dropTilde url) | "attachment" `elem` classes = (Jira.Attachment, url) + | "smart-card" `elem` classes = (Jira.SmartCard, url) + | "smart-link" `elem` classes = (Jira.SmartLink, url) | otherwise = (Jira.External, url) dropTilde txt = case T.uncons txt of Just ('~', username) -> username @@ -292,7 +311,13 @@ quotedToJira qtype xs = do spanToJira :: PandocMonad m => Attr -> [Inline] -> JiraConverter m [Jira.Inline] -spanToJira (_, _classes, _) = toJiraInlines +spanToJira (ident, _classes, attribs) inls = + let wrap = case lookup "color" attribs of + Nothing -> id + Just color -> singleton . Jira.ColorInline (Jira.ColorName color) + in wrap <$> case ident of + "" -> toJiraInlines inls + _ -> (Jira.Anchor ident :) <$> toJiraInlines inls registerNotes :: PandocMonad m => [Block] -> JiraConverter m [Jira.Inline] registerNotes contents = do @@ -308,7 +333,7 @@ registerNotes contents = do knownLanguages :: [Text] knownLanguages = [ "actionscript", "ada", "applescript", "bash", "c", "c#", "c++" - , "css", "erlang", "go", "groovy", "haskell", "html", "javascript" + , "css", "erlang", "go", "groovy", "haskell", "html", "java", "javascript" , "json", "lua", "nyan", "objc", "perl", "php", "python", "r", "ruby" , "scala", "sql", "swift", "visualbasic", "xml", "yaml" ] diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs index d665269ab..063e347fb 100644 --- a/src/Text/Pandoc/Writers/LaTeX.hs +++ b/src/Text/Pandoc/Writers/LaTeX.hs @@ -1,11 +1,12 @@ {-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE LambdaCase #-} {-# LANGUAGE TupleSections #-} {-# LANGUAGE PatternGuards #-} {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE ViewPatterns #-} {- | Module : Text.Pandoc.Writers.LaTeX - Copyright : Copyright (C) 2006-2020 John MacFarlane + Copyright : Copyright (C) 2006-2021 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -18,12 +19,9 @@ module Text.Pandoc.Writers.LaTeX ( writeLaTeX , writeBeamer ) where -import Control.Applicative ((<|>)) import Control.Monad.State.Strict -import Data.Monoid (Any(..)) -import Data.Char (isAlphaNum, isAscii, isDigit, isLetter, isSpace, - isPunctuation, ord) -import Data.List (foldl', intersperse, nubBy, (\\), uncons) +import Data.Char (isDigit) +import Data.List (intersperse, nubBy, (\\)) import Data.Maybe (catMaybes, fromMaybe, isJust, mapMaybe, isNothing) import qualified Data.Map as M import Data.Text (Text) @@ -31,79 +29,30 @@ import qualified Data.Text as T import Network.URI (unEscapeString) import Text.DocTemplates (FromContext(lookupContext), renderTemplate, Val(..), Context(..)) -import Text.Pandoc.BCP47 (Lang (..), getLang, renderLang) +import Text.Collate.Lang (Lang (..), renderLang) import Text.Pandoc.Class.PandocMonad (PandocMonad, report, toLang) import Text.Pandoc.Definition import Text.Pandoc.Highlighting (formatLaTeXBlock, formatLaTeXInline, highlight, - styleToLaTeX, toListingsLanguage) + styleToLaTeX) import Text.Pandoc.ImageSize import Text.Pandoc.Logging import Text.Pandoc.Options import Text.DocLayout import Text.Pandoc.Shared import Text.Pandoc.Slides -import Text.Pandoc.Walk +import Text.Pandoc.Walk (query, walk, walkM) +import Text.Pandoc.Writers.LaTeX.Caption (getCaption) +import Text.Pandoc.Writers.LaTeX.Table (tableToLaTeX) +import Text.Pandoc.Writers.LaTeX.Citation (citationsToNatbib, + citationsToBiblatex) +import Text.Pandoc.Writers.LaTeX.Types (LW, WriterState (..), startingState) +import Text.Pandoc.Writers.LaTeX.Lang (toPolyglossia, toBabel) +import Text.Pandoc.Writers.LaTeX.Util (stringToLaTeX, StringContext(..), + toLabel, inCmd, + wrapDiv, hypertarget, labelFor, + getListingsLanguage, mbBraced) import Text.Pandoc.Writers.Shared -import Text.Printf (printf) -import qualified Data.Text.Normalize as Normalize - -data WriterState = - WriterState { stInNote :: Bool -- true if we're in a note - , stInQuote :: Bool -- true if in a blockquote - , stExternalNotes :: Bool -- true if in context where - -- we need to store footnotes - , stInMinipage :: Bool -- true if in minipage - , stInHeading :: Bool -- true if in a section heading - , stInItem :: Bool -- true if in \item[..] - , stNotes :: [Doc Text] -- notes in a minipage - , stOLLevel :: Int -- level of ordered list nesting - , stOptions :: WriterOptions -- writer options, so they don't have to be parameter - , stVerbInNote :: Bool -- true if document has verbatim text in note - , stTable :: Bool -- true if document has a table - , stStrikeout :: Bool -- true if document has strikeout - , stUrl :: Bool -- true if document has visible URL link - , stGraphics :: Bool -- true if document contains images - , stLHS :: Bool -- true if document has literate haskell code - , stHasChapters :: Bool -- true if document has chapters - , stCsquotes :: Bool -- true if document uses csquotes - , stHighlighting :: Bool -- true if document has highlighted code - , stIncremental :: Bool -- true if beamer lists should be displayed bit by bit - , stInternalLinks :: [Text] -- list of internal link targets - , stBeamer :: Bool -- produce beamer - , stEmptyLine :: Bool -- true if no content on line - , stHasCslRefs :: Bool -- has a Div with class refs - , stIsFirstInDefinition :: Bool -- first block in a defn list - } - -startingState :: WriterOptions -> WriterState -startingState options = WriterState { - stInNote = False - , stInQuote = False - , stExternalNotes = False - , stInHeading = False - , stInMinipage = False - , stInItem = False - , stNotes = [] - , stOLLevel = 1 - , stOptions = options - , stVerbInNote = False - , stTable = False - , stStrikeout = False - , stUrl = False - , stGraphics = False - , stLHS = False - , stHasChapters = case writerTopLevelDivision options of - TopLevelPart -> True - TopLevelChapter -> True - _ -> False - , stCsquotes = False - , stHighlighting = False - , stIncremental = writerIncremental options - , stInternalLinks = [] - , stBeamer = False - , stEmptyLine = True - , stHasCslRefs = False - , stIsFirstInDefinition = False } +import qualified Text.Pandoc.Writers.AnnotatedTable as Ann -- | Convert Pandoc to LaTeX. writeLaTeX :: PandocMonad m => WriterOptions -> Pandoc -> m Text @@ -117,8 +66,6 @@ writeBeamer options document = evalStateT (pandocToLaTeX options document) $ (startingState options){ stBeamer = True } -type LW m = StateT WriterState m - pandocToLaTeX :: PandocMonad m => WriterOptions -> Pandoc -> LW m Text pandocToLaTeX options (Pandoc meta blocks) = do @@ -199,6 +146,11 @@ pandocToLaTeX options (Pandoc meta blocks) = do let dirs = query (extract "dir") blocks + let nociteIds = query (\case + Cite cs _ -> map citationId cs + _ -> []) + $ lookupMetaInlines "nocite" meta + let context = defField "toc" (writerTableOfContents options) $ defField "toc-depth" (tshow (writerTOCDepth options - @@ -212,6 +164,7 @@ pandocToLaTeX options (Pandoc meta blocks) = do defField "documentclass" documentClass $ defField "verbatim-in-note" (stVerbInNote st) $ defField "tables" (stTable st) $ + defField "multirow" (stMultiRow st) $ defField "strikeout" (stStrikeout st) $ defField "url" (stUrl st) $ defField "numbersections" (writerNumberSections options) $ @@ -220,6 +173,7 @@ pandocToLaTeX options (Pandoc meta blocks) = do defField "has-chapters" (stHasChapters st) $ defField "has-frontmatter" (documentClass `elem` frontmatterClasses) $ defField "listings" (writerListings options || stLHS st) $ + defField "zero-width-non-joiner" (stZwnj st) $ defField "beamer" beamer $ (if stHighlighting st then case writerHighlightStyle options of @@ -230,9 +184,11 @@ pandocToLaTeX options (Pandoc meta blocks) = do else id) $ (case writerCiteMethod options of Natbib -> defField "biblio-title" biblioTitle . - defField "natbib" True + defField "natbib" True . + defField "nocite-ids" nociteIds Biblatex -> defField "biblio-title" biblioTitle . - defField "biblatex" True + defField "biblatex" True . + defField "nocite-ids" nociteIds _ -> id) $ defField "colorlinks" (any hasStringValue ["citecolor", "urlcolor", "linkcolor", "toccolor", @@ -297,154 +253,6 @@ pandocToLaTeX options (Pandoc meta blocks) = do Nothing -> main Just tpl -> renderTemplate tpl context' -data StringContext = TextString - | URLString - | CodeString - deriving (Eq) - --- escape things as needed for LaTeX -stringToLaTeX :: PandocMonad m => StringContext -> Text -> LW m Text -stringToLaTeX context zs = do - opts <- gets stOptions - return $ T.pack $ - foldr (go opts context) mempty $ T.unpack $ - if writerPreferAscii opts - then Normalize.normalize Normalize.NFD zs - else zs - where - go :: WriterOptions -> StringContext -> Char -> String -> String - go opts ctx x xs = - let ligatures = isEnabled Ext_smart opts && ctx == TextString - isUrl = ctx == URLString - mbAccentCmd = - if writerPreferAscii opts && ctx == TextString - then uncons xs >>= \(c,_) -> M.lookup c accents - else Nothing - emits s = - case mbAccentCmd of - Just cmd -> - cmd <> "{" <> s <> "}" <> drop 1 xs -- drop combining accent - Nothing -> s <> xs - emitc c = - case mbAccentCmd of - Just cmd -> - cmd <> "{" <> [c] <> "}" <> drop 1 xs -- drop combining accent - Nothing -> c : xs - emitcseq cs = - case xs of - c:_ | isLetter c - , ctx == TextString - -> cs <> " " <> xs - | isSpace c -> cs <> "{}" <> xs - | ctx == TextString - -> cs <> xs - _ -> cs <> "{}" <> xs - emitquote cs = - case xs of - '`':_ -> cs <> "\\," <> xs -- add thin space - '\'':_ -> cs <> "\\," <> xs -- add thin space - _ -> cs <> xs - in case x of - '?' | ligatures -> -- avoid ?` ligature - case xs of - '`':_ -> emits "?{}" - _ -> emitc x - '!' | ligatures -> -- avoid !` ligature - case xs of - '`':_ -> emits "!{}" - _ -> emitc x - '{' -> emits "\\{" - '}' -> emits "\\}" - '`' | ctx == CodeString -> emitcseq "\\textasciigrave" - '$' | not isUrl -> emits "\\$" - '%' -> emits "\\%" - '&' -> emits "\\&" - '_' | not isUrl -> emits "\\_" - '#' -> emits "\\#" - '-' | not isUrl -> case xs of - -- prevent adjacent hyphens from forming ligatures - ('-':_) -> emits "-\\/" - _ -> emitc '-' - '~' | not isUrl -> emitcseq "\\textasciitilde" - '^' -> emits "\\^{}" - '\\'| isUrl -> emitc '/' -- NB. / works as path sep even on Windows - | otherwise -> emitcseq "\\textbackslash" - '|' | not isUrl -> emitcseq "\\textbar" - '<' -> emitcseq "\\textless" - '>' -> emitcseq "\\textgreater" - '[' -> emits "{[}" -- to avoid interpretation as - ']' -> emits "{]}" -- optional arguments - '\'' | ctx == CodeString -> emitcseq "\\textquotesingle" - '\160' -> emits "~" - '\x200B' -> emits "\\hspace{0pt}" -- zero-width space - '\x202F' -> emits "\\," - '\x2026' -> emitcseq "\\ldots" - '\x2018' | ligatures -> emitquote "`" - '\x2019' | ligatures -> emitquote "'" - '\x201C' | ligatures -> emitquote "``" - '\x201D' | ligatures -> emitquote "''" - '\x2014' | ligatures -> emits "---" - '\x2013' | ligatures -> emits "--" - _ | writerPreferAscii opts - -> case x of - 'ı' -> emitcseq "\\i" - 'ȷ' -> emitcseq "\\j" - 'å' -> emitcseq "\\aa" - 'Å' -> emitcseq "\\AA" - 'ß' -> emitcseq "\\ss" - 'ø' -> emitcseq "\\o" - 'Ø' -> emitcseq "\\O" - 'Ł' -> emitcseq "\\L" - 'ł' -> emitcseq "\\l" - 'æ' -> emitcseq "\\ae" - 'Æ' -> emitcseq "\\AE" - 'œ' -> emitcseq "\\oe" - 'Œ' -> emitcseq "\\OE" - '£' -> emitcseq "\\pounds" - '€' -> emitcseq "\\euro" - '©' -> emitcseq "\\copyright" - _ -> emitc x - | otherwise -> emitc x - -accents :: M.Map Char String -accents = M.fromList - [ ('\779' , "\\H") - , ('\768' , "\\`") - , ('\769' , "\\'") - , ('\770' , "\\^") - , ('\771' , "\\~") - , ('\776' , "\\\"") - , ('\775' , "\\.") - , ('\772' , "\\=") - , ('\781' , "\\|") - , ('\817' , "\\b") - , ('\807' , "\\c") - , ('\783' , "\\G") - , ('\777' , "\\h") - , ('\803' , "\\d") - , ('\785' , "\\f") - , ('\778' , "\\r") - , ('\865' , "\\t") - , ('\782' , "\\U") - , ('\780' , "\\v") - , ('\774' , "\\u") - , ('\808' , "\\k") - , ('\785' , "\\newtie") - , ('\8413', "\\textcircled") - ] - -toLabel :: PandocMonad m => Text -> LW m Text -toLabel z = go `fmap` stringToLaTeX URLString z - where - go = T.concatMap $ \x -> case x of - _ | (isLetter x || isDigit x) && isAscii x -> T.singleton x - | x `elemText` "_-+=:;." -> T.singleton x - | otherwise -> T.pack $ "ux" <> printf "%x" (ord x) - --- | Puts contents into LaTeX command. -inCmd :: Text -> Doc Text -> Doc Text -inCmd cmd contents = char '\\' <> literal cmd <> braces contents - toSlides :: PandocMonad m => [Block] -> LW m [Block] toSlides bs = do opts <- gets stOptions @@ -483,7 +291,12 @@ blockToLaTeX :: PandocMonad m => Block -- ^ Block to convert -> LW m (Doc Text) blockToLaTeX Null = return empty -blockToLaTeX (Div attr@(identifier,"block":_,_) (Header _ _ ils : bs)) = do +blockToLaTeX (Div attr@(identifier,"block":dclasses,_) + (Header _ _ ils : bs)) = do + let blockname + | "example" `elem` dclasses = "exampleblock" + | "alert" `elem` dclasses = "alertblock" + | otherwise = "block" ref <- toLabel identifier let anchor = if T.null identifier then empty @@ -491,8 +304,8 @@ blockToLaTeX (Div attr@(identifier,"block":_,_) (Header _ _ ils : bs)) = do braces (literal ref) <> braces empty title' <- inlineListToLaTeX ils contents <- blockListToLaTeX bs - wrapDiv attr $ ("\\begin{block}" <> braces title' <> anchor) $$ - contents $$ "\\end{block}" + wrapDiv attr $ ("\\begin" <> braces blockname <> braces title' <> anchor) $$ + contents $$ "\\end" <> braces blockname blockToLaTeX (Div (identifier,"slide":dclasses,dkvs) (Header _ (_,hclasses,hkvs) ils : bs)) = do -- note: [fragile] is required or verbatim breaks @@ -553,17 +366,16 @@ blockToLaTeX (Div (identifier,classes,kvs) bs) = do Just s -> braces (literal s)) $$ inner $+$ "\\end{CSLReferences}" - else if "csl-entry" `elem` classes - then vcat <$> mapM cslEntryToLaTeX bs - else blockListToLaTeX bs + else blockListToLaTeX bs modify $ \st -> st{ stIncremental = oldIncremental } linkAnchor' <- hypertarget True identifier empty - -- see #2704 for the motivation for adding \leavevmode: + -- see #2704 for the motivation for adding \leavevmode + -- and #7078 for \vadjust pre let linkAnchor = case bs of Para _ : _ | not (isEmpty linkAnchor') - -> "\\leavevmode" <> linkAnchor' <> "%" + -> "\\leavevmode\\vadjust pre{" <> linkAnchor' <> "}%" _ -> linkAnchor' wrapNotes txt = if beamer && "notes" `elem` classes then "\\note" <> braces txt -- speaker notes @@ -575,7 +387,7 @@ blockToLaTeX (Plain lst) = blockToLaTeX (Para [Image attr@(ident, _, _) txt (src,tgt)]) | Just tit <- T.stripPrefix "fig:" tgt = do - (capt, captForLof, footnotes) <- getCaption True txt + (capt, captForLof, footnotes) <- getCaption inlineListToLaTeX True txt lab <- labelFor ident let caption = "\\caption" <> captForLof <> braces capt <> lab img <- inlineToLaTeX (Image attr txt (src,tit)) @@ -776,181 +588,14 @@ blockToLaTeX (Header level (id',classes,_) lst) = do hdr <- sectionHeader classes id' level lst modify $ \s -> s{stInHeading = False} return hdr -blockToLaTeX (Table _ blkCapt specs thead tbody tfoot) = do - let (caption, aligns, widths, heads, rows) = toLegacyTable blkCapt specs thead tbody tfoot - -- simple tables have to have simple cells: - let isSimple [Plain _] = True - isSimple [Para _] = True - isSimple [] = True - isSimple _ = False - let widths' = if all (== 0) widths && not (all (all isSimple) rows) - then replicate (length aligns) - (1 / fromIntegral (length aligns)) - else widths - (captionText, captForLof, captNotes) <- getCaption False caption - let toHeaders hs = do contents <- tableRowToLaTeX True aligns hs - return ("\\toprule" $$ contents $$ "\\midrule") - let removeNote (Note _) = Span ("", [], []) [] - removeNote x = x - firsthead <- if isEmpty captionText || all null heads - then return empty - else ($$ text "\\endfirsthead") <$> toHeaders heads - head' <- if all null heads - then return "\\toprule" - -- avoid duplicate notes in head and firsthead: - else toHeaders (if isEmpty firsthead - then heads - else walk removeNote heads) - let capt = if isEmpty captionText - then empty - else "\\caption" <> captForLof <> braces captionText - <> "\\tabularnewline" - rows' <- mapM (tableRowToLaTeX False aligns) rows - let colDescriptors = - (if all (== 0) widths' - then hcat . map literal - else (\xs -> cr <> nest 2 (vcat $ map literal xs))) $ - zipWith (toColDescriptor (length widths')) aligns widths' - modify $ \s -> s{ stTable = True } - notes <- notesToLaTeX <$> gets stNotes - return $ "\\begin{longtable}[]" <> - braces ("@{}" <> colDescriptors <> "@{}") - -- the @{} removes extra space at beginning and end - $$ capt - $$ firsthead - $$ head' - $$ "\\endhead" - $$ vcat rows' - $$ "\\bottomrule" - $$ "\\end{longtable}" - $$ captNotes - $$ notes - -getCaption :: PandocMonad m - => Bool -> [Inline] -> LW m (Doc Text, Doc Text, Doc Text) -getCaption externalNotes txt = do - oldExternalNotes <- gets stExternalNotes - modify $ \st -> st{ stExternalNotes = externalNotes, stNotes = [] } - capt <- inlineListToLaTeX txt - footnotes <- if externalNotes - then notesToLaTeX <$> gets stNotes - else return empty - modify $ \st -> st{ stExternalNotes = oldExternalNotes, stNotes = [] } - -- We can't have footnotes in the list of figures/tables, so remove them: - let getNote (Note _) = Any True - getNote _ = Any False - let hasNotes = getAny . query getNote - captForLof <- if hasNotes txt - then brackets <$> inlineListToLaTeX (walk deNote txt) - else return empty - return (capt, captForLof, footnotes) - -toColDescriptor :: Int -> Alignment -> Double -> Text -toColDescriptor _numcols align 0 = - case align of - AlignLeft -> "l" - AlignRight -> "r" - AlignCenter -> "c" - AlignDefault -> "l" -toColDescriptor numcols align width = - T.pack $ printf - ">{%s\\arraybackslash}p{(\\columnwidth - %d\\tabcolsep) * \\real{%0.2f}}" - align' - ((numcols - 1) * 2) - width - where - align' :: String - align' = case align of - AlignLeft -> "\\raggedright" - AlignRight -> "\\raggedleft" - AlignCenter -> "\\centering" - AlignDefault -> "\\raggedright" +blockToLaTeX (Table attr blkCapt specs thead tbodies tfoot) = + tableToLaTeX inlineListToLaTeX blockListToLaTeX + (Ann.toTable attr blkCapt specs thead tbodies tfoot) blockListToLaTeX :: PandocMonad m => [Block] -> LW m (Doc Text) blockListToLaTeX lst = vsep `fmap` mapM (\b -> setEmptyLine True >> blockToLaTeX b) lst -tableRowToLaTeX :: PandocMonad m - => Bool - -> [Alignment] - -> [[Block]] - -> LW m (Doc Text) -tableRowToLaTeX header aligns cols = do - cells <- mapM (tableCellToLaTeX header) $ zip aligns cols - return $ hsep (intersperse "&" cells) <> " \\\\ \\addlinespace" - --- For simple latex tables (without minipages or parboxes), --- we need to go to some lengths to get line breaks working: --- as LineBreak bs = \vtop{\hbox{\strut as}\hbox{\strut bs}}. -fixLineBreaks :: Block -> Block -fixLineBreaks (Para ils) = Para $ fixLineBreaks' ils -fixLineBreaks (Plain ils) = Plain $ fixLineBreaks' ils -fixLineBreaks x = x - -fixLineBreaks' :: [Inline] -> [Inline] -fixLineBreaks' ils = case splitBy (== LineBreak) ils of - [] -> [] - [xs] -> xs - chunks -> RawInline "tex" "\\vtop{" : - concatMap tohbox chunks <> - [RawInline "tex" "}"] - where tohbox ys = RawInline "tex" "\\hbox{\\strut " : ys <> - [RawInline "tex" "}"] - --- We also change display math to inline math, since display --- math breaks in simple tables. -displayMathToInline :: Inline -> Inline -displayMathToInline (Math DisplayMath x) = Math InlineMath x -displayMathToInline x = x - -tableCellToLaTeX :: PandocMonad m - => Bool -> (Alignment, [Block]) - -> LW m (Doc Text) -tableCellToLaTeX header (align, blocks) = do - beamer <- gets stBeamer - externalNotes <- gets stExternalNotes - inMinipage <- gets stInMinipage - -- See #5367 -- footnotehyper/footnote don't work in beamer, - -- so we need to produce the notes outside the table... - modify $ \st -> st{ stExternalNotes = beamer } - let isPlainOrPara Para{} = True - isPlainOrPara Plain{} = True - isPlainOrPara _ = False - result <- - if all isPlainOrPara blocks - then - blockListToLaTeX $ walk fixLineBreaks $ walk displayMathToInline blocks - else do - modify $ \st -> st{ stInMinipage = True } - cellContents <- blockListToLaTeX blocks - modify $ \st -> st{ stInMinipage = inMinipage } - let valign = text $ if header then "[b]" else "[t]" - let halign = case align of - AlignLeft -> "\\raggedright" - AlignRight -> "\\raggedleft" - AlignCenter -> "\\centering" - AlignDefault -> "\\raggedright" - return $ "\\begin{minipage}" <> valign <> - braces "\\linewidth" <> halign <> cr <> - cellContents <> cr <> - "\\end{minipage}" - modify $ \st -> st{ stExternalNotes = externalNotes } - return result - - -notesToLaTeX :: [Doc Text] -> Doc Text -notesToLaTeX [] = empty -notesToLaTeX ns = (case length ns of - n | n > 1 -> "\\addtocounter" <> - braces "footnote" <> - braces (text $ show $ 1 - n) - | otherwise -> empty) - $$ - vcat (intersperse - ("\\addtocounter" <> braces "footnote" <> braces "1") - $ map (\x -> "\\footnotetext" <> braces x) - $ reverse ns) - listItemToLaTeX :: PandocMonad m => [Block] -> LW m (Doc Text) listItemToLaTeX lst -- we need to put some text before a header if it's the first @@ -1077,81 +722,6 @@ sectionHeader classes ident level lst = do braces txtNoNotes else empty -mapAlignment :: Text -> Text -mapAlignment a = case a of - "top" -> "T" - "top-baseline" -> "t" - "bottom" -> "b" - "center" -> "c" - _ -> a - -wrapDiv :: PandocMonad m => Attr -> Doc Text -> LW m (Doc Text) -wrapDiv (_,classes,kvs) t = do - beamer <- gets stBeamer - let align dir txt = inCmd "begin" dir $$ txt $$ inCmd "end" dir - lang <- toLang $ lookup "lang" kvs - let wrapColumns = if beamer && "columns" `elem` classes - then \contents -> - let valign = maybe "T" mapAlignment (lookup "align" kvs) - totalwidth = maybe [] (\x -> ["totalwidth=" <> x]) - (lookup "totalwidth" kvs) - onlytextwidth = filter ("onlytextwidth" ==) classes - options = text $ T.unpack $ T.intercalate "," $ - valign : totalwidth ++ onlytextwidth - in inCmd "begin" "columns" <> brackets options - $$ contents - $$ inCmd "end" "columns" - else id - wrapColumn = if beamer && "column" `elem` classes - then \contents -> - let valign = - maybe "" - (brackets . text . T.unpack . mapAlignment) - (lookup "align" kvs) - w = maybe "0.48" fromPct (lookup "width" kvs) - in inCmd "begin" "column" <> - valign <> - braces (literal w <> "\\textwidth") - $$ contents - $$ inCmd "end" "column" - else id - fromPct xs = - case T.unsnoc xs of - Just (ds, '%') -> case safeRead ds of - Just digits -> showFl (digits / 100 :: Double) - Nothing -> xs - _ -> xs - wrapDir = case lookup "dir" kvs of - Just "rtl" -> align "RTL" - Just "ltr" -> align "LTR" - _ -> id - wrapLang txt = case lang of - Just lng -> let (l, o) = toPolyglossiaEnv lng - ops = if T.null o - then "" - else brackets $ literal o - in inCmd "begin" (literal l) <> ops - $$ blankline <> txt <> blankline - $$ inCmd "end" (literal l) - Nothing -> txt - return $ wrapColumns . wrapColumn . wrapDir . wrapLang $ t - -hypertarget :: PandocMonad m => Bool -> Text -> Doc Text -> LW m (Doc Text) -hypertarget _ "" x = return x -hypertarget addnewline ident x = do - ref <- literal `fmap` toLabel ident - return $ text "\\hypertarget" - <> braces ref - <> braces ((if addnewline && not (isEmpty x) - then "%" <> cr - else empty) <> x) - -labelFor :: PandocMonad m => Text -> LW m (Doc Text) -labelFor "" = return empty -labelFor ident = do - ref <- literal `fmap` toLabel ident - return $ text "\\label" <> braces ref - -- | Convert list of inline elements to LaTeX. inlineListToLaTeX :: PandocMonad m => [Inline] -- ^ Inlines to convert @@ -1176,27 +746,6 @@ inlineListToLaTeX lst = hcat <$> fixInitialLineBreaks xs fixInitialLineBreaks xs = xs -isQuoted :: Inline -> Bool -isQuoted (Quoted _ _) = True -isQuoted _ = False - -cslEntryToLaTeX :: PandocMonad m - => Block - -> LW m (Doc Text) -cslEntryToLaTeX (Para xs) = - mconcat <$> mapM go xs - where - go (Span ("",["csl-block"],[]) ils) = - (cr <>) . inCmd "CSLBlock" <$> inlineListToLaTeX ils - go (Span ("",["csl-left-margin"],[]) ils) = - inCmd "CSLLeftMargin" <$> inlineListToLaTeX ils - go (Span ("",["csl-right-inline"],[]) ils) = - (cr <>) . inCmd "CSLRightInline" <$> inlineListToLaTeX ils - go (Span ("",["csl-indent"],[]) ils) = - (cr <>) . inCmd "CSLIndent" <$> inlineListToLaTeX ils - go il = inlineToLaTeX il -cslEntryToLaTeX x = blockToLaTeX x - -- | Convert inline element to LaTeX inlineToLaTeX :: PandocMonad m => Inline -- ^ Inline to convert @@ -1204,23 +753,38 @@ inlineToLaTeX :: PandocMonad m inlineToLaTeX (Span (id',classes,kvs) ils) = do linkAnchor <- hypertarget False id' empty lang <- toLang $ lookup "lang" kvs - let cmds = ["textup" | "csl-no-emph" `elem` classes] ++ - ["textnormal" | "csl-no-strong" `elem` classes || - "csl-no-smallcaps" `elem` classes] ++ - ["RL" | ("dir", "rtl") `elem` kvs] ++ - ["LR" | ("dir", "ltr") `elem` kvs] ++ - (case lang of - Just lng -> let (l, o) = toPolyglossia lng - ops = if T.null o then "" else "[" <> o <> "]" - in ["text" <> l <> ops] - Nothing -> []) + let classToCmd "csl-no-emph" = Just "textup" + classToCmd "csl-no-strong" = Just "textnormal" + classToCmd "csl-no-smallcaps" = Just "textnormal" + classToCmd "csl-block" = Just "CSLBlock" + classToCmd "csl-left-margin" = Just "CSLLeftMargin" + classToCmd "csl-right-inline" = Just "CSLRightInline" + classToCmd "csl-indent" = Just "CSLIndent" + classToCmd _ = Nothing + kvToCmd ("dir","rtl") = Just "RL" + kvToCmd ("dir","ltr") = Just "LR" + kvToCmd _ = Nothing + langCmds = + case lang of + Just lng -> let (l, o) = toPolyglossia lng + ops = if T.null o then "" else "[" <> o <> "]" + in ["text" <> l <> ops] + Nothing -> [] + let cmds = mapMaybe classToCmd classes ++ mapMaybe kvToCmd kvs ++ langCmds contents <- inlineListToLaTeX ils - return $ (if T.null id' - then empty - else "\\protect" <> linkAnchor) <> - (if null cmds - then braces contents - else foldr inCmd contents cmds) + return $ + (case classes of + ["csl-block"] -> (cr <>) + ["csl-left-margin"] -> (cr <>) + ["csl-right-inline"] -> (cr <>) + ["csl-indent"] -> (cr <>) + _ -> id) $ + (if T.null id' + then empty + else "\\protect" <> linkAnchor) <> + (if null cmds + then braces contents + else foldr inCmd contents cmds) inlineToLaTeX (Emph lst) = inCmd "emph" <$> inlineListToLaTeX lst inlineToLaTeX (Underline lst) = inCmd "underline" <$> inlineListToLaTeX lst inlineToLaTeX (Strong lst) = inCmd "textbf" <$> inlineListToLaTeX lst @@ -1242,8 +806,8 @@ inlineToLaTeX (Cite cits lst) = do st <- get let opts = stOptions st case writerCiteMethod opts of - Natbib -> citationsToNatbib cits - Biblatex -> citationsToBiblatex cits + Natbib -> citationsToNatbib inlineListToLaTeX cits + Biblatex -> citationsToBiblatex inlineListToLaTeX cits _ -> inlineListToLaTeX lst inlineToLaTeX (Code (_,classes,kvs) str) = do @@ -1267,7 +831,19 @@ inlineToLaTeX (Code (_,classes,kvs) str) = do let chr = case "!\"'()*,-./:;?@" \\ T.unpack str of (c:_) -> c [] -> '!' - let str' = escapeStringUsing (backslashEscapes "\\{}%~_&#^") str + let isEscapable '\\' = True + isEscapable '{' = True + isEscapable '}' = True + isEscapable '%' = True + isEscapable '~' = True + isEscapable '_' = True + isEscapable '&' = True + isEscapable '#' = True + isEscapable '^' = True + isEscapable _ = False + let escChar c | isEscapable c = T.pack ['\\',c] + | otherwise = T.singleton c + let str' = T.concatMap escChar str -- we always put lstinline in a dummy 'passthrough' command -- (defined in the default template) so that we don't have -- to change the way we escape characters depending on whether @@ -1317,6 +893,10 @@ inlineToLaTeX (Quoted qt lst) = do if isEnabled Ext_smart opts then char '`' <> inner <> char '\'' else char '\x2018' <> inner <> char '\x2019' + where + isQuoted (Span _ (x:_)) = isQuoted x + isQuoted (Quoted _ _) = True + isQuoted _ = False inlineToLaTeX (Str str) = do setEmptyLine False liftM literal $ stringToLaTeX TextString str @@ -1339,7 +919,7 @@ inlineToLaTeX il@(RawInline f str) = do inlineToLaTeX LineBreak = do emptyLine <- gets stEmptyLine setEmptyLine True - return $ (if emptyLine then "~" else "") <> "\\\\" <> cr + return $ (if emptyLine then "\\strut " else "") <> "\\\\" <> cr inlineToLaTeX SoftBreak = do wrapText <- gets (writerWrapText . stOptions) case wrapText of @@ -1454,153 +1034,6 @@ protectCode x = [x] setEmptyLine :: PandocMonad m => Bool -> LW m () setEmptyLine b = modify $ \st -> st{ stEmptyLine = b } -citationsToNatbib :: PandocMonad m => [Citation] -> LW m (Doc Text) -citationsToNatbib - [one] - = citeCommand c p s k - where - Citation { citationId = k - , citationPrefix = p - , citationSuffix = s - , citationMode = m - } - = one - c = case m of - AuthorInText -> "citet" - SuppressAuthor -> "citeyearpar" - NormalCitation -> "citep" - -citationsToNatbib cits - | noPrefix (tail cits) && noSuffix (init cits) && ismode NormalCitation cits - = citeCommand "citep" p s ks - where - noPrefix = all (null . citationPrefix) - noSuffix = all (null . citationSuffix) - ismode m = all ((==) m . citationMode) - p = citationPrefix $ - head cits - s = citationSuffix $ - last cits - ks = T.intercalate ", " $ map citationId cits - -citationsToNatbib (c:cs) | citationMode c == AuthorInText = do - author <- citeCommand "citeauthor" [] [] (citationId c) - cits <- citationsToNatbib (c { citationMode = SuppressAuthor } : cs) - return $ author <+> cits - -citationsToNatbib cits = do - cits' <- mapM convertOne cits - return $ text "\\citetext{" <> foldl' combineTwo empty cits' <> text "}" - where - combineTwo a b | isEmpty a = b - | otherwise = a <> text "; " <> b - convertOne Citation { citationId = k - , citationPrefix = p - , citationSuffix = s - , citationMode = m - } - = case m of - AuthorInText -> citeCommand "citealt" p s k - SuppressAuthor -> citeCommand "citeyear" p s k - NormalCitation -> citeCommand "citealp" p s k - -citeCommand :: PandocMonad m - => Text -> [Inline] -> [Inline] -> Text -> LW m (Doc Text) -citeCommand c p s k = do - args <- citeArguments p s k - return $ literal ("\\" <> c) <> args - -type Prefix = [Inline] -type Suffix = [Inline] -type CiteId = Text -data CiteGroup = CiteGroup Prefix Suffix [CiteId] - -citeArgumentsList :: PandocMonad m - => CiteGroup -> LW m (Doc Text) -citeArgumentsList (CiteGroup _ _ []) = return empty -citeArgumentsList (CiteGroup pfxs sfxs ids) = do - pdoc <- inlineListToLaTeX pfxs - sdoc <- inlineListToLaTeX sfxs' - return $ optargs pdoc sdoc <> - braces (literal (T.intercalate "," (reverse ids))) - where sfxs' = stripLocatorBraces $ case sfxs of - (Str t : r) -> case T.uncons t of - Just (x, xs) - | T.null xs - , isPunctuation x -> dropWhile (== Space) r - | isPunctuation x -> Str xs : r - _ -> sfxs - _ -> sfxs - optargs pdoc sdoc = case (isEmpty pdoc, isEmpty sdoc) of - (True, True ) -> empty - (True, False) -> brackets sdoc - (_ , _ ) -> brackets pdoc <> brackets sdoc - -citeArguments :: PandocMonad m - => [Inline] -> [Inline] -> Text -> LW m (Doc Text) -citeArguments p s k = citeArgumentsList (CiteGroup p s [k]) - --- strip off {} used to define locator in pandoc-citeproc; see #5722 -stripLocatorBraces :: [Inline] -> [Inline] -stripLocatorBraces = walk go - where go (Str xs) = Str $ T.filter (\c -> c /= '{' && c /= '}') xs - go x = x - -citationsToBiblatex :: PandocMonad m => [Citation] -> LW m (Doc Text) -citationsToBiblatex - [one] - = citeCommand cmd p s k - where - Citation { citationId = k - , citationPrefix = p - , citationSuffix = s - , citationMode = m - } = one - cmd = case m of - SuppressAuthor -> "autocite*" - AuthorInText -> "textcite" - NormalCitation -> "autocite" - -citationsToBiblatex (c:cs) - | all (\cit -> null (citationPrefix cit) && null (citationSuffix cit)) (c:cs) - = do - let cmd = case citationMode c of - SuppressAuthor -> "\\autocite*" - AuthorInText -> "\\textcite" - NormalCitation -> "\\autocite" - return $ text cmd <> - braces (literal (T.intercalate "," (map citationId (c:cs)))) - | otherwise - = do - let cmd = case citationMode c of - SuppressAuthor -> "\\autocites*" - AuthorInText -> "\\textcites" - NormalCitation -> "\\autocites" - - groups <- mapM citeArgumentsList (reverse (foldl' grouper [] (c:cs))) - - return $ text cmd <> mconcat groups - - where grouper prev cit = case prev of - ((CiteGroup oPfx oSfx ids):rest) - | null oSfx && null pfx -> CiteGroup oPfx sfx (cid:ids) : rest - _ -> CiteGroup pfx sfx [cid] : prev - where pfx = citationPrefix cit - sfx = citationSuffix cit - cid = citationId cit - -citationsToBiblatex _ = return empty - --- Determine listings language from list of class attributes. -getListingsLanguage :: [Text] -> Maybe Text -getListingsLanguage xs - = foldr ((<|>) . toListingsLanguage) Nothing xs - -mbBraced :: Text -> Text -mbBraced x = if not (T.all isAlphaNum x) - then "{" <> x <> "}" - else x - -- Extract a key from divs and spans extract :: Text -> Block -> [Text] extract key (Div attr _) = lookKey key attr @@ -1617,175 +1050,3 @@ extractInline _ _ = [] -- Look up a key in an attribute and give a list of its values lookKey :: Text -> Attr -> [Text] lookKey key (_,_,kvs) = maybe [] T.words $ lookup key kvs - --- In environments \Arabic instead of \arabic is used -toPolyglossiaEnv :: Lang -> (Text, Text) -toPolyglossiaEnv l = - case toPolyglossia l of - ("arabic", o) -> ("Arabic", o) - x -> x - --- Takes a list of the constituents of a BCP 47 language code and --- converts it to a Polyglossia (language, options) tuple --- http://mirrors.ctan.org/macros/latex/contrib/polyglossia/polyglossia.pdf -toPolyglossia :: Lang -> (Text, Text) -toPolyglossia (Lang "ar" _ "DZ" _) = ("arabic", "locale=algeria") -toPolyglossia (Lang "ar" _ "IQ" _) = ("arabic", "locale=mashriq") -toPolyglossia (Lang "ar" _ "JO" _) = ("arabic", "locale=mashriq") -toPolyglossia (Lang "ar" _ "LB" _) = ("arabic", "locale=mashriq") -toPolyglossia (Lang "ar" _ "LY" _) = ("arabic", "locale=libya") -toPolyglossia (Lang "ar" _ "MA" _) = ("arabic", "locale=morocco") -toPolyglossia (Lang "ar" _ "MR" _) = ("arabic", "locale=mauritania") -toPolyglossia (Lang "ar" _ "PS" _) = ("arabic", "locale=mashriq") -toPolyglossia (Lang "ar" _ "SY" _) = ("arabic", "locale=mashriq") -toPolyglossia (Lang "ar" _ "TN" _) = ("arabic", "locale=tunisia") -toPolyglossia (Lang "de" _ _ vars) - | "1901" `elem` vars = ("german", "spelling=old") -toPolyglossia (Lang "de" _ "AT" vars) - | "1901" `elem` vars = ("german", "variant=austrian, spelling=old") -toPolyglossia (Lang "de" _ "AT" _) = ("german", "variant=austrian") -toPolyglossia (Lang "de" _ "CH" vars) - | "1901" `elem` vars = ("german", "variant=swiss, spelling=old") -toPolyglossia (Lang "de" _ "CH" _) = ("german", "variant=swiss") -toPolyglossia (Lang "de" _ _ _) = ("german", "") -toPolyglossia (Lang "dsb" _ _ _) = ("lsorbian", "") -toPolyglossia (Lang "el" _ "polyton" _) = ("greek", "variant=poly") -toPolyglossia (Lang "en" _ "AU" _) = ("english", "variant=australian") -toPolyglossia (Lang "en" _ "CA" _) = ("english", "variant=canadian") -toPolyglossia (Lang "en" _ "GB" _) = ("english", "variant=british") -toPolyglossia (Lang "en" _ "NZ" _) = ("english", "variant=newzealand") -toPolyglossia (Lang "en" _ "UK" _) = ("english", "variant=british") -toPolyglossia (Lang "en" _ "US" _) = ("english", "variant=american") -toPolyglossia (Lang "grc" _ _ _) = ("greek", "variant=ancient") -toPolyglossia (Lang "hsb" _ _ _) = ("usorbian", "") -toPolyglossia (Lang "la" _ _ vars) - | "x-classic" `elem` vars = ("latin", "variant=classic") -toPolyglossia (Lang "pt" _ "BR" _) = ("portuguese", "variant=brazilian") -toPolyglossia (Lang "sl" _ _ _) = ("slovenian", "") -toPolyglossia x = (commonFromBcp47 x, "") - --- Takes a list of the constituents of a BCP 47 language code and --- converts it to a Babel language string. --- http://mirrors.ctan.org/macros/latex/required/babel/base/babel.pdf --- List of supported languages (slightly outdated): --- http://tug.ctan.org/language/hyph-utf8/doc/generic/hyph-utf8/hyphenation.pdf -toBabel :: Lang -> Text -toBabel (Lang "de" _ "AT" vars) - | "1901" `elem` vars = "austrian" - | otherwise = "naustrian" -toBabel (Lang "de" _ "CH" vars) - | "1901" `elem` vars = "swissgerman" - | otherwise = "nswissgerman" -toBabel (Lang "de" _ _ vars) - | "1901" `elem` vars = "german" - | otherwise = "ngerman" -toBabel (Lang "dsb" _ _ _) = "lowersorbian" -toBabel (Lang "el" _ _ vars) - | "polyton" `elem` vars = "polutonikogreek" -toBabel (Lang "en" _ "AU" _) = "australian" -toBabel (Lang "en" _ "CA" _) = "canadian" -toBabel (Lang "en" _ "GB" _) = "british" -toBabel (Lang "en" _ "NZ" _) = "newzealand" -toBabel (Lang "en" _ "UK" _) = "british" -toBabel (Lang "en" _ "US" _) = "american" -toBabel (Lang "fr" _ "CA" _) = "canadien" -toBabel (Lang "fra" _ _ vars) - | "aca" `elem` vars = "acadian" -toBabel (Lang "grc" _ _ _) = "polutonikogreek" -toBabel (Lang "hsb" _ _ _) = "uppersorbian" -toBabel (Lang "la" _ _ vars) - | "x-classic" `elem` vars = "classiclatin" -toBabel (Lang "pt" _ "BR" _) = "brazilian" -toBabel (Lang "sl" _ _ _) = "slovene" -toBabel x = commonFromBcp47 x - --- Takes a list of the constituents of a BCP 47 language code --- and converts it to a string shared by Babel and Polyglossia. --- https://tools.ietf.org/html/bcp47#section-2.1 -commonFromBcp47 :: Lang -> Text -commonFromBcp47 (Lang "sr" "Cyrl" _ _) = "serbianc" -commonFromBcp47 (Lang "zh" "Latn" _ vars) - | "pinyin" `elem` vars = "pinyin" -commonFromBcp47 (Lang l _ _ _) = fromIso l - where - fromIso "af" = "afrikaans" - fromIso "am" = "amharic" - fromIso "ar" = "arabic" - fromIso "as" = "assamese" - fromIso "ast" = "asturian" - fromIso "bg" = "bulgarian" - fromIso "bn" = "bengali" - fromIso "bo" = "tibetan" - fromIso "br" = "breton" - fromIso "ca" = "catalan" - fromIso "cy" = "welsh" - fromIso "cs" = "czech" - fromIso "cop" = "coptic" - fromIso "da" = "danish" - fromIso "dv" = "divehi" - fromIso "el" = "greek" - fromIso "en" = "english" - fromIso "eo" = "esperanto" - fromIso "es" = "spanish" - fromIso "et" = "estonian" - fromIso "eu" = "basque" - fromIso "fa" = "farsi" - fromIso "fi" = "finnish" - fromIso "fr" = "french" - fromIso "fur" = "friulan" - fromIso "ga" = "irish" - fromIso "gd" = "scottish" - fromIso "gez" = "ethiopic" - fromIso "gl" = "galician" - fromIso "he" = "hebrew" - fromIso "hi" = "hindi" - fromIso "hr" = "croatian" - fromIso "hu" = "magyar" - fromIso "hy" = "armenian" - fromIso "ia" = "interlingua" - fromIso "id" = "indonesian" - fromIso "ie" = "interlingua" - fromIso "is" = "icelandic" - fromIso "it" = "italian" - fromIso "jp" = "japanese" - fromIso "km" = "khmer" - fromIso "kmr" = "kurmanji" - fromIso "kn" = "kannada" - fromIso "ko" = "korean" - fromIso "la" = "latin" - fromIso "lo" = "lao" - fromIso "lt" = "lithuanian" - fromIso "lv" = "latvian" - fromIso "ml" = "malayalam" - fromIso "mn" = "mongolian" - fromIso "mr" = "marathi" - fromIso "nb" = "norsk" - fromIso "nl" = "dutch" - fromIso "nn" = "nynorsk" - fromIso "no" = "norsk" - fromIso "nqo" = "nko" - fromIso "oc" = "occitan" - fromIso "pa" = "panjabi" - fromIso "pl" = "polish" - fromIso "pms" = "piedmontese" - fromIso "pt" = "portuguese" - fromIso "rm" = "romansh" - fromIso "ro" = "romanian" - fromIso "ru" = "russian" - fromIso "sa" = "sanskrit" - fromIso "se" = "samin" - fromIso "sk" = "slovak" - fromIso "sq" = "albanian" - fromIso "sr" = "serbian" - fromIso "sv" = "swedish" - fromIso "syr" = "syriac" - fromIso "ta" = "tamil" - fromIso "te" = "telugu" - fromIso "th" = "thai" - fromIso "ti" = "ethiopic" - fromIso "tk" = "turkmen" - fromIso "tr" = "turkish" - fromIso "uk" = "ukrainian" - fromIso "ur" = "urdu" - fromIso "vi" = "vietnamese" - fromIso _ = "" diff --git a/src/Text/Pandoc/Writers/LaTeX/Caption.hs b/src/Text/Pandoc/Writers/LaTeX/Caption.hs new file mode 100644 index 000000000..ab4d365cc --- /dev/null +++ b/src/Text/Pandoc/Writers/LaTeX/Caption.hs @@ -0,0 +1,47 @@ +{- | + Module : Text.Pandoc.Writers.LaTeX.Caption + Copyright : Copyright (C) 2006-2021 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm@berkeley.edu> + Stability : alpha + Portability : portable + +Write figure or table captions as LaTeX. +-} +module Text.Pandoc.Writers.LaTeX.Caption + ( getCaption + ) where + +import Control.Monad.State.Strict +import Data.Monoid (Any(..)) +import Data.Text (Text) +import Text.Pandoc.Class.PandocMonad (PandocMonad) +import Text.Pandoc.Definition +import Text.DocLayout (Doc, brackets, empty) +import Text.Pandoc.Shared +import Text.Pandoc.Walk +import Text.Pandoc.Writers.LaTeX.Notes (notesToLaTeX) +import Text.Pandoc.Writers.LaTeX.Types + ( LW, WriterState (stExternalNotes, stNotes) ) + +getCaption :: PandocMonad m + => ([Inline] -> LW m (Doc Text)) + -> Bool -> [Inline] + -> LW m (Doc Text, Doc Text, Doc Text) +getCaption inlineListToLaTeX externalNotes txt = do + oldExternalNotes <- gets stExternalNotes + modify $ \st -> st{ stExternalNotes = externalNotes, stNotes = [] } + capt <- inlineListToLaTeX txt + footnotes <- if externalNotes + then notesToLaTeX <$> gets stNotes + else return empty + modify $ \st -> st{ stExternalNotes = oldExternalNotes, stNotes = [] } + -- We can't have footnotes in the list of figures/tables, so remove them: + let getNote (Note _) = Any True + getNote _ = Any False + let hasNotes = getAny . query getNote + captForLof <- if hasNotes txt + then brackets <$> inlineListToLaTeX (walk deNote txt) + else return empty + return (capt, captForLof, footnotes) diff --git a/src/Text/Pandoc/Writers/LaTeX/Citation.hs b/src/Text/Pandoc/Writers/LaTeX/Citation.hs new file mode 100644 index 000000000..f48a43d7a --- /dev/null +++ b/src/Text/Pandoc/Writers/LaTeX/Citation.hs @@ -0,0 +1,181 @@ +{-# LANGUAGE OverloadedStrings #-} +{- | + Module : Text.Pandoc.Writers.LaTeX.Citation + Copyright : Copyright (C) 2006-2021 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm@berkeley.edu> + Stability : alpha + Portability : portable +-} +module Text.Pandoc.Writers.LaTeX.Citation + ( citationsToNatbib, + citationsToBiblatex + ) where + +import Data.Text (Text) +import Data.Char (isPunctuation) +import qualified Data.Text as T +import Text.Pandoc.Class.PandocMonad (PandocMonad) +import Text.Pandoc.Definition +import Data.List (foldl') +import Text.DocLayout (Doc, brackets, empty, (<+>), text, isEmpty, literal, + braces) +import Text.Pandoc.Walk +import Text.Pandoc.Writers.LaTeX.Types ( LW ) + +citationsToNatbib :: PandocMonad m + => ([Inline] -> LW m (Doc Text)) + -> [Citation] + -> LW m (Doc Text) +citationsToNatbib inlineListToLaTeX [one] + = citeCommand inlineListToLaTeX c p s k + where + Citation { citationId = k + , citationPrefix = p + , citationSuffix = s + , citationMode = m + } + = one + c = case m of + AuthorInText -> "citet" + SuppressAuthor -> "citeyearpar" + NormalCitation -> "citep" + +citationsToNatbib inlineListToLaTeX cits + | noPrefix (tail cits) && noSuffix (init cits) && ismode NormalCitation cits + = citeCommand inlineListToLaTeX "citep" p s ks + where + noPrefix = all (null . citationPrefix) + noSuffix = all (null . citationSuffix) + ismode m = all ((==) m . citationMode) + p = citationPrefix $ + head cits + s = citationSuffix $ + last cits + ks = T.intercalate ", " $ map citationId cits + +citationsToNatbib inlineListToLaTeX (c:cs) + | citationMode c == AuthorInText = do + author <- citeCommand inlineListToLaTeX "citeauthor" [] [] (citationId c) + cits <- citationsToNatbib inlineListToLaTeX + (c { citationMode = SuppressAuthor } : cs) + return $ author <+> cits + +citationsToNatbib inlineListToLaTeX cits = do + cits' <- mapM convertOne cits + return $ text "\\citetext{" <> foldl' combineTwo empty cits' <> text "}" + where + citeCommand' = citeCommand inlineListToLaTeX + combineTwo a b | isEmpty a = b + | otherwise = a <> text "; " <> b + convertOne Citation { citationId = k + , citationPrefix = p + , citationSuffix = s + , citationMode = m + } + = case m of + AuthorInText -> citeCommand' "citealt" p s k + SuppressAuthor -> citeCommand' "citeyear" p s k + NormalCitation -> citeCommand' "citealp" p s k + +citeCommand :: PandocMonad m + => ([Inline] -> LW m (Doc Text)) + -> Text + -> [Inline] + -> [Inline] + -> Text + -> LW m (Doc Text) +citeCommand inlineListToLaTeX c p s k = do + args <- citeArguments inlineListToLaTeX p s k + return $ literal ("\\" <> c) <> args + +type Prefix = [Inline] +type Suffix = [Inline] +type CiteId = Text +data CiteGroup = CiteGroup Prefix Suffix [CiteId] + +citeArgumentsList :: PandocMonad m + => ([Inline] -> LW m (Doc Text)) + -> CiteGroup + -> LW m (Doc Text) +citeArgumentsList _inlineListToLaTeX (CiteGroup _ _ []) = return empty +citeArgumentsList inlineListToLaTeX (CiteGroup pfxs sfxs ids) = do + pdoc <- inlineListToLaTeX pfxs + sdoc <- inlineListToLaTeX sfxs' + return $ optargs pdoc sdoc <> + braces (literal (T.intercalate "," (reverse ids))) + where sfxs' = stripLocatorBraces $ case sfxs of + (Str t : r) -> case T.uncons t of + Just (x, xs) + | T.null xs + , isPunctuation x -> dropWhile (== Space) r + | isPunctuation x -> Str xs : r + _ -> sfxs + _ -> sfxs + optargs pdoc sdoc = case (isEmpty pdoc, isEmpty sdoc) of + (True, True ) -> empty + (True, False) -> brackets sdoc + (_ , _ ) -> brackets pdoc <> brackets sdoc + +citeArguments :: PandocMonad m + => ([Inline] -> LW m (Doc Text)) + -> [Inline] + -> [Inline] + -> Text + -> LW m (Doc Text) +citeArguments inlineListToLaTeX p s k = + citeArgumentsList inlineListToLaTeX (CiteGroup p s [k]) + +-- strip off {} used to define locator in pandoc-citeproc; see #5722 +stripLocatorBraces :: [Inline] -> [Inline] +stripLocatorBraces = walk go + where go (Str xs) = Str $ T.filter (\c -> c /= '{' && c /= '}') xs + go x = x + +citationsToBiblatex :: PandocMonad m + => ([Inline] -> LW m (Doc Text)) + -> [Citation] -> LW m (Doc Text) +citationsToBiblatex inlineListToLaTeX [one] + = citeCommand inlineListToLaTeX cmd p s k + where + Citation { citationId = k + , citationPrefix = p + , citationSuffix = s + , citationMode = m + } = one + cmd = case m of + SuppressAuthor -> "autocite*" + AuthorInText -> "textcite" + NormalCitation -> "autocite" + +citationsToBiblatex inlineListToLaTeX (c:cs) + | all (\cit -> null (citationPrefix cit) && null (citationSuffix cit)) (c:cs) + = do + let cmd = case citationMode c of + SuppressAuthor -> "\\autocite*" + AuthorInText -> "\\textcite" + NormalCitation -> "\\autocite" + return $ text cmd <> + braces (literal (T.intercalate "," (map citationId (c:cs)))) + | otherwise + = do + let cmd = case citationMode c of + SuppressAuthor -> "\\autocites*" + AuthorInText -> "\\textcites" + NormalCitation -> "\\autocites" + + groups <- mapM (citeArgumentsList inlineListToLaTeX) + (reverse (foldl' grouper [] (c:cs))) + + return $ text cmd <> mconcat groups + + where grouper prev cit = case prev of + ((CiteGroup oPfx oSfx ids):rest) + | null oSfx && null pfx -> CiteGroup oPfx sfx (cid:ids) : rest + _ -> CiteGroup pfx sfx [cid] : prev + where pfx = citationPrefix cit + sfx = citationSuffix cit + cid = citationId cit + +citationsToBiblatex _ _ = return empty diff --git a/src/Text/Pandoc/Writers/LaTeX/Lang.hs b/src/Text/Pandoc/Writers/LaTeX/Lang.hs new file mode 100644 index 000000000..0ba68b74e --- /dev/null +++ b/src/Text/Pandoc/Writers/LaTeX/Lang.hs @@ -0,0 +1,192 @@ +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE ScopedTypeVariables #-} +{- | + Module : Text.Pandoc.Writers.LaTeX.Lang + Copyright : Copyright (C) 2006-2021 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm@berkeley.edu> + Stability : alpha + Portability : portable +-} +module Text.Pandoc.Writers.LaTeX.Lang + ( toPolyglossiaEnv, + toPolyglossia, + toBabel + ) where +import Data.Text (Text) +import Text.Collate.Lang (Lang(..)) + + +-- In environments \Arabic instead of \arabic is used +toPolyglossiaEnv :: Lang -> (Text, Text) +toPolyglossiaEnv l = + case toPolyglossia l of + ("arabic", o) -> ("Arabic", o) + x -> x + +-- Takes a list of the constituents of a BCP47 language code and +-- converts it to a Polyglossia (language, options) tuple +-- http://mirrors.ctan.org/macros/latex/contrib/polyglossia/polyglossia.pdf +toPolyglossia :: Lang -> (Text, Text) +toPolyglossia (Lang "ar" _ (Just "DZ") _ _ _) = ("arabic", "locale=algeria") +toPolyglossia (Lang "ar" _ (Just "IQ") _ _ _) = ("arabic", "locale=mashriq") +toPolyglossia (Lang "ar" _ (Just "JO") _ _ _) = ("arabic", "locale=mashriq") +toPolyglossia (Lang "ar" _ (Just "LB") _ _ _) = ("arabic", "locale=mashriq") +toPolyglossia (Lang "ar" _ (Just "LY") _ _ _) = ("arabic", "locale=libya") +toPolyglossia (Lang "ar" _ (Just "MA") _ _ _) = ("arabic", "locale=morocco") +toPolyglossia (Lang "ar" _ (Just "MR") _ _ _) = ("arabic", "locale=mauritania") +toPolyglossia (Lang "ar" _ (Just "PS") _ _ _) = ("arabic", "locale=mashriq") +toPolyglossia (Lang "ar" _ (Just "SY") _ _ _) = ("arabic", "locale=mashriq") +toPolyglossia (Lang "ar" _ (Just "TN") _ _ _) = ("arabic", "locale=tunisia") +toPolyglossia (Lang "de" _ _ vars _ _) + | "1901" `elem` vars = ("german", "spelling=old") +toPolyglossia (Lang "de" _ (Just "AT") vars _ _) + | "1901" `elem` vars = ("german", "variant=austrian, spelling=old") +toPolyglossia (Lang "de" _ (Just "AT") _ _ _) = ("german", "variant=austrian") +toPolyglossia (Lang "de" _ (Just "CH") vars _ _) + | "1901" `elem` vars = ("german", "variant=swiss, spelling=old") +toPolyglossia (Lang "de" _ (Just "CH") _ _ _) = ("german", "variant=swiss") +toPolyglossia (Lang "de" _ _ _ _ _) = ("german", "") +toPolyglossia (Lang "dsb" _ _ _ _ _) = ("lsorbian", "") +toPolyglossia (Lang "el" _ _ vars _ _) + | "polyton" `elem` vars = ("greek", "variant=poly") +toPolyglossia (Lang "en" _ (Just "AU") _ _ _) = ("english", "variant=australian") +toPolyglossia (Lang "en" _ (Just "CA") _ _ _) = ("english", "variant=canadian") +toPolyglossia (Lang "en" _ (Just "GB") _ _ _) = ("english", "variant=british") +toPolyglossia (Lang "en" _ (Just "NZ") _ _ _) = ("english", "variant=newzealand") +toPolyglossia (Lang "en" _ (Just "UK") _ _ _) = ("english", "variant=british") +toPolyglossia (Lang "en" _ (Just "US") _ _ _) = ("english", "variant=american") +toPolyglossia (Lang "grc" _ _ _ _ _) = ("greek", "variant=ancient") +toPolyglossia (Lang "hsb" _ _ _ _ _) = ("usorbian", "") +toPolyglossia (Lang "la" _ _ vars _ _) + | "x-classic" `elem` vars = ("latin", "variant=classic") +toPolyglossia (Lang "pt" _ (Just "BR") _ _ _) = ("portuguese", "variant=brazilian") +toPolyglossia (Lang "sl" _ _ _ _ _) = ("slovenian", "") +toPolyglossia x = (commonFromBcp47 x, "") + +-- Takes a list of the constituents of a BCP47 language code and +-- converts it to a Babel language string. +-- http://mirrors.ctan.org/macros/latex/required/babel/base/babel.pdf +-- List of supported languages (slightly outdated): +-- http://tug.ctan.org/language/hyph-utf8/doc/generic/hyph-utf8/hyphenation.pdf +toBabel :: Lang -> Text +toBabel (Lang "de" _ (Just "AT") vars _ _) + | "1901" `elem` vars = "austrian" + | otherwise = "naustrian" +toBabel (Lang "de" _ (Just "CH") vars _ _) + | "1901" `elem` vars = "swissgerman" + | otherwise = "nswissgerman" +toBabel (Lang "de" _ _ vars _ _) + | "1901" `elem` vars = "german" + | otherwise = "ngerman" +toBabel (Lang "dsb" _ _ _ _ _) = "lowersorbian" +toBabel (Lang "el" _ _ vars _ _) + | "polyton" `elem` vars = "polutonikogreek" +toBabel (Lang "en" _ (Just "AU") _ _ _) = "australian" +toBabel (Lang "en" _ (Just "CA") _ _ _) = "canadian" +toBabel (Lang "en" _ (Just "GB") _ _ _) = "british" +toBabel (Lang "en" _ (Just "NZ") _ _ _) = "newzealand" +toBabel (Lang "en" _ (Just "UK") _ _ _) = "british" +toBabel (Lang "en" _ (Just "US") _ _ _) = "american" +toBabel (Lang "fr" _ (Just "CA") _ _ _) = "canadien" +toBabel (Lang "fra" _ _ vars _ _) + | "aca" `elem` vars = "acadian" +toBabel (Lang "grc" _ _ _ _ _) = "polutonikogreek" +toBabel (Lang "hsb" _ _ _ _ _) = "uppersorbian" +toBabel (Lang "la" _ _ vars _ _) + | "x-classic" `elem` vars = "classiclatin" +toBabel (Lang "pt" _ (Just "BR") _ _ _) = "brazilian" +toBabel (Lang "sl" _ _ _ _ _) = "slovene" +toBabel x = commonFromBcp47 x + +-- Takes a list of the constituents of a BCP47 language code +-- and converts it to a string shared by Babel and Polyglossia. +-- https://tools.ietf.org/html/bcp47#section-2.1 +commonFromBcp47 :: Lang -> Text +commonFromBcp47 (Lang "sr" (Just "Cyrl") _ _ _ _) = "serbianc" +commonFromBcp47 (Lang "zh" (Just "Latn") _ vars _ _) + | "pinyin" `elem` vars = "pinyin" +commonFromBcp47 (Lang l _ _ _ _ _) = fromIso l + where + fromIso "af" = "afrikaans" + fromIso "am" = "amharic" + fromIso "ar" = "arabic" + fromIso "as" = "assamese" + fromIso "ast" = "asturian" + fromIso "bg" = "bulgarian" + fromIso "bn" = "bengali" + fromIso "bo" = "tibetan" + fromIso "br" = "breton" + fromIso "ca" = "catalan" + fromIso "cy" = "welsh" + fromIso "cs" = "czech" + fromIso "cop" = "coptic" + fromIso "da" = "danish" + fromIso "dv" = "divehi" + fromIso "el" = "greek" + fromIso "en" = "english" + fromIso "eo" = "esperanto" + fromIso "es" = "spanish" + fromIso "et" = "estonian" + fromIso "eu" = "basque" + fromIso "fa" = "farsi" + fromIso "fi" = "finnish" + fromIso "fr" = "french" + fromIso "fur" = "friulan" + fromIso "ga" = "irish" + fromIso "gd" = "scottish" + fromIso "gez" = "ethiopic" + fromIso "gl" = "galician" + fromIso "he" = "hebrew" + fromIso "hi" = "hindi" + fromIso "hr" = "croatian" + fromIso "hu" = "magyar" + fromIso "hy" = "armenian" + fromIso "ia" = "interlingua" + fromIso "id" = "indonesian" + fromIso "ie" = "interlingua" + fromIso "is" = "icelandic" + fromIso "it" = "italian" + fromIso "ja" = "japanese" + fromIso "km" = "khmer" + fromIso "kmr" = "kurmanji" + fromIso "kn" = "kannada" + fromIso "ko" = "korean" + fromIso "la" = "latin" + fromIso "lo" = "lao" + fromIso "lt" = "lithuanian" + fromIso "lv" = "latvian" + fromIso "ml" = "malayalam" + fromIso "mn" = "mongolian" + fromIso "mr" = "marathi" + fromIso "nb" = "norsk" + fromIso "nl" = "dutch" + fromIso "nn" = "nynorsk" + fromIso "no" = "norsk" + fromIso "nqo" = "nko" + fromIso "oc" = "occitan" + fromIso "pa" = "panjabi" + fromIso "pl" = "polish" + fromIso "pms" = "piedmontese" + fromIso "pt" = "portuguese" + fromIso "rm" = "romansh" + fromIso "ro" = "romanian" + fromIso "ru" = "russian" + fromIso "sa" = "sanskrit" + fromIso "se" = "samin" + fromIso "sk" = "slovak" + fromIso "sq" = "albanian" + fromIso "sr" = "serbian" + fromIso "sv" = "swedish" + fromIso "syr" = "syriac" + fromIso "ta" = "tamil" + fromIso "te" = "telugu" + fromIso "th" = "thai" + fromIso "ti" = "ethiopic" + fromIso "tk" = "turkmen" + fromIso "tr" = "turkish" + fromIso "uk" = "ukrainian" + fromIso "ur" = "urdu" + fromIso "vi" = "vietnamese" + fromIso _ = "" diff --git a/src/Text/Pandoc/Writers/LaTeX/Notes.hs b/src/Text/Pandoc/Writers/LaTeX/Notes.hs new file mode 100644 index 000000000..f225ef0c5 --- /dev/null +++ b/src/Text/Pandoc/Writers/LaTeX/Notes.hs @@ -0,0 +1,34 @@ +{-# LANGUAGE LambdaCase #-} +{-# LANGUAGE OverloadedStrings #-} +{- | + Module : Text.Pandoc.Writers.LaTeX.Notes + Copyright : Copyright (C) 2006-2021 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm@berkeley.edu> + Stability : alpha + Portability : portable + +Output tables as LaTeX. +-} +module Text.Pandoc.Writers.LaTeX.Notes + ( notesToLaTeX + ) where + +import Data.List (intersperse) +import Text.DocLayout ( Doc, braces, empty, text, vcat, ($$)) +import Data.Text (Text) + +notesToLaTeX :: [Doc Text] -> Doc Text +notesToLaTeX = \case + [] -> empty + ns -> (case length ns of + n | n > 1 -> "\\addtocounter" <> + braces "footnote" <> + braces (text $ show $ 1 - n) + | otherwise -> empty) + $$ + vcat (intersperse + ("\\addtocounter" <> braces "footnote" <> braces "1") + $ map (\x -> "\\footnotetext" <> braces x) + $ reverse ns) diff --git a/src/Text/Pandoc/Writers/LaTeX/Table.hs b/src/Text/Pandoc/Writers/LaTeX/Table.hs new file mode 100644 index 000000000..27a8a0257 --- /dev/null +++ b/src/Text/Pandoc/Writers/LaTeX/Table.hs @@ -0,0 +1,307 @@ +{-# LANGUAGE LambdaCase #-} +{-# LANGUAGE OverloadedStrings #-} +{- | + Module : Text.Pandoc.Writers.LaTeX.Table + Copyright : Copyright (C) 2006-2021 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm@berkeley.edu> + Stability : alpha + Portability : portable + +Output LaTeX formatted tables. +-} +module Text.Pandoc.Writers.LaTeX.Table + ( tableToLaTeX + ) where +import Control.Monad.State.Strict +import Data.List (intersperse) +import qualified Data.List.NonEmpty as NonEmpty +import Data.List.NonEmpty (NonEmpty ((:|))) +import Data.Text (Text) +import qualified Data.Text as T +import Text.Pandoc.Class.PandocMonad (PandocMonad) +import Text.Pandoc.Definition +import Text.DocLayout + ( Doc, braces, cr, empty, hcat, hsep, isEmpty, literal, nest + , text, vcat, ($$) ) +import Text.Pandoc.Shared (blocksToInlines, splitBy, tshow) +import Text.Pandoc.Walk (walk, query) +import Data.Monoid (Any(..)) +import Text.Pandoc.Writers.LaTeX.Caption (getCaption) +import Text.Pandoc.Writers.LaTeX.Notes (notesToLaTeX) +import Text.Pandoc.Writers.LaTeX.Types + ( LW, WriterState (stBeamer, stExternalNotes, stInMinipage, stMultiRow + , stNotes, stTable) ) +import Text.Printf (printf) +import qualified Text.Pandoc.Builder as B +import qualified Text.Pandoc.Writers.AnnotatedTable as Ann + +tableToLaTeX :: PandocMonad m + => ([Inline] -> LW m (Doc Text)) + -> ([Block] -> LW m (Doc Text)) + -> Ann.Table + -> LW m (Doc Text) +tableToLaTeX inlnsToLaTeX blksToLaTeX tbl = do + let (Ann.Table _attr caption _specs thead tbodies tfoot) = tbl + CaptionDocs capt captNotes <- captionToLaTeX inlnsToLaTeX caption + let removeNote (Note _) = Span ("", [], []) [] + removeNote x = x + firsthead <- if isEmpty capt || isEmptyHead thead + then return empty + else ($$ text "\\endfirsthead") <$> + headToLaTeX blksToLaTeX thead + head' <- if isEmptyHead thead + then return "\\toprule" + -- avoid duplicate notes in head and firsthead: + else headToLaTeX blksToLaTeX + (if isEmpty firsthead + then thead + else walk removeNote thead) + rows' <- mapM (rowToLaTeX blksToLaTeX BodyCell) $ + mconcat (map bodyRows tbodies) <> footRows tfoot + modify $ \s -> s{ stTable = True } + notes <- notesToLaTeX <$> gets stNotes + return + $ "\\begin{longtable}[]" <> + braces ("@{}" <> colDescriptors tbl <> "@{}") + -- the @{} removes extra space at beginning and end + $$ capt + $$ firsthead + $$ head' + $$ "\\endhead" + $$ vcat rows' + $$ "\\bottomrule" + $$ "\\end{longtable}" + $$ captNotes + $$ notes + +-- | Creates column descriptors for the table. +colDescriptors :: Ann.Table -> Doc Text +colDescriptors (Ann.Table _attr _caption specs thead tbodies tfoot) = + let (aligns, widths) = unzip specs + + defaultWidthsOnly = all (== ColWidthDefault) widths + isSimpleTable = all (all isSimpleCell) $ mconcat + [ headRows thead + , concatMap bodyRows tbodies + , footRows tfoot + ] + + relativeWidths = if defaultWidthsOnly + then replicate (length specs) + (1 / fromIntegral (length specs)) + else map toRelWidth widths + in if defaultWidthsOnly && isSimpleTable + then hcat $ map (literal . colAlign) aligns + else (cr <>) . nest 2 . vcat . map literal $ + zipWith (toColDescriptor (length specs)) + aligns + relativeWidths + where + toColDescriptor :: Int -> Alignment -> Double -> Text + toColDescriptor numcols align width = + T.pack $ printf + ">{%s\\arraybackslash}p{(\\columnwidth - %d\\tabcolsep) * \\real{%0.2f}}" + (T.unpack (alignCommand align)) + ((numcols - 1) * 2) + width + + isSimpleCell (Ann.Cell _ _ (Cell _attr _align _rowspan _colspan blocks)) = + case blocks of + [Para _] -> True + [Plain _] -> True + [] -> True + _ -> False + + toRelWidth ColWidthDefault = 0 + toRelWidth (ColWidth w) = w + +alignCommand :: Alignment -> Text +alignCommand = \case + AlignLeft -> "\\raggedright" + AlignRight -> "\\raggedleft" + AlignCenter -> "\\centering" + AlignDefault -> "\\raggedright" + +colAlign :: Alignment -> Text +colAlign = \case + AlignLeft -> "l" + AlignRight -> "r" + AlignCenter -> "c" + AlignDefault -> "l" + +data CaptionDocs = + CaptionDocs + { captionCommand :: Doc Text + , captionNotes :: Doc Text + } + +captionToLaTeX :: PandocMonad m + => ([Inline] -> LW m (Doc Text)) + -> Caption + -> LW m CaptionDocs +captionToLaTeX inlnsToLaTeX (Caption _maybeShort longCaption) = do + let caption = blocksToInlines longCaption + (captionText, captForLof, captNotes) <- getCaption inlnsToLaTeX False caption + return $ CaptionDocs + { captionNotes = captNotes + , captionCommand = if isEmpty captionText + then empty + else "\\caption" <> captForLof <> + braces captionText <> "\\tabularnewline" + } + +type BlocksWriter m = [Block] -> LW m (Doc Text) + +headToLaTeX :: PandocMonad m + => BlocksWriter m + -> Ann.TableHead + -> LW m (Doc Text) +headToLaTeX blocksWriter (Ann.TableHead _attr headerRows) = do + rowsContents <- mapM (rowToLaTeX blocksWriter HeaderCell . headerRowCells) + headerRows + return ("\\toprule" $$ vcat rowsContents $$ "\\midrule") + +-- | Converts a row of table cells into a LaTeX row. +rowToLaTeX :: PandocMonad m + => BlocksWriter m + -> CellType + -> [Ann.Cell] + -> LW m (Doc Text) +rowToLaTeX blocksWriter celltype row = do + cellsDocs <- mapM (cellToLaTeX blocksWriter celltype) (fillRow row) + return $ hsep (intersperse "&" cellsDocs) <> " \\\\" + +-- | Pads row with empty cells to adjust for rowspans above this row. +fillRow :: [Ann.Cell] -> [Ann.Cell] +fillRow = go 0 + where + go _ [] = [] + go n (acell@(Ann.Cell _spec (Ann.ColNumber colnum) cell):cells) = + let (Cell _ _ _ (ColSpan colspan) _) = cell + in map mkEmptyCell [n .. colnum - 1] ++ + acell : go (colnum + colspan) cells + + mkEmptyCell :: Int -> Ann.Cell + mkEmptyCell colnum = + Ann.Cell ((AlignDefault, ColWidthDefault):|[]) + (Ann.ColNumber colnum) + B.emptyCell + +isEmptyHead :: Ann.TableHead -> Bool +isEmptyHead (Ann.TableHead _attr []) = True +isEmptyHead (Ann.TableHead _attr rows) = all (null . headerRowCells) rows + +-- | Gets all cells in a header row. +headerRowCells :: Ann.HeaderRow -> [Ann.Cell] +headerRowCells (Ann.HeaderRow _attr _rownum cells) = cells + +-- | Gets all cells in a body row. +bodyRowCells :: Ann.BodyRow -> [Ann.Cell] +bodyRowCells (Ann.BodyRow _attr _rownum rowhead cells) = rowhead <> cells + +-- | Gets a list of rows of the table body, where a row is a simple +-- list of cells. +bodyRows :: Ann.TableBody -> [[Ann.Cell]] +bodyRows (Ann.TableBody _attr _rowheads headerRows rows) = + map headerRowCells headerRows <> map bodyRowCells rows + +-- | Gets a list of rows of the table head, where a row is a simple +-- list of cells. +headRows :: Ann.TableHead -> [[Ann.Cell]] +headRows (Ann.TableHead _attr rows) = map headerRowCells rows + +-- | Gets a list of rows from the foot, where a row is a simple list +-- of cells. +footRows :: Ann.TableFoot -> [[Ann.Cell]] +footRows (Ann.TableFoot _attr rows) = map headerRowCells rows + +-- For simple latex tables (without minipages or parboxes), +-- we need to go to some lengths to get line breaks working: +-- as LineBreak bs = \vtop{\hbox{\strut as}\hbox{\strut bs}}. +fixLineBreaks :: Block -> Block +fixLineBreaks = walk fixLineBreaks' + +fixLineBreaks' :: [Inline] -> [Inline] +fixLineBreaks' ils = case splitBy (== LineBreak) ils of + [] -> [] + [xs] -> xs + chunks -> RawInline "tex" "\\vtop{" : + concatMap tohbox chunks <> + [RawInline "tex" "}"] + where tohbox ys = RawInline "tex" "\\hbox{\\strut " : ys <> + [RawInline "tex" "}"] + +-- We also change display math to inline math, since display +-- math breaks in simple tables. +displayMathToInline :: Inline -> Inline +displayMathToInline (Math DisplayMath x) = Math InlineMath x +displayMathToInline x = x + +cellToLaTeX :: PandocMonad m + => BlocksWriter m + -> CellType + -> Ann.Cell + -> LW m (Doc Text) +cellToLaTeX blockListToLaTeX celltype annotatedCell = do + let (Ann.Cell specs _colnum cell) = annotatedCell + let hasWidths = snd (NonEmpty.head specs) /= ColWidthDefault + let specAlign = fst (NonEmpty.head specs) + let (Cell _attr align' rowspan colspan blocks) = cell + let align = case align' of + AlignDefault -> specAlign + _ -> align' + beamer <- gets stBeamer + externalNotes <- gets stExternalNotes + inMinipage <- gets stInMinipage + -- See #5367 -- footnotehyper/footnote don't work in beamer, + -- so we need to produce the notes outside the table... + modify $ \st -> st{ stExternalNotes = beamer } + let isPlainOrPara = \case + Para{} -> True + Plain{} -> True + _ -> False + let hasLineBreak LineBreak = Any True + hasLineBreak _ = Any False + let hasLineBreaks = getAny $ query hasLineBreak blocks + result <- + if not hasWidths || (celltype /= HeaderCell + && all isPlainOrPara blocks + && not hasLineBreaks) + then + blockListToLaTeX $ walk fixLineBreaks $ walk displayMathToInline blocks + else do + modify $ \st -> st{ stInMinipage = True } + cellContents <- blockListToLaTeX blocks + modify $ \st -> st{ stInMinipage = inMinipage } + let valign = text $ case celltype of + HeaderCell -> "[b]" + BodyCell -> "[t]" + let halign = literal $ alignCommand align + return $ "\\begin{minipage}" <> valign <> + braces "\\linewidth" <> halign <> cr <> + cellContents <> + (if hasLineBreaks then "\\strut" else mempty) + <> cr <> + "\\end{minipage}" + modify $ \st -> st{ stExternalNotes = externalNotes } + when (rowspan /= RowSpan 1) $ + modify (\st -> st{ stMultiRow = True }) + let inMultiColumn x = case colspan of + (ColSpan 1) -> x + (ColSpan n) -> "\\multicolumn" + <> braces (literal (tshow n)) + <> braces (literal $ colAlign align) + <> braces x + let inMultiRow x = case rowspan of + (RowSpan 1) -> x + (RowSpan n) -> let nrows = literal (tshow n) + in "\\multirow" <> braces nrows + <> braces "*" <> braces x + return . inMultiColumn . inMultiRow $ result + +data CellType + = HeaderCell + | BodyCell + deriving Eq diff --git a/src/Text/Pandoc/Writers/LaTeX/Types.hs b/src/Text/Pandoc/Writers/LaTeX/Types.hs new file mode 100644 index 000000000..c06b7e923 --- /dev/null +++ b/src/Text/Pandoc/Writers/LaTeX/Types.hs @@ -0,0 +1,83 @@ +module Text.Pandoc.Writers.LaTeX.Types + ( LW + , WriterState (..) + , startingState + ) where + +import Control.Monad.State.Strict (StateT) +import Data.Text (Text) +import Text.DocLayout (Doc) +import Text.Pandoc.Options + ( WriterOptions (writerIncremental, writerTopLevelDivision) + , TopLevelDivision (..) + ) + +-- | LaTeX writer type. The type constructor @m@ will typically be an +-- instance of PandocMonad. +type LW m = StateT WriterState m + +data WriterState = + WriterState + { stInNote :: Bool -- ^ true if we're in a note + , stInQuote :: Bool -- ^ true if in a blockquote + , stExternalNotes :: Bool -- ^ true if in context where + -- we need to store footnotes + , stInMinipage :: Bool -- ^ true if in minipage + , stInHeading :: Bool -- ^ true if in a section heading + , stInItem :: Bool -- ^ true if in \item[..] + , stNotes :: [Doc Text] -- ^ notes in a minipage + , stOLLevel :: Int -- ^ level of ordered list nesting + , stOptions :: WriterOptions -- ^ writer options, so they don't have to + -- be parameter + , stVerbInNote :: Bool -- ^ true if document has verbatim text in note + , stTable :: Bool -- ^ true if document has a table + , stMultiRow :: Bool -- ^ true if document has multirow cells + , stStrikeout :: Bool -- ^ true if document has strikeout + , stUrl :: Bool -- ^ true if document has visible URL link + , stGraphics :: Bool -- ^ true if document contains images + , stLHS :: Bool -- ^ true if document has literate haskell code + , stHasChapters :: Bool -- ^ true if document has chapters + , stCsquotes :: Bool -- ^ true if document uses csquotes + , stHighlighting :: Bool -- ^ true if document has highlighted code + , stIncremental :: Bool -- ^ true if beamer lists should be + , stZwnj :: Bool -- ^ true if document has a ZWNJ character + , stInternalLinks :: [Text] -- ^ list of internal link targets + , stBeamer :: Bool -- ^ produce beamer + , stEmptyLine :: Bool -- ^ true if no content on line + , stHasCslRefs :: Bool -- ^ has a Div with class refs + , stIsFirstInDefinition :: Bool -- ^ first block in a defn list + } + +startingState :: WriterOptions -> WriterState +startingState options = + WriterState + { stInNote = False + , stInQuote = False + , stExternalNotes = False + , stInHeading = False + , stInMinipage = False + , stInItem = False + , stNotes = [] + , stOLLevel = 1 + , stOptions = options + , stVerbInNote = False + , stTable = False + , stMultiRow = False + , stStrikeout = False + , stUrl = False + , stGraphics = False + , stLHS = False + , stHasChapters = case writerTopLevelDivision options of + TopLevelPart -> True + TopLevelChapter -> True + _ -> False + , stCsquotes = False + , stHighlighting = False + , stIncremental = writerIncremental options + , stZwnj = False + , stInternalLinks = [] + , stBeamer = False + , stEmptyLine = True + , stHasCslRefs = False + , stIsFirstInDefinition = False + } diff --git a/src/Text/Pandoc/Writers/LaTeX/Util.hs b/src/Text/Pandoc/Writers/LaTeX/Util.hs new file mode 100644 index 000000000..c34338121 --- /dev/null +++ b/src/Text/Pandoc/Writers/LaTeX/Util.hs @@ -0,0 +1,275 @@ +{-# LANGUAGE OverloadedStrings #-} +{- | + Module : Text.Pandoc.Writers.LaTeX.Util + Copyright : Copyright (C) 2006-2021 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm@berkeley.edu> + Stability : alpha + Portability : portable +-} +module Text.Pandoc.Writers.LaTeX.Util ( + stringToLaTeX + , StringContext(..) + , toLabel + , inCmd + , wrapDiv + , hypertarget + , labelFor + , getListingsLanguage + , mbBraced + ) +where + +import Control.Applicative ((<|>)) +import Control.Monad (when) +import Text.Pandoc.Class (PandocMonad, toLang) +import Text.Pandoc.Options (WriterOptions(..), isEnabled) +import Text.Pandoc.Writers.LaTeX.Types (LW, WriterState(..)) +import Text.Pandoc.Writers.LaTeX.Lang (toPolyglossiaEnv) +import Text.Pandoc.Highlighting (toListingsLanguage) +import Text.DocLayout +import Text.Pandoc.Definition +import Text.Pandoc.ImageSize (showFl) +import Control.Monad.State.Strict (gets, modify) +import Data.Text (Text) +import qualified Data.Text as T +import Text.Pandoc.Extensions (Extension(Ext_smart)) +import Data.Char (isLetter, isSpace, isDigit, isAscii, ord, isAlphaNum) +import Text.Printf (printf) +import Text.Pandoc.Shared (safeRead, elemText) +import qualified Data.Text.Normalize as Normalize +import Data.List (uncons) + +data StringContext = TextString + | URLString + | CodeString + deriving (Eq) + +-- escape things as needed for LaTeX +stringToLaTeX :: PandocMonad m => StringContext -> Text -> LW m Text +stringToLaTeX context zs = do + opts <- gets stOptions + when ('\x200c' `elemText` zs) $ + modify (\s -> s { stZwnj = True }) + return $ T.pack $ + foldr (go opts context) mempty $ T.unpack $ + if writerPreferAscii opts + then Normalize.normalize Normalize.NFD zs + else zs + where + go :: WriterOptions -> StringContext -> Char -> String -> String + go opts ctx x xs = + let ligatures = isEnabled Ext_smart opts && ctx == TextString + isUrl = ctx == URLString + mbAccentCmd = + if writerPreferAscii opts && ctx == TextString + then uncons xs >>= \(c,_) -> lookupAccent c + else Nothing + emits s = + case mbAccentCmd of + Just cmd -> + cmd <> "{" <> s <> "}" <> drop 1 xs -- drop combining accent + Nothing -> s <> xs + emitc c = + case mbAccentCmd of + Just cmd -> + cmd <> "{" <> [c] <> "}" <> drop 1 xs -- drop combining accent + Nothing -> c : xs + emitcseq cs = + case xs of + c:_ | isLetter c + , ctx == TextString + -> cs <> " " <> xs + | isSpace c -> cs <> "{}" <> xs + | ctx == TextString + -> cs <> xs + _ -> cs <> "{}" <> xs + emitquote cs = + case xs of + '`':_ -> cs <> "\\," <> xs -- add thin space + '\'':_ -> cs <> "\\," <> xs -- add thin space + _ -> cs <> xs + in case x of + '?' | ligatures -> -- avoid ?` ligature + case xs of + '`':_ -> emits "?{}" + _ -> emitc x + '!' | ligatures -> -- avoid !` ligature + case xs of + '`':_ -> emits "!{}" + _ -> emitc x + '{' -> emits "\\{" + '}' -> emits "\\}" + '`' | ctx == CodeString -> emitcseq "\\textasciigrave" + '$' | not isUrl -> emits "\\$" + '%' -> emits "\\%" + '&' -> emits "\\&" + '_' | not isUrl -> emits "\\_" + '#' -> emits "\\#" + '-' | not isUrl -> case xs of + -- prevent adjacent hyphens from forming ligatures + ('-':_) -> emits "-\\/" + _ -> emitc '-' + '~' | not isUrl -> emitcseq "\\textasciitilde" + '^' -> emits "\\^{}" + '\\'| isUrl -> emitc '/' -- NB. / works as path sep even on Windows + | otherwise -> emitcseq "\\textbackslash" + '|' | not isUrl -> emitcseq "\\textbar" + '<' -> emitcseq "\\textless" + '>' -> emitcseq "\\textgreater" + '[' -> emits "{[}" -- to avoid interpretation as + ']' -> emits "{]}" -- optional arguments + '\'' | ctx == CodeString -> emitcseq "\\textquotesingle" + '\160' -> emits "~" + '\x200B' -> emits "\\hspace{0pt}" -- zero-width space + '\x202F' -> emits "\\," + '\x2026' -> emitcseq "\\ldots" + '\x2018' | ligatures -> emitquote "`" + '\x2019' | ligatures -> emitquote "'" + '\x201C' | ligatures -> emitquote "``" + '\x201D' | ligatures -> emitquote "''" + '\x2014' | ligatures -> emits "---" + '\x2013' | ligatures -> emits "--" + _ | writerPreferAscii opts + -> case x of + 'ı' -> emitcseq "\\i" + 'ȷ' -> emitcseq "\\j" + 'å' -> emitcseq "\\aa" + 'Å' -> emitcseq "\\AA" + 'ß' -> emitcseq "\\ss" + 'ø' -> emitcseq "\\o" + 'Ø' -> emitcseq "\\O" + 'Ł' -> emitcseq "\\L" + 'ł' -> emitcseq "\\l" + 'æ' -> emitcseq "\\ae" + 'Æ' -> emitcseq "\\AE" + 'œ' -> emitcseq "\\oe" + 'Œ' -> emitcseq "\\OE" + '£' -> emitcseq "\\pounds" + '€' -> emitcseq "\\euro" + '©' -> emitcseq "\\copyright" + _ -> emitc x + | otherwise -> emitc x + +lookupAccent :: Char -> Maybe String +lookupAccent '\779' = Just "\\H" +lookupAccent '\768' = Just "\\`" +lookupAccent '\769' = Just "\\'" +lookupAccent '\770' = Just "\\^" +lookupAccent '\771' = Just "\\~" +lookupAccent '\776' = Just "\\\"" +lookupAccent '\775' = Just "\\." +lookupAccent '\772' = Just "\\=" +lookupAccent '\781' = Just "\\|" +lookupAccent '\817' = Just "\\b" +lookupAccent '\807' = Just "\\c" +lookupAccent '\783' = Just "\\G" +lookupAccent '\777' = Just "\\h" +lookupAccent '\803' = Just "\\d" +lookupAccent '\785' = Just "\\f" +lookupAccent '\778' = Just "\\r" +lookupAccent '\865' = Just "\\t" +lookupAccent '\782' = Just "\\U" +lookupAccent '\780' = Just "\\v" +lookupAccent '\774' = Just "\\u" +lookupAccent '\808' = Just "\\k" +lookupAccent '\8413' = Just "\\textcircled" +lookupAccent _ = Nothing + +toLabel :: PandocMonad m => Text -> LW m Text +toLabel z = go `fmap` stringToLaTeX URLString z + where + go = T.concatMap $ \x -> case x of + _ | (isLetter x || isDigit x) && isAscii x -> T.singleton x + | x `elemText` "_-+=:;." -> T.singleton x + | otherwise -> T.pack $ "ux" <> printf "%x" (ord x) + +-- | Puts contents into LaTeX command. +inCmd :: Text -> Doc Text -> Doc Text +inCmd cmd contents = char '\\' <> literal cmd <> braces contents + +mapAlignment :: Text -> Text +mapAlignment a = case a of + "top" -> "T" + "top-baseline" -> "t" + "bottom" -> "b" + "center" -> "c" + _ -> a + +wrapDiv :: PandocMonad m => Attr -> Doc Text -> LW m (Doc Text) +wrapDiv (_,classes,kvs) t = do + beamer <- gets stBeamer + let align dir txt = inCmd "begin" dir $$ txt $$ inCmd "end" dir + lang <- toLang $ lookup "lang" kvs + let wrapColumns = if beamer && "columns" `elem` classes + then \contents -> + let valign = maybe "T" mapAlignment (lookup "align" kvs) + totalwidth = maybe [] (\x -> ["totalwidth=" <> x]) + (lookup "totalwidth" kvs) + onlytextwidth = filter ("onlytextwidth" ==) classes + options = text $ T.unpack $ T.intercalate "," $ + valign : totalwidth ++ onlytextwidth + in inCmd "begin" "columns" <> brackets options + $$ contents + $$ inCmd "end" "columns" + else id + wrapColumn = if beamer && "column" `elem` classes + then \contents -> + let valign = + maybe "" + (brackets . text . T.unpack . mapAlignment) + (lookup "align" kvs) + w = maybe "0.48" fromPct (lookup "width" kvs) + in inCmd "begin" "column" <> + valign <> + braces (literal w <> "\\textwidth") + $$ contents + $$ inCmd "end" "column" + else id + fromPct xs = + case T.unsnoc xs of + Just (ds, '%') -> case safeRead ds of + Just digits -> showFl (digits / 100 :: Double) + Nothing -> xs + _ -> xs + wrapDir = case lookup "dir" kvs of + Just "rtl" -> align "RTL" + Just "ltr" -> align "LTR" + _ -> id + wrapLang txt = case lang of + Just lng -> let (l, o) = toPolyglossiaEnv lng + ops = if T.null o + then "" + else brackets $ literal o + in inCmd "begin" (literal l) <> ops + $$ blankline <> txt <> blankline + $$ inCmd "end" (literal l) + Nothing -> txt + return $ wrapColumns . wrapColumn . wrapDir . wrapLang $ t + +hypertarget :: PandocMonad m => Bool -> Text -> Doc Text -> LW m (Doc Text) +hypertarget _ "" x = return x +hypertarget addnewline ident x = do + ref <- literal `fmap` toLabel ident + return $ text "\\hypertarget" + <> braces ref + <> braces ((if addnewline && not (isEmpty x) + then "%" <> cr + else empty) <> x) + +labelFor :: PandocMonad m => Text -> LW m (Doc Text) +labelFor "" = return empty +labelFor ident = do + ref <- literal `fmap` toLabel ident + return $ text "\\label" <> braces ref + +-- Determine listings language from list of class attributes. +getListingsLanguage :: [Text] -> Maybe Text +getListingsLanguage xs + = foldr ((<|>) . toListingsLanguage) Nothing xs + +mbBraced :: Text -> Text +mbBraced x = if not (T.all isAlphaNum x) + then "{" <> x <> "}" + else x diff --git a/src/Text/Pandoc/Writers/Man.hs b/src/Text/Pandoc/Writers/Man.hs index 4eb0db042..87b2d8d21 100644 --- a/src/Text/Pandoc/Writers/Man.hs +++ b/src/Text/Pandoc/Writers/Man.hs @@ -3,7 +3,7 @@ {-# LANGUAGE ViewPatterns #-} {- | Module : Text.Pandoc.Writers.Man - Copyright : Copyright (C) 2007-2020 John MacFarlane + Copyright : Copyright (C) 2007-2021 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -16,6 +16,7 @@ Conversion of 'Pandoc' documents to roff man page format. module Text.Pandoc.Writers.Man ( writeMan ) where import Control.Monad.State.Strict import Data.List (intersperse) +import Data.List.NonEmpty (nonEmpty) import Data.Maybe (fromMaybe) import Data.Text (Text) import qualified Data.Text as T @@ -175,8 +176,7 @@ blockToMan opts (BulletList items) = do return (vcat contents) blockToMan opts (OrderedList attribs items) = do let markers = take (length items) $ orderedListMarkers attribs - let indent = 1 + - maximum (map T.length markers) + let indent = 1 + maybe 0 maximum (nonEmpty (map T.length markers)) contents <- mapM (\(num, item) -> orderedListItemToMan opts num indent item) $ zip markers items return (vcat contents) diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs index 6aec6b244..fda2bbcef 100644 --- a/src/Text/Pandoc/Writers/Markdown.hs +++ b/src/Text/Pandoc/Writers/Markdown.hs @@ -5,7 +5,7 @@ {-# LANGUAGE ViewPatterns #-} {- | Module : Text.Pandoc.Writers.Markdown - Copyright : Copyright (C) 2006-2020 John MacFarlane + Copyright : Copyright (C) 2006-2021 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -14,7 +14,7 @@ Conversion of 'Pandoc' documents to markdown-formatted plain text. -Markdown: <http://daringfireball.net/projects/markdown/> +Markdown: <https://daringfireball.net/projects/markdown/> -} module Text.Pandoc.Writers.Markdown ( writeMarkdown, @@ -22,15 +22,14 @@ module Text.Pandoc.Writers.Markdown ( writePlain) where import Control.Monad.Reader import Control.Monad.State.Strict -import Data.Char (isAlphaNum) import Data.Default -import Data.List (find, intersperse, sortOn, transpose) +import Data.List (intersperse, sortOn, transpose) +import Data.List.NonEmpty (nonEmpty, NonEmpty(..)) import qualified Data.Map as M -import Data.Maybe (fromMaybe, mapMaybe) +import Data.Maybe (fromMaybe, mapMaybe, isNothing) import qualified Data.Set as Set import Data.Text (Text) import qualified Data.Text as T -import Network.HTTP (urlEncode) import Text.HTML.TagSoup (Tag (..), isTagText, parseTags) import Text.Pandoc.Class.PandocMonad (PandocMonad, report) import Text.Pandoc.Definition @@ -44,59 +43,11 @@ import Text.Pandoc.Templates (renderTemplate) import Text.DocTemplates (Val(..), Context(..), FromContext(..)) import Text.Pandoc.Walk import Text.Pandoc.Writers.HTML (writeHtml5String) -import Text.Pandoc.Writers.Math (texMathToInlines) -import Text.Pandoc.XML (toHtml5Entities) -import Data.Coerce (coerce) - -type Notes = [[Block]] -type Ref = (Text, Target, Attr) -type Refs = [Ref] - -type MD m = ReaderT WriterEnv (StateT WriterState m) - -evalMD :: PandocMonad m => MD m a -> WriterEnv -> WriterState -> m a -evalMD md env st = evalStateT (runReaderT md env) st - -data WriterEnv = WriterEnv { envInList :: Bool - , envVariant :: MarkdownVariant - , envRefShortcutable :: Bool - , envBlockLevel :: Int - , envEscapeSpaces :: Bool - } - -data MarkdownVariant = - PlainText - | Commonmark - | Markdown - deriving (Show, Eq) - -instance Default WriterEnv - where def = WriterEnv { envInList = False - , envVariant = Markdown - , envRefShortcutable = True - , envBlockLevel = 0 - , envEscapeSpaces = False - } - -data WriterState = WriterState { stNotes :: Notes - , stPrevRefs :: Refs - , stRefs :: Refs - , stKeys :: M.Map Key - (M.Map (Target, Attr) Int) - , stLastIdx :: Int - , stIds :: Set.Set Text - , stNoteNum :: Int - } - -instance Default WriterState - where def = WriterState{ stNotes = [] - , stPrevRefs = [] - , stRefs = [] - , stKeys = M.empty - , stLastIdx = 0 - , stIds = Set.empty - , stNoteNum = 1 - } +import Text.Pandoc.Writers.Markdown.Inline (inlineListToMarkdown, linkAttributes, attrsToMarkdown) +import Text.Pandoc.Writers.Markdown.Types (MarkdownVariant(..), + WriterState(..), + WriterEnv(..), + Ref, Refs, MD, evalMD) -- | Convert Pandoc to Markdown. writeMarkdown :: PandocMonad m => WriterOptions -> Pandoc -> m Text @@ -116,7 +67,16 @@ writePlain opts document = -- | Convert Pandoc to Commonmark. writeCommonMark :: PandocMonad m => WriterOptions -> Pandoc -> m Text writeCommonMark opts document = - evalMD (pandocToMarkdown opts document) def{ envVariant = Commonmark } def + evalMD (pandocToMarkdown opts' document) def{ envVariant = Commonmark } def + where + opts' = opts{ writerExtensions = + -- These extensions can't be enabled or disabled + -- for commonmark because they're part of the core; + -- we set them here so that escapeText will behave + -- properly. + enableExtension Ext_all_symbols_escapable $ + enableExtension Ext_intraword_underscores $ + writerExtensions opts } pandocTitleBlock :: Doc Text -> [Doc Text] -> Doc Text -> Doc Text pandocTitleBlock tit auths dat = @@ -174,23 +134,24 @@ valToYaml :: Val Text -> Doc Text valToYaml (ListVal xs) = vcat $ map (\v -> hang 2 "- " (valToYaml v)) xs valToYaml (MapVal c) = contextToYaml c +valToYaml (BoolVal True) = "true" +valToYaml (BoolVal False) = "false" valToYaml (SimpleVal x) | isEmpty x = empty | otherwise = if hasNewlines x then hang 0 ("|" <> cr) x - else if fst $ foldr needsDoubleQuotes (False, True) x + else if isNothing $ foldM needsDoubleQuotes True x then "\"" <> fmap escapeInDoubleQuotes x <> "\"" else x where - needsDoubleQuotes t (positive, isFirst) + needsDoubleQuotes isFirst t = if T.any isBadAnywhere t || (isFirst && T.any isYamlPunct (T.take 1 t)) - then (True, False) - else (positive, False) + then Nothing + else Just False isBadAnywhere '#' = True isBadAnywhere ':' = True - isBadAnywhere '`' = False isBadAnywhere _ = False hasNewlines NewLine = True hasNewlines BlankLines{} = True @@ -295,75 +256,11 @@ noteToMarkdown opts num blocks = do then hang (writerTabStop opts) (marker <> spacer) contents else marker <> spacer <> contents --- | Escape special characters for Markdown. -escapeText :: WriterOptions -> Text -> Text -escapeText opts = T.pack . go . T.unpack - where - go [] = [] - go (c:cs) = - case c of - '<' | isEnabled Ext_all_symbols_escapable opts -> - '\\' : '<' : go cs - | otherwise -> "<" ++ go cs - '>' | isEnabled Ext_all_symbols_escapable opts -> - '\\' : '>' : go cs - | otherwise -> ">" ++ go cs - '@' | isEnabled Ext_citations opts -> - case cs of - (d:_) - | isAlphaNum d || d == '_' - -> '\\':'@':go cs - _ -> '@':go cs - _ | c `elem` ['\\','`','*','_','[',']','#'] -> - '\\':c:go cs - '|' | isEnabled Ext_pipe_tables opts -> '\\':'|':go cs - '^' | isEnabled Ext_superscript opts -> '\\':'^':go cs - '~' | isEnabled Ext_subscript opts || - isEnabled Ext_strikeout opts -> '\\':'~':go cs - '$' | isEnabled Ext_tex_math_dollars opts -> '\\':'$':go cs - '\'' | isEnabled Ext_smart opts -> '\\':'\'':go cs - '"' | isEnabled Ext_smart opts -> '\\':'"':go cs - '-' | isEnabled Ext_smart opts -> - case cs of - '-':_ -> '\\':'-':go cs - _ -> '-':go cs - '.' | isEnabled Ext_smart opts -> - case cs of - '.':'.':rest -> '\\':'.':'.':'.':go rest - _ -> '.':go cs - _ -> case cs of - '_':x:xs - | isEnabled Ext_intraword_underscores opts - , isAlphaNum c - , isAlphaNum x -> c : '_' : x : go xs - _ -> c : go cs - -attrsToMarkdown :: Attr -> Doc Text -attrsToMarkdown attribs = braces $ hsep [attribId, attribClasses, attribKeys] - where attribId = case attribs of - ("",_,_) -> empty - (i,_,_) -> "#" <> escAttr i - attribClasses = case attribs of - (_,[],_) -> empty - (_,cs,_) -> hsep $ - map (escAttr . ("."<>)) - cs - attribKeys = case attribs of - (_,_,[]) -> empty - (_,_,ks) -> hsep $ - map (\(k,v) -> escAttr k - <> "=\"" <> - escAttr v <> "\"") ks - escAttr = mconcat . map escAttrChar . T.unpack - escAttrChar '"' = literal "\\\"" - escAttrChar '\\' = literal "\\\\" - escAttrChar c = literal $ T.singleton c - -linkAttributes :: WriterOptions -> Attr -> Doc Text -linkAttributes opts attr = - if isEnabled Ext_link_attributes opts && attr /= nullAttr - then attrsToMarkdown attr - else empty +-- | (Code) blocks with a single class and no attributes can just use it +-- standalone, no need to bother with curly braces. +classOrAttrsToMarkdown :: Attr -> Doc Text +classOrAttrsToMarkdown ("",[cls],[]) = literal cls +classOrAttrsToMarkdown attrs = attrsToMarkdown attrs -- | Ordered list start parser for use in Para below. olMarker :: Parser Text ParserState () @@ -424,9 +321,12 @@ blockToMarkdown' opts (Div attrs ils) = do case () of _ | isEnabled Ext_fenced_divs opts && attrs /= nullAttr -> - nowrap (literal ":::" <+> attrsToMarkdown attrs) $$ - chomp contents $$ - literal ":::" <> blankline + let attrsToMd = if variant == Commonmark + then attrsToMarkdown + else classOrAttrsToMarkdown + in nowrap (literal ":::" <+> attrsToMd attrs) $$ + chomp contents $$ + literal ":::" <> blankline | isEnabled Ext_native_divs opts || (isEnabled Ext_raw_html opts && (variant == Commonmark || @@ -468,7 +368,7 @@ blockToMarkdown' opts (Plain inlines) = do -- title beginning with fig: indicates figure blockToMarkdown' opts (Para [Image attr alt (src,tgt@(T.stripPrefix "fig:" -> Just tit))]) | isEnabled Ext_raw_html opts && - not (isEnabled Ext_link_attributes opts) && + not (isEnabled Ext_link_attributes opts || isEnabled Ext_attributes opts) && attr /= nullAttr = -- use raw HTML (<> blankline) . literal . T.strip <$> writeHtml5String opts{ writerTemplate = Nothing } @@ -492,25 +392,24 @@ blockToMarkdown' opts b@(RawBlock f str) = do let renderEmpty = mempty <$ report (BlockNotRendered b) case variant of PlainText -> renderEmpty - _ | f `elem` ["markdown", "markdown_github", "markdown_phpextra", - "markdown_mmd", "markdown_strict"] -> - return $ literal str <> literal "\n" - | isEnabled Ext_raw_attribute opts -> rawAttribBlock - | f `elem` ["html", "html5", "html4"] -> - case () of - _ | isEnabled Ext_markdown_attribute opts -> return $ - literal (addMarkdownAttribute str) <> literal "\n" - | isEnabled Ext_raw_html opts -> return $ - literal str <> literal "\n" - | isEnabled Ext_raw_attribute opts -> rawAttribBlock - | otherwise -> renderEmpty - | f `elem` ["latex", "tex"] -> - case () of - _ | isEnabled Ext_raw_tex opts -> return $ - literal str <> literal "\n" - | isEnabled Ext_raw_attribute opts -> rawAttribBlock - | otherwise -> renderEmpty - | otherwise -> renderEmpty + Commonmark + | f `elem` ["gfm", "commonmark", "commonmark_x", "markdown"] + -> return $ literal str <> literal "\n" + Markdown + | f `elem` ["markdown", "markdown_github", "markdown_phpextra", + "markdown_mmd", "markdown_strict"] + -> return $ literal str <> literal "\n" + _ | isEnabled Ext_raw_attribute opts -> rawAttribBlock + | f `elem` ["html", "html5", "html4"] + , isEnabled Ext_markdown_attribute opts + -> return $ literal (addMarkdownAttribute str) <> literal "\n" + | f `elem` ["html", "html5", "html4"] + , isEnabled Ext_raw_html opts + -> return $ literal str <> literal "\n" + | f `elem` ["latex", "tex"] + , isEnabled Ext_raw_tex opts + -> return $ literal str <> literal "\n" + _ -> renderEmpty blockToMarkdown' opts HorizontalRule = return $ blankline <> literal (T.replicate (writerColumns opts) "-") <> blankline blockToMarkdown' opts (Header level attr inlines) = do @@ -534,7 +433,8 @@ blockToMarkdown' opts (Header level attr inlines) = do && id' == autoId -> empty (id',_,_) | isEnabled Ext_mmd_header_identifiers opts -> space <> brackets (literal id') - _ | isEnabled Ext_header_attributes opts -> + _ | isEnabled Ext_header_attributes opts || + isEnabled Ext_attributes opts -> space <> attrsToMarkdown attr | otherwise -> empty contents <- inlineListToMarkdown opts $ @@ -584,19 +484,21 @@ blockToMarkdown' opts (CodeBlock attribs str) = do | isEnabled Ext_fenced_code_blocks opts -> tildes <> attrs <> cr <> literal str <> cr <> tildes <> blankline _ -> nest (writerTabStop opts) (literal str) <> blankline - where endline c = literal $ case [T.length ln - | ln <- map trim (T.lines str) - , T.pack [c,c,c] `T.isPrefixOf` ln - , T.all (== c) ln] of - [] -> T.replicate 3 $ T.singleton c - xs -> T.replicate (maximum xs + 1) $ T.singleton c - backticks = endline '`' - tildes = endline '~' - attrs = if isEnabled Ext_fenced_code_attributes opts - then nowrap $ " " <> attrsToMarkdown attribs - else case attribs of - (_,cls:_,_) -> " " <> literal cls - _ -> empty + where + endlineLen c = maybe 3 ((+1) . maximum) $ nonEmpty $ + [T.length ln + | ln <- map trim (T.lines str) + , T.pack [c,c,c] `T.isPrefixOf` ln + , T.all (== c) ln] + endline c = literal $ T.replicate (endlineLen c) $ T.singleton c + backticks = endline '`' + tildes = endline '~' + attrs = if isEnabled Ext_fenced_code_attributes opts || + isEnabled Ext_attributes opts + then nowrap $ " " <> classOrAttrsToMarkdown attribs + else case attribs of + (_,cls:_,_) -> " " <> literal cls + _ -> empty blockToMarkdown' opts (BlockQuote blocks) = do variant <- asks envVariant -- if we're writing literate haskell, put a space before the bird tracks @@ -609,7 +511,7 @@ blockToMarkdown' opts (BlockQuote blocks) = do return $ prefixed leader contents <> blankline blockToMarkdown' opts t@(Table _ blkCapt specs thead tbody tfoot) = do let (caption, aligns, widths, headers, rows) = toLegacyTable blkCapt specs thead tbody tfoot - let numcols = maximum (length aligns : length widths : + let numcols = maximum (length aligns :| length widths : map length (headers:rows)) caption' <- inlineListToMarkdown opts caption let caption'' @@ -672,7 +574,10 @@ blockToMarkdown' opts (BulletList items) = do contents <- inList $ mapM (bulletListItemToMarkdown opts) items return $ (if isTightList items then vcat else vsep) contents <> blankline blockToMarkdown' opts (OrderedList (start,sty,delim) items) = do - let start' = if isEnabled Ext_startnum opts then start else 1 + variant <- asks envVariant + let start' = if variant == Commonmark || isEnabled Ext_startnum opts + then start + else 1 let sty' = if isEnabled Ext_fancy_lists opts then sty else DefaultStyle let delim' = if isEnabled Ext_fancy_lists opts then delim else DefaultDelim let attribs = (start', sty', delim') @@ -708,7 +613,8 @@ pipeTable headless aligns rawHeaders rawRows = do blockFor AlignCenter x y = cblock (x + 2) (sp <> y <> sp) <> lblock 0 empty blockFor AlignRight x y = rblock (x + 2) (y <> sp) <> lblock 0 empty blockFor _ x y = lblock (x + 2) (sp <> y) <> lblock 0 empty - let widths = map (max 3 . maximum . map offset) $ transpose (rawHeaders : rawRows) + let widths = map (max 3 . maybe 3 maximum . nonEmpty . map offset) $ + transpose (rawHeaders : rawRows) let torow cs = nowrap $ literal "|" <> hcat (intersperse (literal "|") $ zipWith3 blockFor aligns widths (map chomp cs)) @@ -742,11 +648,11 @@ pandocTable opts multiline headless aligns widths rawHeaders rawRows = do -- Number of characters per column necessary to output every cell -- without requiring a line break. -- The @+2@ is needed for specifying the alignment. - let numChars = (+ 2) . maximum . map offset + let numChars = (+ 2) . maybe 0 maximum . nonEmpty . map offset -- Number of characters per column necessary to output every cell -- without requiring a line break *inside a word*. -- The @+2@ is needed for specifying the alignment. - let minNumChars = (+ 2) . maximum . map minOffset + let minNumChars = (+ 2) . maybe 0 maximum . nonEmpty . map minOffset let columns = transpose (rawHeaders : rawRows) -- minimal column width without wrapping a single word let relWidth w col = @@ -885,6 +791,9 @@ blockListToMarkdown opts blocks = do b1 : commentSep : fixBlocks (b2:bs) fixBlocks (Plain ils : bs@(RawBlock{}:_)) = Plain ils : fixBlocks bs + fixBlocks (Plain ils : bs@(Div{}:_)) + | isEnabled Ext_fenced_divs opts = + Para ils : fixBlocks bs fixBlocks (Plain ils : bs) | inlist = Plain ils : fixBlocks bs fixBlocks (Plain ils : bs) = @@ -908,488 +817,7 @@ blockListToMarkdown opts blocks = do | otherwise = RawBlock "markdown" " \n" mconcat <$> mapM (blockToMarkdown opts) (fixBlocks blocks) -getKey :: Doc Text -> Key -getKey = toKey . render Nothing - -findUsableIndex :: [Text] -> Int -> Int -findUsableIndex lbls i = if tshow i `elem` lbls - then findUsableIndex lbls (i + 1) - else i - -getNextIndex :: PandocMonad m => MD m Int -getNextIndex = do - prevRefs <- gets stPrevRefs - refs <- gets stRefs - i <- (+ 1) <$> gets stLastIdx - modify $ \s -> s{ stLastIdx = i } - let refLbls = map (\(r,_,_) -> r) $ prevRefs ++ refs - return $ findUsableIndex refLbls i - --- | Get reference for target; if none exists, create unique one and return. --- Prefer label if possible; otherwise, generate a unique key. -getReference :: PandocMonad m => Attr -> Doc Text -> Target -> MD m Text -getReference attr label target = do - refs <- gets stRefs - case find (\(_,t,a) -> t == target && a == attr) refs of - Just (ref, _, _) -> return ref - Nothing -> do - keys <- gets stKeys - let key = getKey label - let rawkey = coerce key - case M.lookup key keys of - Nothing -> do -- no other refs with this label - (lab', idx) <- if T.null rawkey || - T.length rawkey > 999 || - T.any (\c -> c == '[' || c == ']') rawkey - then do - i <- getNextIndex - return (tshow i, i) - else - return (render Nothing label, 0) - modify (\s -> s{ - stRefs = (lab', target, attr) : refs, - stKeys = M.insert (getKey label) - (M.insert (target, attr) idx mempty) - (stKeys s) }) - return lab' - - Just km -> -- we have refs with this label - case M.lookup (target, attr) km of - Just i -> do - let lab' = render Nothing $ - label <> if i == 0 - then mempty - else literal (tshow i) - -- make sure it's in stRefs; it may be - -- a duplicate that was printed in a previous - -- block: - when ((lab', target, attr) `notElem` refs) $ - modify (\s -> s{ - stRefs = (lab', target, attr) : refs }) - return lab' - Nothing -> do -- but this one is to a new target - i <- getNextIndex - let lab' = tshow i - modify (\s -> s{ - stRefs = (lab', target, attr) : refs, - stKeys = M.insert key - (M.insert (target, attr) i km) - (stKeys s) }) - return lab' - --- | Convert list of Pandoc inline elements to markdown. -inlineListToMarkdown :: PandocMonad m => WriterOptions -> [Inline] -> MD m (Doc Text) -inlineListToMarkdown opts lst = do - inlist <- asks envInList - go (if inlist then avoidBadWrapsInList lst else lst) - where go [] = return empty - go (i:is) = case i of - Link {} -> case is of - -- If a link is followed by another link, or '[', '(' or ':' - -- then we don't shortcut - Link {}:_ -> unshortcutable - Space:Link {}:_ -> unshortcutable - Space:(Str(thead -> Just '[')):_ -> unshortcutable - Space:(RawInline _ (thead -> Just '[')):_ -> unshortcutable - Space:(Cite _ _):_ -> unshortcutable - SoftBreak:Link {}:_ -> unshortcutable - SoftBreak:(Str(thead -> Just '[')):_ -> unshortcutable - SoftBreak:(RawInline _ (thead -> Just '[')):_ -> unshortcutable - SoftBreak:(Cite _ _):_ -> unshortcutable - LineBreak:Link {}:_ -> unshortcutable - LineBreak:(Str(thead -> Just '[')):_ -> unshortcutable - LineBreak:(RawInline _ (thead -> Just '[')):_ -> unshortcutable - LineBreak:(Cite _ _):_ -> unshortcutable - (Cite _ _):_ -> unshortcutable - Str (thead -> Just '['):_ -> unshortcutable - Str (thead -> Just '('):_ -> unshortcutable - Str (thead -> Just ':'):_ -> unshortcutable - (RawInline _ (thead -> Just '[')):_ -> unshortcutable - (RawInline _ (thead -> Just '(')):_ -> unshortcutable - (RawInline _ (thead -> Just ':')):_ -> unshortcutable - (RawInline _ (T.stripPrefix " [" -> Just _ )):_ -> unshortcutable - _ -> shortcutable - _ -> shortcutable - where shortcutable = liftM2 (<>) (inlineToMarkdown opts i) (go is) - unshortcutable = do - iMark <- local - (\env -> env { envRefShortcutable = False }) - (inlineToMarkdown opts i) - fmap (iMark <>) (go is) - thead = fmap fst . T.uncons - -isSp :: Inline -> Bool -isSp Space = True -isSp SoftBreak = True -isSp _ = False - -avoidBadWrapsInList :: [Inline] -> [Inline] -avoidBadWrapsInList [] = [] -avoidBadWrapsInList (s:Str (T.uncons -> Just ('>',cs)):xs) | isSp s = - Str (" >" <> cs) : avoidBadWrapsInList xs -avoidBadWrapsInList [s, Str (T.uncons -> Just (c, cs))] - | T.null cs && isSp s && c `elem` ['-','*','+'] = [Str $ T.pack [' ', c]] -avoidBadWrapsInList (s:Str (T.uncons -> Just (c, cs)):Space:xs) - | T.null cs && isSp s && c `elem` ['-','*','+'] = - Str (T.pack [' ', c]) : Space : avoidBadWrapsInList xs -avoidBadWrapsInList (s:Str cs:Space:xs) - | isSp s && isOrderedListMarker cs = - Str (" " <> cs) : Space : avoidBadWrapsInList xs -avoidBadWrapsInList [s, Str cs] - | isSp s && isOrderedListMarker cs = [Str $ " " <> cs] -avoidBadWrapsInList (x:xs) = x : avoidBadWrapsInList xs - -isOrderedListMarker :: Text -> Bool -isOrderedListMarker xs = not (T.null xs) && (T.last xs `elem` ['.',')']) && - isRight (runParser (anyOrderedListMarker >> eof) - defaultParserState "" xs) - -isRight :: Either a b -> Bool -isRight (Right _) = True -isRight (Left _) = False - --- | Convert Pandoc inline element to markdown. -inlineToMarkdown :: PandocMonad m => WriterOptions -> Inline -> MD m (Doc Text) -inlineToMarkdown opts (Span ("",["emoji"],kvs) [Str s]) = - case lookup "data-emoji" kvs of - Just emojiname | isEnabled Ext_emoji opts -> - return $ ":" <> literal emojiname <> ":" - _ -> inlineToMarkdown opts (Str s) -inlineToMarkdown opts (Span attrs ils) = do - variant <- asks envVariant - contents <- inlineListToMarkdown opts ils - return $ case variant of - PlainText -> contents - _ | attrs == nullAttr -> contents - | isEnabled Ext_bracketed_spans opts -> - let attrs' = if attrs /= nullAttr - then attrsToMarkdown attrs - else empty - in "[" <> contents <> "]" <> attrs' - | isEnabled Ext_raw_html opts || - isEnabled Ext_native_spans opts -> - tagWithAttrs "span" attrs <> contents <> literal "</span>" - | otherwise -> contents -inlineToMarkdown _ (Emph []) = return empty -inlineToMarkdown opts (Emph lst) = do - variant <- asks envVariant - contents <- inlineListToMarkdown opts lst - return $ case variant of - PlainText - | isEnabled Ext_gutenberg opts -> "_" <> contents <> "_" - | otherwise -> contents - _ -> "*" <> contents <> "*" -inlineToMarkdown _ (Underline []) = return empty -inlineToMarkdown opts (Underline lst) = do - variant <- asks envVariant - contents <- inlineListToMarkdown opts lst - case variant of - PlainText -> return contents - _ | isEnabled Ext_bracketed_spans opts -> - return $ "[" <> contents <> "]" <> "{.ul}" - | isEnabled Ext_native_spans opts -> - return $ tagWithAttrs "span" ("", ["underline"], []) - <> contents - <> literal "</span>" - | isEnabled Ext_raw_html opts -> - return $ "<u>" <> contents <> "</u>" - | otherwise -> inlineToMarkdown opts (Emph lst) -inlineToMarkdown _ (Strong []) = return empty -inlineToMarkdown opts (Strong lst) = do - variant <- asks envVariant - case variant of - PlainText -> - inlineListToMarkdown opts $ - if isEnabled Ext_gutenberg opts - then capitalize lst - else lst - _ -> do - contents <- inlineListToMarkdown opts lst - return $ "**" <> contents <> "**" -inlineToMarkdown _ (Strikeout []) = return empty -inlineToMarkdown opts (Strikeout lst) = do - contents <- inlineListToMarkdown opts lst - return $ if isEnabled Ext_strikeout opts - then "~~" <> contents <> "~~" - else if isEnabled Ext_raw_html opts - then "<s>" <> contents <> "</s>" - else contents -inlineToMarkdown _ (Superscript []) = return empty -inlineToMarkdown opts (Superscript lst) = - local (\env -> env {envEscapeSpaces = envVariant env == Markdown}) $ do - contents <- inlineListToMarkdown opts lst - if isEnabled Ext_superscript opts - then return $ "^" <> contents <> "^" - else if isEnabled Ext_raw_html opts - then return $ "<sup>" <> contents <> "</sup>" - else - case traverse toSuperscriptInline lst of - Just xs' | not (writerPreferAscii opts) - -> inlineListToMarkdown opts xs' - _ -> do - let rendered = render Nothing contents - return $ - case mapM toSuperscript (T.unpack rendered) of - Just r -> literal $ T.pack r - Nothing -> literal $ "^(" <> rendered <> ")" -inlineToMarkdown _ (Subscript []) = return empty -inlineToMarkdown opts (Subscript lst) = - local (\env -> env {envEscapeSpaces = envVariant env == Markdown}) $ do - contents <- inlineListToMarkdown opts lst - if isEnabled Ext_subscript opts - then return $ "~" <> contents <> "~" - else if isEnabled Ext_raw_html opts - then return $ "<sub>" <> contents <> "</sub>" - else - case traverse toSubscriptInline lst of - Just xs' | not (writerPreferAscii opts) - -> inlineListToMarkdown opts xs' - _ -> do - let rendered = render Nothing contents - return $ - case mapM toSuperscript (T.unpack rendered) of - Just r -> literal $ T.pack r - Nothing -> literal $ "_(" <> rendered <> ")" -inlineToMarkdown opts (SmallCaps lst) = do - variant <- asks envVariant - if variant /= PlainText && - (isEnabled Ext_raw_html opts || isEnabled Ext_native_spans opts) - then inlineToMarkdown opts (Span ("",["smallcaps"],[]) lst) - else inlineListToMarkdown opts $ capitalize lst -inlineToMarkdown opts (Quoted SingleQuote lst) = do - contents <- inlineListToMarkdown opts lst - return $ if isEnabled Ext_smart opts - then "'" <> contents <> "'" - else - if writerPreferAscii opts - then "‘" <> contents <> "’" - else "‘" <> contents <> "’" -inlineToMarkdown opts (Quoted DoubleQuote lst) = do - contents <- inlineListToMarkdown opts lst - return $ if isEnabled Ext_smart opts - then "\"" <> contents <> "\"" - else - if writerPreferAscii opts - then "“" <> contents <> "”" - else "“" <> contents <> "”" -inlineToMarkdown opts (Code attr str) = do - let tickGroups = filter (T.any (== '`')) $ T.group str - let longest = if null tickGroups - then 0 - else maximum $ map T.length tickGroups - let marker = T.replicate (longest + 1) "`" - let spacer = if longest == 0 then "" else " " - let attrs = if isEnabled Ext_inline_code_attributes opts && attr /= nullAttr - then attrsToMarkdown attr - else empty - variant <- asks envVariant - case variant of - PlainText -> return $ literal str - _ -> return $ literal - (marker <> spacer <> str <> spacer <> marker) <> attrs -inlineToMarkdown opts (Str str) = do - variant <- asks envVariant - let str' = (if writerPreferAscii opts - then toHtml5Entities - else id) . - (if isEnabled Ext_smart opts - then unsmartify opts - else id) . - (if variant == PlainText - then id - else escapeText opts) $ str - return $ literal str' -inlineToMarkdown opts (Math InlineMath str) = - case writerHTMLMathMethod opts of - WebTeX url -> inlineToMarkdown opts - (Image nullAttr [Str str] (url <> T.pack (urlEncode $ T.unpack str), str)) - _ | isEnabled Ext_tex_math_dollars opts -> - return $ "$" <> literal str <> "$" - | isEnabled Ext_tex_math_single_backslash opts -> - return $ "\\(" <> literal str <> "\\)" - | isEnabled Ext_tex_math_double_backslash opts -> - return $ "\\\\(" <> literal str <> "\\\\)" - | otherwise -> do - variant <- asks envVariant - texMathToInlines InlineMath str >>= - inlineListToMarkdown opts . - (if variant == PlainText then makeMathPlainer else id) -inlineToMarkdown opts (Math DisplayMath str) = - case writerHTMLMathMethod opts of - WebTeX url -> (\x -> blankline <> x <> blankline) `fmap` - inlineToMarkdown opts (Image nullAttr [Str str] - (url <> T.pack (urlEncode $ T.unpack str), str)) - _ | isEnabled Ext_tex_math_dollars opts -> - return $ "$$" <> literal str <> "$$" - | isEnabled Ext_tex_math_single_backslash opts -> - return $ "\\[" <> literal str <> "\\]" - | isEnabled Ext_tex_math_double_backslash opts -> - return $ "\\\\[" <> literal str <> "\\\\]" - | otherwise -> (\x -> cr <> x <> cr) `fmap` - (texMathToInlines DisplayMath str >>= inlineListToMarkdown opts) -inlineToMarkdown opts il@(RawInline f str) = do - let tickGroups = filter (T.any (== '`')) $ T.group str - let numticks = if null tickGroups - then 1 - else 1 + maximum (map T.length tickGroups) - variant <- asks envVariant - let Format fmt = f - let rawAttribInline = return $ - literal (T.replicate numticks "`") <> literal str <> - literal (T.replicate numticks "`") <> literal "{=" <> literal fmt <> literal "}" - let renderEmpty = mempty <$ report (InlineNotRendered il) - case variant of - PlainText -> renderEmpty - _ | f `elem` ["markdown", "markdown_github", "markdown_phpextra", - "markdown_mmd", "markdown_strict"] -> - return $ literal str - | isEnabled Ext_raw_attribute opts -> rawAttribInline - | f `elem` ["html", "html5", "html4"] -> - case () of - _ | isEnabled Ext_raw_html opts -> return $ literal str - | isEnabled Ext_raw_attribute opts -> rawAttribInline - | otherwise -> renderEmpty - | f `elem` ["latex", "tex"] -> - case () of - _ | isEnabled Ext_raw_tex opts -> return $ literal str - | isEnabled Ext_raw_attribute opts -> rawAttribInline - | otherwise -> renderEmpty - | otherwise -> renderEmpty -inlineToMarkdown opts LineBreak = do - variant <- asks envVariant - if variant == PlainText || isEnabled Ext_hard_line_breaks opts - then return cr - else return $ - if isEnabled Ext_escaped_line_breaks opts - then "\\" <> cr - else " " <> cr -inlineToMarkdown _ Space = do - escapeSpaces <- asks envEscapeSpaces - return $ if escapeSpaces then "\\ " else space -inlineToMarkdown opts SoftBreak = do - escapeSpaces <- asks envEscapeSpaces - let space' = if escapeSpaces then "\\ " else space - return $ case writerWrapText opts of - WrapNone -> space' - WrapAuto -> space' - WrapPreserve -> cr -inlineToMarkdown opts (Cite [] lst) = inlineListToMarkdown opts lst -inlineToMarkdown opts (Cite (c:cs) lst) - | not (isEnabled Ext_citations opts) = inlineListToMarkdown opts lst - | otherwise = - if citationMode c == AuthorInText - then do - suffs <- inlineListToMarkdown opts $ citationSuffix c - rest <- mapM convertOne cs - let inbr = suffs <+> joincits rest - br = if isEmpty inbr then empty else char '[' <> inbr <> char ']' - return $ literal ("@" <> citationId c) <+> br - else do - cits <- mapM convertOne (c:cs) - return $ literal "[" <> joincits cits <> literal "]" - where - joincits = hcat . intersperse (literal "; ") . filter (not . isEmpty) - convertOne Citation { citationId = k - , citationPrefix = pinlines - , citationSuffix = sinlines - , citationMode = m } - = do - pdoc <- inlineListToMarkdown opts pinlines - sdoc <- inlineListToMarkdown opts sinlines - let k' = literal (modekey m <> "@" <> k) - r = case sinlines of - Str (T.uncons -> Just (y,_)):_ | y `elem` (",;]@" :: String) -> k' <> sdoc - _ -> k' <+> sdoc - return $ pdoc <+> r - modekey SuppressAuthor = "-" - modekey _ = "" -inlineToMarkdown opts lnk@(Link attr txt (src, tit)) = do - variant <- asks envVariant - linktext <- inlineListToMarkdown opts txt - let linktitle = if T.null tit - then empty - else literal $ " \"" <> tit <> "\"" - let srcSuffix = fromMaybe src (T.stripPrefix "mailto:" src) - let useAuto = isURI src && - case txt of - [Str s] | escapeURI s == srcSuffix -> True - _ -> False - let useRefLinks = writerReferenceLinks opts && not useAuto - shortcutable <- asks envRefShortcutable - let useShortcutRefLinks = shortcutable && - isEnabled Ext_shortcut_reference_links opts - reftext <- if useRefLinks - then literal <$> getReference attr linktext (src, tit) - else return mempty - case variant of - PlainText - | useAuto -> return $ literal srcSuffix - | otherwise -> return linktext - _ | useAuto -> return $ "<" <> literal srcSuffix <> ">" - | useRefLinks -> - let first = "[" <> linktext <> "]" - second = if getKey linktext == getKey reftext - then if useShortcutRefLinks - then "" - else "[]" - else "[" <> reftext <> "]" - in return $ first <> second - | isEnabled Ext_raw_html opts - , not (isEnabled Ext_link_attributes opts) - , attr /= nullAttr -> -- use raw HTML to render attributes - literal . T.strip <$> - writeHtml5String opts{ writerTemplate = Nothing } - (Pandoc nullMeta [Plain [lnk]]) - | otherwise -> return $ - "[" <> linktext <> "](" <> literal src <> linktitle <> ")" <> - linkAttributes opts attr -inlineToMarkdown opts img@(Image attr alternate (source, tit)) - | isEnabled Ext_raw_html opts && - not (isEnabled Ext_link_attributes opts) && - attr /= nullAttr = -- use raw HTML - literal . T.strip <$> - writeHtml5String opts{ writerTemplate = Nothing } (Pandoc nullMeta [Plain [img]]) - | otherwise = do - variant <- asks envVariant - let txt = if null alternate || alternate == [Str source] - -- to prevent autolinks - then [Str ""] - else alternate - linkPart <- inlineToMarkdown opts (Link attr txt (source, tit)) - return $ case variant of - PlainText -> "[" <> linkPart <> "]" - _ -> "!" <> linkPart -inlineToMarkdown opts (Note contents) = do - modify (\st -> st{ stNotes = contents : stNotes st }) - st <- get - let ref = literal $ writerIdentifierPrefix opts <> tshow (stNoteNum st + length (stNotes st) - 1) - if isEnabled Ext_footnotes opts - then return $ "[^" <> ref <> "]" - else return $ "[" <> ref <> "]" - -makeMathPlainer :: [Inline] -> [Inline] -makeMathPlainer = walk go - where - go (Emph xs) = Span nullAttr xs - go x = x - lineBreakToSpace :: Inline -> Inline lineBreakToSpace LineBreak = Space lineBreakToSpace SoftBreak = Space lineBreakToSpace x = x - -toSubscriptInline :: Inline -> Maybe Inline -toSubscriptInline Space = Just Space -toSubscriptInline (Span attr ils) = Span attr <$> traverse toSubscriptInline ils -toSubscriptInline (Str s) = Str . T.pack <$> traverse toSubscript (T.unpack s) -toSubscriptInline LineBreak = Just LineBreak -toSubscriptInline SoftBreak = Just SoftBreak -toSubscriptInline _ = Nothing - -toSuperscriptInline :: Inline -> Maybe Inline -toSuperscriptInline Space = Just Space -toSuperscriptInline (Span attr ils) = Span attr <$> traverse toSuperscriptInline ils -toSuperscriptInline (Str s) = Str . T.pack <$> traverse toSuperscript (T.unpack s) -toSuperscriptInline LineBreak = Just LineBreak -toSuperscriptInline SoftBreak = Just SoftBreak -toSuperscriptInline _ = Nothing diff --git a/src/Text/Pandoc/Writers/Markdown/Inline.hs b/src/Text/Pandoc/Writers/Markdown/Inline.hs new file mode 100644 index 000000000..cd5f5b896 --- /dev/null +++ b/src/Text/Pandoc/Writers/Markdown/Inline.hs @@ -0,0 +1,616 @@ +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE ViewPatterns #-} +{- | + Module : Text.Pandoc.Writers.Markdown.Inline + Copyright : Copyright (C) 2006-2021 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm@berkeley.edu> + Stability : alpha + Portability : portable +-} +module Text.Pandoc.Writers.Markdown.Inline ( + inlineListToMarkdown, + linkAttributes, + attrsToMarkdown + ) where +import Control.Monad.Reader +import Control.Monad.State.Strict +import Data.Char (isAlphaNum, isDigit) +import Data.List (find, intersperse) +import Data.List.NonEmpty (nonEmpty) +import qualified Data.Map as M +import Data.Maybe (fromMaybe) +import Data.Text (Text) +import qualified Data.Text as T +import Network.HTTP (urlEncode) +import Text.Pandoc.Class.PandocMonad (PandocMonad, report) +import Text.Pandoc.Definition +import Text.Pandoc.Logging +import Text.Pandoc.Options +import Text.Pandoc.Parsing hiding (blankline, blanklines, char, space) +import Text.DocLayout +import Text.Pandoc.Shared +import Text.Pandoc.Writers.Shared +import Text.Pandoc.Walk +import Text.Pandoc.Writers.HTML (writeHtml5String) +import Text.Pandoc.Writers.Math (texMathToInlines) +import Text.Pandoc.XML (toHtml5Entities) +import Data.Coerce (coerce) +import Text.Pandoc.Writers.Markdown.Types (MarkdownVariant(..), + WriterState(..), + WriterEnv(..), MD) + +-- | Escape special characters for Markdown. +escapeText :: WriterOptions -> Text -> Text +escapeText opts = T.pack . go . T.unpack + where + startsWithSpace (' ':_) = True + startsWithSpace ('\t':_) = True + startsWithSpace [] = True + startsWithSpace _ = False + go [] = [] + go (c:cs) = + case c of + '<' | isEnabled Ext_all_symbols_escapable opts -> + '\\' : '<' : go cs + | otherwise -> "<" ++ go cs + '>' | isEnabled Ext_all_symbols_escapable opts -> + '\\' : '>' : go cs + | otherwise -> ">" ++ go cs + '@' | isEnabled Ext_citations opts -> + case cs of + (d:_) + | isAlphaNum d || d == '_' || d == '{' + -> '\\':'@':go cs + _ -> '@':go cs + '#' | isEnabled Ext_space_in_atx_header opts + , startsWithSpace cs + -> '\\':'#':go cs + _ | c `elem` ['\\','`','*','_','[',']'] -> + '\\':c:go cs + '|' | isEnabled Ext_pipe_tables opts -> '\\':'|':go cs + '^' | isEnabled Ext_superscript opts -> '\\':'^':go cs + '~' | isEnabled Ext_subscript opts || + isEnabled Ext_strikeout opts -> '\\':'~':go cs + '$' | isEnabled Ext_tex_math_dollars opts -> '\\':'$':go cs + '\'' | isEnabled Ext_smart opts -> '\\':'\'':go cs + '"' | isEnabled Ext_smart opts -> '\\':'"':go cs + '-' | isEnabled Ext_smart opts -> + case cs of + '-':_ -> '\\':'-':go cs + _ -> '-':go cs + '.' | isEnabled Ext_smart opts -> + case cs of + '.':'.':rest -> '\\':'.':'.':'.':go rest + _ -> '.':go cs + _ -> case cs of + '_':x:xs + | isEnabled Ext_intraword_underscores opts + , isAlphaNum c + , isAlphaNum x -> c : '_' : x : go xs + '#':xs -> c : '#' : go xs + '>':xs -> c : '>' : go xs + _ -> c : go cs + +attrsToMarkdown :: Attr -> Doc Text +attrsToMarkdown attribs = braces $ hsep [attribId, attribClasses, attribKeys] + where attribId = case attribs of + ("",_,_) -> empty + (i,_,_) -> "#" <> escAttr i + attribClasses = case attribs of + (_,[],_) -> empty + (_,cs,_) -> hsep $ + map (escAttr . ("."<>)) + cs + attribKeys = case attribs of + (_,_,[]) -> empty + (_,_,ks) -> hsep $ + map (\(k,v) -> escAttr k + <> "=\"" <> + escAttr v <> "\"") ks + escAttr = mconcat . map escAttrChar . T.unpack + escAttrChar '"' = literal "\\\"" + escAttrChar '\\' = literal "\\\\" + escAttrChar c = literal $ T.singleton c + +linkAttributes :: WriterOptions -> Attr -> Doc Text +linkAttributes opts attr = + if (isEnabled Ext_link_attributes opts || isEnabled Ext_attributes opts) && attr /= nullAttr + then attrsToMarkdown attr + else empty + +getKey :: Doc Text -> Key +getKey = toKey . render Nothing + +findUsableIndex :: [Text] -> Int -> Int +findUsableIndex lbls i = if tshow i `elem` lbls + then findUsableIndex lbls (i + 1) + else i + +getNextIndex :: PandocMonad m => MD m Int +getNextIndex = do + prevRefs <- gets stPrevRefs + refs <- gets stRefs + i <- (+ 1) <$> gets stLastIdx + modify $ \s -> s{ stLastIdx = i } + let refLbls = map (\(r,_,_) -> r) $ prevRefs ++ refs + return $ findUsableIndex refLbls i + +-- | Get reference for target; if none exists, create unique one and return. +-- Prefer label if possible; otherwise, generate a unique key. +getReference :: PandocMonad m => Attr -> Doc Text -> Target -> MD m Text +getReference attr label target = do + refs <- gets stRefs + case find (\(_,t,a) -> t == target && a == attr) refs of + Just (ref, _, _) -> return ref + Nothing -> do + keys <- gets stKeys + let key = getKey label + let rawkey = coerce key + case M.lookup key keys of + Nothing -> do -- no other refs with this label + (lab', idx) <- if T.null rawkey || + T.length rawkey > 999 || + T.any (\c -> c == '[' || c == ']') rawkey + then do + i <- getNextIndex + return (tshow i, i) + else + return (render Nothing label, 0) + modify (\s -> s{ + stRefs = (lab', target, attr) : refs, + stKeys = M.insert (getKey label) + (M.insert (target, attr) idx mempty) + (stKeys s) }) + return lab' + + Just km -> -- we have refs with this label + case M.lookup (target, attr) km of + Just i -> do + let lab' = render Nothing $ + label <> if i == 0 + then mempty + else literal (tshow i) + -- make sure it's in stRefs; it may be + -- a duplicate that was printed in a previous + -- block: + when ((lab', target, attr) `notElem` refs) $ + modify (\s -> s{ + stRefs = (lab', target, attr) : refs }) + return lab' + Nothing -> do -- but this one is to a new target + i <- getNextIndex + let lab' = tshow i + modify (\s -> s{ + stRefs = (lab', target, attr) : refs, + stKeys = M.insert key + (M.insert (target, attr) i km) + (stKeys s) }) + return lab' + +-- | Convert list of Pandoc inline elements to markdown. +inlineListToMarkdown :: PandocMonad m => WriterOptions -> [Inline] -> MD m (Doc Text) +inlineListToMarkdown opts lst = do + inlist <- asks envInList + go (if inlist then avoidBadWrapsInList lst else lst) + where go [] = return empty + go (x@Math{}:y@(Str t):zs) + | T.all isDigit (T.take 1 t) -- starts with digit -- see #7058 + = liftM2 (<>) (inlineToMarkdown opts x) + (go (RawInline (Format "html") "<!-- -->" : y : zs)) + go (i:is) = case i of + Link {} -> case is of + -- If a link is followed by another link, or '[', '(' or ':' + -- then we don't shortcut + Link {}:_ -> unshortcutable + Space:Link {}:_ -> unshortcutable + Space:(Str(thead -> Just '[')):_ -> unshortcutable + Space:(RawInline _ (thead -> Just '[')):_ -> unshortcutable + Space:(Cite _ _):_ -> unshortcutable + SoftBreak:Link {}:_ -> unshortcutable + SoftBreak:(Str(thead -> Just '[')):_ -> unshortcutable + SoftBreak:(RawInline _ (thead -> Just '[')):_ -> unshortcutable + SoftBreak:(Cite _ _):_ -> unshortcutable + LineBreak:Link {}:_ -> unshortcutable + LineBreak:(Str(thead -> Just '[')):_ -> unshortcutable + LineBreak:(RawInline _ (thead -> Just '[')):_ -> unshortcutable + LineBreak:(Cite _ _):_ -> unshortcutable + (Cite _ _):_ -> unshortcutable + Str (thead -> Just '['):_ -> unshortcutable + Str (thead -> Just '('):_ -> unshortcutable + Str (thead -> Just ':'):_ -> unshortcutable + (RawInline _ (thead -> Just '[')):_ -> unshortcutable + (RawInline _ (thead -> Just '(')):_ -> unshortcutable + (RawInline _ (thead -> Just ':')):_ -> unshortcutable + (RawInline _ (T.stripPrefix " [" -> Just _ )):_ -> unshortcutable + _ -> shortcutable + _ -> shortcutable + where shortcutable = liftM2 (<>) (inlineToMarkdown opts i) (go is) + unshortcutable = do + iMark <- local + (\env -> env { envRefShortcutable = False }) + (inlineToMarkdown opts i) + fmap (iMark <>) (go is) + thead = fmap fst . T.uncons + +isSp :: Inline -> Bool +isSp Space = True +isSp SoftBreak = True +isSp _ = False + +avoidBadWrapsInList :: [Inline] -> [Inline] +avoidBadWrapsInList [] = [] +avoidBadWrapsInList (s:Str (T.uncons -> Just ('>',cs)):xs) | isSp s = + Str (" >" <> cs) : avoidBadWrapsInList xs +avoidBadWrapsInList [s, Str (T.uncons -> Just (c, cs))] + | T.null cs && isSp s && c `elem` ['-','*','+'] = [Str $ T.pack [' ', c]] +avoidBadWrapsInList (s:Str (T.uncons -> Just (c, cs)):Space:xs) + | T.null cs && isSp s && c `elem` ['-','*','+'] = + Str (T.pack [' ', c]) : Space : avoidBadWrapsInList xs +avoidBadWrapsInList (s:Str cs:Space:xs) + | isSp s && isOrderedListMarker cs = + Str (" " <> cs) : Space : avoidBadWrapsInList xs +avoidBadWrapsInList [s, Str cs] + | isSp s && isOrderedListMarker cs = [Str $ " " <> cs] +avoidBadWrapsInList (x:xs) = x : avoidBadWrapsInList xs + +isOrderedListMarker :: Text -> Bool +isOrderedListMarker xs = not (T.null xs) && (T.last xs `elem` ['.',')']) && + isRight (runParser (anyOrderedListMarker >> eof) + defaultParserState "" xs) + where + isRight (Right _) = True + isRight (Left _) = False + +-- | Convert Pandoc inline element to markdown. +inlineToMarkdown :: PandocMonad m => WriterOptions -> Inline -> MD m (Doc Text) +inlineToMarkdown opts (Span ("",["emoji"],kvs) [Str s]) = + case lookup "data-emoji" kvs of + Just emojiname | isEnabled Ext_emoji opts -> + return $ ":" <> literal emojiname <> ":" + _ -> inlineToMarkdown opts (Str s) +inlineToMarkdown opts (Span attrs ils) = do + variant <- asks envVariant + contents <- inlineListToMarkdown opts ils + return $ case attrs of + (_,["csl-block"],_) -> (cr <>) + (_,["csl-left-margin"],_) -> (cr <>) + (_,["csl-indent"],_) -> (cr <>) + _ -> id + $ case variant of + PlainText -> contents + _ | attrs == nullAttr -> contents + | isEnabled Ext_bracketed_spans opts -> + let attrs' = if attrs /= nullAttr + then attrsToMarkdown attrs + else empty + in "[" <> contents <> "]" <> attrs' + | isEnabled Ext_raw_html opts || + isEnabled Ext_native_spans opts -> + tagWithAttrs "span" attrs <> contents <> literal "</span>" + | otherwise -> contents +inlineToMarkdown _ (Emph []) = return empty +inlineToMarkdown opts (Emph lst) = do + variant <- asks envVariant + contents <- inlineListToMarkdown opts lst + return $ case variant of + PlainText + | isEnabled Ext_gutenberg opts -> "_" <> contents <> "_" + | otherwise -> contents + _ -> "*" <> contents <> "*" +inlineToMarkdown _ (Underline []) = return empty +inlineToMarkdown opts (Underline lst) = do + variant <- asks envVariant + contents <- inlineListToMarkdown opts lst + case variant of + PlainText -> return contents + _ | isEnabled Ext_bracketed_spans opts -> + return $ "[" <> contents <> "]" <> "{.ul}" + | isEnabled Ext_native_spans opts -> + return $ tagWithAttrs "span" ("", ["underline"], []) + <> contents + <> literal "</span>" + | isEnabled Ext_raw_html opts -> + return $ "<u>" <> contents <> "</u>" + | otherwise -> inlineToMarkdown opts (Emph lst) +inlineToMarkdown _ (Strong []) = return empty +inlineToMarkdown opts (Strong lst) = do + variant <- asks envVariant + case variant of + PlainText -> + inlineListToMarkdown opts $ + if isEnabled Ext_gutenberg opts + then capitalize lst + else lst + _ -> do + contents <- inlineListToMarkdown opts lst + return $ "**" <> contents <> "**" +inlineToMarkdown _ (Strikeout []) = return empty +inlineToMarkdown opts (Strikeout lst) = do + contents <- inlineListToMarkdown opts lst + return $ if isEnabled Ext_strikeout opts + then "~~" <> contents <> "~~" + else if isEnabled Ext_raw_html opts + then "<s>" <> contents <> "</s>" + else contents +inlineToMarkdown _ (Superscript []) = return empty +inlineToMarkdown opts (Superscript lst) = + local (\env -> env {envEscapeSpaces = envVariant env == Markdown}) $ do + contents <- inlineListToMarkdown opts lst + if isEnabled Ext_superscript opts + then return $ "^" <> contents <> "^" + else if isEnabled Ext_raw_html opts + then return $ "<sup>" <> contents <> "</sup>" + else + case traverse toSuperscriptInline lst of + Just xs' | not (writerPreferAscii opts) + -> inlineListToMarkdown opts xs' + _ -> do + let rendered = render Nothing contents + return $ + case mapM toSuperscript (T.unpack rendered) of + Just r -> literal $ T.pack r + Nothing -> literal $ "^(" <> rendered <> ")" +inlineToMarkdown _ (Subscript []) = return empty +inlineToMarkdown opts (Subscript lst) = + local (\env -> env {envEscapeSpaces = envVariant env == Markdown}) $ do + contents <- inlineListToMarkdown opts lst + if isEnabled Ext_subscript opts + then return $ "~" <> contents <> "~" + else if isEnabled Ext_raw_html opts + then return $ "<sub>" <> contents <> "</sub>" + else + case traverse toSubscriptInline lst of + Just xs' | not (writerPreferAscii opts) + -> inlineListToMarkdown opts xs' + _ -> do + let rendered = render Nothing contents + return $ + case mapM toSuperscript (T.unpack rendered) of + Just r -> literal $ T.pack r + Nothing -> literal $ "_(" <> rendered <> ")" +inlineToMarkdown opts (SmallCaps lst) = do + variant <- asks envVariant + if variant /= PlainText && + (isEnabled Ext_raw_html opts || isEnabled Ext_native_spans opts) + then inlineToMarkdown opts (Span ("",["smallcaps"],[]) lst) + else inlineListToMarkdown opts $ capitalize lst +inlineToMarkdown opts (Quoted SingleQuote lst) = do + contents <- inlineListToMarkdown opts lst + return $ if isEnabled Ext_smart opts + then "'" <> contents <> "'" + else + if writerPreferAscii opts + then "‘" <> contents <> "’" + else "‘" <> contents <> "’" +inlineToMarkdown opts (Quoted DoubleQuote lst) = do + contents <- inlineListToMarkdown opts lst + return $ if isEnabled Ext_smart opts + then "\"" <> contents <> "\"" + else + if writerPreferAscii opts + then "“" <> contents <> "”" + else "“" <> contents <> "”" +inlineToMarkdown opts (Code attr str) = do + let tickGroups = filter (T.any (== '`')) $ T.group str + let longest = maybe 0 maximum $ nonEmpty $ map T.length tickGroups + let marker = T.replicate (longest + 1) "`" + let spacer = if longest == 0 then "" else " " + let attrsEnabled = isEnabled Ext_inline_code_attributes opts || + isEnabled Ext_attributes opts + let attrs = if attrsEnabled && attr /= nullAttr + then attrsToMarkdown attr + else empty + variant <- asks envVariant + case variant of + PlainText -> return $ literal str + _ -> return $ literal + (marker <> spacer <> str <> spacer <> marker) <> attrs +inlineToMarkdown opts (Str str) = do + variant <- asks envVariant + let str' = (if writerPreferAscii opts + then toHtml5Entities + else id) . + (if isEnabled Ext_smart opts + then unsmartify opts + else id) . + (if variant == PlainText + then id + else escapeText opts) $ str + return $ literal str' +inlineToMarkdown opts (Math InlineMath str) = + case writerHTMLMathMethod opts of + WebTeX url -> inlineToMarkdown opts + (Image nullAttr [Str str] (url <> T.pack (urlEncode $ T.unpack str), str)) + _ | isEnabled Ext_tex_math_dollars opts -> + return $ "$" <> literal str <> "$" + | isEnabled Ext_tex_math_single_backslash opts -> + return $ "\\(" <> literal str <> "\\)" + | isEnabled Ext_tex_math_double_backslash opts -> + return $ "\\\\(" <> literal str <> "\\\\)" + | otherwise -> do + variant <- asks envVariant + texMathToInlines InlineMath str >>= + inlineListToMarkdown opts . + (if variant == PlainText then makeMathPlainer else id) +inlineToMarkdown opts (Math DisplayMath str) = + case writerHTMLMathMethod opts of + WebTeX url -> (\x -> blankline <> x <> blankline) `fmap` + inlineToMarkdown opts (Image nullAttr [Str str] + (url <> T.pack (urlEncode $ T.unpack str), str)) + _ | isEnabled Ext_tex_math_dollars opts -> + return $ "$$" <> literal str <> "$$" + | isEnabled Ext_tex_math_single_backslash opts -> + return $ "\\[" <> literal str <> "\\]" + | isEnabled Ext_tex_math_double_backslash opts -> + return $ "\\\\[" <> literal str <> "\\\\]" + | otherwise -> (\x -> cr <> x <> cr) `fmap` + (texMathToInlines DisplayMath str >>= inlineListToMarkdown opts) +inlineToMarkdown opts il@(RawInline f str) = do + let tickGroups = filter (T.any (== '`')) $ T.group str + let numticks = 1 + maybe 0 maximum (nonEmpty (map T.length tickGroups)) + variant <- asks envVariant + let Format fmt = f + let rawAttribInline = return $ + literal (T.replicate numticks "`") <> literal str <> + literal (T.replicate numticks "`") <> literal "{=" <> literal fmt <> literal "}" + let renderEmpty = mempty <$ report (InlineNotRendered il) + case variant of + PlainText -> renderEmpty + Commonmark + | f `elem` ["gfm", "commonmark", "commonmark_x", "markdown"] + -> return $ literal str + Markdown + | f `elem` ["markdown", "markdown_github", "markdown_phpextra", + "markdown_mmd", "markdown_strict"] + -> return $ literal str + _ | isEnabled Ext_raw_attribute opts -> rawAttribInline + | f `elem` ["html", "html5", "html4"] + , isEnabled Ext_raw_html opts + -> return $ literal str + | f `elem` ["latex", "tex"] + , isEnabled Ext_raw_tex opts + -> return $ literal str + _ -> renderEmpty + + +inlineToMarkdown opts LineBreak = do + variant <- asks envVariant + if variant == PlainText || isEnabled Ext_hard_line_breaks opts + then return cr + else return $ + if isEnabled Ext_escaped_line_breaks opts + then "\\" <> cr + else " " <> cr +inlineToMarkdown _ Space = do + escapeSpaces <- asks envEscapeSpaces + return $ if escapeSpaces then "\\ " else space +inlineToMarkdown opts SoftBreak = do + escapeSpaces <- asks envEscapeSpaces + let space' = if escapeSpaces then "\\ " else space + return $ case writerWrapText opts of + WrapNone -> space' + WrapAuto -> space' + WrapPreserve -> cr +inlineToMarkdown opts (Cite [] lst) = inlineListToMarkdown opts lst +inlineToMarkdown opts (Cite (c:cs) lst) + | not (isEnabled Ext_citations opts) = inlineListToMarkdown opts lst + | otherwise = + if citationMode c == AuthorInText + then do + suffs <- inlineListToMarkdown opts $ citationSuffix c + rest <- mapM convertOne cs + let inbr = suffs <+> joincits rest + br = if isEmpty inbr then empty else char '[' <> inbr <> char ']' + return $ literal ("@" <> maybeInBraces (citationId c)) <+> br + else do + cits <- mapM convertOne (c:cs) + return $ literal "[" <> joincits cits <> literal "]" + where + maybeInBraces key = + case readWith (citeKey False >> spaces >> eof) + defaultParserState ("@" <> key) of + Left _ -> "{" <> key <> "}" + Right _ -> key + joincits = hcat . intersperse (literal "; ") . filter (not . isEmpty) + convertOne Citation { citationId = k + , citationPrefix = pinlines + , citationSuffix = sinlines + , citationMode = m } + = do + pdoc <- inlineListToMarkdown opts pinlines + sdoc <- inlineListToMarkdown opts sinlines + let k' = literal (modekey m <> "@" <> maybeInBraces k) + r = case sinlines of + Str (T.uncons -> Just (y,_)):_ | y `elem` (",;]@" :: String) -> k' <> sdoc + _ -> k' <+> sdoc + return $ pdoc <+> r + modekey SuppressAuthor = "-" + modekey _ = "" +inlineToMarkdown opts lnk@(Link attr txt (src, tit)) = do + variant <- asks envVariant + linktext <- inlineListToMarkdown opts txt + let linktitle = if T.null tit + then empty + else literal $ " \"" <> tit <> "\"" + let srcSuffix = fromMaybe src (T.stripPrefix "mailto:" src) + let useAuto = isURI src && + case txt of + [Str s] | escapeURI s == srcSuffix -> True + _ -> False + let useRefLinks = writerReferenceLinks opts && not useAuto + shortcutable <- asks envRefShortcutable + let useShortcutRefLinks = shortcutable && + isEnabled Ext_shortcut_reference_links opts + reftext <- if useRefLinks + then literal <$> getReference attr linktext (src, tit) + else return mempty + case variant of + PlainText + | useAuto -> return $ literal srcSuffix + | otherwise -> return linktext + _ | useAuto -> return $ "<" <> literal srcSuffix <> ">" + | useRefLinks -> + let first = "[" <> linktext <> "]" + second = if getKey linktext == getKey reftext + then if useShortcutRefLinks + then "" + else "[]" + else "[" <> reftext <> "]" + in return $ first <> second + | isEnabled Ext_raw_html opts + , not (isEnabled Ext_link_attributes opts || isEnabled Ext_attributes opts) + , attr /= nullAttr -> -- use raw HTML to render attributes + literal . T.strip <$> + writeHtml5String opts{ writerTemplate = Nothing } + (Pandoc nullMeta [Plain [lnk]]) + | otherwise -> return $ + "[" <> linktext <> "](" <> literal src <> linktitle <> ")" <> + linkAttributes opts attr +inlineToMarkdown opts img@(Image attr alternate (source, tit)) + | isEnabled Ext_raw_html opts && + not (isEnabled Ext_link_attributes opts || isEnabled Ext_attributes opts) && + attr /= nullAttr = -- use raw HTML + literal . T.strip <$> + writeHtml5String opts{ writerTemplate = Nothing } (Pandoc nullMeta [Plain [img]]) + | otherwise = do + variant <- asks envVariant + let txt = if null alternate || alternate == [Str source] + -- to prevent autolinks + then [Str ""] + else alternate + linkPart <- inlineToMarkdown opts (Link attr txt (source, tit)) + return $ case variant of + PlainText -> "[" <> linkPart <> "]" + _ -> "!" <> linkPart +inlineToMarkdown opts (Note contents) = do + modify (\st -> st{ stNotes = contents : stNotes st }) + st <- get + let ref = literal $ writerIdentifierPrefix opts <> tshow (stNoteNum st + length (stNotes st) - 1) + if isEnabled Ext_footnotes opts + then return $ "[^" <> ref <> "]" + else return $ "[" <> ref <> "]" + +makeMathPlainer :: [Inline] -> [Inline] +makeMathPlainer = walk go + where + go (Emph xs) = Span nullAttr xs + go x = x + +toSubscriptInline :: Inline -> Maybe Inline +toSubscriptInline Space = Just Space +toSubscriptInline (Span attr ils) = Span attr <$> traverse toSubscriptInline ils +toSubscriptInline (Str s) = Str . T.pack <$> traverse toSubscript (T.unpack s) +toSubscriptInline LineBreak = Just LineBreak +toSubscriptInline SoftBreak = Just SoftBreak +toSubscriptInline _ = Nothing + +toSuperscriptInline :: Inline -> Maybe Inline +toSuperscriptInline Space = Just Space +toSuperscriptInline (Span attr ils) = Span attr <$> traverse toSuperscriptInline ils +toSuperscriptInline (Str s) = Str . T.pack <$> traverse toSuperscript (T.unpack s) +toSuperscriptInline LineBreak = Just LineBreak +toSuperscriptInline SoftBreak = Just SoftBreak +toSuperscriptInline _ = Nothing diff --git a/src/Text/Pandoc/Writers/Markdown/Types.hs b/src/Text/Pandoc/Writers/Markdown/Types.hs new file mode 100644 index 000000000..a1d0d14e4 --- /dev/null +++ b/src/Text/Pandoc/Writers/Markdown/Types.hs @@ -0,0 +1,81 @@ +{-# LANGUAGE OverloadedStrings #-} +{- | + Module : Text.Pandoc.Writers.Markdown.Types + Copyright : Copyright (C) 2006-2021 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm@berkeley.edu> + Stability : alpha + Portability : portable +-} +module Text.Pandoc.Writers.Markdown.Types ( + MarkdownVariant(..), + WriterState(..), + WriterEnv(..), + Notes, + Ref, + Refs, + MD, + evalMD + ) where +import Control.Monad.Reader +import Control.Monad.State.Strict +import Data.Default +import qualified Data.Map as M +import qualified Data.Set as Set +import Data.Text (Text) +import Text.Pandoc.Parsing (Key) +import Text.Pandoc.Class.PandocMonad (PandocMonad) +import Text.Pandoc.Definition + +type Notes = [[Block]] +type Ref = (Text, Target, Attr) +type Refs = [Ref] + +type MD m = ReaderT WriterEnv (StateT WriterState m) + +evalMD :: PandocMonad m => MD m a -> WriterEnv -> WriterState -> m a +evalMD md env st = evalStateT (runReaderT md env) st + +data WriterEnv = WriterEnv { envInList :: Bool + , envVariant :: MarkdownVariant + , envRefShortcutable :: Bool + , envBlockLevel :: Int + , envEscapeSpaces :: Bool + } + +data MarkdownVariant = + PlainText + | Commonmark + | Markdown + deriving (Show, Eq) + +instance Default WriterEnv + where def = WriterEnv { envInList = False + , envVariant = Markdown + , envRefShortcutable = True + , envBlockLevel = 0 + , envEscapeSpaces = False + } + +data WriterState = WriterState { stNotes :: Notes + , stPrevRefs :: Refs + , stRefs :: Refs + , stKeys :: M.Map Key + (M.Map (Target, Attr) Int) + , stLastIdx :: Int + , stIds :: Set.Set Text + , stNoteNum :: Int + } + +instance Default WriterState + where def = WriterState{ stNotes = [] + , stPrevRefs = [] + , stRefs = [] + , stKeys = M.empty + , stLastIdx = 0 + , stIds = Set.empty + , stNoteNum = 1 + } + + diff --git a/src/Text/Pandoc/Writers/MediaWiki.hs b/src/Text/Pandoc/Writers/MediaWiki.hs index d1912caa6..5029be69f 100644 --- a/src/Text/Pandoc/Writers/MediaWiki.hs +++ b/src/Text/Pandoc/Writers/MediaWiki.hs @@ -2,7 +2,7 @@ {-# LANGUAGE ViewPatterns #-} {- | Module : Text.Pandoc.Writers.MediaWiki - Copyright : Copyright (C) 2008-2020 John MacFarlane + Copyright : Copyright (C) 2008-2021 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> diff --git a/src/Text/Pandoc/Writers/Ms.hs b/src/Text/Pandoc/Writers/Ms.hs index 96914d3c6..97c23f24d 100644 --- a/src/Text/Pandoc/Writers/Ms.hs +++ b/src/Text/Pandoc/Writers/Ms.hs @@ -2,7 +2,7 @@ {-# LANGUAGE ViewPatterns #-} {- | Module : Text.Pandoc.Writers.Ms - Copyright : Copyright (C) 2007-2020 John MacFarlane + Copyright : Copyright (C) 2007-2021 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -23,6 +23,7 @@ module Text.Pandoc.Writers.Ms ( writeMs ) where import Control.Monad.State.Strict import Data.Char (isLower, isUpper, ord) import Data.List (intercalate, intersperse) +import Data.List.NonEmpty (nonEmpty) import qualified Data.Map as Map import Data.Maybe (catMaybes) import Data.Text (Text) @@ -244,13 +245,17 @@ blockToMs opts (Table _ blkCapt specs thead tbody tfoot) = aligncode AlignDefault = "l" in do caption' <- inlineListToMs' opts caption - let iwidths = if all (== 0) widths - then repeat "" - else map (T.pack . printf "w(%0.1fn)" . (70 *)) widths + let isSimple = all (== 0) widths + let totalWidth = 70 -- 78n default width - 8n indent = 70n let coldescriptions = literal $ T.unwords - (zipWith (\align width -> aligncode align <> width) - alignments iwidths) <> "." + (zipWith (\align width -> aligncode align <> + if width == 0 + then "" + else T.pack $ + printf "w(%0.1fn)" + (totalWidth * width)) + alignments widths) <> "." colheadings <- mapM (blockListToMs opts) headers let makeRow cols = literal "T{" $$ vcat (intersperse (literal "T}\tT{") cols) $$ @@ -259,12 +264,26 @@ blockToMs opts (Table _ blkCapt specs thead tbody tfoot) = then empty else makeRow colheadings $$ char '_' body <- mapM (\row -> do - cols <- mapM (blockListToMs opts) row + cols <- mapM (\(cell, w) -> + (if isSimple + then id + else (literal (".nr LL " <> + T.pack (printf "%0.1fn" + (w * totalWidth))) $$)) <$> + blockListToMs opts cell) (zip row widths) return $ makeRow cols) rows setFirstPara return $ literal ".PP" $$ caption' $$ + literal ".na" $$ -- we don't want justification in table cells + (if isSimple + then "" + else ".nr LLold \\n[LL]") $$ literal ".TS" $$ literal "delim(@@) tab(\t);" $$ coldescriptions $$ - colheadings' $$ vcat body $$ literal ".TE" + colheadings' $$ vcat body $$ literal ".TE" $$ + (if isSimple + then "" + else ".nr LL \\n[LLold]") $$ + literal ".ad" blockToMs opts (BulletList items) = do contents <- mapM (bulletListItemToMs opts) items @@ -272,8 +291,7 @@ blockToMs opts (BulletList items) = do return (vcat contents) blockToMs opts (OrderedList attribs items) = do let markers = take (length items) $ orderedListMarkers attribs - let indent = 2 + - maximum (map T.length markers) + let indent = 2 + maybe 0 maximum (nonEmpty (map T.length markers)) contents <- mapM (\(num, item) -> orderedListItemToMs opts num indent item) $ zip markers items setFirstPara diff --git a/src/Text/Pandoc/Writers/Muse.hs b/src/Text/Pandoc/Writers/Muse.hs index bf3265107..d5100f43f 100644 --- a/src/Text/Pandoc/Writers/Muse.hs +++ b/src/Text/Pandoc/Writers/Muse.hs @@ -31,6 +31,7 @@ import Control.Monad.State.Strict import Data.Char (isAlphaNum, isAsciiLower, isAsciiUpper, isDigit, isSpace) import Data.Default import Data.List (intersperse, transpose) +import Data.List.NonEmpty (nonEmpty, NonEmpty(..)) import qualified Data.Set as Set import qualified Data.Text as T import Data.Text (Text) @@ -158,7 +159,8 @@ simpleTable caption headers rows = do caption' <- inlineListToMuse caption headers' <- mapM blockListToMuse headers rows' <- mapM (mapM blockListToMuse) rows - let widthsInChars = maximum . map offset <$> transpose (headers' : rows') + let widthsInChars = maybe 0 maximum . nonEmpty . map offset <$> + transpose (headers' : rows') let hpipeBlocks sep blocks = hcat $ intersperse sep' blocks where sep' = lblock (T.length sep) $ literal sep let makeRow sep = hpipeBlocks sep . zipWith lblock widthsInChars @@ -238,7 +240,7 @@ blockToMuse (DefinitionList items) = do label' <- local (\env -> env { envOneLine = True, envAfterSpace = True }) $ inlineListToMuse' label let ind = offset' label' -- using Text.DocLayout.offset results in round trip failures hang ind (nowrap label') . vcat <$> mapM descriptionToMuse defs - where offset' d = maximum (0: map T.length + where offset' d = maximum (0 :| map T.length (T.lines $ render Nothing d)) descriptionToMuse :: PandocMonad m => [Block] @@ -269,7 +271,8 @@ blockToMuse (Table _ blkCapt specs thead tbody tfoot) = (caption, aligns, widths, headers, rows) = toLegacyTable blkCapt specs thead tbody tfoot blocksToDoc opts blocks = local (\env -> env { envOptions = opts }) $ blockListToMuse blocks - numcols = maximum (length aligns : length widths : map length (headers:rows)) + numcols = maximum + (length aligns :| length widths : map length (headers:rows)) isSimple = onlySimpleTableCells (headers : rows) && all (== 0) widths blockToMuse (Div _ bs) = flatBlockListToMuse bs blockToMuse Null = return empty diff --git a/src/Text/Pandoc/Writers/Native.hs b/src/Text/Pandoc/Writers/Native.hs index 4d4dfca15..9c2ce805d 100644 --- a/src/Text/Pandoc/Writers/Native.hs +++ b/src/Text/Pandoc/Writers/Native.hs @@ -1,7 +1,7 @@ {-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Writers.Native - Copyright : Copyright (C) 2006-2020 John MacFarlane + Copyright : Copyright (C) 2006-2021 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> diff --git a/src/Text/Pandoc/Writers/ODT.hs b/src/Text/Pandoc/Writers/ODT.hs index e41fb7176..e4eb4fd25 100644 --- a/src/Text/Pandoc/Writers/ODT.hs +++ b/src/Text/Pandoc/Writers/ODT.hs @@ -2,7 +2,7 @@ {-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Writers.ODT - Copyright : Copyright (C) 2008-2020 John MacFarlane + Copyright : Copyright (C) 2008-2021 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -13,9 +13,10 @@ Conversion of 'Pandoc' documents to ODT. -} module Text.Pandoc.Writers.ODT ( writeODT ) where import Codec.Archive.Zip -import Control.Monad.Except (catchError) +import Control.Monad.Except (catchError, throwError) import Control.Monad.State.Strict import qualified Data.ByteString.Lazy as B +import Data.Maybe (fromMaybe) import Data.Generics (everywhere', mkT) import Data.List (isPrefixOf) import qualified Data.Map as Map @@ -23,10 +24,11 @@ import qualified Data.Text as T import qualified Data.Text.Lazy as TL import Data.Time import System.FilePath (takeDirectory, takeExtension, (<.>)) -import Text.Pandoc.BCP47 (Lang (..), getLang, renderLang) +import Text.Collate.Lang (Lang (..), renderLang) import Text.Pandoc.Class.PandocMonad (PandocMonad, report, toLang) import qualified Text.Pandoc.Class.PandocMonad as P import Text.Pandoc.Definition +import Text.Pandoc.Error (PandocError(..)) import Text.Pandoc.ImageSize import Text.Pandoc.Logging import Text.Pandoc.MIME (extensionFromMimeType, getMimeType) @@ -34,13 +36,14 @@ import Text.Pandoc.Options (WrapOption (..), WriterOptions (..)) import Text.DocLayout import Text.Pandoc.Shared (stringify, pandocVersion, tshow) import Text.Pandoc.Writers.Shared (lookupMetaString, lookupMetaBlocks, - fixDisplayMath) -import Text.Pandoc.UTF8 (fromStringLazy, fromTextLazy, toStringLazy) + fixDisplayMath, getLang) +import Text.Pandoc.UTF8 (fromStringLazy, fromTextLazy, toTextLazy) import Text.Pandoc.Walk import Text.Pandoc.Writers.OpenDocument (writeOpenDocument) import Text.Pandoc.XML +import Text.Pandoc.XML.Light import Text.TeXMath -import Text.XML.Light +import qualified Text.XML.Light as XL newtype ODTState = ODTState { stEntries :: [Entry] } @@ -66,7 +69,7 @@ pandocToODT :: PandocMonad m pandocToODT opts doc@(Pandoc meta _) = do let title = docTitle meta let authors = docAuthors meta - utctime <- P.getCurrentTime + utctime <- P.getTimestamp lang <- toLang (getLang opts meta) refArchive <- case writerReferenceDoc opts of @@ -172,24 +175,27 @@ updateStyleWithLang :: PandocMonad m => Maybe Lang -> Archive -> O m Archive updateStyleWithLang Nothing arch = return arch updateStyleWithLang (Just lang) arch = do epochtime <- floor `fmap` lift P.getPOSIXTime - return arch{ zEntries = [if eRelativePath e == "styles.xml" - then case parseXMLDoc - (toStringLazy (fromEntry e)) of - Nothing -> e - Just d -> - toEntry "styles.xml" epochtime - ( fromStringLazy - . ppTopElement - . addLang lang $ d ) - else e - | e <- zEntries arch] } + entries <- mapM (\e -> if eRelativePath e == "styles.xml" + then case parseXMLElement + (toTextLazy (fromEntry e)) of + Left msg -> throwError $ + PandocXMLError "styles.xml" msg + Right d -> return $ + toEntry "styles.xml" epochtime + ( fromTextLazy + . TL.fromStrict + . ppTopElement + . addLang lang $ d ) + else return e) (zEntries arch) + return arch{ zEntries = entries } +-- TODO FIXME avoid this generic traversal! addLang :: Lang -> Element -> Element addLang lang = everywhere' (mkT updateLangAttr) where updateLangAttr (Attr n@(QName "language" _ (Just "fo")) _) - = Attr n (T.unpack $ langLanguage lang) + = Attr n (langLanguage lang) updateLangAttr (Attr n@(QName "country" _ (Just "fo")) _) - = Attr n (T.unpack $ langRegion lang) + = Attr n (fromMaybe "" $ langRegion lang) updateLangAttr x = x -- | transform both Image and Math elements @@ -235,8 +241,8 @@ transformPicMath _ (Math t math) = do case writeMathML dt <$> readTeX math of Left _ -> return $ Math t math Right r -> do - let conf = useShortEmptyTags (const False) defaultConfigPP - let mathml = ppcTopElement conf r + let conf = XL.useShortEmptyTags (const False) XL.defaultConfigPP + let mathml = XL.ppcTopElement conf r epochtime <- floor `fmap` lift P.getPOSIXTime let dirname = "Formula-" ++ show (length entries) ++ "/" let fname = dirname ++ "content.xml" diff --git a/src/Text/Pandoc/Writers/OOXML.hs b/src/Text/Pandoc/Writers/OOXML.hs index ac991b594..0533d6c12 100644 --- a/src/Text/Pandoc/Writers/OOXML.hs +++ b/src/Text/Pandoc/Writers/OOXML.hs @@ -1,7 +1,7 @@ {-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Writers.OOXML - Copyright : Copyright (C) 2012-2020 John MacFarlane + Copyright : Copyright (C) 2012-2021 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -29,32 +29,32 @@ import Control.Monad.Except (throwError) import Text.Pandoc.Error import qualified Data.ByteString as B import qualified Data.ByteString.Lazy as BL -import qualified Data.ByteString.Lazy.Char8 as BL8 import Data.Maybe (mapMaybe) import qualified Data.Text as T +import Data.Text (Text) import Text.Pandoc.Class.PandocMonad (PandocMonad) import qualified Text.Pandoc.UTF8 as UTF8 -import Text.XML.Light as XML +import Text.Pandoc.XML.Light -mknode :: Node t => String -> [(String,String)] -> t -> Element +mknode :: Node t => Text -> [(Text,Text)] -> t -> Element mknode s attrs = add_attrs (map (\(k,v) -> Attr (nodename k) v) attrs) . node (nodename s) -mktnode :: String -> [(String,String)] -> T.Text -> Element -mktnode s attrs = mknode s attrs . T.unpack +mktnode :: Text -> [(Text,Text)] -> T.Text -> Element +mktnode s attrs = mknode s attrs -nodename :: String -> QName +nodename :: Text -> QName nodename s = QName{ qName = name, qURI = Nothing, qPrefix = prefix } - where (name, prefix) = case break (==':') s of - (xs,[]) -> (xs, Nothing) - (ys, _:zs) -> (zs, Just ys) + where (name, prefix) = case T.break (==':') s of + (xs,ys) -> case T.uncons ys of + Nothing -> (xs, Nothing) + Just (_,zs) -> (zs, Just xs) toLazy :: B.ByteString -> BL.ByteString toLazy = BL.fromChunks . (:[]) renderXml :: Element -> BL.ByteString -renderXml elt = BL8.pack "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" <> - UTF8.fromStringLazy (showElement elt) +renderXml elt = BL.fromStrict (UTF8.fromText (showTopElement elt)) parseXml :: PandocMonad m => Archive -> Archive -> String -> m Element parseXml refArchive distArchive relpath = @@ -62,32 +62,32 @@ parseXml refArchive distArchive relpath = findEntryByPath relpath distArchive of Nothing -> throwError $ PandocSomeError $ T.pack relpath <> " missing in reference file" - Just e -> case parseXMLDoc . UTF8.toStringLazy . fromEntry $ e of - Nothing -> throwError $ PandocSomeError $ - T.pack relpath <> " corrupt in reference file" - Just d -> return d + Just e -> case parseXMLElement . UTF8.toTextLazy . fromEntry $ e of + Left msg -> + throwError $ PandocXMLError (T.pack relpath) msg + Right d -> return d -- Copied from Util -attrToNSPair :: XML.Attr -> Maybe (String, String) -attrToNSPair (XML.Attr (QName s _ (Just "xmlns")) val) = Just (s, val) +attrToNSPair :: Attr -> Maybe (Text, Text) +attrToNSPair (Attr (QName s _ (Just "xmlns")) val) = Just (s, val) attrToNSPair _ = Nothing elemToNameSpaces :: Element -> NameSpaces elemToNameSpaces = mapMaybe attrToNSPair . elAttribs -elemName :: NameSpaces -> String -> String -> QName +elemName :: NameSpaces -> Text -> Text -> QName elemName ns prefix name = - QName name (lookup prefix ns) (if null prefix then Nothing else Just prefix) + QName name (lookup prefix ns) (if T.null prefix then Nothing else Just prefix) -isElem :: NameSpaces -> String -> String -> Element -> Bool +isElem :: NameSpaces -> Text -> Text -> Element -> Bool isElem ns prefix name element = let ns' = ns ++ elemToNameSpaces element in qName (elName element) == name && qURI (elName element) == lookup prefix ns' -type NameSpaces = [(String, String)] +type NameSpaces = [(Text, Text)] -- | Scales the image to fit the page -- sizes are passed in emu diff --git a/src/Text/Pandoc/Writers/OPML.hs b/src/Text/Pandoc/Writers/OPML.hs index 810a94775..8c9229fc0 100644 --- a/src/Text/Pandoc/Writers/OPML.hs +++ b/src/Text/Pandoc/Writers/OPML.hs @@ -2,7 +2,7 @@ {-# LANGUAGE CPP #-} {- | Module : Text.Pandoc.Writers.OPML - Copyright : Copyright (C) 2013-2020 John MacFarlane + Copyright : Copyright (C) 2013-2021 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> diff --git a/src/Text/Pandoc/Writers/OpenDocument.hs b/src/Text/Pandoc/Writers/OpenDocument.hs index 071a5542f..5f3224c2f 100644 --- a/src/Text/Pandoc/Writers/OpenDocument.hs +++ b/src/Text/Pandoc/Writers/OpenDocument.hs @@ -1,4 +1,5 @@ {-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE LambdaCase #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE PatternGuards #-} {-# LANGUAGE ViewPatterns #-} @@ -17,6 +18,7 @@ module Text.Pandoc.Writers.OpenDocument ( writeOpenDocument ) where import Control.Arrow ((***), (>>>)) import Control.Monad.State.Strict hiding (when) import Data.Char (chr) +import Data.Foldable (find) import Data.List (sortOn, sortBy, foldl') import qualified Data.Map as Map import Data.Maybe (fromMaybe, isNothing) @@ -24,7 +26,7 @@ import Data.Ord (comparing) import qualified Data.Set as Set import Data.Text (Text) import qualified Data.Text as T -import Text.Pandoc.BCP47 (Lang (..), parseBCP47) +import Text.Collate.Lang (Lang (..), parseLang) import Text.Pandoc.Class.PandocMonad (PandocMonad, report, translateTerm, setTranslations, toLang) import Text.Pandoc.Definition @@ -35,6 +37,7 @@ import Text.DocLayout import Text.Pandoc.Shared (linesToPara, tshow, blocksToInlines) import Text.Pandoc.Templates (renderTemplate) import qualified Text.Pandoc.Translations as Term (Term(Figure, Table)) +import Text.Pandoc.Walk import Text.Pandoc.Writers.Math import Text.Pandoc.Writers.Shared import qualified Text.Pandoc.Writers.AnnotatedTable as Ann @@ -54,6 +57,11 @@ plainToPara x = x type OD m = StateT WriterState m +data ReferenceType + = HeaderRef + | TableRef + | ImageRef + data WriterState = WriterState { stNotes :: [Doc Text] , stTableStyles :: [Doc Text] @@ -69,6 +77,7 @@ data WriterState = , stImageId :: Int , stTableCaptionId :: Int , stImageCaptionId :: Int + , stIdentTypes :: [(Text,ReferenceType)] } defaultWriterState :: WriterState @@ -86,6 +95,7 @@ defaultWriterState = , stImageId = 1 , stTableCaptionId = 1 , stImageCaptionId = 1 + , stIdentTypes = [] } when :: Bool -> Doc Text -> Doc Text @@ -227,7 +237,7 @@ handleSpaces s = case T.uncons s of -- | Convert Pandoc document to string in OpenDocument format. writeOpenDocument :: PandocMonad m => WriterOptions -> Pandoc -> m Text writeOpenDocument opts (Pandoc meta blocks) = do - let defLang = Lang "en" "US" "" [] + let defLang = Lang "en" (Just "US") Nothing [] [] [] lang <- case lookupMetaString "lang" meta of "" -> pure defLang s -> fromMaybe defLang <$> toLang (Just s) @@ -243,6 +253,12 @@ writeOpenDocument opts (Pandoc meta blocks) = do meta ((body, metadata),s) <- flip runStateT defaultWriterState $ do + let collectInlineIdent (Image (ident,_,_) _ _) = [(ident,ImageRef)] + collectInlineIdent _ = [] + let collectBlockIdent (Header _ (ident,_,_) _) = [(ident,HeaderRef)] + collectBlockIdent (Table (ident,_,_) _ _ _ _ _) = [(ident,TableRef)] + collectBlockIdent _ = [] + modify $ \s -> s{ stIdentTypes = query collectBlockIdent blocks ++ query collectInlineIdent blocks } m <- metaToContext opts (blocksToOpenDocument opts) (fmap chomp . inlinesToOpenDocument opts) @@ -357,36 +373,32 @@ blocksToOpenDocument o b = vcat <$> mapM (blockToOpenDocument o) b -- | Convert a Pandoc block element to OpenDocument. blockToOpenDocument :: PandocMonad m => WriterOptions -> Block -> OD m (Doc Text) -blockToOpenDocument o bs - | Plain b <- bs = if null b - then return empty - else inParagraphTags =<< inlinesToOpenDocument o b - | Para [Image attr c (s,T.stripPrefix "fig:" -> Just t)] <- bs - = figure attr c s t - | Para b <- bs = if null b && - not (isEnabled Ext_empty_paragraphs o) - then return empty - else inParagraphTags =<< inlinesToOpenDocument o b - | LineBlock b <- bs = blockToOpenDocument o $ linesToPara b - | Div attr xs <- bs = mkDiv attr xs - | Header i (ident,_,_) b - <- bs = setFirstPara >> (inHeaderTags i ident - =<< inlinesToOpenDocument o b) - | BlockQuote b <- bs = setFirstPara >> mkBlockQuote b - | DefinitionList b <- bs = setFirstPara >> defList b - | BulletList b <- bs = setFirstPara >> bulletListToOpenDocument o b - | OrderedList a b <- bs = setFirstPara >> orderedList a b - | CodeBlock _ s <- bs = setFirstPara >> preformatted s - | Table a bc s th tb tf <- bs = setFirstPara >> table (Ann.toTable a bc s th tb tf) - | HorizontalRule <- bs = setFirstPara >> return (selfClosingTag "text:p" - [ ("text:style-name", "Horizontal_20_Line") ]) - | RawBlock f s <- bs = if f == Format "opendocument" - then return $ text $ T.unpack s - else do - report $ BlockNotRendered bs - return empty - | Null <- bs = return empty - | otherwise = return empty +blockToOpenDocument o = \case + Plain b -> if null b + then return empty + else inParagraphTags =<< inlinesToOpenDocument o b + Para [Image attr c (s,T.stripPrefix "fig:" -> Just t)] -> figure attr c s t + Para b -> if null b && + not (isEnabled Ext_empty_paragraphs o) + then return empty + else inParagraphTags =<< inlinesToOpenDocument o b + LineBlock b -> blockToOpenDocument o $ linesToPara b + Div attr xs -> mkDiv attr xs + Header i (ident,_,_) b -> do + setFirstPara + inHeaderTags i ident =<< inlinesToOpenDocument o b + BlockQuote b -> setFirstPara >> mkBlockQuote b + DefinitionList b -> setFirstPara >> defList b + BulletList b -> setFirstPara >> bulletListToOpenDocument o b + OrderedList a b -> setFirstPara >> orderedList a b + CodeBlock _ s -> setFirstPara >> preformatted s + Table a bc s th tb tf -> setFirstPara >> table (Ann.toTable a bc s th tb tf) + HorizontalRule -> setFirstPara >> return (selfClosingTag "text:p" + [ ("text:style-name", "Horizontal_20_Line") ]) + b@(RawBlock f s) -> if f == Format "opendocument" + then return $ text $ T.unpack s + else empty <$ report (BlockNotRendered b) + Null -> return empty where defList b = do setInDefinitionList True r <- vcat <$> mapM (deflistItemToOpenDocument o) b @@ -411,7 +423,7 @@ blockToOpenDocument o bs inTags True "text:list" [ ("text:style-name", "L" <> tshow ln)] <$> orderedListToOpenDocument o pn b table :: PandocMonad m => Ann.Table -> OD m (Doc Text) - table (Ann.Table _ (Caption _ c) colspecs thead tbodies _) = do + table (Ann.Table (ident, _, _) (Caption _ c) colspecs thead tbodies _) = do tn <- length <$> gets stTableStyles pn <- length <$> gets stParaStyles let genIds = map chr [65..] @@ -433,7 +445,7 @@ blockToOpenDocument o bs then return empty else inlinesToOpenDocument o (blocksToInlines c) >>= if isEnabled Ext_native_numbering o - then numberedTableCaption + then numberedTableCaption ident else unNumberedCaption "TableCaption" th <- colHeadsToOpenDocument o (map fst paraHStyles) thead tr <- mapM (tableBodyToOpenDocument o (map fst paraStyles)) tbodies @@ -442,36 +454,39 @@ blockToOpenDocument o bs , ("table:style-name", name) ] (vcat columns $$ th $$ vcat tr) return $ captionDoc $$ tableDoc - figure attr caption source title | null caption = + figure attr@(ident, _, _) caption source title | null caption = withParagraphStyle o "Figure" [Para [Image attr caption (source,title)]] | otherwise = do imageDoc <- withParagraphStyle o "FigureWithCaption" [Para [Image attr caption (source,title)]] captionDoc <- inlinesToOpenDocument o caption >>= if isEnabled Ext_native_numbering o - then numberedFigureCaption + then numberedFigureCaption ident else unNumberedCaption "FigureCaption" return $ imageDoc $$ captionDoc -numberedTableCaption :: PandocMonad m => Doc Text -> OD m (Doc Text) -numberedTableCaption caption = do +numberedTableCaption :: PandocMonad m => Text -> Doc Text -> OD m (Doc Text) +numberedTableCaption ident caption = do id' <- gets stTableCaptionId modify (\st -> st{ stTableCaptionId = id' + 1 }) capterm <- translateTerm Term.Table - return $ numberedCaption "TableCaption" capterm "Table" id' caption + return $ numberedCaption "TableCaption" capterm "Table" id' ident caption -numberedFigureCaption :: PandocMonad m => Doc Text -> OD m (Doc Text) -numberedFigureCaption caption = do +numberedFigureCaption :: PandocMonad m => Text -> Doc Text -> OD m (Doc Text) +numberedFigureCaption ident caption = do id' <- gets stImageCaptionId modify (\st -> st{ stImageCaptionId = id' + 1 }) capterm <- translateTerm Term.Figure - return $ numberedCaption "FigureCaption" capterm "Illustration" id' caption + return $ numberedCaption "FigureCaption" capterm "Illustration" id' ident caption -numberedCaption :: Text -> Text -> Text -> Int -> Doc Text -> Doc Text -numberedCaption style term name num caption = +numberedCaption :: Text -> Text -> Text -> Int -> Text -> Doc Text -> Doc Text +numberedCaption style term name num ident caption = let t = text $ T.unpack term r = num - 1 - s = inTags False "text:sequence" [ ("text:ref-name", "ref" <> name <> tshow r), + ident' = case ident of + "" -> "ref" <> name <> tshow r + _ -> ident + s = inTags False "text:sequence" [ ("text:ref-name", ident'), ("text:name", name), ("text:formula", "ooow:" <> name <> "+1"), ("style:num-format", "1") ] $ text $ show num @@ -607,7 +622,9 @@ inlineToOpenDocument o ils else do report $ InlineNotRendered ils return empty - Link _ l (s,t) -> mkLink s t <$> inlinesToOpenDocument o l + Link _ l (s,t) -> do + identTypes <- gets stIdentTypes + mkLink o identTypes s t <$> inlinesToOpenDocument o l Image attr _ (s,t) -> mkImg attr s t Note l -> mkNote l where @@ -619,10 +636,6 @@ inlineToOpenDocument o ils unhighlighted s = inlinedCode $ preformatted s preformatted s = handleSpaces $ escapeStringForXML s inlinedCode s = return $ inTags False "text:span" [("text:style-name", "Source_Text")] s - mkLink s t = inTags False "text:a" [ ("xlink:type" , "simple") - , ("xlink:href" , s ) - , ("office:name", t ) - ] . inSpanTags "Definition" mkImg (_, _, kvs) s _ = do id' <- gets stImageId modify (\st -> st{ stImageId = id' + 1 }) @@ -659,6 +672,45 @@ inlineToOpenDocument o ils addNote nn return nn +mkLink :: WriterOptions -> [(Text,ReferenceType)] -> Text -> Text -> Doc Text -> Doc Text +mkLink o identTypes s t d = + let maybeIdentAndType = case T.uncons s of + Just ('#', ident) -> find ((ident ==) . fst) identTypes + _ -> Nothing + d' = inSpanTags "Definition" d + ref refType format ident = inTags False refType + [ ("text:reference-format", format ), + ("text:ref-name", ident) ] + inlineSpace = selfClosingTag "text:s" [] + bookmarkRef = ref "text:bookmark-ref" + bookmarkRefNumber ident = bookmarkRef "number" ident mempty + bookmarkRefName ident = bookmarkRef "text" ident d + bookmarkRefNameNumber ident = bookmarkRefNumber ident <> inlineSpace <> bookmarkRefName ident + bookmarkRef' + | isEnabled Ext_xrefs_number o && isEnabled Ext_xrefs_name o = bookmarkRefNameNumber + | isEnabled Ext_xrefs_name o = bookmarkRefName + | otherwise = bookmarkRefNumber + sequenceRef = ref "text:sequence-ref" + sequenceRefNumber ident = sequenceRef "value" ident mempty + sequenceRefName ident = sequenceRef "caption" ident d + sequenceRefNameNumber ident = sequenceRefNumber ident <> inlineSpace <> sequenceRefName ident + sequenceRef' + | isEnabled Ext_xrefs_number o && isEnabled Ext_xrefs_name o = sequenceRefNameNumber + | isEnabled Ext_xrefs_name o = sequenceRefName + | otherwise = sequenceRefNumber + link = inTags False "text:a" [ ("xlink:type" , "simple") + , ("xlink:href" , s ) + , ("office:name", t ) + ] d' + linkOrReference = case maybeIdentAndType of + Just (ident, HeaderRef) -> bookmarkRef' ident + Just (ident, TableRef) -> sequenceRef' ident + Just (ident, ImageRef) -> sequenceRef' ident + _ -> link + in if isEnabled Ext_xrefs_name o || isEnabled Ext_xrefs_number o + then linkOrReference + else link + bulletListStyle :: PandocMonad m => Int -> OD m (Int,(Int,[Doc Text])) bulletListStyle l = do let doStyles i = inTags True "text:list-level-style-bullet" @@ -819,34 +871,33 @@ data TextStyle = Italic textStyleAttr :: Map.Map Text Text -> TextStyle -> Map.Map Text Text -textStyleAttr m s - | Italic <- s = Map.insert "fo:font-style" "italic" . - Map.insert "style:font-style-asian" "italic" . - Map.insert "style:font-style-complex" "italic" $ m - | Bold <- s = Map.insert "fo:font-weight" "bold" . - Map.insert "style:font-weight-asian" "bold" . - Map.insert "style:font-weight-complex" "bold" $ m - | Under <- s = Map.insert "style:text-underline-style" "solid" . - Map.insert "style:text-underline-width" "auto" . - Map.insert "style:text-underline-color" "font-color" $ m - | Strike <- s = Map.insert "style:text-line-through-style" "solid" m - | Sub <- s = Map.insert "style:text-position" "sub 58%" m - | Sup <- s = Map.insert "style:text-position" "super 58%" m - | SmallC <- s = Map.insert "fo:font-variant" "small-caps" m - | Pre <- s = Map.insert "style:font-name" "Courier New" . - Map.insert "style:font-name-asian" "Courier New" . - Map.insert "style:font-name-complex" "Courier New" $ m - | Language lang <- s - = Map.insert "fo:language" (langLanguage lang) . - Map.insert "fo:country" (langRegion lang) $ m - | otherwise = m +textStyleAttr m = \case + Italic -> Map.insert "fo:font-style" "italic" . + Map.insert "style:font-style-asian" "italic" . + Map.insert "style:font-style-complex" "italic" $ m + Bold -> Map.insert "fo:font-weight" "bold" . + Map.insert "style:font-weight-asian" "bold" . + Map.insert "style:font-weight-complex" "bold" $ m + Under -> Map.insert "style:text-underline-style" "solid" . + Map.insert "style:text-underline-width" "auto" . + Map.insert "style:text-underline-color" "font-color" $ m + Strike -> Map.insert "style:text-line-through-style" "solid" m + Sub -> Map.insert "style:text-position" "sub 58%" m + Sup -> Map.insert "style:text-position" "super 58%" m + SmallC -> Map.insert "fo:font-variant" "small-caps" m + Pre -> Map.insert "style:font-name" "Courier New" . + Map.insert "style:font-name-asian" "Courier New" . + Map.insert "style:font-name-complex" "Courier New" $ m + Language lang -> + Map.insert "fo:language" (langLanguage lang) . + maybe id (Map.insert "fo:country") (langRegion lang) $ m withLangFromAttr :: PandocMonad m => Attr -> OD m a -> OD m a withLangFromAttr (_,_,kvs) action = case lookup "lang" kvs of Nothing -> action Just l -> - case parseBCP47 l of + case parseLang l of Right lang -> withTextStyle (Language lang) action Left _ -> do report $ InvalidLang l diff --git a/src/Text/Pandoc/Writers/Org.hs b/src/Text/Pandoc/Writers/Org.hs index 2af93017d..d404f1c8d 100644 --- a/src/Text/Pandoc/Writers/Org.hs +++ b/src/Text/Pandoc/Writers/Org.hs @@ -3,8 +3,8 @@ {- | Module : Text.Pandoc.Writers.Org Copyright : © 2010-2015 Puneeth Chaganti <punchagan@gmail.com> - 2010-2020 John MacFarlane <jgm@berkeley.edu> - 2016-2020 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> + 2010-2021 John MacFarlane <jgm@berkeley.edu> + 2016-2021 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> License : GNU GPL, version 2 or above Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> @@ -17,8 +17,9 @@ Org-Mode: <http://orgmode.org> -} module Text.Pandoc.Writers.Org (writeOrg) where import Control.Monad.State.Strict -import Data.Char (isAlphaNum) +import Data.Char (isAlphaNum, isDigit) import Data.List (intersect, intersperse, partition, transpose) +import Data.List.NonEmpty (nonEmpty) import Data.Text (Text) import qualified Data.Text as T import Text.Pandoc.Class.PandocMonad (PandocMonad, report) @@ -83,12 +84,15 @@ noteToOrg num note = do -- | Escape special characters for Org. escapeString :: Text -> Text -escapeString = escapeStringUsing - [ ('\x2014',"---") - , ('\x2013',"--") - , ('\x2019',"'") - , ('\x2026',"...") - ] +escapeString t + | T.all (\c -> c < '\x2013' || c > '\x2026') t = t + | otherwise = T.concatMap escChar t + where + escChar '\x2013' = "--" + escChar '\x2014' = "---" + escChar '\x2019' = "'" + escChar '\x2026' = "..." + escChar c = T.singleton c isRawFormat :: Format -> Bool isRawFormat f = @@ -163,7 +167,7 @@ blockToOrg (Table _ blkCapt specs thead tbody tfoot) = do else "#+caption: " <> caption'' headers' <- mapM blockListToOrg headers rawRows <- mapM (mapM blockListToOrg) rows - let numChars = maximum . map offset + let numChars = maybe 0 maximum . nonEmpty . map offset -- FIXME: width is not being used. let widthsInChars = map numChars $ transpose (headers' : rawRows) @@ -198,7 +202,7 @@ blockToOrg (OrderedList (start, _, delim) items) = do x -> x let markers = take (length items) $ orderedListMarkers (start, Decimal, delim') - let maxMarkerLength = maximum $ map T.length markers + let maxMarkerLength = maybe 0 maximum . nonEmpty $ map T.length markers let markers' = map (\m -> let s = maxMarkerLength - T.length m in m <> T.replicate s " ") markers contents <- zipWithM orderedListItemToOrg markers' items @@ -213,25 +217,35 @@ blockToOrg (DefinitionList items) = do -- | Convert bullet list item (list of blocks) to Org. bulletListItemToOrg :: PandocMonad m => [Block] -> Org m (Doc Text) bulletListItemToOrg items = do - contents <- blockListToOrg items + exts <- gets $ writerExtensions . stOptions + contents <- blockListToOrg (taskListItemToOrg exts items) return $ hang 2 "- " contents $$ if endsWithPlain items then cr else blankline - -- | Convert ordered list item (a list of blocks) to Org. orderedListItemToOrg :: PandocMonad m => Text -- ^ marker for list item -> [Block] -- ^ list item (list of blocks) -> Org m (Doc Text) orderedListItemToOrg marker items = do - contents <- blockListToOrg items + exts <- gets $ writerExtensions . stOptions + contents <- blockListToOrg (taskListItemToOrg exts items) return $ hang (T.length marker + 1) (literal marker <> space) contents $$ if endsWithPlain items then cr else blankline +-- | Convert a list item containing text starting with @U+2610 BALLOT BOX@ +-- or @U+2612 BALLOT BOX WITH X@ to org checkbox syntax (e.g. @[X]@). +taskListItemToOrg :: Extensions -> [Block] -> [Block] +taskListItemToOrg = handleTaskListItem toOrg + where + toOrg (Str "☐" : Space : is) = Str "[ ]" : Space : is + toOrg (Str "☒" : Space : is) = Str "[X]" : Space : is + toOrg is = is + -- | Convert definition list item (label, list of blocks) to Org. definitionListItemToOrg :: PandocMonad m => ([Inline], [[Block]]) -> Org m (Doc Text) @@ -337,16 +351,20 @@ inlineListToOrg :: PandocMonad m => [Inline] -> Org m (Doc Text) inlineListToOrg lst = hcat <$> mapM inlineToOrg (fixMarkers lst) - where fixMarkers [] = [] -- prevent note refs and list markers from wrapping, see #4171 + where -- Prevent note refs and list markers from wrapping, see #4171 + -- and #7132. + fixMarkers [] = [] fixMarkers (Space : x : rest) | shouldFix x = Str " " : x : fixMarkers rest fixMarkers (SoftBreak : x : rest) | shouldFix x = Str " " : x : fixMarkers rest fixMarkers (x : rest) = x : fixMarkers rest - shouldFix Note{} = True -- Prevent footnotes + shouldFix Note{} = True -- Prevent footnotes shouldFix (Str "-") = True -- Prevent bullet list items - -- TODO: prevent ordered list items + shouldFix (Str x) -- Prevent ordered list items + | Just (cs, c) <- T.unsnoc x = T.all isDigit cs && + (c == '.' || c == ')') shouldFix _ = False -- | Convert Pandoc inline element to Org. @@ -386,9 +404,11 @@ inlineToOrg (Str str) = return . literal $ escapeString str inlineToOrg (Math t str) = do modify $ \st -> st{ stHasMath = True } return $ if t == InlineMath - then "$" <> literal str <> "$" - else "$$" <> literal str <> "$$" + then "\\(" <> literal str <> "\\)" + else "\\[" <> literal str <> "\\]" inlineToOrg il@(RawInline f str) + | elem f ["tex", "latex"] && T.isPrefixOf "\\begin" str = + return $ cr <> literal str <> cr | isRawFormat f = return $ literal str | otherwise = do report $ InlineNotRendered il diff --git a/src/Text/Pandoc/Writers/Powerpoint.hs b/src/Text/Pandoc/Writers/Powerpoint.hs index ca3b74a1d..e0573beca 100644 --- a/src/Text/Pandoc/Writers/Powerpoint.hs +++ b/src/Text/Pandoc/Writers/Powerpoint.hs @@ -39,5 +39,5 @@ writePowerpoint opts (Pandoc meta blks) = do let blks' = walk fixDisplayMath blks let (pres, logMsgs) = documentToPresentation opts (Pandoc meta blks') mapM_ report logMsgs - archv <- presentationToArchive opts pres + archv <- presentationToArchive opts meta pres return $ fromArchive archv diff --git a/src/Text/Pandoc/Writers/Powerpoint/Output.hs b/src/Text/Pandoc/Writers/Powerpoint/Output.hs index 603a84acc..157810216 100644 --- a/src/Text/Pandoc/Writers/Powerpoint/Output.hs +++ b/src/Text/Pandoc/Writers/Powerpoint/Output.hs @@ -20,15 +20,16 @@ import Control.Monad.Except (throwError, catchError) import Control.Monad.Reader import Control.Monad.State import Codec.Archive.Zip -import Data.Char (toUpper) import Data.List (intercalate, stripPrefix, nub, union, isPrefixOf, intersperse) import Data.Default +import Data.Text (Text) import qualified Data.Text as T +import qualified Data.Text.Read import Data.Time (formatTime, defaultTimeLocale) import Data.Time.Clock (UTCTime) import Data.Time.Clock.POSIX (utcTimeToPOSIXSeconds, posixSecondsToUTCTime) import System.FilePath.Posix (splitDirectories, splitExtension, takeExtension) -import Text.XML.Light +import Text.Pandoc.XML.Light as XML import Text.Pandoc.Definition import qualified Text.Pandoc.UTF8 as UTF8 import Text.Pandoc.Class.PandocMonad (PandocMonad) @@ -37,17 +38,21 @@ import qualified Text.Pandoc.Class.PandocMonad as P import Text.Pandoc.Options import Text.Pandoc.MIME import qualified Data.ByteString.Lazy as BL +import Text.Pandoc.Writers.Shared (metaToContext) import Text.Pandoc.Writers.OOXML import qualified Data.Map as M import Data.Maybe (mapMaybe, listToMaybe, fromMaybe, maybeToList, catMaybes, isJust) import Text.Pandoc.ImageSize import Control.Applicative ((<|>)) import System.FilePath.Glob -import Text.DocTemplates (FromContext(lookupContext)) +import Text.DocTemplates (FromContext(lookupContext), Context) +import Text.DocLayout (literal) import Text.TeXMath import Text.Pandoc.Writers.Math (convertMath) import Text.Pandoc.Writers.Powerpoint.Presentation +import Text.Pandoc.Shared (tshow, stringify) import Skylighting (fromColor) +import Data.List.NonEmpty (nonEmpty) -- |The 'EMU' type is used to specify sizes in English Metric Units. type EMU = Integer @@ -77,19 +82,24 @@ getPresentationSize :: Archive -> Archive -> Maybe (Integer, Integer) getPresentationSize refArchive distArchive = do entry <- findEntryByPath "ppt/presentation.xml" refArchive `mplus` findEntryByPath "ppt/presentation.xml" distArchive - presElement <- parseXMLDoc $ UTF8.toStringLazy $ fromEntry entry + presElement <- either (const Nothing) return $ + parseXMLElement $ UTF8.toTextLazy $ fromEntry entry let ns = elemToNameSpaces presElement sldSize <- findChild (elemName ns "p" "sldSz") presElement cxS <- findAttr (QName "cx" Nothing Nothing) sldSize cyS <- findAttr (QName "cy" Nothing Nothing) sldSize - (cx, _) <- listToMaybe $ reads cxS :: Maybe (Integer, String) - (cy, _) <- listToMaybe $ reads cyS :: Maybe (Integer, String) + cx <- readTextAsInteger cxS + cy <- readTextAsInteger cyS return (cx `div` 12700, cy `div` 12700) +readTextAsInteger :: Text -> Maybe Integer +readTextAsInteger = either (const Nothing) (Just . fst) . Data.Text.Read.decimal + data WriterEnv = WriterEnv { envRefArchive :: Archive , envDistArchive :: Archive , envUTCTime :: UTCTime , envOpts :: WriterOptions + , envContext :: Context Text , envPresentationSize :: (Integer, Integer) , envSlideHasHeader :: Bool , envInList :: Bool @@ -115,6 +125,7 @@ instance Default WriterEnv where , envDistArchive = emptyArchive , envUTCTime = posixSecondsToUTCTime 0 , envOpts = def + , envContext = mempty , envPresentationSize = (720, 540) , envSlideHasHeader = False , envInList = False @@ -159,20 +170,16 @@ runP env st p = evalStateT (runReaderT p env) st -------------------------------------------------------------------- -findAttrText :: QName -> Element -> Maybe T.Text -findAttrText n = fmap T.pack . findAttr n - monospaceFont :: Monad m => P m T.Text monospaceFont = do - vars <- writerVariables <$> asks envOpts + vars <- asks envContext case lookupContext "monofont" vars of Just s -> return s Nothing -> return "Courier" --- Kept as string for XML.Light -fontSizeAttributes :: Monad m => RunProps -> P m [(String, String)] +fontSizeAttributes :: Monad m => RunProps -> P m [(Text, Text)] fontSizeAttributes RunProps { rPropForceSize = Just sz } = - return [("sz", show $ sz * 100)] + return [("sz", tshow $ sz * 100)] fontSizeAttributes _ = return [] copyFileToArchive :: PandocMonad m => Archive -> FilePath -> P m Archive @@ -301,8 +308,9 @@ makeSpeakerNotesMap (Presentation _ slides) = then Nothing else Just n -presentationToArchive :: PandocMonad m => WriterOptions -> Presentation -> m Archive -presentationToArchive opts pres = do +presentationToArchive :: PandocMonad m + => WriterOptions -> Meta -> Presentation -> m Archive +presentationToArchive opts meta pres = do distArchive <- toArchive . BL.fromStrict <$> P.readDefaultDataFile "reference.pptx" refArchive <- case writerReferenceDoc opts of @@ -310,7 +318,7 @@ presentationToArchive opts pres = do Nothing -> toArchive . BL.fromStrict <$> P.readDataFile "reference.pptx" - utctime <- P.getCurrentTime + utctime <- P.getTimestamp presSize <- case getPresentationSize refArchive distArchive of Just sz -> return sz @@ -318,10 +326,18 @@ presentationToArchive opts pres = do PandocSomeError "Could not determine presentation size" + -- note, we need writerTemplate to be Just _ or metaToContext does + -- nothing + context <- metaToContext opts{ writerTemplate = + writerTemplate opts <|> Just mempty } + (return . literal . stringify) + (return . literal . stringify) meta + let env = def { envRefArchive = refArchive , envDistArchive = distArchive , envUTCTime = utctime , envOpts = opts + , envContext = context , envPresentationSize = presSize , envSlideIdMap = makeSlideIdMap pres , envSpeakerNotesIdMap = makeSpeakerNotesMap pres @@ -363,7 +379,7 @@ shapeHasId :: NameSpaces -> T.Text -> Element -> Bool shapeHasId ns ident element | Just nvSpPr <- findChild (elemName ns "p" "nvSpPr") element , Just cNvPr <- findChild (elemName ns "p" "cNvPr") nvSpPr - , Just nm <- findAttrText (QName "id" Nothing Nothing) cNvPr = + , Just nm <- findAttr (QName "id" Nothing Nothing) cNvPr = nm == ident | otherwise = False @@ -394,10 +410,10 @@ getShapeDimensions ns element ext <- findChild (elemName ns "a" "ext") xfrm cxS <- findAttr (QName "cx" Nothing Nothing) ext cyS <- findAttr (QName "cy" Nothing Nothing) ext - (x, _) <- listToMaybe $ reads xS - (y, _) <- listToMaybe $ reads yS - (cx, _) <- listToMaybe $ reads cxS - (cy, _) <- listToMaybe $ reads cyS + x <- readTextAsInteger xS + y <- readTextAsInteger yS + cx <- readTextAsInteger cxS + cy <- readTextAsInteger cyS return ((x `div` 12700, y `div` 12700), (cx `div` 12700, cy `div` 12700)) | otherwise = Nothing @@ -428,7 +444,7 @@ getContentShapeSize ns layout master Nothing -> do let mbSz = findChild (elemName ns "p" "nvSpPr") sp >>= findChild (elemName ns "p" "cNvPr") >>= - findAttrText (QName "id" Nothing Nothing) >>= + findAttr (QName "id" Nothing Nothing) >>= flip getMasterShapeDimensionsById master case mbSz of Just sz' -> return sz' @@ -437,10 +453,10 @@ getContentShapeSize ns layout master getContentShapeSize _ _ _ = throwError $ PandocSomeError "Attempted to find content shape size in non-layout" -buildSpTree :: NameSpaces -> Element -> [Element] -> Element +buildSpTree :: NameSpaces -> Element -> [Content] -> Element buildSpTree ns spTreeElem newShapes = emptySpTreeElem { elContent = newContent } - where newContent = elContent emptySpTreeElem <> map Elem newShapes + where newContent = elContent emptySpTreeElem <> newShapes emptySpTreeElem = spTreeElem { elContent = filter fn (elContent spTreeElem) } fn :: Content -> Bool fn (Elem e) = isElem ns "p" "nvGrpSpPr" e || @@ -448,8 +464,8 @@ buildSpTree ns spTreeElem newShapes = fn _ = True replaceNamedChildren :: NameSpaces - -> String - -> String + -> Text + -> Text -> [Element] -> Element -> Element @@ -472,15 +488,16 @@ registerLink link = do linkReg <- gets stLinkIds mediaReg <- gets stMediaIds hasSpeakerNotes <- curSlideHasSpeakerNotes - let maxLinkId = case M.lookup curSlideId linkReg of - Just mp -> case M.keys mp of - [] -> if hasSpeakerNotes then 2 else 1 - ks -> maximum ks - Nothing -> if hasSpeakerNotes then 2 else 1 - maxMediaId = case M.lookup curSlideId mediaReg of - Just [] -> if hasSpeakerNotes then 2 else 1 - Just mInfos -> maximum $ map mInfoLocalId mInfos - Nothing -> if hasSpeakerNotes then 2 else 1 + let maxLinkId = case M.lookup curSlideId linkReg >>= nonEmpty . M.keys of + Just xs -> maximum xs + Nothing + | hasSpeakerNotes -> 2 + | otherwise -> 1 + maxMediaId = case M.lookup curSlideId mediaReg >>= nonEmpty of + Just mInfos -> maximum $ fmap mInfoLocalId mInfos + Nothing + | hasSpeakerNotes -> 2 + | otherwise -> 1 maxId = max maxLinkId maxMediaId slideLinks = case M.lookup curSlideId linkReg of Just mp -> M.insert (maxId + 1) link mp @@ -495,20 +512,19 @@ registerMedia fp caption = do mediaReg <- gets stMediaIds globalIds <- gets stMediaGlobalIds hasSpeakerNotes <- curSlideHasSpeakerNotes - let maxLinkId = case M.lookup curSlideId linkReg of - Just mp -> case M.keys mp of - [] -> if hasSpeakerNotes then 2 else 1 - ks -> maximum ks - Nothing -> if hasSpeakerNotes then 2 else 1 - maxMediaId = case M.lookup curSlideId mediaReg of - Just [] -> if hasSpeakerNotes then 2 else 1 - Just mInfos -> maximum $ map mInfoLocalId mInfos - Nothing -> if hasSpeakerNotes then 2 else 1 + let maxLinkId = case M.lookup curSlideId linkReg >>= nonEmpty . M.keys of + Just ks -> maximum ks + Nothing + | hasSpeakerNotes -> 2 + | otherwise -> 1 + maxMediaId = case M.lookup curSlideId mediaReg >>= nonEmpty of + Just mInfos -> maximum $ fmap mInfoLocalId mInfos + Nothing + | hasSpeakerNotes -> 2 + | otherwise -> 1 maxLocalId = max maxLinkId maxMediaId - maxGlobalId = case M.elems globalIds of - [] -> 0 - ids -> maximum ids + maxGlobalId = maybe 0 maximum $ nonEmpty $ M.elems globalIds (imgBytes, mbMt) <- P.fetchItem $ T.pack fp let imgExt = (mbMt >>= extensionFromMimeType >>= (\x -> return $ "." <> x)) @@ -521,6 +537,7 @@ registerMedia fp caption = do Just Eps -> Just ".eps" Just Svg -> Just ".svg" Just Emf -> Just ".emf" + Just Tiff -> Just ".tiff" Nothing -> Nothing let newGlobalId = fromMaybe (maxGlobalId + 1) (M.lookup fp globalIds) @@ -652,10 +669,10 @@ createCaption contentShapeDimensions paraElements = do ] , mknode "p:spPr" [] [ mknode "a:xfrm" [] - [ mknode "a:off" [("x", show $ 12700 * x), - ("y", show $ 12700 * (y + cy - captionHeight))] () - , mknode "a:ext" [("cx", show $ 12700 * cx), - ("cy", show $ 12700 * captionHeight)] () + [ mknode "a:off" [("x", tshow $ 12700 * x), + ("y", tshow $ 12700 * (y + cy - captionHeight))] () + , mknode "a:ext" [("cx", tshow $ 12700 * cx), + ("cy", tshow $ 12700 * captionHeight)] () ] , mknode "a:prstGeom" [("prst", "rect")] [ mknode "a:avLst" [] () @@ -704,11 +721,13 @@ makePicElements layout picProps mInfo alt = do ,("noChangeAspect","1")] () -- cNvPr will contain the link information so we do that separately, -- and register the link if necessary. - let cNvPrAttr = [("descr", mInfoFilePath mInfo), ("id","0"),("name","Picture 1")] + let cNvPrAttr = [("descr", T.pack $ mInfoFilePath mInfo), + ("id","0"), + ("name","Picture 1")] cNvPr <- case picPropLink picProps of Just link -> do idNum <- registerLink link return $ mknode "p:cNvPr" cNvPrAttr $ - mknode "a:hlinkClick" [("r:id", "rId" <> show idNum)] () + mknode "a:hlinkClick" [("r:id", "rId" <> tshow idNum)] () Nothing -> return $ mknode "p:cNvPr" cNvPrAttr () let nvPicPr = mknode "p:nvPicPr" [] [ cNvPr @@ -716,13 +735,13 @@ makePicElements layout picProps mInfo alt = do , mknode "p:nvPr" [] ()] let blipFill = mknode "p:blipFill" [] [ mknode "a:blip" [("r:embed", "rId" <> - show (mInfoLocalId mInfo))] () + tshow (mInfoLocalId mInfo))] () , mknode "a:stretch" [] $ mknode "a:fillRect" [] () ] let xfrm = mknode "a:xfrm" [] - [ mknode "a:off" [("x",show xoff'), ("y",show yoff')] () - , mknode "a:ext" [("cx",show dimX') - ,("cy",show dimY')] () ] + [ mknode "a:off" [("x", tshow xoff'), ("y", tshow yoff')] () + , mknode "a:ext" [("cx", tshow dimX') + ,("cy", tshow dimY')] () ] let prstGeom = mknode "a:prstGeom" [("prst","rect")] $ mknode "a:avLst" [] () let ln = mknode "a:ln" [("w","9525")] @@ -744,8 +763,8 @@ makePicElements layout picProps mInfo alt = do else return [picShape] -paraElemToElements :: PandocMonad m => ParaElem -> P m [Element] -paraElemToElements Break = return [mknode "a:br" [] ()] +paraElemToElements :: PandocMonad m => ParaElem -> P m [Content] +paraElemToElements Break = return [Elem $ mknode "a:br" [] ()] paraElemToElements (Run rpr s) = do sizeAttrs <- fontSizeAttributes rpr let attrs = sizeAttrs <> @@ -761,7 +780,7 @@ paraElemToElements (Run rpr s) = do Just DoubleStrike -> [("strike", "dblStrike")] Nothing -> []) <> (case rBaseline rpr of - Just n -> [("baseline", show n)] + Just n -> [("baseline", tshow n)] Nothing -> []) <> (case rCap rpr of Just NoCapitals -> [("cap", "none")] @@ -778,42 +797,44 @@ paraElemToElements (Run rpr s) = do return $ case link of InternalTarget _ -> let linkAttrs = - [ ("r:id", "rId" <> show idNum) + [ ("r:id", "rId" <> tshow idNum) , ("action", "ppaction://hlinksldjump") ] in [mknode "a:hlinkClick" linkAttrs ()] -- external ExternalTarget _ -> let linkAttrs = - [ ("r:id", "rId" <> show idNum) + [ ("r:id", "rId" <> tshow idNum) ] in [mknode "a:hlinkClick" linkAttrs ()] Nothing -> return [] let colorContents = case rSolidFill rpr of Just color -> case fromColor color of - '#':hx -> [mknode "a:solidFill" [] - [mknode "a:srgbClr" [("val", map toUpper hx)] ()] - ] + '#':hx -> + [mknode "a:solidFill" [] + [mknode "a:srgbClr" + [("val", T.toUpper $ T.pack hx)] ()]] _ -> [] Nothing -> [] codeFont <- monospaceFont let codeContents = - [mknode "a:latin" [("typeface", T.unpack codeFont)] () | rPropCode rpr] + [mknode "a:latin" [("typeface", codeFont)] () | rPropCode rpr] let propContents = linkProps <> colorContents <> codeContents - return [mknode "a:r" [] [ mknode "a:rPr" attrs propContents - , mknode "a:t" [] $ T.unpack s - ]] + return [Elem $ mknode "a:r" [] [ mknode "a:rPr" attrs propContents + , mknode "a:t" [] s + ]] paraElemToElements (MathElem mathType texStr) = do isInSpkrNotes <- asks envInSpeakerNotes if isInSpkrNotes then paraElemToElements $ Run def $ unTeXString texStr else do res <- convertMath writeOMML mathType (unTeXString texStr) - case res of - Right r -> return [mknode "a14:m" [] $ addMathInfo r] + case fromXLElement <$> res of + Right r -> return [Elem $ mknode "a14:m" [] $ addMathInfo r] Left (Str s) -> paraElemToElements (Run def s) Left _ -> throwError $ PandocShouldNeverHappenError "non-string math fallback" -paraElemToElements (RawOOXMLParaElem str) = return [ x | Elem x <- parseXML str ] +paraElemToElements (RawOOXMLParaElem str) = return + [Text (CData CDataRaw str Nothing)] -- This is a bit of a kludge -- really requires adding an option to @@ -821,9 +842,10 @@ paraElemToElements (RawOOXMLParaElem str) = return [ x | Elem x <- parseXML str -- step at a time. addMathInfo :: Element -> Element addMathInfo element = - let mathspace = Attr { attrKey = QName "m" Nothing (Just "xmlns") - , attrVal = "http://schemas.openxmlformats.org/officeDocument/2006/math" - } + let mathspace = + Attr { attrKey = QName "m" Nothing (Just "xmlns") + , attrVal = "http://schemas.openxmlformats.org/officeDocument/2006/math" + } in add_attr mathspace element -- We look through the element to see if it contains an a14:m @@ -846,13 +868,13 @@ surroundWithMathAlternate element = paragraphToElement :: PandocMonad m => Paragraph -> P m Element paragraphToElement par = do let - attrs = [("lvl", show $ pPropLevel $ paraProps par)] <> + attrs = [("lvl", tshow $ pPropLevel $ paraProps par)] <> (case pPropMarginLeft (paraProps par) of - Just px -> [("marL", show $ pixelsToEmu px)] + Just px -> [("marL", tshow $ pixelsToEmu px)] Nothing -> [] ) <> (case pPropIndent (paraProps par) of - Just px -> [("indent", show $ pixelsToEmu px)] + Just px -> [("indent", tshow $ pixelsToEmu px)] Nothing -> [] ) <> (case pPropAlign (paraProps par) of @@ -864,7 +886,7 @@ paragraphToElement par = do props = [] <> (case pPropSpaceBefore $ paraProps par of Just px -> [mknode "a:spcBef" [] [ - mknode "a:spcPts" [("val", show $ 100 * px)] () + mknode "a:spcPts" [("val", tshow $ 100 * px)] () ] ] Nothing -> [] @@ -875,8 +897,9 @@ paragraphToElement par = do [mknode "a:buAutoNum" (autoNumAttrs attrs') ()] Nothing -> [mknode "a:buNone" [] ()] ) - paras <- concat <$> mapM paraElemToElements (paraElems par) - return $ mknode "a:p" [] $ [mknode "a:pPr" attrs props] <> paras + paras <- mapM paraElemToElements (paraElems par) + return $ mknode "a:p" [] $ + [Elem $ mknode "a:pPr" attrs props] <> concat paras shapeToElement :: PandocMonad m => Element -> Shape -> P m Element shapeToElement layout (TextBox paras) @@ -896,21 +919,22 @@ shapeToElement layout (TextBox paras) -- GraphicFrame and Pic should never reach this. shapeToElement _ _ = return $ mknode "p:sp" [] () -shapeToElements :: PandocMonad m => Element -> Shape -> P m [Element] +shapeToElements :: PandocMonad m => Element -> Shape -> P m [Content] shapeToElements layout (Pic picProps fp alt) = do mInfo <- registerMedia fp alt case mInfoExt mInfo of - Just _ -> + Just _ -> map Elem <$> makePicElements layout picProps mInfo alt Nothing -> shapeToElements layout $ TextBox [Paragraph def alt] -shapeToElements layout (GraphicFrame tbls cptn) = +shapeToElements layout (GraphicFrame tbls cptn) = map Elem <$> graphicFrameToElements layout tbls cptn -shapeToElements _ (RawOOXMLShape str) = return [ x | Elem x <- parseXML str ] +shapeToElements _ (RawOOXMLShape str) = return + [Text (CData CDataRaw str Nothing)] shapeToElements layout shp = do element <- shapeToElement layout shp - return [element] + return [Elem element] -shapesToElements :: PandocMonad m => Element -> [Shape] -> P m [Element] +shapesToElements :: PandocMonad m => Element -> [Shape] -> P m [Content] shapesToElements layout shps = concat <$> mapM (shapeToElements layout) shps @@ -937,8 +961,10 @@ graphicFrameToElements layout tbls caption = do [mknode "p:ph" [("idx", "1")] ()] ] , mknode "p:xfrm" [] - [ mknode "a:off" [("x", show $ 12700 * x), ("y", show $ 12700 * y)] () - , mknode "a:ext" [("cx", show $ 12700 * cx), ("cy", show $ 12700 * cy)] () + [ mknode "a:off" [("x", tshow $ 12700 * x), + ("y", tshow $ 12700 * y)] () + , mknode "a:ext" [("cx", tshow $ 12700 * cx), + ("cy", tshow $ 12700 * cy)] () ] ] <> elements @@ -952,7 +978,7 @@ getDefaultTableStyle = do refArchive <- asks envRefArchive distArchive <- asks envDistArchive tblStyleLst <- parseXml refArchive distArchive "ppt/tableStyles.xml" - return $ findAttrText (QName "def" Nothing Nothing) tblStyleLst + return $ findAttr (QName "def" Nothing Nothing) tblStyleLst graphicToElement :: PandocMonad m => Integer -> Graphic -> P m Element graphicToElement tableWidth (Tbl tblPr hdrCells rows) = do @@ -990,7 +1016,7 @@ graphicToElement tableWidth (Tbl tblPr hdrCells rows) = do let mkrow border cells = mknode "a:tr" [("h", "0")] $ map (mkcell border) cells let mkgridcol w = mknode "a:gridCol" - [("w", show ((12700 * w) :: Integer))] () + [("w", tshow ((12700 * w) :: Integer))] () let hasHeader = not (all null hdrCells) mbDefTblStyle <- getDefaultTableStyle @@ -999,7 +1025,7 @@ graphicToElement tableWidth (Tbl tblPr hdrCells rows) = do , ("bandRow", if tblPrBandRow tblPr then "1" else "0") ] (case mbDefTblStyle of Nothing -> [] - Just sty -> [mknode "a:tableStyleId" [] $ T.unpack sty]) + Just sty -> [mknode "a:tableStyleId" [] sty]) return $ mknode "a:graphic" [] [mknode "a:graphicData" [("uri", "http://schemas.openxmlformats.org/drawingml/2006/table")] @@ -1032,7 +1058,7 @@ findPHType ns spElem phType -- if it's a named PHType, we want to check that the attribute -- value matches. Just phElem | (PHType tp) <- phType -> - case findAttrText (QName "type" Nothing Nothing) phElem of + case findAttr (QName "type" Nothing Nothing) phElem of Just tp' -> tp == tp' Nothing -> False -- if it's an ObjType, we want to check that there is NO @@ -1083,7 +1109,7 @@ contentToElement layout hdrShape shapes , Just cSld <- findChild (elemName ns "p" "cSld") layout , Just spTree <- findChild (elemName ns "p" "spTree") cSld = do element <- nonBodyTextToElement layout [PHType "title"] hdrShape - let hdrShapeElements = [element | not (null hdrShape)] + let hdrShapeElements = [Elem element | not (null hdrShape)] contentElements <- local (\env -> env {envContentType = NormalContent}) (shapesToElements layout shapes) @@ -1096,7 +1122,7 @@ twoColumnToElement layout hdrShape shapesL shapesR , Just cSld <- findChild (elemName ns "p" "cSld") layout , Just spTree <- findChild (elemName ns "p" "spTree") cSld = do element <- nonBodyTextToElement layout [PHType "title"] hdrShape - let hdrShapeElements = [element | not (null hdrShape)] + let hdrShapeElements = [Elem element | not (null hdrShape)] contentElementsL <- local (\env -> env {envContentType =TwoColumnLeftContent}) (shapesToElements layout shapesL) @@ -1105,7 +1131,8 @@ twoColumnToElement layout hdrShape shapesL shapesR (shapesToElements layout shapesR) -- let contentElementsL' = map (setIdx ns "1") contentElementsL -- contentElementsR' = map (setIdx ns "2") contentElementsR - return $ buildSpTree ns spTree (hdrShapeElements <> contentElementsL <> contentElementsR) + return $ buildSpTree ns spTree $ + hdrShapeElements <> contentElementsL <> contentElementsR twoColumnToElement _ _ _ _= return $ mknode "p:sp" [] () @@ -1115,7 +1142,7 @@ titleToElement layout titleElems , Just cSld <- findChild (elemName ns "p" "cSld") layout , Just spTree <- findChild (elemName ns "p" "spTree") cSld = do element <- nonBodyTextToElement layout [PHType "title", PHType "ctrTitle"] titleElems - let titleShapeElements = [element | not (null titleElems)] + let titleShapeElements = [Elem element | not (null titleElems)] return $ buildSpTree ns spTree titleShapeElements titleToElement _ _ = return $ mknode "p:sp" [] () @@ -1135,7 +1162,8 @@ metadataToElement layout titleElems subtitleElems authorsElems dateElems dateShapeElements <- if null dateElems then return [] else sequence [nonBodyTextToElement layout [PHType "dt"] dateElems] - return $ buildSpTree ns spTree (titleShapeElements <> subtitleShapeElements <> dateShapeElements) + return . buildSpTree ns spTree . map Elem $ + (titleShapeElements <> subtitleShapeElements <> dateShapeElements) metadataToElement _ _ _ _ _ = return $ mknode "p:sp" [] () slideToElement :: PandocMonad m => Slide -> P m Element @@ -1197,7 +1225,7 @@ getSlideNumberFieldId notesMaster , Just txBody <- findChild (elemName ns "p" "txBody") sp , Just p <- findChild (elemName ns "a" "p") txBody , Just fld <- findChild (elemName ns "a" "fld") p - , Just fldId <- findAttrText (QName "id" Nothing Nothing) fld = + , Just fldId <- findAttr (QName "id" Nothing Nothing) fld = return fldId | otherwise = throwError $ PandocSomeError @@ -1276,11 +1304,11 @@ speakerNotesSlideNumber pgNum fieldId = [ mknode "a:bodyPr" [] () , mknode "a:lstStyle" [] () , mknode "a:p" [] - [ mknode "a:fld" [ ("id", T.unpack fieldId) + [ mknode "a:fld" [ ("id", fieldId) , ("type", "slidenum") ] [ mknode "a:rPr" [("lang", "en-US")] () - , mknode "a:t" [] (show pgNum) + , mknode "a:t" [] (tshow pgNum) ] , mknode "a:endParaRPr" [("lang", "en-US")] () ] @@ -1332,7 +1360,7 @@ getSlideIdNum sldId = do Just n -> return n Nothing -> throwError $ PandocShouldNeverHappenError $ - "Slide Id " <> T.pack (show sldId) <> " not found." + "Slide Id " <> tshow sldId <> " not found." slideNum :: PandocMonad m => Slide -> P m Int slideNum slide = getSlideIdNum $ slideId slide @@ -1349,7 +1377,7 @@ slideToRelId :: PandocMonad m => Slide -> P m T.Text slideToRelId slide = do n <- slideNum slide offset <- asks envSlideIdOffset - return $ "rId" <> T.pack (show $ n + offset) + return $ "rId" <> tshow (n + offset) data Relationship = Relationship { relId :: Int @@ -1361,13 +1389,11 @@ elementToRel :: Element -> Maybe Relationship elementToRel element | elName element == QName "Relationship" (Just "http://schemas.openxmlformats.org/package/2006/relationships") Nothing = do rId <- findAttr (QName "Id" Nothing Nothing) element - numStr <- stripPrefix "rId" rId - num <- case reads numStr :: [(Int, String)] of - (n, _) : _ -> Just n - [] -> Nothing - type' <- findAttrText (QName "Type" Nothing Nothing) element + numStr <- T.stripPrefix "rId" rId + num <- fromIntegral <$> readTextAsInteger numStr + type' <- findAttr (QName "Type" Nothing Nothing) element target <- findAttr (QName "Target" Nothing Nothing) element - return $ Relationship num type' target + return $ Relationship num type' (T.unpack target) | otherwise = Nothing slideToPresRel :: PandocMonad m => Slide -> P m Relationship @@ -1416,11 +1442,8 @@ presentationToRels pres@(Presentation _ slides) = do -- all relWithoutSlide rels (unless they're 1) -- 3. If we have a notesmaster slide, we make space for that as well. - let minRelNotOne = case filter (1<) $ map relId relsWeKeep of - [] -> 0 -- doesn't matter in this case, since - -- there will be nothing to map the - -- function over - l -> minimum l + let minRelNotOne = maybe 0 minimum $ nonEmpty + $ filter (1 <) $ map relId relsWeKeep modifyRelNum :: Int -> Int modifyRelNum 1 = 1 @@ -1456,10 +1479,9 @@ topLevelRelsEntry :: PandocMonad m => P m Entry topLevelRelsEntry = elemToEntry "_rels/.rels" $ relsToElement topLevelRels relToElement :: Relationship -> Element -relToElement rel = mknode "Relationship" [ ("Id", "rId" <> - show (relId rel)) - , ("Type", T.unpack $ relType rel) - , ("Target", relTarget rel) ] () +relToElement rel = mknode "Relationship" [ ("Id", "rId" <> tshow (relId rel)) + , ("Type", relType rel) + , ("Target", T.pack $ relTarget rel) ] () relsToElement :: [Relationship] -> Element relsToElement rels = mknode "Relationships" @@ -1494,7 +1516,8 @@ slideToSpeakerNotesEntry slide = do Just element | Just notesIdNum <- mbNotesIdNum -> Just <$> elemToEntry - ("ppt/notesSlides/notesSlide" <> show notesIdNum <> ".xml") + ("ppt/notesSlides/notesSlide" <> show notesIdNum <> + ".xml") element _ -> return Nothing @@ -1507,7 +1530,7 @@ slideToSpeakerNotesRelElement slide@Slide{} = do [("xmlns", "http://schemas.openxmlformats.org/package/2006/relationships")] [ mknode "Relationship" [ ("Id", "rId2") , ("Type", "http://schemas.openxmlformats.org/officeDocument/2006/relationships/slide") - , ("Target", "../slides/slide" <> show idNum <> ".xml") + , ("Target", "../slides/slide" <> tshow idNum <> ".xml") ] () , mknode "Relationship" [ ("Id", "rId1") , ("Type", "http://schemas.openxmlformats.org/officeDocument/2006/relationships/notesMaster") @@ -1540,15 +1563,15 @@ linkRelElement :: PandocMonad m => (Int, LinkTarget) -> P m Element linkRelElement (rIdNum, InternalTarget targetId) = do targetIdNum <- getSlideIdNum targetId return $ - mknode "Relationship" [ ("Id", "rId" <> show rIdNum) + mknode "Relationship" [ ("Id", "rId" <> tshow rIdNum) , ("Type", "http://schemas.openxmlformats.org/officeDocument/2006/relationships/slide") - , ("Target", "slide" <> show targetIdNum <> ".xml") + , ("Target", "slide" <> tshow targetIdNum <> ".xml") ] () linkRelElement (rIdNum, ExternalTarget (url, _)) = return $ - mknode "Relationship" [ ("Id", "rId" <> show rIdNum) + mknode "Relationship" [ ("Id", "rId" <> tshow rIdNum) , ("Type", "http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink") - , ("Target", T.unpack url) + , ("Target", url) , ("TargetMode", "External") ] () @@ -1560,10 +1583,10 @@ mediaRelElement mInfo = let ext = fromMaybe "" (mInfoExt mInfo) in mknode "Relationship" [ ("Id", "rId" <> - show (mInfoLocalId mInfo)) + tshow (mInfoLocalId mInfo)) , ("Type", "http://schemas.openxmlformats.org/officeDocument/2006/relationships/image") , ("Target", "../media/image" <> - show (mInfoGlobalId mInfo) <> T.unpack ext) + tshow (mInfoGlobalId mInfo) <> ext) ] () speakerNotesSlideRelElement :: PandocMonad m => Slide -> P m (Maybe Element) @@ -1573,7 +1596,7 @@ speakerNotesSlideRelElement slide = do return $ case M.lookup idNum mp of Nothing -> Nothing Just n -> - let target = "../notesSlides/notesSlide" <> show n <> ".xml" + let target = "../notesSlides/notesSlide" <> tshow n <> ".xml" in Just $ mknode "Relationship" [ ("Id", "rId2") , ("Type", "http://schemas.openxmlformats.org/officeDocument/2006/relationships/notesSlide") @@ -1612,9 +1635,9 @@ slideToSlideRelElement slide = do slideToSldIdElement :: PandocMonad m => Slide -> P m Element slideToSldIdElement slide = do n <- slideNum slide - let id' = show $ n + 255 + let id' = tshow $ n + 255 rId <- slideToRelId slide - return $ mknode "p:sldId" [("id", id'), ("r:id", T.unpack rId)] () + return $ mknode "p:sldId" [("id", id'), ("r:id", rId)] () presentationToSldIdLst :: PandocMonad m => Presentation -> P m Element presentationToSldIdLst (Presentation _ slides) = do @@ -1639,7 +1662,7 @@ presentationToPresentationElement pres@(Presentation _ slds) = do notesMasterElem = mknode "p:notesMasterIdLst" [] [ mknode "p:NotesMasterId" - [("r:id", "rId" <> show notesMasterRId)] + [("r:id", "rId" <> tshow notesMasterRId)] () ] @@ -1695,17 +1718,17 @@ docPropsElement docProps = do ,("xmlns:dcmitype","http://purl.org/dc/dcmitype/") ,("xmlns:xsi","http://www.w3.org/2001/XMLSchema-instance")] $ - mknode "dc:title" [] (maybe "" T.unpack $ dcTitle docProps) + mknode "dc:title" [] (fromMaybe "" $ dcTitle docProps) : - mknode "dc:creator" [] (maybe "" T.unpack $ dcCreator docProps) + mknode "dc:creator" [] (fromMaybe "" $ dcCreator docProps) : - mknode "cp:keywords" [] (T.unpack keywords) - : ( [mknode "dc:subject" [] $ maybe "" T.unpack $ dcSubject docProps | isJust (dcSubject docProps)]) - <> ( [mknode "dc:description" [] $ maybe "" T.unpack $ dcDescription docProps | isJust (dcDescription docProps)]) - <> ( [mknode "cp:category" [] $ maybe "" T.unpack $ cpCategory docProps | isJust (cpCategory docProps)]) + mknode "cp:keywords" [] keywords + : ( [mknode "dc:subject" [] $ fromMaybe "" $ dcSubject docProps | isJust (dcSubject docProps)]) + <> ( [mknode "dc:description" [] $ fromMaybe "" $ dcDescription docProps | isJust (dcDescription docProps)]) + <> ( [mknode "cp:category" [] $ fromMaybe "" $ cpCategory docProps | isJust (cpCategory docProps)]) <> (\x -> [ mknode "dcterms:created" [("xsi:type","dcterms:W3CDTF")] x - , mknode "dcterms:modified" [("xsi:type","dcterms:W3CDTF")] x - ]) (formatTime defaultTimeLocale "%FT%XZ" utctime) + , mknode "dcterms:modified" [("xsi:type","dcterms:W3CDTF")] x + ]) (T.pack $ formatTime defaultTimeLocale "%FT%XZ" utctime) docPropsToEntry :: PandocMonad m => DocProps -> P m Entry docPropsToEntry docProps = docPropsElement docProps >>= @@ -1716,8 +1739,8 @@ docCustomPropsElement :: PandocMonad m => DocProps -> P m Element docCustomPropsElement docProps = do let mkCustomProp (k, v) pid = mknode "property" [("fmtid","{D5CDD505-2E9C-101B-9397-08002B2CF9AE}") - ,("pid", show pid) - ,("name", T.unpack k)] $ mknode "vt:lpwstr" [] (T.unpack v) + ,("pid", tshow pid) + ,("name", k)] $ mknode "vt:lpwstr" [] v return $ mknode "Properties" [("xmlns","http://schemas.openxmlformats.org/officeDocument/2006/custom-properties") ,("xmlns:vt","http://schemas.openxmlformats.org/officeDocument/2006/docPropsVTypes") @@ -1736,7 +1759,7 @@ viewPropsElement = do distArchive <- asks envDistArchive viewPrElement <- parseXml refArchive distArchive "ppt/viewProps.xml" -- remove "lastView" if it exists: - let notLastView :: Text.XML.Light.Attr -> Bool + let notLastView :: XML.Attr -> Bool notLastView attr = qName (attrKey attr) /= "lastView" return $ @@ -1748,15 +1771,15 @@ makeViewPropsEntry = viewPropsElement >>= elemToEntry "ppt/viewProps.xml" defaultContentTypeToElem :: DefaultContentType -> Element defaultContentTypeToElem dct = mknode "Default" - [("Extension", T.unpack $ defContentTypesExt dct), - ("ContentType", T.unpack $ defContentTypesType dct)] + [("Extension", defContentTypesExt dct), + ("ContentType", defContentTypesType dct)] () overrideContentTypeToElem :: OverrideContentType -> Element overrideContentTypeToElem oct = mknode "Override" - [("PartName", overrideContentTypesPart oct), - ("ContentType", T.unpack $ overrideContentTypesType oct)] + [("PartName", T.pack $ overrideContentTypesPart oct), + ("ContentType", overrideContentTypesType oct)] () contentTypesToElement :: ContentTypes -> Element @@ -1814,7 +1837,8 @@ getSpeakerNotesFilePaths :: PandocMonad m => P m [FilePath] getSpeakerNotesFilePaths = do mp <- asks envSpeakerNotesIdMap let notesIdNums = M.elems mp - return $ map (\n -> "ppt/notesSlides/notesSlide" <> show n <> ".xml") notesIdNums + return $ map (\n -> "ppt/notesSlides/notesSlide" <> show n <> ".xml") + notesIdNums presentationToContentTypes :: PandocMonad m => Presentation -> P m ContentTypes presentationToContentTypes p@(Presentation _ slides) = do @@ -1878,11 +1902,11 @@ getContentType fp | otherwise = Nothing -- Kept as String for XML.Light -autoNumAttrs :: ListAttributes -> [(String, String)] +autoNumAttrs :: ListAttributes -> [(Text, Text)] autoNumAttrs (startNum, numStyle, numDelim) = numAttr <> typeAttr where - numAttr = [("startAt", show startNum) | startNum /= 1] + numAttr = [("startAt", tshow startNum) | startNum /= 1] typeAttr = [("type", typeString <> delimString)] typeString = case numStyle of Decimal -> "arabic" diff --git a/src/Text/Pandoc/Writers/Powerpoint/Presentation.hs b/src/Text/Pandoc/Writers/Powerpoint/Presentation.hs index affec38aa..9246a93e9 100644 --- a/src/Text/Pandoc/Writers/Powerpoint/Presentation.hs +++ b/src/Text/Pandoc/Writers/Powerpoint/Presentation.hs @@ -45,6 +45,7 @@ module Text.Pandoc.Writers.Powerpoint.Presentation ( documentToPresentation import Control.Monad.Reader import Control.Monad.State import Data.List (intercalate) +import Data.List.NonEmpty (nonEmpty) import Data.Default import Text.Pandoc.Definition import Text.Pandoc.ImageSize @@ -363,9 +364,7 @@ inlineToParElems (Note blks) = do then return [] else do notes <- gets stNoteIds - let maxNoteId = case M.keys notes of - [] -> 0 - lst -> maximum lst + let maxNoteId = maybe 0 maximum $ nonEmpty $ M.keys notes curNoteId = maxNoteId + 1 modify $ \st -> st { stNoteIds = M.insert curNoteId blks notes } local (\env -> env{envRunProps = (envRunProps env){rLink = Just $ InternalTarget endNotesSlideId}}) $ diff --git a/src/Text/Pandoc/Writers/RST.hs b/src/Text/Pandoc/Writers/RST.hs index 43bf382b7..983ef412a 100644 --- a/src/Text/Pandoc/Writers/RST.hs +++ b/src/Text/Pandoc/Writers/RST.hs @@ -2,7 +2,7 @@ {-# LANGUAGE ViewPatterns #-} {- | Module : Text.Pandoc.Writers.RST - Copyright : Copyright (C) 2006-2020 John MacFarlane + Copyright : Copyright (C) 2006-2021 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -16,7 +16,8 @@ reStructuredText: <http://docutils.sourceforge.net/rst.html> module Text.Pandoc.Writers.RST ( writeRST, flatten ) where import Control.Monad.State.Strict import Data.Char (isSpace) -import Data.List (transpose, intersperse) +import Data.List (transpose, intersperse, foldl') +import qualified Data.List.NonEmpty as NE import Data.Maybe (fromMaybe) import qualified Data.Text as T import Data.Text (Text) @@ -143,9 +144,12 @@ pictToRST (label, (attr, src, _, mbtarget)) = do let (_, cls, _) = attr classes = case cls of [] -> empty - ["align-right"] -> ":align: right" - ["align-left"] -> ":align: left" - ["align-center"] -> ":align: center" + ["align-top"] -> ":align: top" + ["align-middle"] -> ":align: middle" + ["align-bottom"] -> ":align: bottom" + ["align-center"] -> empty + ["align-right"] -> empty + ["align-left"] -> empty _ -> ":class: " <> literal (T.unwords cls) return $ nowrap $ ".. |" <> label' <> "| image:: " <> literal src $$ hang 3 empty (classes $$ dims) @@ -215,19 +219,28 @@ blockToRST (Div (ident,classes,_kvs) bs) = do nest 3 contents $$ blankline blockToRST (Plain inlines) = inlineListToRST inlines --- title beginning with fig: indicates that the image is a figure -blockToRST (Para [Image attr txt (src,T.stripPrefix "fig:" -> Just tit)]) = do - capt <- inlineListToRST txt +blockToRST (Para [Image attr txt (src, rawtit)]) = do + description <- inlineListToRST txt dims <- imageDimsToRST attr - let fig = "figure:: " <> literal src - alt = ":alt: " <> if T.null tit then capt else literal tit + -- title beginning with fig: indicates that the image is a figure + let (isfig, tit) = case T.stripPrefix "fig:" rawtit of + Nothing -> (False, rawtit) + Just tit' -> (True, tit') + let fig | isfig = "figure:: " <> literal src + | otherwise = "image:: " <> literal src + alt | isfig = ":alt: " <> if T.null tit then description else literal tit + | null txt = empty + | otherwise = ":alt: " <> description + capt | isfig = description + | otherwise = empty (_,cls,_) = attr classes = case cls of [] -> empty ["align-right"] -> ":align: right" ["align-left"] -> ":align: left" ["align-center"] -> ":align: center" - _ -> ":figclass: " <> literal (T.unwords cls) + _ | isfig -> ":figclass: " <> literal (T.unwords cls) + | otherwise -> ":class: " <> literal (T.unwords cls) return $ hang 3 ".. " (fig $$ alt $$ classes $$ dims $+$ capt) $$ blankline blockToRST (Para inlines) | LineBreak `elem` inlines = @@ -323,7 +336,7 @@ blockToRST (OrderedList (start, style', delim) items) = do then replicate (length items) "#." else take (length items) $ orderedListMarkers (start, style', delim) - let maxMarkerLength = maximum $ map T.length markers + let maxMarkerLength = maybe 0 maximum $ NE.nonEmpty $ map T.length markers let markers' = map (\m -> let s = maxMarkerLength - T.length m in m <> T.replicate s " ") markers contents <- zipWithM orderedListItemToRST markers' items @@ -497,7 +510,7 @@ flatten outer | null contents = [outer] | otherwise = combineAll contents where contents = dropInlineParent outer - combineAll = foldl combine [] + combineAll = foldl' combine [] combine :: [Inline] -> Inline -> [Inline] combine f i = @@ -507,8 +520,8 @@ flatten outer (Quoted _ _, _) -> keep f i (_, Quoted _ _) -> keep f i -- spans are not rendered using RST inlines, so we can keep them - (Span ("",[],[]) _, _) -> keep f i - (_, Span ("",[],[]) _) -> keep f i + (Span (_,_,[]) _, _) -> keep f i + (_, Span (_,_,[]) _) -> keep f i -- inlineToRST handles this case properly so it's safe to keep ( Link{}, Image{}) -> keep f i -- parent inlines would prevent links from being correctly @@ -525,11 +538,15 @@ flatten outer collapse f i = appendToLast f $ dropInlineParent i appendToLast :: [Inline] -> [Inline] -> [Inline] - appendToLast [] toAppend = [setInlineChildren outer toAppend] - appendToLast flattened toAppend - | isOuter lastFlat = init flattened <> [appendTo lastFlat toAppend] - | otherwise = flattened <> [setInlineChildren outer toAppend] - where lastFlat = last flattened + appendToLast flattened toAppend = + case NE.nonEmpty flattened of + Nothing -> [setInlineChildren outer toAppend] + Just xs -> + if isOuter lastFlat + then NE.init xs <> [appendTo lastFlat toAppend] + else flattened <> [setInlineChildren outer toAppend] + where + lastFlat = NE.last xs appendTo o i = mapNested (<> i) o isOuter i = emptyParent i == emptyParent outer emptyParent i = setInlineChildren i [] @@ -749,8 +766,7 @@ simpleTable opts blocksToDoc headers rows = do then return [] else fixEmpties <$> mapM (blocksToDoc opts) headers rowDocs <- mapM (fmap fixEmpties . mapM (blocksToDoc opts)) rows - let numChars [] = 0 - numChars xs = maximum . map offset $ xs + let numChars = maybe 0 maximum . NE.nonEmpty . map offset let colWidths = map numChars $ transpose (headerDocs : rowDocs) let toRow = mconcat . intersperse (lblock 1 " ") . zipWith lblock colWidths let hline = nowrap $ hsep (map (\n -> literal (T.replicate n "=")) colWidths) diff --git a/src/Text/Pandoc/Writers/RTF.hs b/src/Text/Pandoc/Writers/RTF.hs index e3966ed07..3527949b4 100644 --- a/src/Text/Pandoc/Writers/RTF.hs +++ b/src/Text/Pandoc/Writers/RTF.hs @@ -2,7 +2,7 @@ {-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Writers.RTF - Copyright : Copyright (C) 2006-2020 John MacFarlane + Copyright : Copyright (C) 2006-2021 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -16,7 +16,7 @@ module Text.Pandoc.Writers.RTF ( writeRTF import Control.Monad.Except (catchError, throwError) import Control.Monad import qualified Data.ByteString as B -import Data.Char (chr, isDigit, ord) +import Data.Char (chr, isDigit, ord, isAlphaNum) import qualified Data.Map as M import Data.Text (Text) import qualified Data.Text as T @@ -137,15 +137,21 @@ handleUnicode = T.concatMap $ \c -> -- | Escape special characters. escapeSpecial :: Text -> Text -escapeSpecial = escapeStringUsing $ - [ ('\t',"\\tab ") - , ('\8216',"\\u8216'") - , ('\8217',"\\u8217'") - , ('\8220',"\\u8220\"") - , ('\8221',"\\u8221\"") - , ('\8211',"\\u8211-") - , ('\8212',"\\u8212-") - ] <> backslashEscapes "{\\}" +escapeSpecial t + | T.all isAlphaNum t = t + | otherwise = T.concatMap escChar t + where + escChar '\t' = "\\tab " + escChar '\8216' = "\\u8216'" + escChar '\8217' = "\\u8217'" + escChar '\8220' = "\\u8220\"" + escChar '\8221' = "\\u8221\"" + escChar '\8211' = "\\u8211-" + escChar '\8212' = "\\u8212-" + escChar '{' = "\\{" + escChar '}' = "\\}" + escChar '\\' = "\\\\" + escChar c = T.singleton c -- | Escape strings as needed for rich text format. stringToRTF :: Text -> Text diff --git a/src/Text/Pandoc/Writers/Roff.hs b/src/Text/Pandoc/Writers/Roff.hs index 00b027cc9..6af56242f 100644 --- a/src/Text/Pandoc/Writers/Roff.hs +++ b/src/Text/Pandoc/Writers/Roff.hs @@ -1,7 +1,7 @@ {-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Writers.Roff - Copyright : Copyright (C) 2007-2020 John MacFarlane + Copyright : Copyright (C) 2007-2021 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> diff --git a/src/Text/Pandoc/Writers/Shared.hs b/src/Text/Pandoc/Writers/Shared.hs index b399afbf3..0b7c6bee0 100644 --- a/src/Text/Pandoc/Writers/Shared.hs +++ b/src/Text/Pandoc/Writers/Shared.hs @@ -3,7 +3,7 @@ {-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Writers.Shared - Copyright : Copyright (C) 2013-2020 John MacFarlane + Copyright : Copyright (C) 2013-2021 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -20,6 +20,7 @@ module Text.Pandoc.Writers.Shared ( , setField , resetField , defField + , getLang , tagWithAttrs , isDisplayMath , fixDisplayMath @@ -44,6 +45,7 @@ import Control.Monad (zipWithM) import Data.Aeson (ToJSON (..), encode) import Data.Char (chr, ord, isSpace) import Data.List (groupBy, intersperse, transpose, foldl') +import Data.List.NonEmpty (NonEmpty(..), nonEmpty) import Data.Text.Conversions (FromText(..)) import qualified Data.Map as M import qualified Data.Text as T @@ -109,8 +111,7 @@ metaValueToVal blockWriter inlineWriter (MetaMap metamap) = MapVal . Context <$> mapM (metaValueToVal blockWriter inlineWriter) metamap metaValueToVal blockWriter inlineWriter (MetaList xs) = ListVal <$> mapM (metaValueToVal blockWriter inlineWriter) xs -metaValueToVal _ _ (MetaBool True) = return $ SimpleVal "true" -metaValueToVal _ _ (MetaBool False) = return NullVal +metaValueToVal _ _ (MetaBool b) = return $ BoolVal b metaValueToVal _ inlineWriter (MetaString s) = SimpleVal <$> inlineWriter (Builder.toList (Builder.text s)) metaValueToVal blockWriter _ (MetaBlocks bs) = SimpleVal <$> blockWriter bs @@ -147,6 +148,19 @@ defField field val (Context m) = where f _newval oldval = oldval +-- | Get the contents of the `lang` metadata field or variable. +getLang :: WriterOptions -> Meta -> Maybe T.Text +getLang opts meta = + case lookupContext "lang" (writerVariables opts) of + Just s -> Just s + _ -> + case lookupMeta "lang" meta of + Just (MetaBlocks [Para [Str s]]) -> Just s + Just (MetaBlocks [Plain [Str s]]) -> Just s + Just (MetaInlines [Str s]) -> Just s + Just (MetaString s) -> Just s + _ -> Nothing + -- | Produce an HTML tag with the given pandoc attributes. tagWithAttrs :: HasChars a => T.Text -> Attr -> Doc a tagWithAttrs tag (ident,classes,kvs) = hsep @@ -225,7 +239,7 @@ gridTable :: (Monad m, HasChars a) -> m (Doc a) gridTable opts blocksToDoc headless aligns widths headers rows = do -- the number of columns will be used in case of even widths - let numcols = maximum (length aligns : length widths : + let numcols = maximum (length aligns :| length widths : map length (headers:rows)) let officialWidthsInChars widths' = map ( (\x -> if x < 1 then 1 else x) . @@ -254,8 +268,7 @@ gridTable opts blocksToDoc headless aligns widths headers rows = do let handleFullWidths widths' = do rawHeaders' <- mapM (blocksToDoc opts) headers rawRows' <- mapM (mapM (blocksToDoc opts)) rows - let numChars [] = 0 - numChars xs = maximum . map offset $ xs + let numChars = maybe 0 maximum . nonEmpty . map offset let minWidthsInChars = map numChars $ transpose (rawHeaders' : rawRows') let widthsInChars' = zipWith max @@ -381,6 +394,7 @@ toSuperscript '2' = Just '\x00B2' toSuperscript '3' = Just '\x00B3' toSuperscript '+' = Just '\x207A' toSuperscript '-' = Just '\x207B' +toSuperscript '\x2212' = Just '\x207B' -- unicode minus toSuperscript '=' = Just '\x207C' toSuperscript '(' = Just '\x207D' toSuperscript ')' = Just '\x207E' diff --git a/src/Text/Pandoc/Writers/TEI.hs b/src/Text/Pandoc/Writers/TEI.hs index a9ee5eece..18015259d 100644 --- a/src/Text/Pandoc/Writers/TEI.hs +++ b/src/Text/Pandoc/Writers/TEI.hs @@ -2,7 +2,7 @@ {-# LANGUAGE PatternGuards #-} {- | Module : Text.Pandoc.Writers.Docbook - Copyright : Copyright (C) 2006-2020 John MacFarlane + Copyright : Copyright (C) 2006-2021 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -146,16 +146,17 @@ blockToTEI opts (LineBlock lns) = blockToTEI opts $ linesToPara lns blockToTEI opts (BlockQuote blocks) = inTagsIndented "quote" <$> blocksToTEI opts blocks -blockToTEI _ (CodeBlock (_,classes,_) str) = +blockToTEI opts (CodeBlock (_,classes,_) str) = return $ literal ("<ab type='codeblock " <> lang <> "'>") <> cr <> flush (literal (escapeStringForXML str) <> cr <> text "</ab>") where lang = if null langs then "" else escapeStringForXML (head langs) - isLang l = T.toLower l `elem` map T.toLower languages + syntaxMap = writerSyntaxMap opts + isLang l = T.toLower l `elem` map T.toLower (languages syntaxMap) langsFrom s = if isLang s then [s] - else languagesByExtension . T.toLower $ s + else (languagesByExtension syntaxMap) . T.toLower $ s langs = concatMap langsFrom classes blockToTEI opts (BulletList lst) = do let attribs = [("type", "unordered")] diff --git a/src/Text/Pandoc/Writers/Texinfo.hs b/src/Text/Pandoc/Writers/Texinfo.hs index c6debd9ce..6a33b4283 100644 --- a/src/Text/Pandoc/Writers/Texinfo.hs +++ b/src/Text/Pandoc/Writers/Texinfo.hs @@ -1,7 +1,7 @@ {-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Writers.Texinfo - Copyright : Copyright (C) 2008-2020 John MacFarlane + Copyright : Copyright (C) 2008-2021 John MacFarlane 2012 Peter Wang License : GNU GPL, version 2 or above @@ -14,8 +14,9 @@ Conversion of 'Pandoc' format into Texinfo. module Text.Pandoc.Writers.Texinfo ( writeTexinfo ) where import Control.Monad.Except (throwError) import Control.Monad.State.Strict -import Data.Char (chr, ord) -import Data.List (maximumBy, transpose) +import Data.Char (chr, ord, isAlphaNum) +import Data.List (maximumBy, transpose, foldl') +import Data.List.NonEmpty (nonEmpty) import Data.Ord (comparing) import qualified Data.Set as Set import Data.Text (Text) @@ -84,16 +85,18 @@ pandocToTexinfo options (Pandoc meta blocks) = do -- | Escape things as needed for Texinfo. stringToTexinfo :: Text -> Text -stringToTexinfo = escapeStringUsing texinfoEscapes - where texinfoEscapes = [ ('{', "@{") - , ('}', "@}") - , ('@', "@@") - , ('\160', "@ ") - , ('\x2014', "---") - , ('\x2013', "--") - , ('\x2026', "@dots{}") - , ('\x2019', "'") - ] +stringToTexinfo t + | T.all isAlphaNum t = t + | otherwise = T.concatMap escChar t + where escChar '{' = "@{" + escChar '}' = "@}" + escChar '@' = "@@" + escChar '\160' = "@ " + escChar '\x2014' = "---" + escChar '\x2013' = "--" + escChar '\x2026' = "@dots{}" + escChar '\x2019' = "'" + escChar c = T.singleton c escapeCommas :: PandocMonad m => TI m (Doc Text) -> TI m (Doc Text) escapeCommas parser = do @@ -238,9 +241,13 @@ blockToTexinfo (Table _ blkCapt specs thead tbody tfoot) = do colDescriptors <- if all (== 0) widths then do -- use longest entry instead of column widths - cols <- mapM (mapM (liftM (T.unpack . render Nothing . hcat) . mapM blockToTexinfo)) $ + cols <- mapM (mapM (fmap (T.unpack . render Nothing . hcat) . + mapM blockToTexinfo)) $ transpose $ heads : rows - return $ concatMap ((\x -> "{"++x++"} ") . maximumBy (comparing length)) cols + return $ concatMap + ((\x -> "{"++x++"} ") . + maybe "" (maximumBy (comparing length)) . nonEmpty) + cols else return $ "@columnfractions " ++ concatMap (printf "%.2f ") widths let tableBody = text ("@multitable " ++ colDescriptors) $$ headers $$ @@ -271,7 +278,7 @@ tableAnyRowToTexinfo :: PandocMonad m -> [[Block]] -> TI m (Doc Text) tableAnyRowToTexinfo itemtype aligns cols = - (literal itemtype $$) . foldl (\row item -> row $$ + (literal itemtype $$) . foldl' (\row item -> row $$ (if isEmpty row then empty else text " @tab ") <> item) empty <$> zipWithM alignedBlock aligns cols alignedBlock :: PandocMonad m diff --git a/src/Text/Pandoc/Writers/Textile.hs b/src/Text/Pandoc/Writers/Textile.hs index 61ddb7497..03d030477 100644 --- a/src/Text/Pandoc/Writers/Textile.hs +++ b/src/Text/Pandoc/Writers/Textile.hs @@ -2,7 +2,7 @@ {-# LANGUAGE ViewPatterns #-} {- | Module : Text.Pandoc.Writers.Textile - Copyright : Copyright (C) 2010-2020 John MacFarlane + Copyright : Copyright (C) 2010-2021 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> diff --git a/src/Text/Pandoc/Writers/ZimWiki.hs b/src/Text/Pandoc/Writers/ZimWiki.hs index 902b093d3..df914f590 100644 --- a/src/Text/Pandoc/Writers/ZimWiki.hs +++ b/src/Text/Pandoc/Writers/ZimWiki.hs @@ -2,7 +2,7 @@ {-# LANGUAGE ViewPatterns #-} {- | Module : Text.Pandoc.Writers.ZimWiki - Copyright : © 2008-2020 John MacFarlane, + Copyright : © 2008-2021 John MacFarlane, 2017-2019 Alex Ivkin License : GNU GPL, version 2 or above @@ -20,6 +20,7 @@ import Control.Monad (zipWithM) import Control.Monad.State.Strict (StateT, evalStateT, gets, modify) import Data.Default (Default (..)) import Data.List (transpose) +import Data.List.NonEmpty (nonEmpty) import qualified Data.Map as Map import Text.DocLayout (render, literal) import Data.Maybe (fromMaybe) @@ -115,7 +116,7 @@ blockToZimWiki opts b@(RawBlock f str) blockToZimWiki _ HorizontalRule = return "\n----\n" blockToZimWiki opts (Header level _ inlines) = do - contents <- inlineListToZimWiki opts $ removeFormatting inlines -- emphasis, links etc. not allowed in headers + contents <- inlineListToZimWiki opts inlines let eqs = T.replicate ( 7 - level ) "=" return $ eqs <> " " <> contents <> " " <> eqs <> "\n" @@ -143,7 +144,8 @@ blockToZimWiki opts (Table _ blkCapt specs thead tbody tfoot) = do then zipWithM (tableItemToZimWiki opts) aligns (head rows) else mapM (inlineListToZimWiki opts . removeFormatting)headers -- emphasis, links etc. are not allowed in table headers rows' <- mapM (zipWithM (tableItemToZimWiki opts) aligns) rows - let widths = map (maximum . map T.length) $ transpose (headers':rows') + let widths = map (maybe 0 maximum . nonEmpty . map T.length) $ + transpose (headers':rows') let padTo (width, al) s = case width - T.length s of x | x > 0 -> |