diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/Text/Pandoc/App.hs | 102 | ||||
-rw-r--r-- | src/Text/Pandoc/Class.hs | 43 | ||||
-rw-r--r-- | src/Text/Pandoc/Extensions.hs | 123 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Docx/Parse.hs | 32 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/HTML.hs | 24 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/LaTeX.hs | 2 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Org/Blocks.hs | 51 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/Custom.hs | 12 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/FB2.hs | 11 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/LaTeX.hs | 7 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/Powerpoint.hs | 3 |
11 files changed, 263 insertions, 147 deletions
diff --git a/src/Text/Pandoc/App.hs b/src/Text/Pandoc/App.hs index e597c56d6..7c463d743 100644 --- a/src/Text/Pandoc/App.hs +++ b/src/Text/Pandoc/App.hs @@ -35,11 +35,12 @@ Does a pandoc conversion based on command-line options. module Text.Pandoc.App ( convertWithOpts , Opt(..) + , LineEnding(..) + , Filter(..) , defaultOpts , parseOptions , options , applyFilters - , applyLuaFilters ) where import qualified Control.Exception as E import Control.Monad @@ -58,6 +59,9 @@ import Data.Monoid import qualified Data.Set as Set import Data.Text (Text) import qualified Data.Text as T +import qualified Data.Text.Lazy as TL +import qualified Data.Text.Lazy.Encoding as TE +import qualified Data.Text.Encoding.Error as TE import Data.Yaml (decode) import qualified Data.Yaml as Yaml import GHC.Generics @@ -181,11 +185,13 @@ convertWithOpts opts = do Nothing -> return Nothing Just fp -> Just <$> UTF8.readFile fp + let isPandocCiteproc (JSONFilter f) = takeBaseName f == "pandoc-citeproc" + isPandocCiteproc _ = False -- --bibliography implies -F pandoc-citeproc for backwards compatibility: let needsCiteproc = isJust (lookup "bibliography" (optMetadata opts)) && optCiteMethod opts `notElem` [Natbib, Biblatex] && - "pandoc-citeproc" `notElem` map takeBaseName filters - let filters' = if needsCiteproc then "pandoc-citeproc" : filters + all (not . isPandocCiteproc) filters + let filters' = if needsCiteproc then JSONFilter "pandoc-citeproc" : filters else filters let sources = case optInputFiles opts of @@ -498,10 +504,9 @@ convertWithOpts opts = do then fillMediaBag else return) >=> return . addMetadata metadata - >=> applyLuaFilters datadir (optLuaFilters opts) format - >=> maybe return extractMedia (optExtractMedia opts) >=> applyTransforms transforms - >=> applyFilters readerOpts datadir filters' [format] + >=> applyFilters readerOpts filters' [format] + >=> maybe return extractMedia (optExtractMedia opts) ) case writer of @@ -513,7 +518,9 @@ convertWithOpts opts = do case res of Right pdf -> writeFnBinary outputFile pdf Left err' -> liftIO $ - E.throwIO $ PandocPDFError (UTF8.toStringLazy err') + E.throwIO $ PandocPDFError $ + TL.unpack (TE.decodeUtf8With TE.lenientDecode err') + Nothing -> do let htmlFormat = format `elem` ["html","html4","html5","s5","slidy", @@ -578,6 +585,10 @@ externalFilter ropts f args' d = liftIO $ do where filterException :: E.SomeException -> IO a filterException e = E.throwIO $ PandocFilterError f (show e) +data Filter = LuaFilter FilePath + | JSONFilter FilePath + deriving (Show) + -- | Data structure for command line options. data Opt = Opt { optTabStop :: Int -- ^ Number of spaces per tab @@ -621,8 +632,7 @@ data Opt = Opt , optDpi :: Int -- ^ Dpi , optWrapText :: WrapOption -- ^ Options for wrapping text , optColumns :: Int -- ^ Line length in characters - , optFilters :: [FilePath] -- ^ Filters to apply - , optLuaFilters :: [FilePath] -- ^ Lua filters to apply + , optFilters :: [Filter] -- ^ Filters to apply , optEmailObfuscation :: ObfuscationMethod , optIdentifierPrefix :: String , optStripEmptyParagraphs :: Bool -- ^ Strip empty paragraphs @@ -695,7 +705,6 @@ defaultOpts = Opt , optWrapText = WrapAuto , optColumns = 72 , optFilters = [] - , optLuaFilters = [] , optEmailObfuscation = NoObfuscation , optIdentifierPrefix = "" , optStripEmptyParagraphs = False @@ -827,41 +836,46 @@ applyTransforms transforms d = return $ foldr ($) d transforms -- First we check to see if a filter is found. If not, and if it's -- not an absolute path, we check to see whether it's in `userdir/filters`. -- If not, we leave it unchanged. -expandFilterPath :: MonadIO m => Maybe FilePath -> FilePath -> m FilePath -expandFilterPath mbDatadir fp = liftIO $ do - fpExists <- doesFileExist fp +expandFilterPath :: PandocMonad m => FilePath -> m FilePath +expandFilterPath fp = do + mbDatadir <- getUserDataDir + fpExists <- fileExists fp if fpExists then return fp else case mbDatadir of Just datadir | isRelative fp -> do let filterPath = datadir </> "filters" </> fp - filterPathExists <- doesFileExist filterPath + filterPathExists <- fileExists filterPath if filterPathExists then return filterPath else return fp _ -> return fp -applyLuaFilters :: Maybe FilePath -> [FilePath] -> String -> Pandoc - -> PandocIO Pandoc -applyLuaFilters mbDatadir filters format d = do - expandedFilters <- mapM (expandFilterPath mbDatadir) filters - let go f d' = do - res <- runLuaFilter f format d' - case res of - Right x -> return x - Left (LuaException s) -> E.throw (PandocFilterError f s) - foldrM ($) d $ map go expandedFilters - -applyFilters :: MonadIO m - => ReaderOptions - -> Maybe FilePath - -> [FilePath] +applyFilters :: ReaderOptions + -> [Filter] -> [String] -> Pandoc - -> m Pandoc -applyFilters ropts mbDatadir filters args d = do - expandedFilters <- mapM (expandFilterPath mbDatadir) filters - foldrM ($) d $ map (flip (externalFilter ropts) args) expandedFilters + -> PandocIO Pandoc +applyFilters ropts filters args d = do + foldrM ($) d $ map (applyFilter ropts args) filters + +applyFilter :: ReaderOptions + -> [String] + -> Filter + -> Pandoc + -> PandocIO Pandoc +applyFilter _ropts args (LuaFilter f) d = do + f' <- expandFilterPath f + let format = case args of + (x:_) -> x + _ -> error "Format not supplied for lua filter" + res <- runLuaFilter f' format d + case res of + Right x -> return x + Left (LuaException s) -> E.throw (PandocFilterError f s) +applyFilter ropts args (JSONFilter f) d = do + f' <- expandFilterPath f + liftIO $ externalFilter ropts f' args d readSource :: FilePath -> PandocIO Text readSource "-" = liftIO (UTF8.toText <$> BS.getContents) @@ -963,13 +977,15 @@ options = , Option "F" ["filter"] (ReqArg - (\arg opt -> return opt { optFilters = arg : optFilters opt }) + (\arg opt -> return opt { optFilters = + JSONFilter arg : optFilters opt }) "PROGRAM") "" -- "External JSON filter" , Option "" ["lua-filter"] (ReqArg - (\arg opt -> return opt { optLuaFilters = arg : optLuaFilters opt }) + (\arg opt -> return opt { optFilters = + LuaFilter arg : optFilters opt }) "SCRIPTPATH") "" -- "Lua filter" @@ -1584,15 +1600,16 @@ options = "" , Option "" ["list-extensions"] - (NoArg - (\_ -> do - let showExt x = drop 4 (show x) ++ - if extensionEnabled x pandocExtensions - then " +" - else " -" + (OptArg + (\arg _ -> do + let exts = getDefaultExtensions (fromMaybe "markdown" arg) + let showExt x = (if extensionEnabled x exts + then '+' + else '-') : drop 4 (show x) mapM_ (UTF8.hPutStrLn stdout . showExt) ([minBound..maxBound] :: [Extension]) - exitSuccess )) + exitSuccess ) + "FORMAT") "" , Option "" ["list-highlight-languages"] @@ -1714,4 +1731,5 @@ deprecatedOption o msg = -- see https://github.com/jgm/pandoc/pull/4083 -- using generic deriving caused long compilation times $(deriveJSON defaultOptions ''LineEnding) +$(deriveJSON defaultOptions ''Filter) $(deriveJSON defaultOptions ''Opt) diff --git a/src/Text/Pandoc/Class.hs b/src/Text/Pandoc/Class.hs index c63781adf..f8d6b6737 100644 --- a/src/Text/Pandoc/Class.hs +++ b/src/Text/Pandoc/Class.hs @@ -78,9 +78,10 @@ module Text.Pandoc.Class ( PandocMonad(..) , getResourcePath , PandocIO(..) , PandocPure(..) - , FileTree(..) + , FileTree , FileInfo(..) , addToFileTree + , insertInFileTree , runIO , runIOorExplode , runPure @@ -144,6 +145,8 @@ import System.Directory (createDirectoryIfMissing, getDirectoryContents, import System.FilePath ((</>), (<.>), takeDirectory, takeExtension, dropExtension, isRelative, normalise) import qualified System.FilePath.Glob as IO (glob) +import qualified System.FilePath.Posix as Posix +import System.FilePath (splitDirectories) import qualified System.Directory as IO (getModificationTime) import Control.Monad as M (fail) import Control.Monad.State.Strict @@ -160,8 +163,6 @@ import Text.Pandoc.Translations (Term(..), Translations, lookupTerm, import qualified Debug.Trace #ifdef EMBED_DATA_FILES import Text.Pandoc.Data (dataFiles) -import qualified System.FilePath.Posix as Posix -import System.FilePath (splitDirectories) #else import qualified Paths_pandoc as Paths #endif @@ -620,6 +621,7 @@ getDefaultReferenceDocx = do "word/document.xml", "word/fontTable.xml", "word/footnotes.xml", + "word/comments.xml", "word/numbering.xml", "word/settings.xml", "word/webSettings.xml", @@ -686,8 +688,6 @@ getDefaultReferencePptx = do , "ppt/presProps.xml" , "ppt/presentation.xml" , "ppt/slideLayouts/_rels/slideLayout1.xml.rels" - , "ppt/slideLayouts/_rels/slideLayout10.xml.rels" - , "ppt/slideLayouts/_rels/slideLayout11.xml.rels" , "ppt/slideLayouts/_rels/slideLayout2.xml.rels" , "ppt/slideLayouts/_rels/slideLayout3.xml.rels" , "ppt/slideLayouts/_rels/slideLayout4.xml.rels" @@ -696,6 +696,8 @@ getDefaultReferencePptx = do , "ppt/slideLayouts/_rels/slideLayout7.xml.rels" , "ppt/slideLayouts/_rels/slideLayout8.xml.rels" , "ppt/slideLayouts/_rels/slideLayout9.xml.rels" + , "ppt/slideLayouts/_rels/slideLayout10.xml.rels" + , "ppt/slideLayouts/_rels/slideLayout11.xml.rels" , "ppt/slideLayouts/slideLayout1.xml" , "ppt/slideLayouts/slideLayout10.xml" , "ppt/slideLayouts/slideLayout11.xml" @@ -711,6 +713,8 @@ getDefaultReferencePptx = do , "ppt/slideMasters/slideMaster1.xml" , "ppt/slides/_rels/slide1.xml.rels" , "ppt/slides/slide1.xml" + , "ppt/slides/_rels/slide2.xml.rels" + , "ppt/slides/slide2.xml" , "ppt/tableStyles.xml" , "ppt/theme/theme1.xml" , "ppt/viewProps.xml" @@ -760,11 +764,6 @@ readDefaultDataFile fname = case lookup (makeCanonical fname) dataFiles of Nothing -> throwError $ PandocCouldNotFindDataFileError fname Just contents -> return contents - where makeCanonical = Posix.joinPath . transformPathParts . splitDirectories - transformPathParts = reverse . foldl go [] - go as "." = as - go (_:as) ".." = as - go as x = x : as #else getDataFileName fname' >>= checkExistence >>= readFileStrict where fname' = if fname == "MANUAL.txt" then fname else "data" </> fname @@ -777,6 +776,13 @@ checkExistence fn = do else throwError $ PandocCouldNotFindDataFileError fn #endif +makeCanonical :: FilePath -> FilePath +makeCanonical = Posix.joinPath . transformPathParts . splitDirectories + where transformPathParts = reverse . foldl go [] + go as "." = as + go (_:as) ".." = as + go as x = x : as + withPaths :: PandocMonad m => [FilePath] -> (FilePath -> m a) -> FilePath -> m a withPaths [] _ fp = throwError $ PandocResourceNotFound fp withPaths (p:ps) action fp = @@ -912,12 +918,13 @@ newtype FileTree = FileTree {unFileTree :: M.Map FilePath FileInfo} deriving (Monoid) getFileInfo :: FilePath -> FileTree -> Maybe FileInfo -getFileInfo fp tree = M.lookup fp $ unFileTree tree +getFileInfo fp tree = + M.lookup (makeCanonical fp) (unFileTree tree) -- | Add the specified file to the FileTree. If file -- is a directory, add its contents recursively. addToFileTree :: FileTree -> FilePath -> IO FileTree -addToFileTree (FileTree treemap) fp = do +addToFileTree tree fp = do isdir <- doesDirectoryExist fp if isdir then do -- recursively add contents of directories @@ -925,13 +932,17 @@ addToFileTree (FileTree treemap) fp = do isSpecial "." = True isSpecial _ = False fs <- (map (fp </>) . filter (not . isSpecial)) <$> getDirectoryContents fp - foldM addToFileTree (FileTree treemap) fs + foldM addToFileTree tree fs else do contents <- B.readFile fp mtime <- IO.getModificationTime fp - return $ FileTree $ - M.insert fp FileInfo{ infoFileMTime = mtime - , infoFileContents = contents } treemap + return $ insertInFileTree fp FileInfo{ infoFileMTime = mtime + , infoFileContents = contents } tree + +-- | Insert an ersatz file into the 'FileTree'. +insertInFileTree :: FilePath -> FileInfo -> FileTree -> FileTree +insertInFileTree fp info (FileTree treemap) = + FileTree $ M.insert (makeCanonical fp) info treemap newtype PandocPure a = PandocPure { unPandocPure :: ExceptT PandocError diff --git a/src/Text/Pandoc/Extensions.hs b/src/Text/Pandoc/Extensions.hs index bea293891..31fddb148 100644 --- a/src/Text/Pandoc/Extensions.hs +++ b/src/Text/Pandoc/Extensions.hs @@ -80,79 +80,79 @@ disableExtension x (Extensions exts) = Extensions (clearBit exts (fromEnum x)) -- | Individually selectable syntax extensions. data Extension = - Ext_footnotes -- ^ Pandoc/PHP/MMD style footnotes - | Ext_inline_notes -- ^ Pandoc-style inline notes - | Ext_pandoc_title_block -- ^ Pandoc title block - | Ext_yaml_metadata_block -- ^ YAML metadata block - | Ext_mmd_title_block -- ^ Multimarkdown metadata block - | Ext_table_captions -- ^ Pandoc-style table captions - | Ext_implicit_figures -- ^ A paragraph with just an image is a figure - | Ext_simple_tables -- ^ Pandoc-style simple tables - | Ext_multiline_tables -- ^ Pandoc-style multiline tables - | Ext_grid_tables -- ^ Grid tables (pandoc, reST) - | Ext_pipe_tables -- ^ Pipe tables (as in PHP markdown extra) - | Ext_citations -- ^ Pandoc/citeproc citations - | Ext_raw_tex -- ^ Allow raw TeX (other than math) - | Ext_raw_html -- ^ Allow raw HTML - | Ext_tex_math_dollars -- ^ TeX math between $..$ or $$..$$ - | Ext_tex_math_single_backslash -- ^ TeX math btw \(..\) \[..\] - | Ext_tex_math_double_backslash -- ^ TeX math btw \\(..\\) \\[..\\] - | Ext_latex_macros -- ^ Parse LaTeX macro definitions (for math only) - | Ext_fenced_code_blocks -- ^ Parse fenced code blocks - | Ext_fenced_code_attributes -- ^ Allow attributes on fenced code blocks - | Ext_backtick_code_blocks -- ^ GitHub style ``` code blocks - | Ext_inline_code_attributes -- ^ Allow attributes on inline code - | Ext_raw_attribute -- ^ Allow explicit raw blocks/inlines - | Ext_markdown_in_html_blocks -- ^ Interpret as markdown inside HTML blocks - | Ext_native_divs -- ^ Use Div blocks for contents of <div> tags - | Ext_fenced_divs -- ^ Allow fenced div syntax ::: - | Ext_native_spans -- ^ Use Span inlines for contents of <span> - | Ext_bracketed_spans -- ^ Bracketed spans with attributes - | Ext_markdown_attribute -- ^ Interpret text inside HTML as markdown - -- iff container has attribute 'markdown' - | Ext_escaped_line_breaks -- ^ Treat a backslash at EOL as linebreak - | Ext_link_attributes -- ^ link and image attributes - | Ext_mmd_link_attributes -- ^ MMD style reference link attributes - | Ext_autolink_bare_uris -- ^ Make all absolute URIs into links - | Ext_fancy_lists -- ^ Enable fancy list numbers and delimiters - | Ext_lists_without_preceding_blankline -- ^ Allow lists without preceding blank - | Ext_four_space_rule -- ^ Require 4-space indent for list contents - | Ext_startnum -- ^ Make start number of ordered list significant - | Ext_definition_lists -- ^ Definition lists as in pandoc, mmd, php - | Ext_compact_definition_lists -- ^ Definition lists without - -- space between items, and disallow laziness - | Ext_example_lists -- ^ Markdown-style numbered examples + Ext_abbreviations -- ^ PHP markdown extra abbreviation definitions | Ext_all_symbols_escapable -- ^ Make all non-alphanumerics escapable + | Ext_amuse -- ^ Enable Text::Amuse extensions to Emacs Muse markup | Ext_angle_brackets_escapable -- ^ Make < and > escapable - | Ext_intraword_underscores -- ^ Treat underscore inside word as literal + | Ext_ascii_identifiers -- ^ ascii-only identifiers for headers + | Ext_auto_identifiers -- ^ Automatic identifiers for headers + | Ext_autolink_bare_uris -- ^ Make all absolute URIs into links + | Ext_backtick_code_blocks -- ^ GitHub style ``` code blocks | Ext_blank_before_blockquote -- ^ Require blank line before a blockquote | Ext_blank_before_header -- ^ Require blank line before a header - | Ext_space_in_atx_header -- ^ Require space between # and header text - | Ext_strikeout -- ^ Strikeout using ~~this~~ syntax - | Ext_superscript -- ^ Superscript using ^this^ syntax - | Ext_subscript -- ^ Subscript using ~this~ syntax - | Ext_hard_line_breaks -- ^ All newlines become hard line breaks - | Ext_ignore_line_breaks -- ^ Newlines in paragraphs are ignored + | Ext_bracketed_spans -- ^ Bracketed spans with attributes + | Ext_citations -- ^ Pandoc/citeproc citations + | Ext_compact_definition_lists -- ^ Definition lists without space between items, + -- and disallow laziness + | Ext_definition_lists -- ^ Definition lists as in pandoc, mmd, php | Ext_east_asian_line_breaks -- ^ Newlines in paragraphs are ignored between - -- East Asian wide characters - | Ext_literate_haskell -- ^ Enable literate Haskell conventions - | Ext_abbreviations -- ^ PHP markdown extra abbreviation definitions + -- East Asian wide characters | Ext_emoji -- ^ Support emoji like :smile: - | Ext_auto_identifiers -- ^ Automatic identifiers for headers - | Ext_gfm_auto_identifiers -- ^ Automatic identifiers for headers, - -- using GitHub's method for generating identifiers - | Ext_ascii_identifiers -- ^ ascii-only identifiers for headers + | Ext_empty_paragraphs -- ^ Allow empty paragraphs + | Ext_epub_html_exts -- ^ Recognise the EPUB extended version of HTML + | Ext_escaped_line_breaks -- ^ Treat a backslash at EOL as linebreak + | Ext_example_lists -- ^ Markdown-style numbered examples + | Ext_fancy_lists -- ^ Enable fancy list numbers and delimiters + | Ext_fenced_code_attributes -- ^ Allow attributes on fenced code blocks + | Ext_fenced_code_blocks -- ^ Parse fenced code blocks + | Ext_fenced_divs -- ^ Allow fenced div syntax ::: + | Ext_footnotes -- ^ Pandoc/PHP/MMD style footnotes + | Ext_four_space_rule -- ^ Require 4-space indent for list contents + | Ext_gfm_auto_identifiers -- ^ Automatic identifiers for headers, using + -- GitHub's method for generating identifiers + | Ext_grid_tables -- ^ Grid tables (pandoc, reST) + | Ext_hard_line_breaks -- ^ All newlines become hard line breaks | Ext_header_attributes -- ^ Explicit header attributes {#id .class k=v} - | Ext_mmd_header_identifiers -- ^ Multimarkdown style header identifiers [myid] + | Ext_ignore_line_breaks -- ^ Newlines in paragraphs are ignored + | Ext_implicit_figures -- ^ A paragraph with just an image is a figure | Ext_implicit_header_references -- ^ Implicit reference links for headers + | Ext_inline_code_attributes -- ^ Allow attributes on inline code + | Ext_inline_notes -- ^ Pandoc-style inline notes + | Ext_intraword_underscores -- ^ Treat underscore inside word as literal + | Ext_latex_macros -- ^ Parse LaTeX macro definitions (for math only) | Ext_line_blocks -- ^ RST style line blocks - | Ext_epub_html_exts -- ^ Recognise the EPUB extended version of HTML + | Ext_link_attributes -- ^ link and image attributes + | Ext_lists_without_preceding_blankline -- ^ Allow lists without preceding blank + | Ext_literate_haskell -- ^ Enable literate Haskell conventions + | Ext_markdown_attribute -- ^ Interpret text inside HTML as markdown iff + -- container has attribute 'markdown' + | Ext_markdown_in_html_blocks -- ^ Interpret as markdown inside HTML blocks + | Ext_mmd_header_identifiers -- ^ Multimarkdown style header identifiers [myid] + | Ext_mmd_link_attributes -- ^ MMD style reference link attributes + | Ext_mmd_title_block -- ^ Multimarkdown metadata block + | Ext_multiline_tables -- ^ Pandoc-style multiline tables + | Ext_native_divs -- ^ Use Div blocks for contents of <div> tags + | Ext_native_spans -- ^ Use Span inlines for contents of <span> + | Ext_old_dashes -- ^ -- = em, - before number = en + | Ext_pandoc_title_block -- ^ Pandoc title block + | Ext_pipe_tables -- ^ Pipe tables (as in PHP markdown extra) + | Ext_raw_attribute -- ^ Allow explicit raw blocks/inlines + | Ext_raw_html -- ^ Allow raw HTML + | Ext_raw_tex -- ^ Allow raw TeX (other than math) | Ext_shortcut_reference_links -- ^ Shortcut reference links + | Ext_simple_tables -- ^ Pandoc-style simple tables | Ext_smart -- ^ "Smart" quotes, apostrophes, ellipses, dashes - | Ext_old_dashes -- ^ -- = em, - before number = en + | Ext_space_in_atx_header -- ^ Require space between # and header text | Ext_spaced_reference_links -- ^ Allow space between two parts of ref link - | Ext_amuse -- ^ Enable Text::Amuse extensions to Emacs Muse markup - | Ext_empty_paragraphs -- ^ Allow empty paragraphs + | Ext_startnum -- ^ Make start number of ordered list significant + | Ext_strikeout -- ^ Strikeout using ~~this~~ syntax + | Ext_subscript -- ^ Subscript using ~this~ syntax + | Ext_superscript -- ^ Superscript using ^this^ syntax + | Ext_table_captions -- ^ Pandoc-style table captions + | Ext_tex_math_dollars -- ^ TeX math between $..$ or $$..$$ + | Ext_tex_math_double_backslash -- ^ TeX math btw \\(..\\) \\[..\\] + | Ext_tex_math_single_backslash -- ^ TeX math btw \(..\) \[..\] + | Ext_yaml_metadata_block -- ^ YAML metadata block deriving (Show, Read, Enum, Eq, Ord, Bounded, Data, Typeable, Generic) -- | Extensions to be used with pandoc-flavored markdown. @@ -321,6 +321,7 @@ getDefaultExtensions "org" = extensionsFromList getDefaultExtensions "html" = extensionsFromList [Ext_auto_identifiers, Ext_native_divs, + Ext_line_blocks, Ext_native_spans] getDefaultExtensions "html4" = getDefaultExtensions "html" getDefaultExtensions "html5" = getDefaultExtensions "html" diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs index 99e6f99e6..48a512be2 100644 --- a/src/Text/Pandoc/Readers/Docx/Parse.hs +++ b/src/Text/Pandoc/Readers/Docx/Parse.hs @@ -73,6 +73,7 @@ import Text.TeXMath (Exp) import Text.TeXMath.Readers.OMML (readOMML) import Text.TeXMath.Unicode.Fonts (Font (..), getUnicode, stringToFont) import Text.XML.Light +import qualified Text.XML.Light.Cursor as XMLC data ReaderEnv = ReaderEnv { envNotes :: Notes , envComments :: Comments @@ -117,6 +118,32 @@ mapD f xs = in concatMapM handler xs +unwrapSDT :: NameSpaces -> Content -> Content +unwrapSDT ns (Elem element) + | isElem ns "w" "sdt" element + , Just sdtContent <- findChildByName ns "w" "sdtContent" element + , child : _ <- elChildren sdtContent + = Elem child +unwrapSDT _ content = content + +walkDocument' :: NameSpaces -> XMLC.Cursor -> XMLC.Cursor +walkDocument' ns cur = + let modifiedCur = XMLC.modifyContent (unwrapSDT ns) cur + in + case XMLC.nextDF modifiedCur of + Just cur' -> walkDocument' ns cur' + Nothing -> XMLC.root modifiedCur + +walkDocument :: NameSpaces -> Element -> Maybe Element +walkDocument ns element = + let cur = XMLC.fromContent (Elem element) + cur' = walkDocument' ns cur + in + case XMLC.toTree cur' of + Elem element' -> Just element' + _ -> Nothing + + data Docx = Docx Document deriving Show @@ -298,7 +325,10 @@ archiveToDocument zf = do docElem <- maybeToD $ (parseXMLDoc . UTF8.toStringLazy . fromEntry) entry let namespaces = elemToNameSpaces docElem bodyElem <- maybeToD $ findChildByName namespaces "w" "body" docElem - body <- elemToBody namespaces bodyElem + let bodyElem' = case walkDocument namespaces bodyElem of + Just e -> e + Nothing -> bodyElem + body <- elemToBody namespaces bodyElem' return $ Document namespaces body elemToBody :: NameSpaces -> Element -> D Body diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index f5f296712..65171d37a 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -51,7 +51,7 @@ import Data.Char (isAlphaNum, isDigit, isLetter) import Data.Default (Default (..), def) import Data.Foldable (for_) import Data.List (intercalate, isPrefixOf) -import Data.List.Split (wordsBy) +import Data.List.Split (wordsBy, splitWhen) import qualified Data.Map as M import Data.Maybe (fromMaybe, isJust, isNothing) import Data.Monoid (First (..), (<>)) @@ -70,12 +70,12 @@ import Text.Pandoc.Error import Text.Pandoc.Logging import Text.Pandoc.Options ( Extension (Ext_epub_html_exts, Ext_empty_paragraphs, Ext_native_divs, - Ext_native_spans, Ext_raw_html), + Ext_native_spans, Ext_raw_html, Ext_line_blocks), ReaderOptions (readerExtensions, readerStripComments), extensionEnabled) import Text.Pandoc.Parsing hiding ((<|>)) -import Text.Pandoc.Shared (addMetaField, crFilter, escapeURI, extractSpaces, - safeRead, underlineSpan) +import Text.Pandoc.Shared (addMetaField, blocksToInlines', crFilter, escapeURI, + extractSpaces, safeRead, underlineSpan) import Text.Pandoc.Walk import Text.Parsec.Error import Text.TeXMath (readMathML, writeTeX) @@ -191,6 +191,7 @@ block = do , pHtml , pHead , pBody + , pLineBlock , pDiv , pPlain , pFigure @@ -377,6 +378,16 @@ pRawTag = do then return mempty else return $ renderTags' [tag] +pLineBlock :: PandocMonad m => TagParser m Blocks +pLineBlock = try $ do + guardEnabled Ext_line_blocks + _ <- pSatisfy $ tagOpen (=="div") (== [("class","line-block")]) + ils <- trimInlines . mconcat <$> manyTill inline (pSatisfy (tagClose (=="div"))) + let lns = map B.fromList $ + splitWhen (== LineBreak) $ filter (/= SoftBreak) $ + B.toList ils + return $ B.lineBlock lns + pDiv :: PandocMonad m => TagParser m Blocks pDiv = try $ do guardEnabled Ext_native_divs @@ -588,8 +599,9 @@ pFigure = try $ do skipMany pBlank let pImg = (\x -> (Just x, Nothing)) <$> (pOptInTag "p" pImage <* skipMany pBlank) - pCapt = (\x -> (Nothing, Just x)) <$> - (pInTags "figcaption" inline <* skipMany pBlank) + pCapt = (\x -> (Nothing, Just x)) <$> do + bs <- pInTags "figcaption" block + return $ blocksToInlines' $ B.toList bs pSkip = (Nothing, Nothing) <$ pSatisfy (not . matchTagClose "figure") res <- many (pImg <|> pCapt <|> pSkip) let mbimg = msum $ map fst res diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index 6c5567ffd..e0972bb6c 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -1726,7 +1726,7 @@ inline = (mempty <$ comment) <|> (guardEnabled Ext_literate_haskell *> symbol '|' *> doLHSverb) <|> (str . (:[]) <$> primEscape) <|> regularSymbol - <|> (do res <- symbolIn "#^'`\"[]" + <|> (do res <- symbolIn "#^'`\"[]&" pos <- getPosition let s = T.unpack (untoken res) report $ ParsingUnescaped s pos diff --git a/src/Text/Pandoc/Readers/Org/Blocks.hs b/src/Text/Pandoc/Readers/Org/Blocks.hs index cc6abbfa5..a930652af 100644 --- a/src/Text/Pandoc/Readers/Org/Blocks.hs +++ b/src/Text/Pandoc/Readers/Org/Blocks.hs @@ -41,7 +41,6 @@ import Text.Pandoc.Readers.Org.Shared (cleanLinkString, isImageFilename, originalLang, translateLang) import Text.Pandoc.Builder (Blocks, Inlines) -import qualified Text.Pandoc.Builder as B import Text.Pandoc.Class (PandocMonad) import Text.Pandoc.Definition import Text.Pandoc.Options @@ -54,6 +53,9 @@ import Data.List (foldl', isPrefixOf) import Data.Maybe (fromMaybe, isJust, isNothing) import Data.Monoid ((<>)) +import qualified Text.Pandoc.Builder as B +import qualified Text.Pandoc.Walk as Walk + -- -- parsing blocks -- @@ -509,19 +511,18 @@ include :: PandocMonad m => OrgParser m (F Blocks) include = try $ do metaLineStart <* stringAnyCase "include:" <* skipSpaces filename <- includeTarget - blockType <- optionMaybe $ skipSpaces *> many1 alphaNum - blocksParser <- case blockType of - Just "example" -> - return $ pure . B.codeBlock <$> parseRaw - Just "export" -> do - format <- skipSpaces *> many (noneOf "\n\r\t ") - return $ pure . B.rawBlock format <$> parseRaw - Just "src" -> do - language <- skipSpaces *> many (noneOf "\n\r\t ") - let attr = (mempty, [language], mempty) - return $ pure . B.codeBlockWith attr <$> parseRaw - _ -> return $ pure . B.fromList <$> blockList - anyLine + includeArgs <- many (try $ skipSpaces *> many1 alphaNum) + params <- keyValues + blocksParser <- case includeArgs of + ("example" : _) -> return $ pure . B.codeBlock <$> parseRaw + ["export"] -> return . returnF $ B.fromList [] + ("export" : format : []) -> return $ pure . B.rawBlock format <$> parseRaw + ("src" : rest) -> do + let attr = case rest of + [lang] -> (mempty, [lang], mempty) + _ -> nullAttr + return $ pure . B.codeBlockWith attr <$> parseRaw + _ -> return $ return . B.fromList . blockFilter params <$> blockList insertIncludedFileF blocksParser ["."] filename where includeTarget :: PandocMonad m => OrgParser m FilePath @@ -532,6 +533,28 @@ include = try $ do parseRaw :: PandocMonad m => OrgParser m String parseRaw = many anyChar + blockFilter :: [(String, String)] -> [Block] -> [Block] + blockFilter params blks = + let minlvl = lookup "minlevel" params + in case (minlvl >>= safeRead :: Maybe Int) of + Nothing -> blks + Just lvl -> let levels = Walk.query headerLevel blks + -- CAVE: partial function in else + curMin = if null levels then 0 else minimum levels + in Walk.walk (shiftHeader (curMin - lvl)) blks + + headerLevel :: Block -> [Int] + headerLevel (Header lvl _attr _content) = [lvl] + headerLevel _ = [] + + shiftHeader :: Int -> Block -> Block + shiftHeader shift blk = + if shift <= 0 + then blk + else case blk of + (Header lvl attr content) -> Header (lvl - shift) attr content + _ -> blk + rawExportLine :: PandocMonad m => OrgParser m Blocks rawExportLine = try $ do metaLineStart diff --git a/src/Text/Pandoc/Writers/Custom.hs b/src/Text/Pandoc/Writers/Custom.hs index 6a6fabf1d..a33196cbe 100644 --- a/src/Text/Pandoc/Writers/Custom.hs +++ b/src/Text/Pandoc/Writers/Custom.hs @@ -87,6 +87,15 @@ instance ToLuaStack (Stringify Citation) where addValue "citationNoteNum" $ citationNoteNum cit addValue "citationHash" $ citationHash cit +-- | Key-value pair, pushed as a table with @a@ as the only key and @v@ as the +-- associated value. +newtype KeyValue a b = KeyValue (a, b) + +instance (ToLuaStack a, ToLuaStack b) => ToLuaStack (KeyValue a b) where + push (KeyValue (k, v)) = do + newtable + addValue k v + data PandocLuaException = PandocLuaException String deriving (Show, Typeable) @@ -165,7 +174,8 @@ blockToCustom (OrderedList (num,sty,delim) items) = callFunc "OrderedList" (map Stringify items) num (show sty) (show delim) blockToCustom (DefinitionList items) = - callFunc "DefinitionList" (map (Stringify *** map Stringify) items) + callFunc "DefinitionList" + (map (KeyValue . (Stringify *** map Stringify)) items) blockToCustom (Div attr items) = callFunc "Div" (Stringify items) (attrToMap attr) diff --git a/src/Text/Pandoc/Writers/FB2.hs b/src/Text/Pandoc/Writers/FB2.hs index 633f42442..0a4130ca4 100644 --- a/src/Text/Pandoc/Writers/FB2.hs +++ b/src/Text/Pandoc/Writers/FB2.hs @@ -121,9 +121,18 @@ description meta' = do Just (MetaString s) -> [el "lang" $ iso639 s] _ -> [] where iso639 = takeWhile (/= '-') -- Convert BCP 47 to ISO 639 + let coverimage url = do + let img = Image nullAttr mempty (url, "") + im <- insertImage InlineImage img + return [el "coverpage" im] + coverpage <- case lookupMeta "cover-image" meta' of + Just (MetaInlines [Str s]) -> coverimage s + Just (MetaString s) -> coverimage s + _ -> return [] return $ el "description" [ el "title-info" (genre : (bt ++ as ++ dd ++ lang)) - , el "document-info" [ el "program-used" "pandoc" ] -- FIXME: +version + , el "document-info" ([ el "program-used" "pandoc" ] -- FIXME: +version + ++ coverpage) ] booktitle :: PandocMonad m => Meta -> FBM m [Content] diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs index d6ccc1512..87ce65586 100644 --- a/src/Text/Pandoc/Writers/LaTeX.hs +++ b/src/Text/Pandoc/Writers/LaTeX.hs @@ -699,10 +699,9 @@ blockToLaTeX (Table caption aligns widths heads rows) = do then return empty else ($$ text "\\endfirsthead") <$> toHeaders heads head' <- if all null heads - then return empty + then return "\\toprule" -- avoid duplicate notes in head and firsthead: - else ($$ text "\\endhead") <$> - toHeaders (if isEmpty firsthead + else toHeaders (if isEmpty firsthead then heads else walk removeNote heads) let capt = if isEmpty captionText @@ -717,8 +716,8 @@ blockToLaTeX (Table caption aligns widths heads rows) = do -- the @{} removes extra space at beginning and end $$ capt $$ firsthead - $$ (if all null heads then "\\toprule" else empty) $$ head' + $$ "\\endhead" $$ vcat rows' $$ "\\bottomrule" $$ "\\end{longtable}" diff --git a/src/Text/Pandoc/Writers/Powerpoint.hs b/src/Text/Pandoc/Writers/Powerpoint.hs index d5627f51c..ab3b2eabf 100644 --- a/src/Text/Pandoc/Writers/Powerpoint.hs +++ b/src/Text/Pandoc/Writers/Powerpoint.hs @@ -86,6 +86,9 @@ writePowerpoint opts (Pandoc meta blks) = do , envDistArchive = distArchive , envUTCTime = utctime , envOpts = opts + , envSlideLevel = case writerSlideLevel opts of + Just n -> n + Nothing -> 2 } runP env def $ do pres <- blocksToPresentation blks' archv <- presentationToArchive pres |