diff options
Diffstat (limited to 'src/Text/Pandoc/Shared.hs')
-rw-r--r-- | src/Text/Pandoc/Shared.hs | 167 |
1 files changed, 42 insertions, 125 deletions
diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index f14a57c1f..d86f9a390 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -20,7 +20,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Shared Copyright : Copyright (C) 2006-2010 John MacFarlane - License : GNU GPL, version 2 or above + License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> Stability : alpha @@ -59,13 +59,8 @@ module Text.Pandoc.Shared ( uniqueIdent, isHeaderBlock, headerShift, - -- * Writer options - HTMLMathMethod (..), - CiteMethod (..), - ObfuscationMethod (..), - HTMLSlideVariant (..), - WriterOptions (..), - defaultWriterOptions, + -- * TagSoup HTML handling + renderTags', -- * File handling inDirectory, findDataFile, @@ -73,6 +68,8 @@ module Text.Pandoc.Shared ( -- * Error handling err, warn, + -- * Safe read + safeRead ) where import Text.Pandoc.Definition @@ -90,11 +87,12 @@ import Data.Generics (Typeable, Data) import qualified Control.Monad.State as S import Control.Monad (msum) import Paths_pandoc (getDataFileName) -import Text.Pandoc.Highlighting (Style, pygments) import Text.Pandoc.Pretty (charWidth) import System.Locale (defaultTimeLocale) import Data.Time import System.IO (stderr) +import Text.HTML.TagSoup (renderTagsOptions, RenderOptions(..), Tag(..), + renderOptions) -- -- List processing @@ -149,7 +147,7 @@ backslashEscapes = map (\ch -> (ch, ['\\',ch])) -- characters and strings. escapeStringUsing :: [(Char, String)] -> String -> String escapeStringUsing _ [] = "" -escapeStringUsing escapeTable (x:xs) = +escapeStringUsing escapeTable (x:xs) = case (lookup x escapeTable) of Just str -> str ++ rest Nothing -> x:rest @@ -176,7 +174,7 @@ stripFirstAndLast :: String -> String stripFirstAndLast str = drop 1 $ take ((length str) - 1) str --- | Change CamelCase word to hyphenated lowercase (e.g., camel-case). +-- | Change CamelCase word to hyphenated lowercase (e.g., camel-case). camelCaseToHyphenated :: String -> String camelCaseToHyphenated [] = "" camelCaseToHyphenated (a:b:rest) | isLower a && isUpper b = @@ -247,13 +245,13 @@ normalizeDate s = fmap (formatTime defaultTimeLocale "%F") -- | Generate infinite lazy list of markers for an ordered list, -- depending on list attributes. orderedListMarkers :: (Int, ListNumberStyle, ListNumberDelim) -> [String] -orderedListMarkers (start, numstyle, numdelim) = +orderedListMarkers (start, numstyle, numdelim) = let singleton c = [c] nums = case numstyle of DefaultStyle -> map show [start..] Example -> map show [start..] Decimal -> map show [start..] - UpperAlpha -> drop (start - 1) $ cycle $ + UpperAlpha -> drop (start - 1) $ cycle $ map singleton ['A'..'Z'] LowerAlpha -> drop (start - 1) $ cycle $ map singleton ['a'..'z'] @@ -271,13 +269,12 @@ orderedListMarkers (start, numstyle, numdelim) = -- remove empty Str elements. normalizeSpaces :: [Inline] -> [Inline] normalizeSpaces = cleanup . dropWhile isSpaceOrEmpty - where cleanup [] = [] - cleanup (Space:rest) = let rest' = dropWhile isSpaceOrEmpty rest - in case rest' of - [] -> [] - _ -> Space : cleanup rest' + where cleanup [] = [] + cleanup (Space:rest) = case dropWhile isSpaceOrEmpty rest of + [] -> [] + (x:xs) -> Space : x : cleanup xs cleanup ((Str ""):rest) = cleanup rest - cleanup (x:rest) = x : cleanup rest + cleanup (x:rest) = x : cleanup rest isSpaceOrEmpty :: Inline -> Bool isSpaceOrEmpty Space = True @@ -386,7 +383,7 @@ isPara (Para _) = True isPara _ = False -- | Data structure for defining hierarchical Pandoc documents -data Element = Blk Block +data Element = Blk Block | Sec Int [Int] String [Inline] [Element] -- lvl num ident label contents deriving (Eq, Read, Show, Typeable, Data) @@ -414,7 +411,7 @@ hierarchicalizeWithIds ((Header level title'):xs) = do let ident = uniqueIdent title' usedIdents let lastnum' = take level lastnum let newnum = if length lastnum' >= level - then init lastnum' ++ [last lastnum' + 1] + then init lastnum' ++ [last lastnum' + 1] else lastnum ++ replicate (level - length lastnum - 1) 0 ++ [1] S.put (newnum, (ident : usedIdents)) let (sectionContents, rest) = break (headerLtEq level) xs @@ -456,112 +453,20 @@ headerShift n = bottomUp shift shift x = x -- --- Writer options +-- TagSoup HTML handling -- -data HTMLMathMethod = PlainMath - | LaTeXMathML (Maybe String) -- url of LaTeXMathML.js - | JsMath (Maybe String) -- url of jsMath load script - | GladTeX - | WebTeX String -- url of TeX->image script. - | MathML (Maybe String) -- url of MathMLinHTML.js - | MathJax String -- url of MathJax.js - deriving (Show, Read, Eq) - -data CiteMethod = Citeproc -- use citeproc to render them - | Natbib -- output natbib cite commands - | Biblatex -- output biblatex cite commands - deriving (Show, Read, Eq) - --- | Methods for obfuscating email addresses in HTML. -data ObfuscationMethod = NoObfuscation - | ReferenceObfuscation - | JavascriptObfuscation - deriving (Show, Read, Eq) - --- | Varieties of HTML slide shows. -data HTMLSlideVariant = S5Slides - | SlidySlides - | SlideousSlides - | DZSlides - | NoSlides - deriving (Show, Read, Eq) - --- | Options for writers -data WriterOptions = WriterOptions - { writerStandalone :: Bool -- ^ Include header and footer - , writerTemplate :: String -- ^ Template to use in standalone mode - , writerVariables :: [(String, String)] -- ^ Variables to set in template - , writerEPUBMetadata :: String -- ^ Metadata to include in EPUB - , writerTabStop :: Int -- ^ Tabstop for conversion btw spaces and tabs - , writerTableOfContents :: Bool -- ^ Include table of contents - , writerSlideVariant :: HTMLSlideVariant -- ^ Are we writing S5, Slidy or Slideous? - , writerIncremental :: Bool -- ^ True if lists should be incremental - , writerXeTeX :: Bool -- ^ Create latex suitable for use by xetex - , writerHTMLMathMethod :: HTMLMathMethod -- ^ How to print math in HTML - , writerIgnoreNotes :: Bool -- ^ Ignore footnotes (used in making toc) - , writerNumberSections :: Bool -- ^ Number sections in LaTeX - , writerSectionDivs :: Bool -- ^ Put sections in div tags in HTML - , writerStrictMarkdown :: Bool -- ^ Use strict markdown syntax - , writerReferenceLinks :: Bool -- ^ Use reference links in writing markdown, rst - , writerWrapText :: Bool -- ^ Wrap text to line length - , writerColumns :: Int -- ^ Characters in a line (for text wrapping) - , writerLiterateHaskell :: Bool -- ^ Write as literate haskell - , writerEmailObfuscation :: ObfuscationMethod -- ^ How to obfuscate emails - , writerIdentifierPrefix :: String -- ^ Prefix for section & note ids in HTML - , writerSourceDirectory :: FilePath -- ^ Directory path of 1st source file - , writerUserDataDir :: Maybe FilePath -- ^ Path of user data directory - , writerCiteMethod :: CiteMethod -- ^ How to print cites - , writerBiblioFiles :: [FilePath] -- ^ Biblio files to use for citations - , writerHtml5 :: Bool -- ^ Produce HTML5 - , writerBeamer :: Bool -- ^ Produce beamer LaTeX slide show - , writerSlideLevel :: Maybe Int -- ^ Force header level of slides - , writerChapters :: Bool -- ^ Use "chapter" for top-level sects - , writerListings :: Bool -- ^ Use listings package for code - , writerHighlight :: Bool -- ^ Highlight source code - , writerHighlightStyle :: Style -- ^ Style to use for highlighting - , writerSetextHeaders :: Bool -- ^ Use setext headers for levels 1-2 in markdown - , writerTeXLigatures :: Bool -- ^ Use tex ligatures quotes, dashes in latex - } deriving Show - -{-# DEPRECATED writerXeTeX "writerXeTeX no longer does anything" #-} --- | Default writer options. -defaultWriterOptions :: WriterOptions -defaultWriterOptions = - WriterOptions { writerStandalone = False - , writerTemplate = "" - , writerVariables = [] - , writerEPUBMetadata = "" - , writerTabStop = 4 - , writerTableOfContents = False - , writerSlideVariant = NoSlides - , writerIncremental = False - , writerXeTeX = False - , writerHTMLMathMethod = PlainMath - , writerIgnoreNotes = False - , writerNumberSections = False - , writerSectionDivs = False - , writerStrictMarkdown = False - , writerReferenceLinks = False - , writerWrapText = True - , writerColumns = 72 - , writerLiterateHaskell = False - , writerEmailObfuscation = JavascriptObfuscation - , writerIdentifierPrefix = "" - , writerSourceDirectory = "." - , writerUserDataDir = Nothing - , writerCiteMethod = Citeproc - , writerBiblioFiles = [] - , writerHtml5 = False - , writerBeamer = False - , writerSlideLevel = Nothing - , writerChapters = False - , writerListings = False - , writerHighlight = False - , writerHighlightStyle = pygments - , writerSetextHeaders = True - , writerTeXLigatures = True - } +-- | Render HTML tags. +renderTags' :: [Tag String] -> String +renderTags' = renderTagsOptions + renderOptions{ optMinimize = \x -> + let y = map toLower x + in y == "hr" || y == "br" || + y == "img" || y == "meta" || + y == "link" + , optRawTag = \x -> + let y = map toLower x + in y == "script" || y == "style" } -- -- File handling @@ -606,3 +511,15 @@ warn :: String -> IO () warn msg = do name <- getProgName UTF8.hPutStrLn stderr $ name ++ ": " ++ msg + +-- +-- Safe read +-- + +safeRead :: (Monad m, Read a) => String -> m a +safeRead s = case reads s of + (d,x):_ + | all isSpace x -> return d + _ -> fail $ "Could not read `" ++ s ++ "'" + + |