aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc
diff options
context:
space:
mode:
Diffstat (limited to 'src/Text/Pandoc')
-rw-r--r--src/Text/Pandoc/Shared.hs87
-rw-r--r--src/Text/Pandoc/Writers/Docbook.hs2
-rw-r--r--src/Text/Pandoc/Writers/HTML.hs144
-rw-r--r--src/Text/Pandoc/Writers/Markdown.hs2
-rw-r--r--src/Text/Pandoc/Writers/RTF.hs2
5 files changed, 114 insertions, 123 deletions
diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs
index 13eab9bdb..82ae08601 100644
--- a/src/Text/Pandoc/Shared.hs
+++ b/src/Text/Pandoc/Shared.hs
@@ -1,4 +1,4 @@
-{-# LANGUAGE CPP #-}
+{-# LANGUAGE CPP, DeriveDataTypeable #-}
{-
Copyright (C) 2006-8 John MacFarlane <jgm@berkeley.edu>
@@ -112,13 +112,16 @@ import Text.ParserCombinators.Parsec
import Text.PrettyPrint.HughesPJ ( Doc, fsep, ($$), (<>), empty, isEmpty, text, nest )
import qualified Text.PrettyPrint.HughesPJ as PP
import Text.Pandoc.CharacterReferences ( characterReference )
-import Data.Char ( toLower, toUpper, ord, isLower, isUpper )
+import Data.Char ( toLower, toUpper, ord, isLower, isUpper, isAlpha,
+ isPunctuation )
import Data.List ( find, isPrefixOf, intercalate )
-import Control.Monad ( join )
import Network.URI ( parseURI, URI (..), isAllowedInURI )
import System.Directory
import Prelude hiding ( putStrLn, writeFile, readFile, getContents )
import System.IO.UTF8
+import Data.Generics
+import qualified Control.Monad.State as S
+import Control.Monad (join)
--
-- List processing
@@ -878,22 +881,74 @@ endsWithPlain blocks =
-- | Data structure for defining hierarchical Pandoc documents
data Element = Blk Block
- | Sec [Inline] [Element] deriving (Eq, Read, Show)
-
--- | Returns @True@ on Header block with at least the specified level
-headerAtLeast :: Int -> Block -> Bool
-headerAtLeast level (Header x _) = x <= level
-headerAtLeast _ _ = False
+ | Sec Int String [Inline] [Element]
+ -- lvl ident label contents
+ deriving (Eq, Read, Show, Typeable, Data)
+
+-- | Convert Pandoc inline list to plain text identifier.
+inlineListToIdentifier :: [Inline] -> String
+inlineListToIdentifier = dropWhile (not . isAlpha) . inlineListToIdentifier'
+
+inlineListToIdentifier' :: [Inline] -> [Char]
+inlineListToIdentifier' [] = ""
+inlineListToIdentifier' (x:xs) =
+ xAsText ++ inlineListToIdentifier' xs
+ where xAsText = case x of
+ Str s -> filter (\c -> c == '-' || not (isPunctuation c)) $
+ intercalate "-" $ words $ map toLower s
+ Emph lst -> inlineListToIdentifier' lst
+ Strikeout lst -> inlineListToIdentifier' lst
+ Superscript lst -> inlineListToIdentifier' lst
+ SmallCaps lst -> inlineListToIdentifier' lst
+ Subscript lst -> inlineListToIdentifier' lst
+ Strong lst -> inlineListToIdentifier' lst
+ Quoted _ lst -> inlineListToIdentifier' lst
+ Cite _ lst -> inlineListToIdentifier' lst
+ Code s -> s
+ Space -> "-"
+ EmDash -> "-"
+ EnDash -> "-"
+ Apostrophe -> ""
+ Ellipses -> ""
+ LineBreak -> "-"
+ Math _ _ -> ""
+ TeX _ -> ""
+ HtmlInline _ -> ""
+ Link lst _ -> inlineListToIdentifier' lst
+ Image lst _ -> inlineListToIdentifier' lst
+ Note _ -> ""
-- | Convert list of Pandoc blocks into (hierarchical) list of Elements
hierarchicalize :: [Block] -> [Element]
-hierarchicalize [] = []
-hierarchicalize (block:rest) =
- case block of
- (Header level title) ->
- let (thisSection, rest') = break (headerAtLeast level) rest
- in (Sec title (hierarchicalize thisSection)):(hierarchicalize rest')
- x -> (Blk x):(hierarchicalize rest)
+hierarchicalize blocks = S.evalState (hierarchicalizeWithIds blocks) []
+
+hierarchicalizeWithIds :: [Block] -> S.State [String] [Element]
+hierarchicalizeWithIds [] = return []
+hierarchicalizeWithIds ((Header level title'):xs) = do
+ usedIdents <- S.get
+ let ident = uniqueIdent title' usedIdents
+ S.modify (ident :)
+ let (sectionContents, rest) = break (headerLtEq level) xs
+ sectionContents' <- hierarchicalizeWithIds sectionContents
+ rest' <- hierarchicalizeWithIds rest
+ return $ Sec level ident title' sectionContents' : rest'
+hierarchicalizeWithIds (x:rest) = do
+ rest' <- hierarchicalizeWithIds rest
+ return $ (Blk x) : rest'
+
+headerLtEq :: Int -> Block -> Bool
+headerLtEq level (Header l _) = l <= level
+headerLtEq _ _ = False
+
+uniqueIdent :: [Inline] -> [String] -> String
+uniqueIdent title' usedIdents =
+ let baseIdent = inlineListToIdentifier title'
+ numIdent n = baseIdent ++ "-" ++ show n
+ in if baseIdent `elem` usedIdents
+ then case find (\x -> numIdent x `notElem` usedIdents) ([1..60000] :: [Int]) of
+ Just x -> numIdent x
+ Nothing -> baseIdent -- if we have more than 60,000, allow repeats
+ else baseIdent
-- | True if block is a Header block.
isHeaderBlock :: Block -> Bool
diff --git a/src/Text/Pandoc/Writers/Docbook.hs b/src/Text/Pandoc/Writers/Docbook.hs
index 3e535a87e..eed428d23 100644
--- a/src/Text/Pandoc/Writers/Docbook.hs
+++ b/src/Text/Pandoc/Writers/Docbook.hs
@@ -80,7 +80,7 @@ writeDocbook opts (Pandoc (Meta title authors date) blocks) =
-- | Convert an Element to Docbook.
elementToDocbook :: WriterOptions -> Element -> Doc
elementToDocbook opts (Blk block) = blockToDocbook opts block
-elementToDocbook opts (Sec title elements) =
+elementToDocbook opts (Sec _ _ title elements) =
-- Docbook doesn't allow sections with no content, so insert some if needed
let elements' = if null elements
then [Blk (Para [])]
diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs
index fb7320e92..4b6ea5982 100644
--- a/src/Text/Pandoc/Writers/HTML.hs
+++ b/src/Text/Pandoc/Writers/HTML.hs
@@ -36,22 +36,21 @@ import Text.Pandoc.Shared
import Text.Pandoc.Readers.TeXMath
import Text.Pandoc.Highlighting ( highlightHtml, defaultHighlightingCss )
import Numeric ( showHex )
-import Data.Char ( ord, toLower, isAlpha )
+import Data.Char ( ord, toLower )
import Data.List ( isPrefixOf, intercalate )
+import Data.Maybe ( catMaybes )
import qualified Data.Set as S
import Control.Monad.State
import Text.XHtml.Transitional hiding ( stringToHtml )
data WriterState = WriterState
{ stNotes :: [Html] -- ^ List of notes
- , stIds :: [String] -- ^ List of header identifiers
, stMath :: Bool -- ^ Math is used in document
, stCSS :: S.Set String -- ^ CSS to include in header
} deriving Show
defaultWriterState :: WriterState
-defaultWriterState = WriterState {stNotes= [], stIds = [],
- stMath = False, stCSS = S.empty}
+defaultWriterState = WriterState {stNotes= [], stMath = False, stCSS = S.empty}
-- Helpers to render HTML with the appropriate function.
@@ -107,15 +106,13 @@ writeHtml opts (Pandoc (Meta tit authors date) blocks) =
not (writerS5 opts)
then h1 ! [theclass "title"] $ topTitle
else noHtml
- headerBlocks = filter isHeaderBlock blocks
- ids = uniqueIdentifiers $
- map (\(Header _ lst) -> lst) headerBlocks
+ sects = hierarchicalize blocks
toc = if writerTableOfContents opts
- then tableOfContents opts headerBlocks ids
+ then evalState (tableOfContents opts sects) defaultWriterState
else noHtml
- (blocks', newstate) =
- runState (blockListToHtml opts blocks)
- (defaultWriterState {stIds = ids})
+ (blocks', newstate) = runState
+ (mapM (elementToHtml opts) sects >>= return . toHtmlFromList)
+ defaultWriterState
cssLines = stCSS newstate
css = if S.null cssLines
then noHtml
@@ -146,35 +143,36 @@ writeHtml opts (Pandoc (Meta tit authors date) blocks) =
then head' +++ body thebody
else thebody
--- | Construct table of contents from list of header blocks and identifiers.
--- Assumes there are as many identifiers as header blocks.
-tableOfContents :: WriterOptions -> [Block] -> [String] -> Html
-tableOfContents _ [] _ = noHtml
-tableOfContents opts headers ids =
+-- | Construct table of contents from list of elements.
+tableOfContents :: WriterOptions -> [Element] -> State WriterState Html
+tableOfContents _ [] = return noHtml
+tableOfContents opts sects = do
let opts' = opts { writerIgnoreNotes = True }
- contentsTree = hierarchicalize headers
- contents = evalState (mapM (elementToListItem opts') contentsTree)
- (defaultWriterState {stIds = ids})
- in thediv ! [identifier "toc"] $ unordList contents
+ contents <- mapM (elementToListItem opts') sects
+ return $ thediv ! [identifier "TOC"] $ unordList $ catMaybes contents
-- | Converts an Element to a list item for a table of contents,
-- retrieving the appropriate identifier from state.
-elementToListItem :: WriterOptions -> Element -> State WriterState Html
-elementToListItem _ (Blk _) = return noHtml
-elementToListItem opts (Sec headerText subsecs) = do
- st <- get
- let ids = stIds st
- let (id', rest) = if null ids
- then ("", [])
- else (head ids, tail ids)
- put $ st {stIds = rest}
+elementToListItem :: WriterOptions -> Element -> State WriterState (Maybe Html)
+elementToListItem _ (Blk _) = return Nothing
+elementToListItem opts (Sec _ id' headerText subsecs) = do
txt <- inlineListToHtml opts headerText
- subHeads <- mapM (elementToListItem opts) subsecs
+ subHeads <- mapM (elementToListItem opts) subsecs >>= return . catMaybes
let subList = if null subHeads
then noHtml
- else unordList subHeads
- return $ (anchor ! [href ("#" ++ id'), identifier ("TOC-" ++ id')] $ txt) +++
- subList
+ else unordList subHeads
+ return $ Just $ (anchor ! [href ("#" ++ id')] $ txt) +++ subList
+
+-- | Convert an Element to Html.
+elementToHtml :: WriterOptions -> Element -> State WriterState Html
+elementToHtml opts (Blk block) = blockToHtml opts block
+elementToHtml opts (Sec level id' title' elements) = do
+ innerContents <- mapM (elementToHtml opts) elements
+ header' <- blockToHtml opts (Header level title')
+ return $ if writerS5 opts || (writerStrictMarkdown opts && not (writerTableOfContents opts))
+ -- S5 gets confused by the extra divs around sections
+ then toHtmlFromList (header' : innerContents)
+ else thediv ! [identifier id'] << (header' : innerContents)
-- | Convert list of Note blocks to a footnote <div>.
-- Assumes notes are sorted.
@@ -236,15 +234,6 @@ obfuscateChar char =
obfuscateString :: String -> String
obfuscateString = concatMap obfuscateChar . decodeCharacterReferences
--- | True if character is a punctuation character (unicode).
-isPunctuation :: Char -> Bool
-isPunctuation c =
- let c' = ord c
- in if c `elem` "!\"'()*,-./:;<>?[\\]`{|}~" || c' >= 0x2000 && c' <= 0x206F ||
- c' >= 0xE000 && c' <= 0xE0FF
- then True
- else False
-
-- | Add CSS for document header.
addToCSS :: String -> State WriterState ()
addToCSS item = do
@@ -252,50 +241,6 @@ addToCSS item = do
let current = stCSS st
put $ st {stCSS = S.insert item current}
--- | Convert Pandoc inline list to plain text identifier.
-inlineListToIdentifier :: [Inline] -> String
-inlineListToIdentifier = dropWhile (not . isAlpha) . inlineListToIdentifier'
-
-inlineListToIdentifier' :: [Inline] -> [Char]
-inlineListToIdentifier' [] = ""
-inlineListToIdentifier' (x:xs) =
- xAsText ++ inlineListToIdentifier' xs
- where xAsText = case x of
- Str s -> filter (\c -> c == '-' || not (isPunctuation c)) $
- intercalate "-" $ words $ map toLower s
- Emph lst -> inlineListToIdentifier' lst
- Strikeout lst -> inlineListToIdentifier' lst
- Superscript lst -> inlineListToIdentifier' lst
- SmallCaps lst -> inlineListToIdentifier' lst
- Subscript lst -> inlineListToIdentifier' lst
- Strong lst -> inlineListToIdentifier' lst
- Quoted _ lst -> inlineListToIdentifier' lst
- Cite _ lst -> inlineListToIdentifier' lst
- Code s -> s
- Space -> "-"
- EmDash -> "-"
- EnDash -> "-"
- Apostrophe -> ""
- Ellipses -> ""
- LineBreak -> "-"
- Math _ _ -> ""
- TeX _ -> ""
- HtmlInline _ -> ""
- Link lst _ -> inlineListToIdentifier' lst
- Image lst _ -> inlineListToIdentifier' lst
- Note _ -> ""
-
--- | Return unique identifiers for list of inline lists.
-uniqueIdentifiers :: [[Inline]] -> [String]
-uniqueIdentifiers ls =
- let addIdentifier (nonuniqueIds, uniqueIds) l =
- let new = inlineListToIdentifier l
- matches = length $ filter (== new) nonuniqueIds
- new' = (if null new then "section" else new) ++
- if matches > 0 then ("-" ++ show matches) else ""
- in (new:nonuniqueIds, new':uniqueIds)
- in reverse $ snd $ foldl addIdentifier ([],[]) ls
-
-- | Convert Pandoc block element to HTML.
blockToHtml :: WriterOptions -> Block -> State WriterState Html
blockToHtml _ Null = return $ noHtml
@@ -335,26 +280,17 @@ blockToHtml opts (BlockQuote blocks) =
else blockListToHtml opts blocks >>= (return . blockquote)
blockToHtml opts (Header level lst) = do
contents <- inlineListToHtml opts lst
- st <- get
- let ids = stIds st
- let (id', rest) = if null ids
- then ("", [])
- else (head ids, tail ids)
- put $ st {stIds = rest}
- let attribs = if writerStrictMarkdown opts && not (writerTableOfContents opts)
- then []
- else [identifier id']
let contents' = if writerTableOfContents opts
- then anchor ! [href ("#TOC-" ++ id')] $ contents
+ then anchor ! [href "#TOC"] $ contents
else contents
return $ case level of
- 1 -> h1 contents' ! attribs
- 2 -> h2 contents' ! attribs
- 3 -> h3 contents' ! attribs
- 4 -> h4 contents' ! attribs
- 5 -> h5 contents' ! attribs
- 6 -> h6 contents' ! attribs
- _ -> paragraph contents' ! attribs
+ 1 -> h1 contents'
+ 2 -> h2 contents'
+ 3 -> h3 contents'
+ 4 -> h4 contents'
+ 5 -> h5 contents'
+ 6 -> h6 contents'
+ _ -> paragraph contents'
blockToHtml opts (BulletList lst) = do
contents <- mapM (blockListToHtml opts) lst
let attribs = if writerIncremental opts
@@ -492,7 +428,7 @@ inlineToHtml opts inline =
return $ primHtml $ "<EQ>" ++ str ++ "</EQ>"
PlainMath ->
inlineListToHtml opts (readTeXMath str) >>=
- return . (thespan ! [theclass "math"]))
+ return . (thespan ! [theclass "math"]) )
(TeX str) -> case writerHTMLMathMethod opts of
LaTeXMathML _ -> do modify (\st -> st {stMath = True})
return $ primHtml str
diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs
index bebb88a76..f376ac0c6 100644
--- a/src/Text/Pandoc/Writers/Markdown.hs
+++ b/src/Text/Pandoc/Writers/Markdown.hs
@@ -138,7 +138,7 @@ tableOfContents opts headers =
-- | Converts an Element to a list item for a table of contents,
elementToListItem :: Element -> [Block]
elementToListItem (Blk _) = []
-elementToListItem (Sec headerText subsecs) = [Plain headerText] ++
+elementToListItem (Sec _ _ headerText subsecs) = [Plain headerText] ++
if null subsecs
then []
else [BulletList $ map elementToListItem subsecs]
diff --git a/src/Text/Pandoc/Writers/RTF.hs b/src/Text/Pandoc/Writers/RTF.hs
index fc6cd1bf0..62d8c4a0c 100644
--- a/src/Text/Pandoc/Writers/RTF.hs
+++ b/src/Text/Pandoc/Writers/RTF.hs
@@ -59,7 +59,7 @@ tableOfContents headers =
elementToListItem :: Element -> [Block]
elementToListItem (Blk _) = []
-elementToListItem (Sec sectext subsecs) = [Plain sectext] ++
+elementToListItem (Sec _ _ sectext subsecs) = [Plain sectext] ++
if null subsecs
then []
else [BulletList (map elementToListItem subsecs)]