aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/Text/Pandoc.hs3
-rw-r--r--src/Text/Pandoc/Highlighting.hs3
-rw-r--r--src/Text/Pandoc/Options.hs1
-rw-r--r--src/Text/Pandoc/Parsing.hs12
-rw-r--r--src/Text/Pandoc/Readers/Docx.hs7
-rw-r--r--src/Text/Pandoc/Readers/Docx/Reducible.hs2
-rw-r--r--src/Text/Pandoc/Readers/HTML.hs64
-rw-r--r--src/Text/Pandoc/Readers/LaTeX.hs4
-rw-r--r--src/Text/Pandoc/Readers/Markdown.hs4
-rw-r--r--src/Text/Pandoc/Readers/MediaWiki.hs5
-rw-r--r--src/Text/Pandoc/Readers/Odt/ContentReader.hs3
-rw-r--r--src/Text/Pandoc/Readers/Org.hs63
-rw-r--r--src/Text/Pandoc/Readers/Textile.hs21
-rw-r--r--src/Text/Pandoc/Shared.hs6
-rw-r--r--src/Text/Pandoc/Writers/Custom.hs4
-rw-r--r--src/Text/Pandoc/Writers/Docx.hs7
-rw-r--r--src/Text/Pandoc/Writers/EPUB.hs5
-rw-r--r--src/Text/Pandoc/Writers/HTML.hs73
-rw-r--r--src/Text/Pandoc/Writers/LaTeX.hs30
-rw-r--r--src/Text/Pandoc/Writers/Markdown.hs9
-rw-r--r--src/Text/Pandoc/Writers/Org.hs9
-rw-r--r--src/Text/Pandoc/Writers/TEI.hs320
-rw-r--r--src/Text/Pandoc/Writers/Texinfo.hs7
23 files changed, 535 insertions, 127 deletions
diff --git a/src/Text/Pandoc.hs b/src/Text/Pandoc.hs
index 3f46648a2..4b2397eb9 100644
--- a/src/Text/Pandoc.hs
+++ b/src/Text/Pandoc.hs
@@ -115,6 +115,7 @@ module Text.Pandoc
, writeHaddock
, writeCommonMark
, writeCustom
+ , writeTEI
-- * Rendering templates and default templates
, module Text.Pandoc.Templates
-- * Miscellaneous
@@ -169,6 +170,7 @@ import Text.Pandoc.Writers.AsciiDoc
import Text.Pandoc.Writers.Haddock
import Text.Pandoc.Writers.CommonMark
import Text.Pandoc.Writers.Custom
+import Text.Pandoc.Writers.TEI
import Text.Pandoc.Templates
import Text.Pandoc.Options
import Text.Pandoc.Shared (safeRead, warn, mapLeft, pandocVersion)
@@ -304,6 +306,7 @@ writers = [
,("asciidoc" , PureStringWriter writeAsciiDoc)
,("haddock" , PureStringWriter writeHaddock)
,("commonmark" , PureStringWriter writeCommonMark)
+ ,("tei" , PureStringWriter writeTEI)
]
getDefaultExtensions :: String -> Set Extension
diff --git a/src/Text/Pandoc/Highlighting.hs b/src/Text/Pandoc/Highlighting.hs
index d7a14c129..ecfef1832 100644
--- a/src/Text/Pandoc/Highlighting.hs
+++ b/src/Text/Pandoc/Highlighting.hs
@@ -70,7 +70,8 @@ highlight formatter (_, classes, keyvals) rawCode =
startNumber = firstNum,
numberLines = any (`elem`
["number","numberLines", "number-lines"]) classes }
- lcclasses = map (map toLower) classes
+ lcclasses = map (map toLower)
+ (classes ++ concatMap languagesByExtension classes)
in case find (`elem` lcLanguages) lcclasses of
Nothing
| numberLines fmtOpts -> Just
diff --git a/src/Text/Pandoc/Options.hs b/src/Text/Pandoc/Options.hs
index 7dd47cd59..333f499fb 100644
--- a/src/Text/Pandoc/Options.hs
+++ b/src/Text/Pandoc/Options.hs
@@ -202,7 +202,6 @@ githubMarkdownExtensions :: Set Extension
githubMarkdownExtensions = Set.fromList
[ Ext_pipe_tables
, Ext_raw_html
- , Ext_tex_math_single_backslash
, Ext_fenced_code_blocks
, Ext_auto_identifiers
, Ext_ascii_identifiers
diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs
index 16fe75ed5..325231846 100644
--- a/src/Text/Pandoc/Parsing.hs
+++ b/src/Text/Pandoc/Parsing.hs
@@ -915,7 +915,7 @@ data ParserState = ParserState
stateMeta' :: F Meta, -- ^ Document metadata
stateHeaderTable :: [HeaderType], -- ^ Ordered list of header types used
stateHeaders :: M.Map Inlines String, -- ^ List of headers and ids (used for implicit ref links)
- stateIdentifiers :: [String], -- ^ List of header identifiers used
+ stateIdentifiers :: Set.Set String, -- ^ Header identifiers used
stateNextExample :: Int, -- ^ Number of next example
stateExamples :: M.Map String Int, -- ^ Map from example labels to numbers
stateHasChapters :: Bool, -- ^ True if \chapter encountered
@@ -973,8 +973,8 @@ instance HasHeaderMap ParserState where
updateHeaderMap f st = st{ stateHeaders = f $ stateHeaders st }
class HasIdentifierList st where
- extractIdentifierList :: st -> [String]
- updateIdentifierList :: ([String] -> [String]) -> st -> st
+ extractIdentifierList :: st -> Set.Set String
+ updateIdentifierList :: (Set.Set String -> Set.Set String) -> st -> st
instance HasIdentifierList ParserState where
extractIdentifierList = stateIdentifiers
@@ -1013,7 +1013,7 @@ defaultParserState =
stateMeta' = return nullMeta,
stateHeaderTable = [],
stateHeaders = M.empty,
- stateIdentifiers = [],
+ stateIdentifiers = Set.empty,
stateNextExample = 1,
stateExamples = M.empty,
stateHasChapters = False,
@@ -1092,8 +1092,8 @@ registerHeader (ident,classes,kvs) header' = do
let id'' = if Ext_ascii_identifiers `Set.member` exts
then catMaybes $ map toAsciiChar id'
else id'
- updateState $ updateIdentifierList $
- if id' == id'' then (id' :) else ([id', id''] ++)
+ updateState $ updateIdentifierList $ Set.insert id'
+ updateState $ updateIdentifierList $ Set.insert id''
updateState $ updateHeaderMap $ insert' header' id'
return (id'',classes,kvs)
else do
diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs
index 44f67ce75..1b3269136 100644
--- a/src/Text/Pandoc/Readers/Docx.hs
+++ b/src/Text/Pandoc/Readers/Docx.hs
@@ -89,6 +89,7 @@ import Text.TeXMath (writeTeX)
import Data.Default (Default)
import qualified Data.ByteString.Lazy as B
import qualified Data.Map as M
+import qualified Data.Set as Set
import Control.Monad.Reader
import Control.Monad.State
import Data.Sequence (ViewL(..), viewl)
@@ -350,7 +351,7 @@ parPartToInlines (BookMark _ anchor) =
-- avoid an extra pass.
let newAnchor =
if not inHdrBool && anchor `elem` (M.elems anchorMap)
- then uniqueIdent [Str anchor] (M.elems anchorMap)
+ then uniqueIdent [Str anchor] (Set.fromList $ M.elems anchorMap)
else anchor
unless inHdrBool
(modify $ \s -> s { docxAnchorMap = M.insert anchor newAnchor anchorMap})
@@ -393,7 +394,7 @@ makeHeaderAnchor' (Header n (_, classes, kvs) ils)
| (c:cs) <- filter isAnchorSpan ils
, (Span (ident, ["anchor"], _) _) <- c = do
hdrIDMap <- gets docxAnchorMap
- let newIdent = uniqueIdent ils (M.elems hdrIDMap)
+ let newIdent = uniqueIdent ils (Set.fromList $ M.elems hdrIDMap)
modify $ \s -> s {docxAnchorMap = M.insert ident newIdent hdrIDMap}
return $ Header n (newIdent, classes, kvs) (ils \\ (c:cs))
-- Otherwise we just give it a name, and register that name (associate
@@ -401,7 +402,7 @@ makeHeaderAnchor' (Header n (_, classes, kvs) ils)
makeHeaderAnchor' (Header n (_, classes, kvs) ils) =
do
hdrIDMap <- gets docxAnchorMap
- let newIdent = uniqueIdent ils (M.elems hdrIDMap)
+ let newIdent = uniqueIdent ils (Set.fromList $ M.elems hdrIDMap)
modify $ \s -> s {docxAnchorMap = M.insert newIdent newIdent hdrIDMap}
return $ Header n (newIdent, classes, kvs) ils
makeHeaderAnchor' blk = return blk
diff --git a/src/Text/Pandoc/Readers/Docx/Reducible.hs b/src/Text/Pandoc/Readers/Docx/Reducible.hs
index c93b40119..e6de2d474 100644
--- a/src/Text/Pandoc/Readers/Docx/Reducible.hs
+++ b/src/Text/Pandoc/Readers/Docx/Reducible.hs
@@ -53,6 +53,7 @@ instance Modifiable Inlines where
(Strikeout _) -> Modifier strikeout
(Superscript _) -> Modifier superscript
(Subscript _) -> Modifier subscript
+ (Link attr _ tgt) -> Modifier $ linkWith attr (fst tgt) (snd tgt)
(Span attr _) -> AttrModifier spanWith attr
_ -> NullModifier
_ -> NullModifier
@@ -65,6 +66,7 @@ instance Modifiable Inlines where
(Strikeout lst) -> fromList lst
(Superscript lst) -> fromList lst
(Subscript lst) -> fromList lst
+ (Link _ lst _) -> fromList lst
(Span _ lst) -> fromList lst
_ -> ils
_ -> ils
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index a34e2fb5c..03b790d0b 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -43,7 +43,7 @@ import Text.HTML.TagSoup.Match
import Text.Pandoc.Definition
import qualified Text.Pandoc.Builder as B
import Text.Pandoc.Builder (Blocks, Inlines, trimInlines, HasMeta(..))
-import Text.Pandoc.Shared ( extractSpaces, renderTags'
+import Text.Pandoc.Shared ( extractSpaces, renderTags', addMetaField
, escapeURI, safeRead, mapLeft )
import Text.Pandoc.Options (ReaderOptions(readerParseRaw, readerTrace)
, Extension (Ext_epub_html_exts,
@@ -68,7 +68,7 @@ import Text.Pandoc.Error
import Text.Pandoc.CSS (foldOrElse, pickStyleAttrProps)
import Text.Pandoc.Compat.Monoid ((<>))
import Text.Parsec.Error
-
+import qualified Data.Set as Set
-- | Convert HTML-formatted string to 'Pandoc' document.
readHtml :: ReaderOptions -- ^ Reader options
@@ -77,7 +77,7 @@ readHtml :: ReaderOptions -- ^ Reader options
readHtml opts inp =
mapLeft (ParseFailure . getError) . flip runReader def $
runParserT parseDoc
- (HTMLState def{ stateOptions = opts } [] Nothing [] M.empty)
+ (HTMLState def{ stateOptions = opts } [] Nothing Set.empty M.empty)
"source" tags
where tags = stripPrefixes . canonicalizeTags $
parseTagsOptions parseOptions{ optTagPosition = True } inp
@@ -104,7 +104,7 @@ data HTMLState =
{ parserState :: ParserState,
noteTable :: [(String, Blocks)],
baseHref :: Maybe String,
- identifiers :: [String],
+ identifiers :: Set.Set String,
headerMap :: M.Map Inlines String
}
@@ -137,7 +137,11 @@ pHead = pInTags "head" $ pTitle <|> pMetaTag <|> pBaseTag <|> (mempty <$ pAnyTag
then return mempty
else do
let content = fromAttrib "content" mt
- updateState $ B.setMeta name (B.text content)
+ updateState $ \s ->
+ let ps = parserState s in
+ s{ parserState = ps{
+ stateMeta = addMetaField name (B.text content)
+ (stateMeta ps) } }
return mempty
pBaseTag = do
bt <- pSatisfy (~== TagOpen "base" [])
@@ -441,6 +445,7 @@ pTable = try $ do
-- fail on empty table
guard $ not $ null head' && null rows
let isSinglePlain x = case B.toList x of
+ [] -> True
[Plain _] -> True
_ -> False
let isSimple = all isSinglePlain $ concat (head':rows)
@@ -925,14 +930,45 @@ htmlInBalanced :: (Monad m)
=> (Tag String -> Bool)
-> ParserT String st m String
htmlInBalanced f = try $ do
- (TagOpen t _, tag) <- htmlTag f
- guard $ not $ "/>" `isSuffixOf` tag -- not a self-closing tag
- let stopper = htmlTag (~== TagClose t)
- let anytag = snd <$> htmlTag (const True)
- contents <- many $ notFollowedBy' stopper >>
- (htmlInBalanced f <|> anytag <|> count 1 anyChar)
- endtag <- liftM snd stopper
- return $ tag ++ concat contents ++ endtag
+ lookAhead (char '<')
+ inp <- getInput
+ let ts = canonicalizeTags $
+ parseTagsOptions parseOptions{ optTagWarning = True,
+ optTagPosition = True } inp
+ case ts of
+ (TagPosition sr sc : t@(TagOpen tn _) : rest) -> do
+ guard $ f t
+ guard $ not $ hasTagWarning (t : take 1 rest)
+ case htmlInBalanced' tn (t:rest) of
+ [] -> mzero
+ xs -> case reverse xs of
+ (TagClose _ : TagPosition er ec : _) -> do
+ let ls = er - sr
+ let cs = ec - sc
+ lscontents <- concat <$> count ls anyLine
+ cscontents <- count cs anyChar
+ (_,closetag) <- htmlTag (~== TagClose tn)
+ return (lscontents ++ cscontents ++ closetag)
+ _ -> mzero
+ _ -> mzero
+
+htmlInBalanced' :: String
+ -> [Tag String]
+ -> [Tag String]
+htmlInBalanced' tagname ts = fromMaybe [] $ go 0 ts
+ where go :: Int -> [Tag String] -> Maybe [Tag String]
+ go n (t@(TagOpen tn' _):rest) | tn' == tagname =
+ (t :) <$> go (n + 1) rest
+ go 1 (t@(TagClose tn'):_) | tn' == tagname =
+ return [t]
+ go n (t@(TagClose tn'):rest) | tn' == tagname =
+ (t :) <$> go (n - 1) rest
+ go n (t:ts') = (t :) <$> go n ts'
+ go n [] = mzero
+
+hasTagWarning :: [Tag String] -> Bool
+hasTagWarning (TagWarning _:_) = True
+hasTagWarning _ = False
-- | Matches a tag meeting a certain condition.
htmlTag :: Monad m
@@ -941,8 +977,6 @@ htmlTag :: Monad m
htmlTag f = try $ do
lookAhead (char '<')
inp <- getInput
- let hasTagWarning (TagWarning _:_) = True
- hasTagWarning _ = False
let (next : rest) = canonicalizeTags $ parseTagsOptions
parseOptions{ optTagWarning = True } inp
guard $ f next
diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 4b30725aa..9a1708331 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -422,7 +422,8 @@ inlineCommand = try $ do
else if parseRaw
then return $ rawInline "latex" rawcommand
else return mempty
- lookupListDefault mzero [name',name] inlineCommands
+ (lookupListDefault mzero [name',name] inlineCommands <*
+ optional (try (string "{}")))
<|> raw
unlessParseRaw :: LP ()
@@ -435,6 +436,7 @@ isBlockCommand s = s `M.member` blockCommands
inlineEnvironments :: M.Map String (LP Inlines)
inlineEnvironments = M.fromList
[ ("displaymath", mathEnv id Nothing "displaymath")
+ , ("math", math <$> verbEnv "math")
, ("equation", mathEnv id Nothing "equation")
, ("equation*", mathEnv id Nothing "equation*")
, ("gather", mathEnv id (Just "gathered") "gather")
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 77c3a1016..82d343243 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -36,7 +36,7 @@ import Data.List ( transpose, sortBy, findIndex, intersperse, intercalate )
import qualified Data.Map as M
import Data.Scientific (coefficient, base10Exponent)
import Data.Ord ( comparing )
-import Data.Char ( isSpace, isAlphaNum, toLower )
+import Data.Char ( isSpace, isAlphaNum, toLower, isPunctuation )
import Data.Maybe
import Text.Pandoc.Definition
import Text.Pandoc.Emoji (emojis)
@@ -1554,7 +1554,7 @@ math :: MarkdownParser (F Inlines)
math = (return . B.displayMath <$> (mathDisplay >>= applyMacros'))
<|> (return . B.math <$> (mathInline >>= applyMacros')) <+?>
((getOption readerSmart >>= guard) *> (return <$> apostrophe)
- <* notFollowedBy space)
+ <* notFollowedBy (space <|> satisfy isPunctuation))
-- Parses material enclosed in *s, **s, _s, or __s.
-- Designed to avoid backtracking.
diff --git a/src/Text/Pandoc/Readers/MediaWiki.hs b/src/Text/Pandoc/Readers/MediaWiki.hs
index d29ec50e7..950497992 100644
--- a/src/Text/Pandoc/Readers/MediaWiki.hs
+++ b/src/Text/Pandoc/Readers/MediaWiki.hs
@@ -52,6 +52,7 @@ import Text.HTML.TagSoup
import Data.Sequence (viewl, ViewL(..), (<|))
import qualified Data.Foldable as F
import qualified Data.Map as M
+import qualified Data.Set as Set
import Data.Char (isDigit, isSpace)
import Data.Maybe (fromMaybe)
import Text.Printf (printf)
@@ -69,7 +70,7 @@ readMediaWiki opts s =
, mwNextLinkNumber = 1
, mwCategoryLinks = []
, mwHeaderMap = M.empty
- , mwIdentifierList = []
+ , mwIdentifierList = Set.empty
}
(s ++ "\n")
@@ -78,7 +79,7 @@ data MWState = MWState { mwOptions :: ReaderOptions
, mwNextLinkNumber :: Int
, mwCategoryLinks :: [Inlines]
, mwHeaderMap :: M.Map Inlines String
- , mwIdentifierList :: [String]
+ , mwIdentifierList :: Set.Set String
}
type MWParser = Parser [Char] MWState
diff --git a/src/Text/Pandoc/Readers/Odt/ContentReader.hs b/src/Text/Pandoc/Readers/Odt/ContentReader.hs
index 1f1c57646..8c475eefc 100644
--- a/src/Text/Pandoc/Readers/Odt/ContentReader.hs
+++ b/src/Text/Pandoc/Readers/Odt/ContentReader.hs
@@ -61,6 +61,7 @@ import Text.Pandoc.Readers.Odt.Generic.XMLConverter
import Text.Pandoc.Readers.Odt.Generic.Fallible
import Text.Pandoc.Readers.Odt.Generic.Utils
+import qualified Data.Set as Set
--------------------------------------------------------------------------------
-- State
@@ -221,7 +222,7 @@ getPrettyAnchor = proc (baseIdent, uglyAnchor) -> do
getHeaderAnchor :: OdtReaderSafe Inlines Anchor
getHeaderAnchor = proc title -> do
state <- getExtraState -< ()
- let anchor = uniqueIdent (toList title) (usedAnchors state)
+ let anchor = uniqueIdent (toList title) (Set.fromList $ usedAnchors state)
modifyExtraState (putPrettyAnchor anchor anchor) -<< anchor
diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs
index 894bd1954..7dd611be3 100644
--- a/src/Text/Pandoc/Readers/Org.hs
+++ b/src/Text/Pandoc/Readers/Org.hs
@@ -53,6 +53,7 @@ import Data.Char (isAlphaNum, toLower)
import Data.Default
import Data.List (intersperse, isPrefixOf, isSuffixOf)
import qualified Data.Map as M
+import qualified Data.Set as Set
import Data.Maybe (fromMaybe, isJust)
import Network.HTTP (urlEncode)
@@ -144,7 +145,7 @@ data OrgParserState = OrgParserState
, orgStateMeta' :: F Meta
, orgStateNotes' :: OrgNoteTable
, orgStateParserContext :: ParserContext
- , orgStateIdentifiers :: [String]
+ , orgStateIdentifiers :: Set.Set String
, orgStateHeaderMap :: M.Map Inlines String
}
@@ -186,7 +187,7 @@ defaultOrgParserState = OrgParserState
, orgStateMeta' = return nullMeta
, orgStateNotes' = []
, orgStateParserContext = NullState
- , orgStateIdentifiers = []
+ , orgStateIdentifiers = Set.empty
, orgStateHeaderMap = M.empty
}
@@ -1238,37 +1239,37 @@ applyCustomLinkFormat link = do
formatter <- M.lookup linkType <$> asksF orgStateLinkFormatters
return $ maybe link ($ drop 1 rest) formatter
--- TODO: might be a lot smarter/cleaner to use parsec and ADTs for this kind
--- of parsing.
+-- | Take a link and return a function which produces new inlines when given
+-- description inlines.
linkToInlinesF :: String -> Inlines -> F Inlines
-linkToInlinesF s =
+linkToInlinesF linkStr =
+ case linkStr of
+ "" -> pure . B.link mempty "" -- wiki link (empty by convention)
+ ('#':_) -> pure . B.link linkStr "" -- document-local fraction
+ _ -> case cleanLinkString linkStr of
+ (Just cleanedLink) -> if isImageFilename cleanedLink
+ then const . pure $ B.image cleanedLink "" ""
+ else pure . B.link cleanedLink ""
+ Nothing -> internalLink linkStr -- other internal link
+
+-- | Cleanup and canonicalize a string describing a link. Return @Nothing@ if
+-- the string does not appear to be a link.
+cleanLinkString :: String -> Maybe String
+cleanLinkString s =
case s of
- "" -> pure . B.link "" ""
- ('#':_) -> pure . B.link s ""
- _ | isImageFilename s -> const . pure $ B.image s "" ""
- _ | isFileLink s -> pure . B.link (dropLinkType s) ""
- _ | isUri s -> pure . B.link s ""
- _ | isAbsoluteFilePath s -> pure . B.link ("file://" ++ s) ""
- _ | isRelativeFilePath s -> pure . B.link s ""
- _ -> internalLink s
-
-isFileLink :: String -> Bool
-isFileLink s = ("file:" `isPrefixOf` s) && not ("file://" `isPrefixOf` s)
-
-dropLinkType :: String -> String
-dropLinkType = tail . snd . break (== ':')
-
-isRelativeFilePath :: String -> Bool
-isRelativeFilePath s = (("./" `isPrefixOf` s) || ("../" `isPrefixOf` s)) &&
- (':' `notElem` s)
-
-isUri :: String -> Bool
-isUri s = let (scheme, path) = break (== ':') s
- in all (\c -> isAlphaNum c || c `elem` (".-" :: String)) scheme
- && not (null path)
-
-isAbsoluteFilePath :: String -> Bool
-isAbsoluteFilePath = ('/' ==) . head
+ '/':_ -> Just $ "file://" ++ s -- absolute path
+ '.':'/':_ -> Just s -- relative path
+ '.':'.':'/':_ -> Just s -- relative path
+ -- Relative path or URL (file schema)
+ 'f':'i':'l':'e':':':s' -> Just $ if ("//" `isPrefixOf` s') then s else s'
+ _ | isUrl s -> Just s -- URL
+ _ -> Nothing
+ where
+ isUrl :: String -> Bool
+ isUrl cs =
+ let (scheme, path) = break (== ':') cs
+ in all (\c -> isAlphaNum c || c `elem` (".-"::String)) scheme
+ && not (null path)
isImageFilename :: String -> Bool
isImageFilename filename =
diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs
index dd1d289a3..6f64540f8 100644
--- a/src/Text/Pandoc/Readers/Textile.hs
+++ b/src/Text/Pandoc/Readers/Textile.hs
@@ -583,7 +583,18 @@ code2 = do
-- | Html / CSS attributes
attributes :: Parser [Char] ParserState Attr
-attributes = (foldl (flip ($)) ("",[],[])) `fmap` many attribute
+attributes = (foldl (flip ($)) ("",[],[])) <$>
+ try (do special <- option id specialAttribute
+ attrs <- many attribute
+ return (special : attrs))
+
+specialAttribute :: Parser [Char] ParserState (Attr -> Attr)
+specialAttribute = do
+ alignStr <- ("center" <$ char '=') <|>
+ ("justify" <$ try (string "<>")) <|>
+ ("right" <$ char '>') <|>
+ ("left" <$ char '<')
+ return $ addStyle ("text-align:" ++ alignStr)
attribute :: Parser [Char] ParserState (Attr -> Attr)
attribute = classIdAttr <|> styleAttr <|> langAttr
@@ -602,7 +613,13 @@ classIdAttr = try $ do -- (class class #id)
styleAttr :: Parser [Char] ParserState (Attr -> Attr)
styleAttr = do
style <- try $ enclosed (char '{') (char '}') anyChar'
- return $ \(id',classes,keyvals) -> (id',classes,("style",style):keyvals)
+ return $ addStyle style
+
+addStyle :: String -> Attr -> Attr
+addStyle style (id',classes,keyvals) =
+ (id',classes,keyvals')
+ where keyvals' = ("style", style') : [(k,v) | (k,v) <- keyvals, k /= "style"]
+ style' = style ++ ";" ++ concat [v | ("style",v) <- keyvals]
langAttr :: Parser [Char] ParserState (Attr -> Attr)
langAttr = do
diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs
index aa07c81e1..b5efcf172 100644
--- a/src/Text/Pandoc/Shared.hs
+++ b/src/Text/Pandoc/Shared.hs
@@ -706,14 +706,14 @@ headerLtEq _ _ = False
-- | Generate a unique identifier from a list of inlines.
-- Second argument is a list of already used identifiers.
-uniqueIdent :: [Inline] -> [String] -> String
+uniqueIdent :: [Inline] -> Set.Set String -> String
uniqueIdent title' usedIdents
= let baseIdent = case inlineListToIdentifier title' of
"" -> "section"
x -> x
numIdent n = baseIdent ++ "-" ++ show n
- in if baseIdent `elem` usedIdents
- then case find (\x -> numIdent x `notElem` usedIdents) ([1..60000] :: [Int]) of
+ in if baseIdent `Set.member` usedIdents
+ then case find (\x -> not $ numIdent x `Set.member` usedIdents) ([1..60000] :: [Int]) of
Just x -> numIdent x
Nothing -> baseIdent -- if we have more than 60,000, allow repeats
else baseIdent
diff --git a/src/Text/Pandoc/Writers/Custom.hs b/src/Text/Pandoc/Writers/Custom.hs
index 9671fc05b..d69eaaa64 100644
--- a/src/Text/Pandoc/Writers/Custom.hs
+++ b/src/Text/Pandoc/Writers/Custom.hs
@@ -222,8 +222,8 @@ blockToCustom _ Null = return ""
blockToCustom lua (Plain inlines) = callfunc lua "Plain" inlines
-blockToCustom lua (Para [Image _ txt (src,tit)]) =
- callfunc lua "CaptionedImage" src tit txt
+blockToCustom lua (Para [Image attr txt (src,tit)]) =
+ callfunc lua "CaptionedImage" src tit txt (attrToMap attr)
blockToCustom lua (Para inlines) = callfunc lua "Para" inlines
diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs
index 827d32620..150e19043 100644
--- a/src/Text/Pandoc/Writers/Docx.hs
+++ b/src/Text/Pandoc/Writers/Docx.hs
@@ -34,6 +34,7 @@ import qualified Data.ByteString as B
import qualified Data.ByteString.Lazy as BL
import qualified Data.ByteString.Lazy.Char8 as BL8
import qualified Data.Map as M
+import qualified Data.Set as Set
import qualified Text.Pandoc.UTF8 as UTF8
import Codec.Archive.Zip
import Data.Time.Clock.POSIX
@@ -95,7 +96,7 @@ data WriterState = WriterState{
stTextProperties :: [Element]
, stParaProperties :: [Element]
, stFootnotes :: [Element]
- , stSectionIds :: [String]
+ , stSectionIds :: Set.Set String
, stExternalLinks :: M.Map String String
, stImages :: M.Map FilePath (String, String, Maybe MimeType, Element, B.ByteString)
, stListLevel :: Int
@@ -117,7 +118,7 @@ defaultWriterState = WriterState{
stTextProperties = []
, stParaProperties = []
, stFootnotes = defaultFootnotes
- , stSectionIds = []
+ , stSectionIds = Set.empty
, stExternalLinks = M.empty
, stImages = M.empty
, stListLevel = -1
@@ -742,7 +743,7 @@ blockToOpenXML opts (Header lev (ident,_,_) lst) = do
let bookmarkName = if null ident
then uniqueIdent lst usedIdents
else ident
- modify $ \s -> s{ stSectionIds = bookmarkName : stSectionIds s }
+ modify $ \s -> s{ stSectionIds = Set.insert bookmarkName $ stSectionIds s }
id' <- getUniqueId
let bookmarkStart = mknode "w:bookmarkStart" [("w:id", id')
,("w:name",bookmarkName)] ()
diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs
index 64f94f41f..804dbb926 100644
--- a/src/Text/Pandoc/Writers/EPUB.hs
+++ b/src/Text/Pandoc/Writers/EPUB.hs
@@ -31,6 +31,7 @@ Conversion of 'Pandoc' documents to EPUB.
module Text.Pandoc.Writers.EPUB ( writeEPUB ) where
import Data.IORef ( IORef, newIORef, readIORef, modifyIORef )
import qualified Data.Map as M
+import qualified Data.Set as Set
import Data.Maybe ( fromMaybe, catMaybes )
import Data.List ( isPrefixOf, isInfixOf, intercalate )
import System.Environment ( getEnv )
@@ -916,13 +917,13 @@ showChapter = printf "ch%03d.xhtml"
-- Add identifiers to any headers without them.
addIdentifiers :: [Block] -> [Block]
-addIdentifiers bs = evalState (mapM go bs) []
+addIdentifiers bs = evalState (mapM go bs) Set.empty
where go (Header n (ident,classes,kvs) ils) = do
ids <- get
let ident' = if null ident
then uniqueIdent ils ids
else ident
- put $ ident' : ids
+ modify $ Set.insert ident'
return $ Header n (ident',classes,kvs) ils
go x = return x
diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs
index 73a8906c3..c5b6a6db2 100644
--- a/src/Text/Pandoc/Writers/HTML.hs
+++ b/src/Text/Pandoc/Writers/HTML.hs
@@ -140,40 +140,38 @@ pandocToHtml opts (Pandoc meta blocks) = do
st <- get
let notes = reverse (stNotes st)
let thebody = blocks' >> footnoteSection opts notes
- let math = if stMath st
- then case writerHTMLMathMethod opts of
- LaTeXMathML (Just url) ->
- H.script ! A.src (toValue url)
- ! A.type_ "text/javascript"
- $ mempty
- MathML (Just url) ->
- H.script ! A.src (toValue url)
- ! A.type_ "text/javascript"
- $ mempty
- MathJax url ->
- H.script ! A.src (toValue url)
- ! A.type_ "text/javascript"
- $ case writerSlideVariant opts of
- SlideousSlides ->
- preEscapedString
- "MathJax.Hub.Queue([\"Typeset\",MathJax.Hub]);"
- _ -> mempty
- JsMath (Just url) ->
- H.script ! A.src (toValue url)
- ! A.type_ "text/javascript"
- $ mempty
- KaTeX js css ->
- (H.script ! A.src (toValue js) $ mempty) <>
- (H.link ! A.rel "stylesheet" ! A.href (toValue css)) <>
- (H.script ! A.type_ "text/javascript" $ toHtml renderKaTeX)
- _ -> case lookup "mathml-script" (writerVariables opts) of
- Just s | not (writerHtml5 opts) ->
- H.script ! A.type_ "text/javascript"
- $ preEscapedString
- ("/*<![CDATA[*/\n" ++ s ++ "/*]]>*/\n")
- | otherwise -> mempty
- Nothing -> mempty
- else mempty
+ let math = case writerHTMLMathMethod opts of
+ LaTeXMathML (Just url) ->
+ H.script ! A.src (toValue url)
+ ! A.type_ "text/javascript"
+ $ mempty
+ MathML (Just url) ->
+ H.script ! A.src (toValue url)
+ ! A.type_ "text/javascript"
+ $ mempty
+ MathJax url ->
+ H.script ! A.src (toValue url)
+ ! A.type_ "text/javascript"
+ $ case writerSlideVariant opts of
+ SlideousSlides ->
+ preEscapedString
+ "MathJax.Hub.Queue([\"Typeset\",MathJax.Hub]);"
+ _ -> mempty
+ JsMath (Just url) ->
+ H.script ! A.src (toValue url)
+ ! A.type_ "text/javascript"
+ $ mempty
+ KaTeX js css ->
+ (H.script ! A.src (toValue js) $ mempty) <>
+ (H.link ! A.rel "stylesheet" ! A.href (toValue css)) <>
+ (H.script ! A.type_ "text/javascript" $ toHtml renderKaTeX)
+ _ -> case lookup "mathml-script" (writerVariables opts) of
+ Just s | not (writerHtml5 opts) ->
+ H.script ! A.type_ "text/javascript"
+ $ preEscapedString
+ ("/*<![CDATA[*/\n" ++ s ++ "/*]]>*/\n")
+ | otherwise -> mempty
+ Nothing -> mempty
let context = (if stHighlighting st
then defField "highlighting-css"
(styleToCss $ writerHighlightStyle opts)
@@ -647,7 +645,7 @@ alignmentToString alignment = case alignment of
AlignLeft -> "left"
AlignRight -> "right"
AlignCenter -> "center"
- AlignDefault -> "left"
+ AlignDefault -> ""
tableItemToHtml :: WriterOptions
-> (Html -> Html)
@@ -660,7 +658,10 @@ tableItemToHtml opts tag' align' item = do
let attribs = if writerHtml5 opts
then A.style (toValue $ "text-align: " ++ alignStr ++ ";")
else A.align (toValue alignStr)
- return $ (tag' ! attribs $ contents) >> nl opts
+ let tag'' = if null alignStr
+ then tag'
+ else tag' ! attribs
+ return $ (tag'' $ contents) >> nl opts
toListItems :: WriterOptions -> [Html] -> [Html]
toListItems opts items = map (toListItem opts) items ++ [nl opts]
diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs
index 2dcbf62bf..4e4279ec5 100644
--- a/src/Text/Pandoc/Writers/LaTeX.hs
+++ b/src/Text/Pandoc/Writers/LaTeX.hs
@@ -113,12 +113,7 @@ pandocToLaTeX options (Pandoc meta blocks) = do
(fmap (render colwidth) . inlineListToLaTeX)
meta
let bookClasses = ["memoir","book","report","scrreprt","scrbook"]
- let documentClass = case P.parse (do P.skipMany (P.satisfy (/='\\'))
- P.string "\\documentclass"
- P.skipMany (P.satisfy (/='{'))
- P.char '{'
- P.manyTill P.letter (P.char '}')) "template"
- template of
+ let documentClass = case P.parse pDocumentClass "template" template of
Right r -> r
Left _ -> ""
case lookup "documentclass" (writerVariables options) `mplus`
@@ -591,7 +586,7 @@ blockToLaTeX (Table caption aligns widths heads rows) = do
rows' <- mapM (tableRowToLaTeX False aligns widths) rows
let colDescriptors = text $ concat $ map toColDescriptor aligns
modify $ \s -> s{ stTable = True }
- return $ "\\begin{longtable}[c]" <>
+ return $ "\\begin{longtable}[]" <>
braces ("@{}" <> colDescriptors <> "@{}")
-- the @{} removes extra space at beginning and end
$$ capt
@@ -1260,3 +1255,24 @@ commonFromBcp47 x = fromIso $ head x
deNote :: Inline -> Inline
deNote (Note _) = RawInline (Format "latex") ""
deNote x = x
+
+pDocumentOptions :: P.Parsec String () [String]
+pDocumentOptions = do
+ P.char '['
+ opts <- P.sepBy
+ (P.many $ P.spaces *> P.noneOf (" ,]" :: String) <* P.spaces)
+ (P.char ',')
+ P.char ']'
+ return opts
+
+pDocumentClass :: P.Parsec String () String
+pDocumentClass =
+ do P.skipMany (P.satisfy (/='\\'))
+ P.string "\\documentclass"
+ classOptions <- pDocumentOptions <|> return []
+ if ("article" :: String) `elem` classOptions
+ then return "article"
+ else do P.skipMany (P.satisfy (/='{'))
+ P.char '{'
+ P.manyTill P.letter (P.char '}')
+
diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs
index 5a92f3cdf..ce993093c 100644
--- a/src/Text/Pandoc/Writers/Markdown.hs
+++ b/src/Text/Pandoc/Writers/Markdown.hs
@@ -53,6 +53,7 @@ import Data.Yaml (Value(Object,String,Array,Bool,Number))
import qualified Data.HashMap.Strict as H
import qualified Data.Vector as V
import qualified Data.Text as T
+import qualified Data.Set as Set
type Notes = [[Block]]
type Ref = ([Inline], Target, Attr)
@@ -61,11 +62,11 @@ data WriterState = WriterState { stNotes :: Notes
, stRefs :: Refs
, stRefShortcutable :: Bool
, stInList :: Bool
- , stIds :: [String]
+ , stIds :: Set.Set String
, stPlain :: Bool }
instance Default WriterState
where def = WriterState{ stNotes = [], stRefs = [], stRefShortcutable = True,
- stInList = False, stIds = [], stPlain = False }
+ stInList = False, stIds = Set.empty, stPlain = False }
-- | Convert Pandoc to Markdown.
writeMarkdown :: WriterOptions -> Pandoc -> String
@@ -116,7 +117,7 @@ plainTitleBlock tit auths dat =
dat <> cr
yamlMetadataBlock :: Value -> Doc
-yamlMetadataBlock v = "---" $$ (jsonToYaml v) $$ "..."
+yamlMetadataBlock v = "---" $$ (jsonToYaml v) $$ "---"
jsonToYaml :: Value -> Doc
jsonToYaml (Object hashmap) =
@@ -364,7 +365,7 @@ blockToMarkdown opts (Header level attr inlines) = do
-- so we know whether to print an explicit identifier
ids <- gets stIds
let autoId = uniqueIdent inlines ids
- modify $ \st -> st{ stIds = autoId : ids }
+ modify $ \st -> st{ stIds = Set.insert autoId ids }
let attr' = case attr of
("",[],[]) -> empty
(id',[],[]) | isEnabled Ext_auto_identifiers opts
diff --git a/src/Text/Pandoc/Writers/Org.hs b/src/Text/Pandoc/Writers/Org.hs
index d843d2efd..20086ed19 100644
--- a/src/Text/Pandoc/Writers/Org.hs
+++ b/src/Text/Pandoc/Writers/Org.hs
@@ -102,6 +102,10 @@ escapeString = escapeStringUsing $
, ('\x2026',"...")
] ++ backslashEscapes "^_"
+isRawFormat :: Format -> Bool
+isRawFormat f =
+ f == Format "latex" || f == Format "tex" || f == Format "org"
+
-- | Convert Pandoc block element to Org.
blockToOrg :: Block -- ^ Block element
-> State WriterState Doc
@@ -129,7 +133,7 @@ blockToOrg (Para inlines) = do
blockToOrg (RawBlock "html" str) =
return $ blankline $$ "#+BEGIN_HTML" $$
nest 2 (text str) $$ "#+END_HTML" $$ blankline
-blockToOrg (RawBlock f str) | f `elem` ["org", "latex", "tex"] =
+blockToOrg (RawBlock f str) | isRawFormat f =
return $ text str
blockToOrg (RawBlock _ _) = return empty
blockToOrg HorizontalRule = return $ blankline $$ "--------------" $$ blankline
@@ -271,7 +275,8 @@ inlineToOrg (Math t str) = do
return $ if t == InlineMath
then "$" <> text str <> "$"
else "$$" <> text str <> "$$"
-inlineToOrg (RawInline f str) | f == "tex" || f == "latex" = return $ text str
+inlineToOrg (RawInline f str) | isRawFormat f =
+ return $ text str
inlineToOrg (RawInline _ _) = return empty
inlineToOrg (LineBreak) = return (text "\\\\" <> cr)
inlineToOrg Space = return space
diff --git a/src/Text/Pandoc/Writers/TEI.hs b/src/Text/Pandoc/Writers/TEI.hs
new file mode 100644
index 000000000..b9e683ab9
--- /dev/null
+++ b/src/Text/Pandoc/Writers/TEI.hs
@@ -0,0 +1,320 @@
+{-# LANGUAGE OverloadedStrings, PatternGuards #-}
+{-
+Copyright (C) 2006-2015 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Writers.Docbook
+ Copyright : Copyright (C) 2006-2015 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Conversion of 'Pandoc' documents to Docbook XML.
+-}
+module Text.Pandoc.Writers.TEI (writeTEI) where
+import Text.Pandoc.Definition
+import Text.Pandoc.XML
+import Text.Pandoc.Shared
+import Text.Pandoc.Writers.Shared
+import Text.Pandoc.Options
+import Text.Pandoc.Templates (renderTemplate')
+import Data.List ( stripPrefix, isPrefixOf, isSuffixOf )
+import Data.Char ( toLower )
+import Text.Pandoc.Highlighting ( languages, languagesByExtension )
+import Text.Pandoc.Pretty
+import Text.Pandoc.ImageSize
+import qualified Text.Pandoc.Builder as B
+
+-- | Convert list of authors to a docbook <author> section
+authorToTEI :: WriterOptions -> [Inline] -> B.Inlines
+authorToTEI opts name' =
+ let name = render Nothing $ inlinesToTEI opts name'
+ colwidth = if writerWrapText opts == WrapAuto
+ then Just $ writerColumns opts
+ else Nothing
+ in B.rawInline "tei" $ render colwidth $
+ inTagsSimple "author" (text $ escapeStringForXML name)
+
+-- | Convert Pandoc document to string in Docbook format.
+writeTEI :: WriterOptions -> Pandoc -> String
+writeTEI opts (Pandoc meta blocks) =
+ let elements = hierarchicalize blocks
+ colwidth = if writerWrapText opts == WrapAuto
+ then Just $ writerColumns opts
+ else Nothing
+ render' = render colwidth
+ opts' = if "/book>" `isSuffixOf`
+ (trimr $ writerTemplate opts)
+ then opts{ writerChapters = True }
+ else opts
+ startLvl = if writerChapters opts' then 0 else 1
+ auths' = map (authorToTEI opts) $ docAuthors meta
+ meta' = B.setMeta "author" auths' meta
+ Just metadata = metaToJSON opts
+ (Just . render colwidth . (vcat .
+ (map (elementToTEI opts' startLvl)) . hierarchicalize))
+ (Just . render colwidth . inlinesToTEI opts')
+ meta'
+ main = render' $ vcat (map (elementToTEI opts' startLvl) elements)
+ context = defField "body" main
+ $ defField "mathml" (case writerHTMLMathMethod opts of
+ MathML _ -> True
+ _ -> False)
+ $ metadata
+ in if writerStandalone opts
+ then renderTemplate' (writerTemplate opts) context
+ else main
+
+-- | Convert an Element to TEI.
+elementToTEI :: WriterOptions -> Int -> Element -> Doc
+elementToTEI opts _ (Blk block) = blockToTEI opts block
+elementToTEI opts lvl (Sec _ _num (id',_,_) title elements) =
+ -- TEI doesn't allow sections with no content, so insert some if needed
+ let elements' = if null elements
+ then [Blk (Para [])]
+ else elements
+ divType = case lvl of
+ n | n == 0 -> "chapter"
+ | n >= 1 && n <= 5 -> "level" ++ show n
+ | otherwise -> "section"
+ in inTags True "div" [("type", divType) | not (null id')] $
+-- ("id", writerIdentifierPrefix opts ++ id') | not (null id')] $
+ inTagsSimple "head" (inlinesToTEI opts title) $$
+ vcat (map (elementToTEI opts (lvl + 1)) elements')
+
+-- | Convert a list of Pandoc blocks to TEI.
+blocksToTEI :: WriterOptions -> [Block] -> Doc
+blocksToTEI opts = vcat . map (blockToTEI opts)
+
+-- | Auxiliary function to convert Plain block to Para.
+plainToPara :: Block -> Block
+plainToPara (Plain x) = Para x
+plainToPara x = x
+
+-- | Convert a list of pairs of terms and definitions into a TEI
+-- list with labels and items.
+deflistItemsToTEI :: WriterOptions -> [([Inline],[[Block]])] -> Doc
+deflistItemsToTEI opts items =
+ vcat $ map (\(term, defs) -> deflistItemToTEI opts term defs) items
+
+-- | Convert a term and a list of blocks into a TEI varlistentry.
+deflistItemToTEI :: WriterOptions -> [Inline] -> [[Block]] -> Doc
+deflistItemToTEI opts term defs =
+ let def' = concatMap (map plainToPara) defs
+ in inTagsIndented "label" (inlinesToTEI opts term) $$
+ inTagsIndented "item" (blocksToTEI opts def')
+
+-- | Convert a list of lists of blocks to a list of TEI list items.
+listItemsToTEI :: WriterOptions -> [[Block]] -> Doc
+listItemsToTEI opts items = vcat $ map (listItemToTEI opts) items
+
+-- | Convert a list of blocks into a TEI list item.
+listItemToTEI :: WriterOptions -> [Block] -> Doc
+listItemToTEI opts item =
+ inTagsIndented "item" $ blocksToTEI opts $ map plainToPara item
+
+imageToTEI :: WriterOptions -> Attr -> String -> Doc
+imageToTEI _ attr src = selfClosingTag "graphic" $
+ ("url", src) : idAndRole attr ++ dims
+ where
+ dims = go Width "width" ++ go Height "depth"
+ go dir dstr = case (dimension dir attr) of
+ Just a -> [(dstr, show a)]
+ Nothing -> []
+
+-- | Convert a Pandoc block element to TEI.
+blockToTEI :: WriterOptions -> Block -> Doc
+blockToTEI _ Null = empty
+-- Add ids to paragraphs in divs with ids - this is needed for
+-- pandoc-citeproc to get link anchors in bibliographies:
+blockToTEI opts (Div (ident,_,_) [Para lst]) =
+ let attribs = [("id", ident) | not (null ident)] in
+ inTags False "p" attribs $ inlinesToTEI opts lst
+blockToTEI opts (Div _ bs) = blocksToTEI opts $ map plainToPara bs
+blockToTEI _ (Header _ _ _) = empty -- should not occur after hierarchicalize
+-- For TEI simple, text must be within containing block element, so
+-- we use plainToPara to ensure that Plain text ends up contained by
+-- something.
+blockToTEI opts (Plain lst) = blockToTEI opts $ Para lst
+-- title beginning with fig: indicates that the image is a figure
+--blockToTEI opts (Para [Image attr txt (src,'f':'i':'g':':':_)]) =
+-- let alt = inlinesToTEI opts txt
+-- capt = if null txt
+-- then empty
+-- else inTagsSimple "title" alt
+-- in inTagsIndented "figure" $
+-- capt $$
+-- (inTagsIndented "mediaobject" $
+-- (inTagsIndented "imageobject"
+-- (imageToTEI opts attr src)) $$
+-- inTagsSimple "textobject" (inTagsSimple "phrase" alt))
+blockToTEI opts (Para lst) =
+ inTags False "p" [] $ inlinesToTEI opts lst
+blockToTEI opts (BlockQuote blocks) =
+ inTagsIndented "quote" $ blocksToTEI opts blocks
+blockToTEI _ (CodeBlock (_,classes,_) str) =
+ text ("<ab type='codeblock " ++ lang ++ "'>") <> cr <>
+ flush (text (escapeStringForXML str) <> cr <> text "</ab>")
+ where lang = if null langs
+ then ""
+ else escapeStringForXML (head langs)
+ isLang l = map toLower l `elem` map (map toLower) languages
+ langsFrom s = if isLang s
+ then [s]
+ else languagesByExtension . map toLower $ s
+ langs = concatMap langsFrom classes
+blockToTEI opts (BulletList lst) =
+ let attribs = [("type", "unordered")]
+ in inTags True "list" attribs $ listItemsToTEI opts lst
+blockToTEI _ (OrderedList _ []) = empty
+blockToTEI opts (OrderedList (start, numstyle, _) (first:rest)) =
+ let attribs = case numstyle of
+ DefaultStyle -> []
+ Decimal -> [("type", "ordered:arabic")]
+ Example -> [("type", "ordered:arabic")]
+ UpperAlpha -> [("type", "ordered:upperalpha")]
+ LowerAlpha -> [("type", "ordered:loweralpha")]
+ UpperRoman -> [("type", "ordered:upperroman")]
+ LowerRoman -> [("type", "ordered:lowerroman")]
+ items = if start == 1
+ then listItemsToTEI opts (first:rest)
+ else (inTags True "item" [("n",show start)]
+ (blocksToTEI opts $ map plainToPara first)) $$
+ listItemsToTEI opts rest
+ in inTags True "list" attribs items
+blockToTEI opts (DefinitionList lst) =
+ let attribs = [("type", "definition")]
+ in inTags True "list" attribs $ deflistItemsToTEI opts lst
+blockToTEI _ (RawBlock f str)
+ | f == "tei" = text str -- raw TEI block (should such a thing exist).
+-- | f == "html" = text str -- allow html for backwards compatibility
+ | otherwise = empty
+blockToTEI _ HorizontalRule =
+ selfClosingTag "milestone" [("unit","undefined"), ("type","separator"),("rendition","line")]
+
+-- | TEI Tables
+-- TEI Simple's tables are composed of cells and rows; other
+-- table info in the AST is here lossily discard.
+blockToTEI opts (Table _ _ _ headers rows) =
+ let
+ headers' = tableHeadersToTEI opts headers
+-- headers' = if all null headers
+-- then return empty
+-- else tableRowToTEI opts headers
+ in
+ inTags True "table" [] $
+ vcat $ [headers'] <> map (tableRowToTEI opts) rows
+
+tableRowToTEI :: WriterOptions
+ -> [[Block]]
+ -> Doc
+tableRowToTEI opts cols =
+ inTagsIndented "row" $ vcat $ map (tableItemToTEI opts) cols
+
+tableHeadersToTEI :: WriterOptions
+ -> [[Block]]
+ -> Doc
+tableHeadersToTEI opts cols =
+ inTags True "row" [("role","label")] $ vcat $ map (tableItemToTEI opts) cols
+
+tableItemToTEI :: WriterOptions
+ -> [Block]
+ -> Doc
+tableItemToTEI opts item =
+ inTags False "cell" [] $ vcat $ map (blockToTEI opts) item
+
+-- | Convert a list of inline elements to TEI.
+inlinesToTEI :: WriterOptions -> [Inline] -> Doc
+inlinesToTEI opts lst = hcat $ map (inlineToTEI opts) lst
+
+-- | Convert an inline element to TEI.
+inlineToTEI :: WriterOptions -> Inline -> Doc
+inlineToTEI _ (Str str) = text $ escapeStringForXML str
+inlineToTEI opts (Emph lst) =
+ inTags False "hi" [("rendition","simple:italic")] $ inlinesToTEI opts lst
+inlineToTEI opts (Strong lst) =
+ inTags False "hi" [("rendition", "simple:bold")] $ inlinesToTEI opts lst
+inlineToTEI opts (Strikeout lst) =
+ inTags False "hi" [("rendition", "simple:strikethrough")] $
+ inlinesToTEI opts lst
+inlineToTEI opts (Superscript lst) =
+ inTags False "hi" [("rendition", "simple:superscript")] $ inlinesToTEI opts lst
+inlineToTEI opts (Subscript lst) =
+ inTags False "hi" [("rendition", "simple:subscript")] $ inlinesToTEI opts lst
+inlineToTEI opts (SmallCaps lst) =
+ inTags False "hi" [("rendition", "simple:smallcaps")] $
+ inlinesToTEI opts lst
+inlineToTEI opts (Quoted _ lst) =
+ inTagsSimple "quote" $ inlinesToTEI opts lst
+inlineToTEI opts (Cite _ lst) =
+ inlinesToTEI opts lst
+inlineToTEI opts (Span _ ils) =
+ inlinesToTEI opts ils
+inlineToTEI _ (Code _ str) =
+ inTags False "seg" [("type","code")] $ text (escapeStringForXML str)
+-- Distinguish display from inline math by wrapping the former in a "figure."
+inlineToTEI _ (Math t str) =
+ case t of
+ InlineMath -> inTags False "formula" [("notation","TeX")] $
+ text (str)
+ DisplayMath -> inTags True "figure" [("type","math")] $
+ inTags False "formula" [("notation","TeX")] $ text (str)
+
+inlineToTEI _ (RawInline f x) | f == "tei" = text x
+ | otherwise = empty
+inlineToTEI _ LineBreak = selfClosingTag "lb" []
+inlineToTEI _ Space = space
+-- because we use \n for LineBreak, we can't do soft breaks:
+inlineToTEI _ SoftBreak = space
+inlineToTEI opts (Link attr txt (src, _))
+ | Just email <- stripPrefix "mailto:" src =
+ let emailLink = text $
+ escapeStringForXML $ email
+ in case txt of
+ [Str s] | escapeURI s == email -> emailLink
+ _ -> inlinesToTEI opts txt <+>
+ char '(' <> emailLink <> char ')'
+ | otherwise =
+ (if isPrefixOf "#" src
+ then inTags False "ref" $ ("target", drop 1 src) : idAndRole attr
+ else inTags False "ref" $ ("target", src) : idAndRole attr ) $
+ inlinesToTEI opts txt
+inlineToTEI opts (Image attr description (src, tit)) =
+ let titleDoc = if null tit
+ then empty
+ else inTags False "figDesc" [] (text $ escapeStringForXML tit)
+ imageDesc = if null description
+ then empty
+ else inTags False "head" [] (inlinesToTEI opts description)
+ in inTagsIndented "figure" $ imageDesc $$
+ imageToTEI opts attr src $$ titleDoc
+inlineToTEI opts (Note contents) =
+ inTagsIndented "note" $ blocksToTEI opts contents
+
+idAndRole :: Attr -> [(String, String)]
+idAndRole (id',cls,_) = ident ++ role
+ where
+ ident = if null id'
+ then []
+ else [("id", id')]
+ role = if null cls
+ then []
+ else [("role", unwords cls)]
+
diff --git a/src/Text/Pandoc/Writers/Texinfo.hs b/src/Text/Pandoc/Writers/Texinfo.hs
index 1aefaa678..8420704dc 100644
--- a/src/Text/Pandoc/Writers/Texinfo.hs
+++ b/src/Text/Pandoc/Writers/Texinfo.hs
@@ -43,13 +43,14 @@ import Text.Pandoc.Pretty
import Text.Pandoc.ImageSize
import Network.URI ( isURI, unEscapeString )
import System.FilePath
+import qualified Data.Set as Set
data WriterState =
WriterState { stStrikeout :: Bool -- document contains strikeout
, stSuperscript :: Bool -- document contains superscript
, stSubscript :: Bool -- document contains subscript
, stEscapeComma :: Bool -- in a context where we need @comma
- , stIdentifiers :: [String] -- header ids used already
+ , stIdentifiers :: Set.Set String -- header ids used already
, stOptions :: WriterOptions -- writer options
}
@@ -64,7 +65,7 @@ writeTexinfo options document =
evalState (pandocToTexinfo options $ wrapTop document) $
WriterState { stStrikeout = False, stSuperscript = False,
stEscapeComma = False, stSubscript = False,
- stIdentifiers = [], stOptions = options}
+ stIdentifiers = Set.empty, stOptions = options}
-- | Add a "Top" node around the document, needed by Texinfo.
wrapTop :: Pandoc -> Pandoc
@@ -215,7 +216,7 @@ blockToTexinfo (Header level _ lst) = do
txt <- inlineListToTexinfo lst
idsUsed <- gets stIdentifiers
let id' = uniqueIdent lst idsUsed
- modify $ \st -> st{ stIdentifiers = id' : idsUsed }
+ modify $ \st -> st{ stIdentifiers = Set.insert id' idsUsed }
return $ if (level > 0) && (level <= 4)
then blankline <> text "@node " <> node $$
text (seccmd level) <> txt $$