aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Readers
diff options
context:
space:
mode:
Diffstat (limited to 'src/Text/Pandoc/Readers')
-rw-r--r--src/Text/Pandoc/Readers/DocBook.hs5
-rw-r--r--src/Text/Pandoc/Readers/EPUB.hs9
-rw-r--r--src/Text/Pandoc/Readers/HTML.hs82
-rw-r--r--src/Text/Pandoc/Readers/LaTeX.hs1
-rw-r--r--src/Text/Pandoc/Readers/MediaWiki.hs4
-rw-r--r--src/Text/Pandoc/Readers/Org.hs17
-rw-r--r--src/Text/Pandoc/Readers/RST.hs20
7 files changed, 87 insertions, 51 deletions
diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs
index 3cc2a4479..352b94496 100644
--- a/src/Text/Pandoc/Readers/DocBook.hs
+++ b/src/Text/Pandoc/Readers/DocBook.hs
@@ -194,7 +194,7 @@ List of all DocBook tags, with [x] indicating implemented,
[x] indexterm - A wrapper for terms to be indexed
[x] info - A wrapper for information about a component or other block. (DocBook v5)
[x] informalequation - A displayed mathematical equation without a title
-[ ] informalexample - A displayed example without a title
+[x] informalexample - A displayed example without a title
[ ] informalfigure - A untitled figure
[ ] informaltable - A table without a title
[ ] initializer - The initializer for a FieldSynopsis
@@ -611,6 +611,7 @@ isBlockElement (Elem e) = qName (elName e) `elem` blocktags
"important","caution","note","tip","warning","qandadiv",
"question","answer","abstract","itemizedlist","orderedlist",
"variablelist","article","book","table","informaltable",
+ "informalexample",
"screen","programlisting","example","calloutlist"]
isBlockElement _ = False
@@ -766,6 +767,8 @@ parseBlock (Elem e) =
"book" -> modify (\st -> st{ dbBook = True }) >> getBlocks e
"table" -> parseTable
"informaltable" -> parseTable
+ "informalexample" -> divWith ("", ["informalexample"], []) <$>
+ getBlocks e
"literallayout" -> codeBlockWithLang
"screen" -> codeBlockWithLang
"programlisting" -> codeBlockWithLang
diff --git a/src/Text/Pandoc/Readers/EPUB.hs b/src/Text/Pandoc/Readers/EPUB.hs
index 338540533..aefc32e0e 100644
--- a/src/Text/Pandoc/Readers/EPUB.hs
+++ b/src/Text/Pandoc/Readers/EPUB.hs
@@ -181,7 +181,6 @@ getManifest archive = do
fixInternalReferences :: FilePath -> Pandoc -> Pandoc
fixInternalReferences pathToFile =
(walk $ renameImages root)
- . (walk normalisePath)
. (walk $ fixBlockIRs filename)
. (walk $ fixInlineIRs filename)
where
@@ -196,12 +195,6 @@ fixInlineIRs s (Link t ('#':url, tit)) =
Link t (addHash s url, tit)
fixInlineIRs _ v = v
-normalisePath :: Inline -> Inline
-normalisePath (Link t (url, tit)) =
- let (path, uid) = span (/= '#') url in
- Link t (takeFileName path ++ uid, tit)
-normalisePath s = s
-
prependHash :: [String] -> Inline -> Inline
prependHash ps l@(Link is (url, tit))
| or [s `isPrefixOf` url | s <- ps] =
@@ -223,7 +216,7 @@ fixAttrs s (ident, cs, kvs) = (addHash s ident, filter (not . null) cs, removeEP
addHash :: String -> String -> String
addHash _ "" = ""
-addHash s ident = s ++ "#" ++ ident
+addHash s ident = takeFileName s ++ "#" ++ ident
removeEPUBAttrs :: [(String, String)] -> [(String, String)]
removeEPUBAttrs kvs = filter (not . isEPUBAttr) kvs
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index fcba16e04..b32264d61 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -50,6 +50,7 @@ import Text.Pandoc.Options (ReaderOptions(readerParseRaw, readerTrace)
Ext_native_divs, Ext_native_spans))
import Text.Pandoc.Parsing hiding ((<|>))
import Text.Pandoc.Walk
+import qualified Data.Map as M
import Data.Maybe ( fromMaybe, isJust)
import Data.List ( intercalate, isInfixOf, isPrefixOf, isSuffixOf )
import Data.Char ( isDigit )
@@ -64,6 +65,7 @@ import Data.Default (Default (..), def)
import Control.Monad.Reader (Reader,ask, asks, local, runReader)
import Network.URI (isURI)
import Text.Pandoc.Error
+import Text.Pandoc.CSS (foldOrElse, pickStyleAttrProps)
import Text.Parsec.Error
@@ -74,8 +76,9 @@ readHtml :: ReaderOptions -- ^ Reader options
-> Either PandocError Pandoc
readHtml opts inp =
mapLeft (ParseFailure . getError) . flip runReader def $
- runParserT parseDoc (HTMLState def{ stateOptions = opts } [] Nothing)
- "source" tags
+ runParserT parseDoc
+ (HTMLState def{ stateOptions = opts } [] Nothing [] M.empty)
+ "source" tags
where tags = stripPrefixes . canonicalizeTags $
parseTagsOptions parseOptions{ optTagPosition = True } inp
parseDoc = do
@@ -100,7 +103,9 @@ data HTMLState =
HTMLState
{ parserState :: ParserState,
noteTable :: [(String, Blocks)],
- baseHref :: Maybe String
+ baseHref :: Maybe String,
+ identifiers :: [String],
+ headerMap :: M.Map Inlines String
}
data HTMLLocal = HTMLLocal { quoteContext :: QuoteContext
@@ -252,6 +257,22 @@ pListItem nonItem = do
let liDiv = maybe mempty (\x -> B.divWith (x, [], []) mempty) (lookup "id" attr)
(liDiv <>) <$> pInTags "li" block <* skipMany nonItem
+parseListStyleType :: String -> ListNumberStyle
+parseListStyleType "lower-roman" = LowerRoman
+parseListStyleType "upper-roman" = UpperRoman
+parseListStyleType "lower-alpha" = LowerAlpha
+parseListStyleType "upper-alpha" = UpperAlpha
+parseListStyleType "decimal" = Decimal
+parseListStyleType _ = DefaultStyle
+
+parseTypeAttr :: String -> ListNumberStyle
+parseTypeAttr "i" = LowerRoman
+parseTypeAttr "I" = UpperRoman
+parseTypeAttr "a" = LowerAlpha
+parseTypeAttr "A" = UpperAlpha
+parseTypeAttr "1" = Decimal
+parseTypeAttr _ = DefaultStyle
+
pOrderedList :: TagParser Blocks
pOrderedList = try $ do
TagOpen _ attribs <- pSatisfy (~== TagOpen "ol" [])
@@ -261,23 +282,19 @@ pOrderedList = try $ do
sta' = if all isDigit sta
then read sta
else 1
- sty = fromMaybe (fromMaybe "" $
- lookup "style" attribs) $
- lookup "class" attribs
- sty' = case sty of
- "lower-roman" -> LowerRoman
- "upper-roman" -> UpperRoman
- "lower-alpha" -> LowerAlpha
- "upper-alpha" -> UpperAlpha
- "decimal" -> Decimal
- _ ->
- case lookup "type" attribs of
- Just "1" -> Decimal
- Just "I" -> UpperRoman
- Just "i" -> LowerRoman
- Just "A" -> UpperAlpha
- Just "a" -> LowerAlpha
- _ -> DefaultStyle
+
+ pickListStyle = pickStyleAttrProps ["list-style-type", "list-style"]
+
+ typeAttr = fromMaybe "" $ lookup "type" attribs
+ classAttr = fromMaybe "" $ lookup "class" attribs
+ styleAttr = fromMaybe "" $ lookup "style" attribs
+ listStyle = fromMaybe "" $ pickListStyle styleAttr
+
+ sty' = foldOrElse DefaultStyle
+ [ parseTypeAttr typeAttr
+ , parseListStyleType classAttr
+ , parseListStyleType listStyle
+ ]
let nonItem = pSatisfy (\t ->
not (tagOpen (`elem` ["li","ol","ul","dl"]) (const True) t) &&
not (t ~== TagClose "ol"))
@@ -385,9 +402,10 @@ pHeader = try $ do
let ident = fromMaybe "" $ lookup "id" attr
let classes = maybe [] words $ lookup "class" attr
let keyvals = [(k,v) | (k,v) <- attr, k /= "class", k /= "id"]
+ attr' <- registerHeader (ident, classes, keyvals) contents
return $ if bodyTitle
then mempty -- skip a representation of the title in the body
- else B.headerWith (ident, classes, keyvals) level contents
+ else B.headerWith attr' level contents
pHrule :: TagParser Blocks
pHrule = do
@@ -622,12 +640,11 @@ pSpan = try $ do
guardEnabled Ext_native_spans
TagOpen _ attr <- lookAhead $ pSatisfy $ tagOpen (=="span") (const True)
contents <- pInTags "span" inline
- let attr' = mkAttr attr
- return $ case attr' of
- ("",[],[("style",s)])
- | filter (`notElem` " \t;") s == "font-variant:small-caps" ->
- B.smallcaps contents
- _ -> B.spanWith (mkAttr attr) contents
+ let isSmallCaps = fontVariant == "small-caps"
+ where styleAttr = fromMaybe "" $ lookup "style" attr
+ fontVariant = fromMaybe "" $ pickStyleAttrProps ["font-variant"] styleAttr
+ let tag = if isSmallCaps then B.smallcaps else B.spanWith (mkAttr attr)
+ return $ tag contents
pRawHtmlInline :: TagParser Inlines
pRawHtmlInline = do
@@ -971,6 +988,14 @@ isSpace _ = False
-- Instances
+instance HasIdentifierList HTMLState where
+ extractIdentifierList = identifiers
+ updateIdentifierList f s = s{ identifiers = f (identifiers s) }
+
+instance HasHeaderMap HTMLState where
+ extractHeaderMap = headerMap
+ updateHeaderMap f s = s{ headerMap = f (headerMap s) }
+
-- This signature should be more general
-- MonadReader HTMLLocal m => HasQuoteContext st m
instance HasQuoteContext st (Reader HTMLLocal) where
@@ -980,9 +1005,6 @@ instance HasQuoteContext st (Reader HTMLLocal) where
instance HasReaderOptions HTMLState where
extractReaderOptions = extractReaderOptions . parserState
-instance Default HTMLState where
- def = HTMLState def [] Nothing
-
instance HasMeta HTMLState where
setMeta s b st = st {parserState = setMeta s b $ parserState st}
deleteMeta s st = st {parserState = deleteMeta s $ parserState st}
diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 0da912ea6..16d387dc4 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -524,6 +524,7 @@ inlineCommands = M.fromList $
, ("includegraphics", skipopts *> (unescapeURL <$> braced) >>= mkImage)
, ("enquote", enquote)
, ("cite", citation "cite" AuthorInText False)
+ , ("Cite", citation "cite" AuthorInText False)
, ("citep", citation "citep" NormalCitation False)
, ("citep*", citation "citep*" NormalCitation False)
, ("citeal", citation "citeal" NormalCitation False)
diff --git a/src/Text/Pandoc/Readers/MediaWiki.hs b/src/Text/Pandoc/Readers/MediaWiki.hs
index 2a5adab22..b23b44544 100644
--- a/src/Text/Pandoc/Readers/MediaWiki.hs
+++ b/src/Text/Pandoc/Readers/MediaWiki.hs
@@ -252,8 +252,8 @@ parseAttr = try $ do
skipMany spaceChar
k <- many1 letter
char '='
- char '"'
- v <- many1Till (satisfy (/='\n')) (char '"')
+ v <- (char '"' >> many1Till (satisfy (/='\n')) (char '"'))
+ <|> many1 nonspaceChar
return (k,v)
tableStart :: MWParser ()
diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs
index 980f63504..55ac92bcb 100644
--- a/src/Text/Pandoc/Readers/Org.hs
+++ b/src/Text/Pandoc/Readers/Org.hs
@@ -70,6 +70,14 @@ data OrgParserLocal = OrgParserLocal { orgLocalQuoteContext :: QuoteContext }
type OrgParser = ParserT [Char] OrgParserState (Reader OrgParserLocal)
+instance HasIdentifierList OrgParserState where
+ extractIdentifierList = orgStateIdentifiers
+ updateIdentifierList f s = s{ orgStateIdentifiers = f (orgStateIdentifiers s) }
+
+instance HasHeaderMap OrgParserState where
+ extractHeaderMap = orgStateHeaderMap
+ updateHeaderMap f s = s{ orgStateHeaderMap = f (orgStateHeaderMap s) }
+
parseOrg :: OrgParser Pandoc
parseOrg = do
blocks' <- parseBlocks
@@ -135,6 +143,8 @@ data OrgParserState = OrgParserState
, orgStateMeta :: Meta
, orgStateMeta' :: F Meta
, orgStateNotes' :: OrgNoteTable
+ , orgStateIdentifiers :: [String]
+ , orgStateHeaderMap :: M.Map Inlines String
}
instance Default OrgParserLocal where
@@ -174,6 +184,8 @@ defaultOrgParserState = OrgParserState
, orgStateMeta = nullMeta
, orgStateMeta' = return nullMeta
, orgStateNotes' = []
+ , orgStateIdentifiers = []
+ , orgStateHeaderMap = M.empty
}
recordAnchorId :: String -> OrgParser ()
@@ -668,7 +680,10 @@ header = try $ do
title <- manyTill inline (lookAhead headerEnd)
tags <- headerEnd
let inlns = trimInlinesF . mconcat $ title <> map tagToInlineF tags
- return $ B.header level <$> inlns
+ st <- getState
+ let inlines = runF inlns st
+ attr <- registerHeader nullAttr inlines
+ return $ pure (B.headerWith attr level inlines)
where
tagToInlineF :: String -> F Inlines
tagToInlineF t = return $ B.spanWith ("", ["tag"], [("data-tag-name", t)]) mempty
diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs
index 678eecc52..f9663b19a 100644
--- a/src/Text/Pandoc/Readers/RST.hs
+++ b/src/Text/Pandoc/Readers/RST.hs
@@ -614,20 +614,22 @@ directive' = do
return mempty
-- TODO:
--- - Silently ignores illegal fields
-- - Only supports :format: fields with a single format for :raw: roles,
-- change Text.Pandoc.Definition.Format to fix
addNewRole :: String -> [(String, String)] -> RSTParser Blocks
addNewRole roleString fields = do
(role, parentRole) <- parseFromString inheritedRole roleString
customRoles <- stateRstCustomRoles <$> getState
- let (baseRole, baseFmt, baseAttr) =
- maybe (parentRole, Nothing, nullAttr) id $
- M.lookup parentRole customRoles
+ let getBaseRole (r, f, a) roles =
+ case M.lookup r roles of
+ Just (r', f', a') -> getBaseRole (r', f', a') roles
+ Nothing -> (r, f, a)
+ (baseRole, baseFmt, baseAttr) =
+ getBaseRole (parentRole, Nothing, nullAttr) customRoles
fmt = if parentRole == "raw" then lookup "format" fields else baseFmt
annotate :: [String] -> [String]
annotate = maybe id (:) $
- if parentRole == "code"
+ if baseRole == "code"
then lookup "language" fields
else Nothing
attr = let (ident, classes, keyValues) = baseAttr
@@ -636,12 +638,12 @@ addNewRole roleString fields = do
-- warn about syntax we ignore
flip mapM_ fields $ \(key, _) -> case key of
- "language" -> when (parentRole /= "code") $ addWarning Nothing $
+ "language" -> when (baseRole /= "code") $ addWarning Nothing $
"ignoring :language: field because the parent of role :" ++
- role ++ ": is :" ++ parentRole ++ ": not :code:"
- "format" -> when (parentRole /= "raw") $ addWarning Nothing $
+ role ++ ": is :" ++ baseRole ++ ": not :code:"
+ "format" -> when (baseRole /= "raw") $ addWarning Nothing $
"ignoring :format: field because the parent of role :" ++
- role ++ ": is :" ++ parentRole ++ ": not :raw:"
+ role ++ ": is :" ++ baseRole ++ ": not :raw:"
_ -> addWarning Nothing $ "ignoring unknown field :" ++ key ++
": in definition of role :" ++ role ++ ": in"
when (parentRole == "raw" && countKeys "format" > 1) $