aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2012-09-30 09:53:50 -0700
committerJohn MacFarlane <jgm@berkeley.edu>2012-09-30 09:53:50 -0700
commite5740a138e291995d97e91d038d174977b0456f0 (patch)
tree3fd0894011f67ba7c12e80a50a5b39839f0bb097 /src/Text/Pandoc
parentd3601726932753f76919f2d052dcd01ce0b54986 (diff)
downloadpandoc-e5740a138e291995d97e91d038d174977b0456f0.tar.gz
RST reader: Handle replace:: and unicode:: substitutions.
Diffstat (limited to 'src/Text/Pandoc')
-rw-r--r--src/Text/Pandoc/Readers/RST.hs85
1 files changed, 77 insertions, 8 deletions
diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs
index eb8558857..d6aa5aba1 100644
--- a/src/Text/Pandoc/Readers/RST.hs
+++ b/src/Text/Pandoc/Readers/RST.hs
@@ -500,29 +500,66 @@ directive = try $ do
string ".."
directive'
+-- TODO: line-block, parsed-literal, table, csv-table, list-table
+-- replace, unicode
+-- date
+-- include
+-- raw (consolidate)
+-- class
+-- title
directive' :: RSTParser Blocks
directive' = do
skipMany1 spaceChar
label <- directiveLabel
skipMany spaceChar
top <- many $ satisfy (/='\n')
- <|> try (char '\n' <* notFollowedBy blankline <*
- notFollowedBy' (lookAhead (many spaceChar)
- >>= rawFieldListItem))
+ <|> try (char '\n' <*
+ notFollowedBy' (rawFieldListItem " ") <*
+ count 3 (char ' ') <*
+ notFollowedBy blankline)
newline
- indent <- lookAhead $ many spaceChar
- fields <- many $ rawFieldListItem indent
- blanklines
- body <- option "" indentedBlock
+ fields <- many $ rawFieldListItem " "
+ body <- option "" $ try $ blanklines >> indentedBlock
+ optional blanklines
let body' = body ++ "\n\n"
case label of
"container" -> parseFromString parseBlocks body'
+ "replace" -> B.para <$> -- consumed by substKey
+ parseFromString (trimInlines . mconcat <$> many inline)
+ (trim top)
+ "unicode" -> B.para <$> -- consumed by substKey
+ parseFromString (trimInlines . mconcat <$> many inline)
+ (trim $ unicodeTransform top)
"compound" -> parseFromString parseBlocks body'
"pull-quote" -> B.blockQuote <$> parseFromString parseBlocks body'
"epigraph" -> B.blockQuote <$> parseFromString parseBlocks body'
"highlights" -> B.blockQuote <$> parseFromString parseBlocks body'
"rubric" -> B.para . B.strong <$> parseFromString
(trimInlines . mconcat <$> many inline) top
+ _ | label `elem` ["attention","caution","danger","error","hint",
+ "important","note","tip","warning"] ->
+ do let tit = B.para $ B.strong $ B.str label
+ bod <- parseFromString parseBlocks $ top ++ "\n\n" ++ body'
+ return $ B.blockQuote $ tit <> bod
+ "admonition" ->
+ do tit <- B.para . B.strong <$> parseFromString
+ (trimInlines . mconcat <$> many inline) top
+ bod <- parseFromString parseBlocks body'
+ return $ B.blockQuote $ tit <> bod
+ "sidebar" ->
+ do let subtit = maybe "" trim $ lookup "subtitle" fields
+ tit <- B.para . B.strong <$> parseFromString
+ (trimInlines . mconcat <$> many inline)
+ (trim top ++ if null subtit
+ then ""
+ else (": " ++ subtit))
+ bod <- parseFromString parseBlocks body'
+ return $ B.blockQuote $ tit <> bod
+ "topic" ->
+ do tit <- B.para . B.strong <$> parseFromString
+ (trimInlines . mconcat <$> many inline) top
+ bod <- parseFromString parseBlocks body'
+ return $ tit <> bod
"default-role" -> mempty <$ updateState (\s ->
s { stateRstDefaultRole =
case trim top of
@@ -546,6 +583,38 @@ directive' = do
Nothing -> B.image src "" alt
_ -> return mempty
+-- Can contain haracter codes as decimal numbers or
+-- hexadecimal numbers, prefixed by 0x, x, \x, U+, u, or \u
+-- or as XML-style hexadecimal character entities, e.g. &#x1a2b;
+-- or text, which is used as-is. Comments start with ..
+unicodeTransform :: String -> String
+unicodeTransform t =
+ case t of
+ ('.':'.':xs) -> unicodeTransform $ dropWhile (/='\n') xs -- comment
+ ('0':'x':xs) -> go "0x" xs
+ ('x':xs) -> go "x" xs
+ ('\\':'x':xs) -> go "\\x" xs
+ ('U':'+':xs) -> go "U+" xs
+ ('u':xs) -> go "u" xs
+ ('\\':'u':xs) -> go "\\u" xs
+ ('&':'#':'x':xs) -> maybe ("&#x" ++ unicodeTransform xs)
+ -- drop semicolon
+ (\(c,s) -> c : unicodeTransform (drop 1 s))
+ $ extractUnicodeChar xs
+ (x:xs) -> x : unicodeTransform xs
+ [] -> []
+ where go pref zs = maybe (pref ++ unicodeTransform zs)
+ (\(c,s) -> c : unicodeTransform s)
+ $ extractUnicodeChar zs
+
+extractUnicodeChar :: String -> Maybe (Char, String)
+extractUnicodeChar s = maybe Nothing (\c -> Just (c,rest)) mbc
+ where (ds,rest) = span isHexDigit s
+ mbc = safeRead ('\'':'\\':'x':ds ++ "'")
+
+isHexDigit :: Char -> Bool
+isHexDigit c = c `elem` "0123456789ABCDEFabcdef"
+
extractCaption :: RSTParser (Inlines, Blocks)
extractCaption = do
capt <- trimInlines . mconcat <$> many inline
@@ -662,7 +731,7 @@ substKey = try $ do
skipMany1 spaceChar
(alt,ref) <- withRaw $ trimInlines . mconcat
<$> enclosed (char '|') (char '|') inline
- res <- B.toList <$> (directive' <|> para)
+ res <- B.toList <$> directive'
il <- case res of
-- use alt unless :alt: attribute on image:
[Para [Image [Str "image"] (src,tit)]] ->