10 files changed, 169 insertions, 177 deletions
diff --git a/src/Text/Pandoc/Entities.hs b/src/Text/Pandoc/Entities.hs
index ea5676b79..3e68db35c 100644
--- a/src/Text/Pandoc/Entities.hs
+++ b/src/Text/Pandoc/Entities.hs
@@ -29,62 +29,109 @@ Functions for encoding unicode characters as entity references,
 and vice versa.
 -}
 module Text.Pandoc.Entities (
-                     entityToChar,
                      charToEntity,
-                     decodeEntities,
+                     charToNumericalEntity,
+                     specialCharToEntity,
                      encodeEntities,
+                     decodeEntities,
+                     stringToSGML,
                      characterEntity
                     ) where
 import Data.Char ( chr, ord )
-import Text.Regex ( mkRegex, matchRegexAll, Regex )
-import Maybe ( fromMaybe )
+import Text.ParserCombinators.Parsec
+import Data.Maybe ( fromMaybe )
 
--- | Regular expression for numerical coded entity.
-numericalEntity :: Text.Regex.Regex
-numericalEntity = mkRegex "&#([0-9]+|[xX][0-9A-Fa-f]+);"
+-- | Returns a string containing an entity reference for the character.
+charToEntity :: Char -> String
+charToEntity char = 
+  let matches = filter (\(entity, character) -> (character == char)) 
+                       entityTable in
+  if (length matches) == 0
+     then charToNumericalEntity char
+     else fst (head matches)
 
--- | Regular expression for character entity.
-characterEntity :: Text.Regex.Regex
-characterEntity = mkRegex "&#[0-9]+;|&#[xX][0-9A-Fa-f]+;|&[A-Za-z0-9]+;"
+-- | Returns a string containing a numerical entity reference for the char.
+charToNumericalEntity :: Char -> String
+charToNumericalEntity ch = "&#" ++ show (ord ch) ++ ";"
 
--- | Return a string with all entity references decoded to unicode characters
--- where possible.  
-decodeEntities :: String -> String
-decodeEntities str = 
-  case (matchRegexAll characterEntity str) of
-    Nothing                       -> str
-    Just (before, match, rest, _) -> before ++ replacement ++ 
-                                     (decodeEntities rest)
-      where replacement = case (entityToChar match) of
-                            Just ch -> [ch]
-                            Nothing -> match
+-- | Escape special character to SGML entity.
+specialCharToEntity :: Bool    -- ^ Use numerical entities only.
+                    -> Char    -- ^ Character to convert.
+                    -> [Char]
+specialCharToEntity numericalEntities c = 
+  if (c `elem` "&<>\"") || (ord c > 127)
+     then if numericalEntities
+             then charToNumericalEntity c
+             else charToEntity c
+     else [c]
 
--- | Returns a string with characters replaced with entity references where
--- possible.
-encodeEntities :: String -> String
-encodeEntities [] = []
-encodeEntities (c:cs) = if ord c < 128
-                          then c:(encodeEntities cs)
-                          else (charToEntity c) ++ (encodeEntities cs) 
+-- | Parse SGML character entity.
+characterEntity :: GenParser Char st Char
+characterEntity = namedEntity <|> hexEntity <|> decimalEntity <?> "SGML entity"
 
--- | If the string is a valid entity reference, returns @Just@ the character,
--- otherwise @Nothing@.
-entityToChar :: String -> Maybe Char 
-entityToChar entity = 
-    case (lookup entity entityTable) of
-      Just ch -> Just ch
-      Nothing -> case (matchRegexAll numericalEntity entity) of
-                   Just (_, _, _, [sub]) -> Just (chr (read ('0':sub)))
-                   Nothing               -> Nothing
+-- | Parse SGML character entity.
+namedEntity :: GenParser Char st Char
+namedEntity = try $ do
+  st <- char '&'
+  body <- many1 alphaNum
+  end <- char ';'
+  let entity = "&" ++ body ++ ";"
+  return $ case (lookup entity entityTable) of
+             Just ch -> ch
+             Nothing -> '?'
+   
+-- | Parse SGML hexadecimal entity.
+hexEntity :: GenParser Char st Char
+hexEntity = try $ do
+  st <- string "&#"
+  hex <- oneOf "Xx"
+  body <- many1 (oneOf "0123456789ABCDEFabcdef")
+  end <- char ';'
+  return $ chr $ read ('0':'x':body)
 
--- | Returns a string containing an entity reference for the character.
-charToEntity :: Char -> String
-charToEntity char = 
-    let matches = filter (\(entity, character) -> (character == char)) 
-                         entityTable in
-    if (length matches) == 0
-       then "&#" ++ show (ord char) ++ ";"
-       else fst (head matches)
+-- | Parse SGML decimal entity.
+decimalEntity :: GenParser Char st Char
+decimalEntity = try $ do
+  st <- string "&#"
+  body <- many1 digit
+  end <- char ';'
+  return $ chr $ read body
+
+-- | Escape string as needed for SGML.  Entity references are not preserved.
+encodeEntities :: Bool   -- ^ Use only numerical entities.
+               -> String -- ^ String to convert.
+               -> String
+encodeEntities numericalEntities = 
+  concatMap (specialCharToEntity numericalEntities)
+
+-- | Escape string as needed for SGML, using only numerical entities.
+--   Entity references are not preserved.
+encodeEntitiesNumerical :: String -> String
+encodeEntitiesNumerical = 
+  concatMap (\c -> "&#" ++ show (ord c) ++ ";")
+
+-- | Convert entities in a string to characters.
+decodeEntities :: String -> String
+decodeEntities str = 
+  case parse (many (characterEntity <|> anyChar)) str str of
+	Left err        -> error $ "\nError: " ++ show err
+	Right result    -> result
+
+-- | Escape string for SGML, preserving entity references.
+stringToSGML :: Bool     -- ^ Use only numerical entities.
+             -> String   -- ^ String to convert.
+             -> String
+stringToSGML numericalEntities str = 
+  let nonentity = do
+                    notFollowedBy characterEntity
+                    c <- anyChar
+                    return $ specialCharToEntity numericalEntities c
+      entity    = do
+                    ent <- manyTill anyChar (char ';')
+                    return (ent ++ ";") in 
+  case parse (many (nonentity <|> entity)) str str of
+    Left err       -> error $ "\nError: " ++ show err
+    Right result   -> concat result
 
 entityTable :: [(String, Char)]
 entityTable =  [
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index 135a90ea8..fc06b657e 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -44,7 +44,7 @@ import Text.ParserCombinators.Parsec
 import Text.ParserCombinators.Pandoc
 import Text.Pandoc.Definition
 import Text.Pandoc.Shared 
-import Text.Pandoc.Entities ( decodeEntities, entityToChar )
+import Text.Pandoc.Entities ( characterEntity, decodeEntities )
 import Maybe ( fromMaybe )
 import Data.List ( intersect, takeWhile, dropWhile )
 import Data.Char ( toUpper, toLower, isAlphaNum )
@@ -391,14 +391,9 @@ text =  choice [ entity, strong, emph, code, str, linebreak, whitespace ] <?> "t
 special = choice [ link, image, rawHtmlInline ] <?> 
                  "link, inline html, or image"
 
-entity = try (do
-  char '&'
-  body <- choice [(many1 letter), (try (do
-                                          char '#'
-                                          num <- many1 digit
-                                          return ("#" ++ num)))]
-  char ';'
-  return (Str [fromMaybe '?' (entityToChar ("&" ++ body ++ ";"))]))
+entity = do
+  ent <- characterEntity
+  return $ Str [ent]
 
 code = try (do 
   htmlTag "code"
@@ -439,7 +434,7 @@ linebreak = do
 
 str = do 
   result <- many1 (noneOf "<& \t\n")
-  return (Str (decodeEntities result))
+  return (Str result)
 
 --
 -- links and images
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 3aa0a6f12..9b3f047e9 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -42,7 +42,7 @@ import Text.Pandoc.Readers.HTML ( rawHtmlBlock,
                                   anyHtmlTag, anyHtmlEndTag,
                                   htmlEndTag, extractTagType,
                                   htmlBlockElement )
-import Text.Pandoc.Entities ( decodeEntities )
+import Text.Pandoc.Entities ( characterEntity )
 import Text.ParserCombinators.Parsec
 
 -- | Read markdown from an input string and return a Pandoc document.
@@ -88,12 +88,13 @@ blockQuoteChar = '>'
 hyphenChar = '-'
 ellipsesChar = '.'
 listColSepChar = '|'
+entityStart = '&'
 
 -- treat these as potentially non-text when parsing inline:
 specialChars = [escapeChar, labelStart, labelEnd, emphStart, emphEnd,
                 emphStartAlt, emphEndAlt, codeStart, codeEnd, autoLinkEnd,
                 autoLinkStart, mathStart, mathEnd, imageStart, noteStart,
-                hyphenChar, ellipsesChar] ++ quoteChars
+                hyphenChar, ellipsesChar, entityStart] ++ quoteChars
 
 --
 -- auxiliary functions
@@ -674,7 +675,7 @@ text = choice [ escapedChar, math, strong, emph, smartPunctuation,
                 code, ltSign, symbol,
                 str, linebreak, tabchar, whitespace, endline ] <?> "text"
 
-inline = choice [ rawLaTeXInline', escapedChar, special, text ] <?> "inline"
+inline = choice [ rawLaTeXInline', escapedChar, entity, special, text ] <?> "inline"
 
 special = choice [ noteRef, inlineNote, link, referenceLink, rawHtmlInline', 
                    autoLink, image ] <?> "link, inline html, note, or image"
@@ -827,9 +828,13 @@ linebreak = try (do
 
 nonEndline = noneOf endLineChars
 
+entity = do
+  ent <- characterEntity
+  return $ Str [ent]
+
 str = do 
   result <- many1 ((noneOf (specialChars ++ spaceChars ++ endLineChars))) 
-  return (Str (decodeEntities result))
+  return (Str result)
 
 -- an endline character that can be treated as a space, not a structural break
 endline = try (do
diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs
index b82357d7a..f63ca4ce4 100644
--- a/src/Text/Pandoc/Shared.hs
+++ b/src/Text/Pandoc/Shared.hs
@@ -65,8 +65,6 @@ module Text.Pandoc.Shared (
                      replaceReferenceLinks,
                      replaceRefLinksBlockList,
                      -- * SGML
-                     escapeSGML,
-                     stringToSGML,
                      inTags,
                      selfClosingTag,
                      inTagsSimple,
@@ -74,7 +72,7 @@ module Text.Pandoc.Shared (
                     ) where
 import Text.Pandoc.Definition
 import Text.ParserCombinators.Parsec as Parsec
-import Text.Pandoc.Entities ( decodeEntities, charToEntity )
+import Text.Pandoc.Entities ( decodeEntities, encodeEntities, stringToSGML )
 import Text.PrettyPrint.HughesPJ as PP ( text, char, (<>), 
                                          ($$), nest, Doc, isEmpty )
 import Data.Char ( toLower, ord )
@@ -523,61 +521,11 @@ replaceRefLinksInline keytable (Quoted t lst) =
     Quoted t (map (replaceRefLinksInline keytable) lst)
 replaceRefLinksInline keytable other = other
 
--- | Parse SGML character entity.
-sgmlCharacterEntity :: GenParser Char st [Char]
-sgmlCharacterEntity = sgmlNamedEntity <|> sgmlDecimalEntity <|> 
-                      sgmlHexEntity <?> "SGML entity"
-
--- | Parse SGML character entity.
-sgmlNamedEntity :: GenParser Char st [Char]
-sgmlNamedEntity = try $ do
-  st <- Parsec.char '&'
-  body <- many1 alphaNum
-  end <- Parsec.char ';'
-  return $ (st:body) ++ [end] 
-   
--- | Parse SGML decimal entity.
-sgmlDecimalEntity :: GenParser Char st [Char]
-sgmlDecimalEntity = try $ do
-  st <- string "&#"
-  body <- many1 (oneOf "0123456789")
-  end <- Parsec.char ';'
-  return $ st ++ body ++ [end]
-
--- | Parse SGML hexadecimal entity.
-sgmlHexEntity :: GenParser Char st [Char]
-sgmlHexEntity = try $ do
-  st <- string "&#"
-  hex <- oneOf "Xx"
-  body <- many1 (oneOf "0123456789ABCDEFabcdef")
-  end <- Parsec.char ';'
-  return $ st ++ (hex:body) ++ [end]
-
--- | Escape special character to SGML entity.
-specialCharToEntity :: Char -> [Char]
-specialCharToEntity c = if (c `elem` "&<>\"") || (ord c > 127)
-                          then charToEntity c
-                          else [c]
-
--- | Escape string, preserving character entities.
-stringToSGML :: String -> String
-stringToSGML str =
-  let segment = sgmlCharacterEntity <|>
-                (do{c <- anyChar;
-                    return $ specialCharToEntity c})
-      sgmlString = (do{segs <- many segment; return $ concat segs}) in
-  case parse sgmlString str str of
-      Left err     -> error $ "\nError:\n" ++ show err
-      Right result -> result
-
--- | Escape string as needed for SGML.  Entity references are not preserved.
-escapeSGML :: String -> String
-escapeSGML = concatMap specialCharToEntity 
-
 -- | Return a text object with a string of formatted SGML attributes. 
 attributeList :: [(String, String)] -> Doc
 attributeList = text .  concatMap 
-  (\(a, b) -> " " ++ stringToSGML a ++ "=\"" ++ stringToSGML b ++ "\"") 
+  (\(a, b) -> " " ++ stringToSGML True a ++ "=\"" ++ 
+  stringToSGML True b ++ "\"") 
 
 -- | Put the supplied contents between start and end tags of tagType,
 --   with specified attributes and (if specified) indentation.
diff --git a/src/Text/Pandoc/Writers/Docbook.hs b/src/Text/Pandoc/Writers/Docbook.hs
index ec3801a9a..0c83d0ea0 100644
--- a/src/Text/Pandoc/Writers/Docbook.hs
+++ b/src/Text/Pandoc/Writers/Docbook.hs
@@ -32,10 +32,9 @@ module Text.Pandoc.Writers.Docbook (
                                    ) where
 import Text.Pandoc.Definition
 import Text.Pandoc.Shared
-import Text.Pandoc.Entities ( encodeEntities )
-import Text.Regex ( mkRegex, matchRegex )
+import Text.Pandoc.Entities ( encodeEntities, stringToSGML )
 import Data.Char ( toLower, ord )
-import Data.List ( isPrefixOf, partition )
+import Data.List ( isPrefixOf, partition, drop )
 import Text.PrettyPrint.HughesPJ hiding ( Str )
 
 -- | Data structure for defining hierarchical Pandoc documents
@@ -65,8 +64,8 @@ authorToDocbook name = inTagsIndented "author" $
     then -- last name first
       let (lastname, rest) = break (==',') name 
           firstname = removeLeadingSpace rest in
-      inTagsSimple "firstname" (text $ stringToSGML firstname) <> 
-      inTagsSimple "surname" (text $ stringToSGML lastname) 
+      inTagsSimple "firstname" (text $ stringToSGML True firstname) <> 
+      inTagsSimple "surname" (text $ stringToSGML True lastname) 
     else -- last name last
       let namewords = words name
           lengthname = length namewords 
@@ -74,8 +73,8 @@ authorToDocbook name = inTagsIndented "author" $
             0  -> ("","") 
             1  -> ("", name)
             n  -> (joinWithSep " " (take (n-1) namewords), last namewords) in
-       inTagsSimple "firstname" (text $ stringToSGML firstname) $$ 
-       inTagsSimple "surname" (text $ stringToSGML lastname) 
+       inTagsSimple "firstname" (text $ stringToSGML True firstname) $$ 
+       inTagsSimple "surname" (text $ stringToSGML True lastname) 
 
 -- | Convert Pandoc document to string in Docbook format.
 writeDocbook :: WriterOptions -> Pandoc -> String
@@ -87,7 +86,7 @@ writeDocbook opts (Pandoc (Meta title authors date) blocks) =
                 then inTagsIndented "articleinfo" $
                      (inTagsSimple "title" (wrap opts title)) $$ 
                      (vcat (map authorToDocbook authors)) $$ 
-                     (inTagsSimple "date" (text $ stringToSGML date)) 
+                     (inTagsSimple "date" (text $ stringToSGML True date)) 
                 else empty
       blocks' = replaceReferenceLinks blocks
       (noteBlocks, blocks'') = partition isNoteBlock blocks' 
@@ -142,7 +141,7 @@ blockToDocbook opts (Para lst) =
 blockToDocbook opts (BlockQuote blocks) =
   inTagsIndented "blockquote" (blocksToDocbook opts blocks)
 blockToDocbook opts (CodeBlock str) = 
-  text "<screen>\n" <> text (escapeSGML str) <> text "\n</screen>"
+  text "<screen>\n" <> text (encodeEntities True str) <> text "\n</screen>"
 blockToDocbook opts (BulletList lst) = 
   inTagsIndented "itemizedlist" $ listItemsToDocbook opts lst 
 blockToDocbook opts (OrderedList lst) = 
@@ -199,7 +198,7 @@ inlinesToDocbook opts lst = hcat (map (inlineToDocbook opts) lst)
 
 -- | Convert an inline element to Docbook.
 inlineToDocbook :: WriterOptions -> Inline -> Doc
-inlineToDocbook opts (Str str) = text $ stringToSGML str 
+inlineToDocbook opts (Str str) = text $ stringToSGML True str 
 inlineToDocbook opts (Emph lst) = 
   inTagsSimple "emphasis" (inlinesToDocbook opts lst)
 inlineToDocbook opts (Strong lst) = 
@@ -212,24 +211,23 @@ inlineToDocbook opts Ellipses = text "&#8230;"
 inlineToDocbook opts EmDash = text "&#8212;" 
 inlineToDocbook opts EnDash = text "&#8211;" 
 inlineToDocbook opts (Code str) = 
-  inTagsSimple "literal" $ text (escapeSGML str)
+  inTagsSimple "literal" $ text (encodeEntities True str)
 inlineToDocbook opts (TeX str) = inlineToDocbook opts (Code str)
 inlineToDocbook opts (HtmlInline str) = empty
 inlineToDocbook opts LineBreak = 
   text $ "<literallayout></literallayout>" 
 inlineToDocbook opts Space = char ' '
 inlineToDocbook opts (Link txt (Src src tit)) =
-  case (matchRegex (mkRegex "mailto:(.*)") src) of
-    Just [addr] -> inTagsSimple "email" $ text (escapeSGML addr)
-    Nothing     -> inTags False "ulink" [("url", src)] $
-                   inlinesToDocbook opts txt
+  if isPrefixOf "mailto:" src
+    then inTagsSimple "email" $ text (encodeEntities True $ drop 7 src)
+    else inTags False "ulink" [("url", src)] $ inlinesToDocbook opts txt
 inlineToDocbook opts (Link text (Ref ref)) = empty -- shouldn't occur
 inlineToDocbook opts (Image alt (Src src tit)) = 
   let titleDoc = if null tit
                    then empty
                    else inTagsIndented "objectinfo" $
                         inTagsIndented "title" 
-                        (text $ stringToSGML tit) in
+                        (text $ stringToSGML True tit) in
   inTagsIndented "inlinemediaobject" $ 
   inTagsIndented "imageobject" $
   titleDoc $$ selfClosingTag "imagedata" [("fileref", src)] 
diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs
index 5465e125d..7c89d6352 100644
--- a/src/Text/Pandoc/Writers/HTML.hs
+++ b/src/Text/Pandoc/Writers/HTML.hs
@@ -32,7 +32,7 @@ module Text.Pandoc.Writers.HTML (
                                 ) where
 import Text.Pandoc.Definition
 import Text.Pandoc.Shared
-import Text.Pandoc.Entities ( encodeEntities )
+import Text.Pandoc.Entities ( encodeEntities, stringToSGML )
 import Text.Regex ( mkRegex, matchRegex )
 import Numeric ( showHex )
 import Data.Char ( ord, toLower )
@@ -127,11 +127,11 @@ htmlHeader opts (Meta title authors date) =
                       then empty 
                       else selfClosingTag "meta" [("name", "author"), 
                            ("content", 
-                            joinWithSep ", " (map stringToSGML authors))]  
+                            joinWithSep ", " (map (stringToSGML False) authors))]  
       datetext = if (date == "")
                     then empty 
                     else selfClosingTag "meta" [("name", "date"),
-                         ("content", stringToSGML date)] in
+                         ("content", stringToSGML False date)] in
   text (writerHeader opts) $$ authortext $$ datetext $$ titletext $$ 
   text "</head>\n<body>"
 
@@ -168,7 +168,7 @@ blockToHtml opts (Note ref lst) =
                      (text "&#8617;")
 blockToHtml opts (Key _ _) = empty
 blockToHtml opts (CodeBlock str) = 
-  text "<pre><code>" <> text (escapeSGML str) <> text "\n</code></pre>"
+  text "<pre><code>" <> text (encodeEntities False str) <> text "\n</code></pre>"
 blockToHtml opts (RawHtml str) = text str 
 blockToHtml opts (BulletList lst) = 
   let attribs = if (writerIncremental opts)
@@ -234,7 +234,7 @@ inlineToHtml opts (Emph lst) =
 inlineToHtml opts (Strong lst) = 
   inTagsSimple "strong" (inlineListToHtml opts lst)
 inlineToHtml opts (Code str) =  
-  inTagsSimple "code" $ text (escapeSGML str)
+  inTagsSimple "code" $ text (encodeEntities False str)
 inlineToHtml opts (Quoted SingleQuote lst) =
   text "&lsquo;" <> (inlineListToHtml opts lst) <> text "&rsquo;"
 inlineToHtml opts (Quoted DoubleQuote lst) =
@@ -243,16 +243,16 @@ inlineToHtml opts EmDash = text "&mdash;"
 inlineToHtml opts EnDash = text "&ndash;"
 inlineToHtml opts Ellipses = text "&hellip;"
 inlineToHtml opts Apostrophe = text "&rsquo;"
-inlineToHtml opts (Str str) = text $ stringToSGML str
-inlineToHtml opts (TeX str) = text $ escapeSGML str
+inlineToHtml opts (Str str) = text $ stringToSGML False str
+inlineToHtml opts (TeX str) = text $ encodeEntities False str
 inlineToHtml opts (HtmlInline str) = text str
 inlineToHtml opts (LineBreak) = selfClosingTag "br" []
 inlineToHtml opts Space = space
 inlineToHtml opts (Link txt (Src src tit)) = 
-  let title = stringToSGML tit in
+  let title = stringToSGML False tit in
   if (isPrefixOf "mailto:" src)
      then obfuscateLink opts txt src 
-     else inTags False "a" ([("href", escapeSGML src)] ++ 
+     else inTags False "a" ([("href", encodeEntities False src)] ++ 
           if null tit then [] else [("title", title)]) 
           (inlineListToHtml opts txt)
 inlineToHtml opts (Link txt (Ref ref)) = 
@@ -260,7 +260,7 @@ inlineToHtml opts (Link txt (Ref ref)) =
   (inlineListToHtml opts ref) <> char ']'
   -- this is what markdown does, for better or worse
 inlineToHtml opts (Image alt (Src source tit)) = 
-  let title = stringToSGML tit
+  let title = stringToSGML False tit
       alternate = render $ inlineListToHtml opts alt in 
   selfClosingTag "img" $ [("src", source)] ++
   (if null alternate then [] else [("alt", alternate)]) ++
diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs
index 5eff079f8..46c47bf74 100644
--- a/src/Text/Pandoc/Writers/Markdown.hs
+++ b/src/Text/Pandoc/Writers/Markdown.hs
@@ -32,7 +32,6 @@ Markdown:  <http://daringfireball.net/projects/markdown/>
 module Text.Pandoc.Writers.Markdown (
                                      writeMarkdown
                                     ) where
-import Text.Regex ( matchRegex, mkRegex )
 import Text.Pandoc.Definition
 import Text.Pandoc.Shared 
 import Data.List ( group, isPrefixOf, drop )
diff --git a/tests/testsuite.native b/tests/testsuite.native
index 44c133311..683b2c550 100644
--- a/tests/testsuite.native
+++ b/tests/testsuite.native
@@ -240,8 +240,8 @@ Pandoc (Meta [Str "Pandoc",Space,Str "Test",Space,Str "Suite"] ["John MacFarlane
   , [ Plain [Str "section:",Space,Str "\167"] ]
   , [ Plain [Str "set",Space,Str "membership:",Space,Str "\8712"] ]
   , [ Plain [Str "copyright:",Space,Str "\169"] ] ]
-, Para [Str "AT&T",Space,Str "has",Space,Str "an",Space,Str "ampersand",Space,Str "in",Space,Str "their",Space,Str "name",Str "."]
-, Para [Str "AT&T",Space,Str "is",Space,Str "another",Space,Str "way",Space,Str "to",Space,Str "write",Space,Str "it",Str "."]
+, Para [Str "AT",Str "&",Str "T",Space,Str "has",Space,Str "an",Space,Str "ampersand",Space,Str "in",Space,Str "their",Space,Str "name",Str "."]
+, Para [Str "AT",Str "&",Str "T",Space,Str "is",Space,Str "another",Space,Str "way",Space,Str "to",Space,Str "write",Space,Str "it",Str "."]
 , Para [Str "This",Space,Str "&",Space,Str "that",Str "."]
 , Para [Str "4",Space,Str "<",Space,Str "5",Str "."]
 , Para [Str "6",Space,Str ">",Space,Str "5",Str "."]
@@ -294,7 +294,7 @@ Pandoc (Meta [Str "Pandoc",Space,Str "Test",Space,Str "Suite"] ["John MacFarlane
 , Key [Str "bar"] (Src "/url/" "Title with &quot;quotes&quot; inside")
 , Header 2 [Str "With",Space,Str "ampersands"]
 , Para [Str "Here",Apostrophe,Str "s",Space,Str "a",Space,Link [Str "link",Space,Str "with",Space,Str "an",Space,Str "ampersand",Space,Str "in",Space,Str "the",Space,Str "URL"] (Ref [Str "1"]),Str "."]
-, Para [Str "Here",Apostrophe,Str "s",Space,Str "a",Space,Str "link",Space,Str "with",Space,Str "an",Space,Str "amersand",Space,Str "in",Space,Str "the",Space,Str "link",Space,Str "text:",Space,Link [Str "AT&T"] (Ref [Str "2"]),Str "."]
+, Para [Str "Here",Apostrophe,Str "s",Space,Str "a",Space,Str "link",Space,Str "with",Space,Str "an",Space,Str "amersand",Space,Str "in",Space,Str "the",Space,Str "link",Space,Str "text:",Space,Link [Str "AT",Str "&",Str "T"] (Ref [Str "2"]),Str "."]
 , Para [Str "Here",Apostrophe,Str "s",Space,Str "an",Space,Link [Str "inline",Space,Str "link"] (Src "/script?foo=1&bar=2" ""),Str "."]
 , Para [Str "Here",Apostrophe,Str "s",Space,Str "an",Space,Link [Str "inline",Space,Str "link",Space,Str "in",Space,Str "pointy",Space,Str "braces"] (Src "/script?foo=1&bar=2" ""),Str "."]
 , Key [Str "1"] (Src "http://example.com/?foo=1&bar=2" "")
diff --git a/tests/writer.docbook b/tests/writer.docbook
index 1c5965cb3..600c52d6a 100644
--- a/tests/writer.docbook
+++ b/tests/writer.docbook
@@ -89,7 +89,7 @@
       </para>
       <screen>
 sub status {
-    print &quot;working&quot;;
+    print &#34;working&#34;;
 }
 </screen>
       <para>
@@ -122,7 +122,7 @@ sub status {
       </blockquote>
     </blockquote>
     <para>
-      This should not be a block quote: 2 &gt; 1.
+      This should not be a block quote: 2 &#62; 1.
     </para>
     <para>
       Box-style:
@@ -133,7 +133,7 @@ sub status {
       </para>
       <screen>
 sub status {
-    print &quot;working&quot;;
+    print &#34;working&#34;;
 }
 </screen>
     </blockquote>
@@ -177,7 +177,7 @@ sub status {
 ---- (should be four hyphens)
 
 sub status {
-    print &quot;working&quot;;
+    print &#34;working&#34;;
 }
 
 this code block is indented by one tab
@@ -188,7 +188,7 @@ this code block is indented by one tab
     <screen>
     this code block is indented by two tabs
 
-These should not be escaped:  \$ \\ \&gt; \[ \{
+These should not be escaped:  \$ \\ \&#62; \[ \{
 </screen>
   </section>
   <section>
@@ -577,9 +577,9 @@ These should not be escaped:  \$ \\ \&gt; \[ \{
       word.
     </para>
     <para>
-      This is code: <literal>&gt;</literal>, <literal>$</literal>,
+      This is code: <literal>&#62;</literal>, <literal>$</literal>,
       <literal>\</literal>, <literal>\$</literal>,
-      <literal>&lt;html&gt;</literal>.
+      <literal>&#60;html&#62;</literal>.
     </para>
   </section>
   <section>
@@ -602,7 +602,7 @@ These should not be escaped:  \$ \\ \&gt; \[ \{
     </para>
     <para>
       Here is some quoted <quote><literal>code</literal></quote> and a
-      <quote><ulink url="http://example.com/?foo=1&amp;bar=2">quoted link</ulink></quote>.
+      <quote><ulink url="http://example.com/?foo=1&#38;bar=2">quoted link</ulink></quote>.
     </para>
     <para>
       Some dashes: one&#8212;two&#8212;three&#8212;four&#8212;five.
@@ -691,9 +691,9 @@ These should not be escaped:  \$ \\ \&gt; \[ \{
     </para>
     <para>
       <literal>\begin{tabular}{|l|l|}\hline
-Animal &amp; Number \\ \hline
-Dog    &amp; 2      \\
-Cat    &amp; 1      \\ \hline
+Animal &#38; Number \\ \hline
+Dog    &#38; 2      \\
+Cat    &#38; 1      \\ \hline
 \end{tabular}</literal>
     </para>
   </section>
@@ -705,44 +705,44 @@ Cat    &amp; 1      \\ \hline
     <itemizedlist>
       <listitem>
         <para>
-          I hat: &Icirc;
+          I hat: &#206;
         </para>
       </listitem>
       <listitem>
         <para>
-          o umlaut: &ouml;
+          o umlaut: &#246;
         </para>
       </listitem>
       <listitem>
         <para>
-          section: &sect;
+          section: &#167;
         </para>
       </listitem>
       <listitem>
         <para>
-          set membership: &isin;
+          set membership: &#8712;
         </para>
       </listitem>
       <listitem>
         <para>
-          copyright: &copy;
+          copyright: &#169;
         </para>
       </listitem>
     </itemizedlist>
     <para>
-      AT&amp;T has an ampersand in their name.
+      AT&#38;T has an ampersand in their name.
     </para>
     <para>
-      AT&amp;T is another way to write it.
+      AT&#38;T is another way to write it.
     </para>
     <para>
-      This &amp; that.
+      This &#38; that.
     </para>
     <para>
-      4 &lt; 5.
+      4 &#60; 5.
     </para>
     <para>
-      6 &gt; 5.
+      6 &#62; 5.
     </para>
     <para>
       Backslash: \
@@ -775,7 +775,7 @@ Cat    &amp; 1      \\ \hline
       Right paren: )
     </para>
     <para>
-      Greater-than: &gt;
+      Greater-than: &#62;
     </para>
     <para>
       Hash: #
@@ -868,25 +868,25 @@ Cat    &amp; 1      \\ \hline
       <title>With ampersands</title>
       <para>
         Here's a
-        <ulink url="http://example.com/?foo=1&amp;bar=2">link with an ampersand in the URL</ulink>.
+        <ulink url="http://example.com/?foo=1&#38;bar=2">link with an ampersand in the URL</ulink>.
       </para>
       <para>
         Here's a link with an amersand in the link text:
-        <ulink url="http://att.com/">AT&amp;T</ulink>.
+        <ulink url="http://att.com/">AT&#38;T</ulink>.
       </para>
       <para>
-        Here's an <ulink url="/script?foo=1&amp;bar=2">inline link</ulink>.
+        Here's an <ulink url="/script?foo=1&#38;bar=2">inline link</ulink>.
       </para>
       <para>
         Here's an
-        <ulink url="/script?foo=1&amp;bar=2">inline link in pointy braces</ulink>.
+        <ulink url="/script?foo=1&#38;bar=2">inline link in pointy braces</ulink>.
       </para>
     </section>
     <section>
       <title>Autolinks</title>
       <para>
         With an ampersand:
-        <ulink url="http://example.com/?foo=1&amp;bar=2">http://example.com/?foo=1&amp;bar=2</ulink>
+        <ulink url="http://example.com/?foo=1&#38;bar=2">http://example.com/?foo=1&#38;bar=2</ulink>
       </para>
       <itemizedlist>
         <listitem>
@@ -916,10 +916,10 @@ Cat    &amp; 1      \\ \hline
       </blockquote>
       <para>
         Auto-links should not occur here:
-        <literal>&lt;http://example.com/&gt;</literal>
+        <literal>&#60;http://example.com/&#62;</literal>
       </para>
       <screen>
-or here: &lt;http://example.com/&gt;
+or here: &#60;http://example.com/&#62;
 </screen>
     </section>
   </section>
@@ -970,7 +970,7 @@ or here: &lt;http://example.com/&gt;
                   footnote (as with list items).
                 </para>
                 <screen>
-  { &lt;code&gt; }
+  { &#60;code&#62; }
 </screen>
                 <para>
                   If you want, you can indent every line, but you can also be lazy
diff --git a/tests/writer.native b/tests/writer.native
index 44c133311..683b2c550 100644
--- a/tests/writer.native
+++ b/tests/writer.native
@@ -240,8 +240,8 @@ Pandoc (Meta [Str "Pandoc",Space,Str "Test",Space,Str "Suite"] ["John MacFarlane
   , [ Plain [Str "section:",Space,Str "\167"] ]
   , [ Plain [Str "set",Space,Str "membership:",Space,Str "\8712"] ]
   , [ Plain [Str "copyright:",Space,Str "\169"] ] ]
-, Para [Str "AT&T",Space,Str "has",Space,Str "an",Space,Str "ampersand",Space,Str "in",Space,Str "their",Space,Str "name",Str "."]
-, Para [Str "AT&T",Space,Str "is",Space,Str "another",Space,Str "way",Space,Str "to",Space,Str "write",Space,Str "it",Str "."]
+, Para [Str "AT",Str "&",Str "T",Space,Str "has",Space,Str "an",Space,Str "ampersand",Space,Str "in",Space,Str "their",Space,Str "name",Str "."]
+, Para [Str "AT",Str "&",Str "T",Space,Str "is",Space,Str "another",Space,Str "way",Space,Str "to",Space,Str "write",Space,Str "it",Str "."]
 , Para [Str "This",Space,Str "&",Space,Str "that",Str "."]
 , Para [Str "4",Space,Str "<",Space,Str "5",Str "."]
 , Para [Str "6",Space,Str ">",Space,Str "5",Str "."]
@@ -294,7 +294,7 @@ Pandoc (Meta [Str "Pandoc",Space,Str "Test",Space,Str "Suite"] ["John MacFarlane
 , Key [Str "bar"] (Src "/url/" "Title with &quot;quotes&quot; inside")
 , Header 2 [Str "With",Space,Str "ampersands"]
 , Para [Str "Here",Apostrophe,Str "s",Space,Str "a",Space,Link [Str "link",Space,Str "with",Space,Str "an",Space,Str "ampersand",Space,Str "in",Space,Str "the",Space,Str "URL"] (Ref [Str "1"]),Str "."]
-, Para [Str "Here",Apostrophe,Str "s",Space,Str "a",Space,Str "link",Space,Str "with",Space,Str "an",Space,Str "amersand",Space,Str "in",Space,Str "the",Space,Str "link",Space,Str "text:",Space,Link [Str "AT&T"] (Ref [Str "2"]),Str "."]
+, Para [Str "Here",Apostrophe,Str "s",Space,Str "a",Space,Str "link",Space,Str "with",Space,Str "an",Space,Str "amersand",Space,Str "in",Space,Str "the",Space,Str "link",Space,Str "text:",Space,Link [Str "AT",Str "&",Str "T"] (Ref [Str "2"]),Str "."]
 , Para [Str "Here",Apostrophe,Str "s",Space,Str "an",Space,Link [Str "inline",Space,Str "link"] (Src "/script?foo=1&bar=2" ""),Str "."]
 , Para [Str "Here",Apostrophe,Str "s",Space,Str "an",Space,Link [Str "inline",Space,Str "link",Space,Str "in",Space,Str "pointy",Space,Str "braces"] (Src "/script?foo=1&bar=2" ""),Str "."]
 , Key [Str "1"] (Src "http://example.com/?foo=1&bar=2" "")