From 6543b05116ee58ef4de62f93dcafeb27617d83e6 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Fri, 30 Jul 2021 17:23:46 -0600
Subject: Add RTF reader.

- `rtf` is now supported as an input format as well as output.
- New module Text.Pandoc.Readers.RTF (exporting `readRTF`). [API change]

Closes #3982.
---
 src/Text/Pandoc/Readers.hs     |    3 +
 src/Text/Pandoc/Readers/RTF.hs | 1333 ++++++++++++++++++++++++++++++++++++++++
 2 files changed, 1336 insertions(+)
 create mode 100644 src/Text/Pandoc/Readers/RTF.hs

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers.hs b/src/Text/Pandoc/Readers.hs
index 5106f8058..dd3aecdc5 100644
--- a/src/Text/Pandoc/Readers.hs
+++ b/src/Text/Pandoc/Readers.hs
@@ -55,6 +55,7 @@ module Text.Pandoc.Readers
   , readCslJson
   , readBibTeX
   , readBibLaTeX
+  , readRTF
   -- * Miscellaneous
   , getReader
   , getDefaultExtensions
@@ -102,6 +103,7 @@ import Text.Pandoc.Readers.Man
 import Text.Pandoc.Readers.CSV
 import Text.Pandoc.Readers.CslJson
 import Text.Pandoc.Readers.BibTeX
+import Text.Pandoc.Readers.RTF
 import qualified Text.Pandoc.UTF8 as UTF8
 import Text.Pandoc.Sources (ToSources(..), sourcesToText)
 
@@ -149,6 +151,7 @@ readers = [("native"       , TextReader readNative)
           ,("csljson"      , TextReader readCslJson)
           ,("bibtex"       , TextReader readBibTeX)
           ,("biblatex"     , TextReader readBibLaTeX)
+          ,("rtf"          , TextReader readRTF)
            ]
 
 -- | Retrieve reader, extensions based on formatSpec (format+extensions).
diff --git a/src/Text/Pandoc/Readers/RTF.hs b/src/Text/Pandoc/Readers/RTF.hs
new file mode 100644
index 000000000..749a38dd3
--- /dev/null
+++ b/src/Text/Pandoc/Readers/RTF.hs
@@ -0,0 +1,1333 @@
+{-# LANGUAGE FlexibleContexts  #-}
+{-# LANGUAGE OverloadedStrings #-}
+{- |
+   Module      : Text.Pandoc.Readers.RTF
+   Copyright   : Copyright (C) 2021 John MacFarlane
+   License     : GNU GPL, version 2 or above
+
+   Maintainer  : John MacFarlane (<jgm@berkeley.edu>)
+   Stability   : alpha
+   Portability : portable
+
+Conversion of RTF documents 'Pandoc' document.
+We target version 1.5 of the RTF spec.
+-}
+module Text.Pandoc.Readers.RTF (readRTF) where
+
+import qualified Data.IntMap as IntMap
+import qualified Data.Sequence as Seq
+import Control.Monad
+import Control.Monad.Except (throwError)
+import Data.List (find, foldl')
+import Data.Word (Word8, Word16)
+import Data.Default
+import Data.Text (Text)
+import qualified Data.Text as T
+import qualified Data.Text.Read as TR
+import Text.Pandoc.Builder (Blocks, Inlines)
+import qualified Text.Pandoc.Builder as B
+import Text.Pandoc.Class.PandocMonad (PandocMonad (..), insertMedia)
+import Text.Pandoc.Definition
+import Text.Pandoc.Options
+import Text.Pandoc.Parsing
+import Text.Pandoc.Shared (safeRead, tshow)
+import Data.Char (isAlphaNum, chr, digitToInt, isAscii, isLetter, isSpace)
+import qualified Data.ByteString.Lazy as BL
+import Data.Digest.Pure.SHA (sha1, showDigest)
+import Data.Maybe (mapMaybe, fromMaybe)
+import Safe (lastMay, initSafe, headDef)
+-- import Debug.Trace
+
+-- TODO:
+-- [ ] more complex table features
+--
+
+-- | Read RTF from an input string and return a Pandoc document.
+readRTF  :: (PandocMonad m, ToSources a)
+         => ReaderOptions
+         -> a
+         -> m Pandoc
+readRTF opts s = do
+  let sources = toSources s
+  parsed <- readWithM parseRTF def{ sOptions = opts } sources
+  case parsed of
+       Left e  -> throwError e
+       Right d -> return d
+
+data CharSet = ANSI | Mac | Pc | Pca
+  deriving (Show, Eq)
+
+-- first index is the list (or override) id, second is the list level
+type ListTable = IntMap.IntMap ListLevelTable
+type ListLevelTable = IntMap.IntMap ListType
+
+data RTFState = RTFState  { sOptions     :: ReaderOptions
+                          , sCharSet     :: CharSet
+                          , sGroupStack  :: [Properties]
+                          , sListStack   :: [List]
+                          , sCurrentCell :: Blocks
+                          , sTableRows   :: [TableRow] -- reverse order
+                          , sTextContent :: [(Properties, Text)]
+                          , sMetadata    :: [(Text, Inlines)]
+                          , sFontTable   :: FontTable
+                          , sStylesheet  :: Stylesheet
+                          , sListTable   :: ListTable
+                          , sListOverrideTable :: ListTable
+                          , sEatChars    :: Int
+                          } deriving (Show)
+
+instance Default RTFState where
+ def = RTFState { sOptions = def
+                , sCharSet = ANSI
+                , sGroupStack = []
+                , sListStack = []
+                , sCurrentCell = mempty
+                , sTableRows = []
+                , sTextContent = []
+                , sMetadata = []
+                , sFontTable = mempty
+                , sStylesheet = mempty
+                , sListTable = mempty
+                , sListOverrideTable = mempty
+                , sEatChars = 0
+                }
+
+type FontTable = IntMap.IntMap FontFamily
+
+data FontFamily =
+  Roman | Swiss | Modern | Script | Decor | Tech | Bidi
+  deriving (Show, Eq)
+
+data StyleType = ParagraphStyle | SectionStyle | CharStyle | TableStyle
+  deriving (Show, Eq)
+
+data Style =
+  Style { styleNum :: Int
+        , styleType :: StyleType
+        , styleBasedOn :: Maybe Int
+        , styleName :: Text
+        , styleFormatting :: [Tok]
+        } deriving (Show, Eq)
+
+type Stylesheet = IntMap.IntMap Style
+
+data PictType =
+  Emfblip | Pngblip | Jpegblip
+  deriving (Show, Eq)
+
+data Pict =
+  Pict { picType :: Maybe PictType
+       , picWidth :: Maybe Int
+       , picHeight :: Maybe Int
+       , picWidthGoal :: Maybe Int
+       , picHeightGoal :: Maybe Int
+       , picBinary :: Bool
+       , picData :: Text
+       , picName :: Text
+       , picBytes :: BL.ByteString
+       } deriving (Show, Eq)
+
+instance Default Pict where
+ def = Pict { picType = Nothing
+            , picWidth = Nothing
+            , picHeight = Nothing
+            , picWidthGoal = Nothing
+            , picHeightGoal = Nothing
+            , picBinary = False
+            , picData = mempty
+            , picName = mempty
+            , picBytes = mempty }
+
+data Properties =
+  Properties
+  { gBold :: Bool
+  , gItalic :: Bool
+  , gCaps :: Bool
+  , gDeleted :: Bool
+  , gSub :: Bool
+  , gSuper :: Bool
+  , gSmallCaps :: Bool
+  , gUnderline :: Bool
+  , gHyperlink :: Maybe Text
+  , gAnchor :: Maybe Text
+  , gImage :: Maybe Pict
+  , gFontFamily :: Maybe FontFamily
+  , gHidden :: Bool
+  , gUC :: Int -- number of ansi chars to skip after unicode char
+  , gFootnote :: Maybe Blocks
+  , gOutlineLevel :: Maybe ListLevel
+  , gListOverride :: Maybe Override
+  , gListLevel :: Maybe Int
+  , gInTable :: Bool
+  } deriving (Show, Eq)
+
+instance Default Properties where
+   def = Properties { gBold = False
+                    , gItalic = False
+                    , gCaps = False
+                    , gDeleted = False
+                    , gSub = False
+                    , gSuper = False
+                    , gSmallCaps = False
+                    , gUnderline = False
+                    , gHyperlink = Nothing
+                    , gAnchor = Nothing
+                    , gImage = Nothing
+                    , gFontFamily = Nothing
+                    , gHidden = False
+                    , gUC = 1
+                    , gFootnote = Nothing
+                    , gOutlineLevel = Nothing
+                    , gListOverride = Nothing
+                    , gListLevel = Nothing
+                    , gInTable = False
+                    }
+
+type RTFParser m = ParserT Sources RTFState m
+
+data ListType = Bullet | Ordered ListAttributes
+  deriving (Show, Eq)
+
+type Override = Int
+
+type ListLevel = Int
+
+data List =
+    List Override ListLevel ListType [Blocks]  -- items in reverse order
+    deriving (Show, Eq)
+
+newtype TableRow = TableRow [Blocks] -- cells in reverse order
+    deriving (Show, Eq)
+
+parseRTF :: PandocMonad m => RTFParser m Pandoc
+parseRTF = do
+  skipMany nl
+  toks <- many tok
+  -- return $! traceShowId toks
+  bs <- (foldM processTok mempty toks >>= emitBlocks)
+  unclosed <- closeContainers
+  let doc = B.doc $ bs <> unclosed
+  kvs <- sMetadata <$> getState
+  pure $ foldr (uncurry B.setMeta) doc kvs
+
+data Tok = Tok SourcePos TokContents
+  deriving (Show, Eq)
+
+data TokContents =
+    ControlWord Text (Maybe Int)
+  | ControlSymbol Char
+  | UnformattedText Text
+  | HexVal Word8
+  | Grouped [Tok]
+  deriving (Show, Eq)
+
+tok :: PandocMonad m => RTFParser m Tok
+tok = do
+  pos <- getPosition
+  Tok pos <$> ((controlThing <|> unformattedText <|> grouped) <* skipMany nl)
+ where
+  controlThing = do
+    char '\\' *>
+      ( (ControlWord <$> letterSequence <*> (parameter <* optional delimChar))
+     <|> (HexVal <$> hexVal)
+     <|> (ControlSymbol <$> anyChar) )
+  parameter = do
+    hyph <- string "-" <|> pure ""
+    rest <- many digit
+    let pstr = T.pack $ hyph <> rest
+    return $ safeRead pstr
+  hexVal = do
+    char '\''
+    x <- hexDigit
+    y <- hexDigit
+    return $ hexToWord (T.pack [x,y])
+  letterSequence = T.pack <$> many1 (satisfy (\c -> isAscii c && isLetter c))
+  unformattedText =
+    UnformattedText . T.pack . mconcat <$>
+      many1 (   many1 (satisfy (not . isSpecial))
+            <|> ("" <$ nl))
+  grouped = Grouped <$> (char '{' *> skipMany nl *> manyTill tok (char '}'))
+
+nl :: PandocMonad m => RTFParser m ()
+nl = void (char '\n' <|> char '\r')
+
+isSpecial :: Char -> Bool
+isSpecial '{' = True
+isSpecial '}' = True
+isSpecial '\\' = True
+isSpecial '\n' = True
+isSpecial _ = False
+
+delimChar :: PandocMonad m => RTFParser m Char
+delimChar = satisfy (\c -> not (isAlphaNum c || isSpecial c))
+
+modifyGroup :: PandocMonad m
+            => (Properties -> Properties)
+            -> RTFParser m ()
+modifyGroup f =
+  updateState $ \st ->
+    st{ sGroupStack =
+          case sGroupStack st of
+            [] -> []
+            (x:xs) -> f x : xs }
+
+addFormatting :: (Properties, Text) -> Inlines
+addFormatting (_, "\n") = B.linebreak
+addFormatting (props, _) | gHidden props = mempty
+addFormatting (props, _) | Just bs <- gFootnote props = B.note bs
+addFormatting (props, txt) =
+  (if gBold props then B.strong else id) .
+  (if gItalic props then B.emph else id) .
+  (if gDeleted props then B.strikeout else id) .
+  (if gSub props then B.subscript else id) .
+  (if gSuper props then B.superscript else id) .
+  (if gSmallCaps props then B.smallcaps else id) .
+  (if gUnderline props then B.underline else id) .
+  (case gHyperlink props of
+     Nothing -> id
+     Just linkdest -> B.link linkdest mempty) .
+  (case gAnchor props of
+     Nothing -> id
+     Just ident -> B.spanWith (ident,[],[])) .
+  (case gFontFamily props of
+     Just Modern -> B.code
+     _ -> case gImage props of
+            Just pict ->
+              let attr = ("",[],
+                         (case picWidthGoal pict of
+                           Nothing -> []
+                           Just w  -> [("width", tshow (fromIntegral w / 1440
+                                                         :: Double)
+                                          <> "in")]) ++
+                         (case picHeightGoal pict of
+                            Nothing -> []
+                            Just h -> [("height", tshow (fromIntegral h / 1440
+                                                         :: Double)
+                                          <> "in")]))
+              in  B.imageWith attr (picName pict) "" . B.text
+            Nothing -> B.text) .
+  (if gCaps props then T.toUpper else id)
+  $ txt
+
+addText :: PandocMonad m => Text -> RTFParser m ()
+addText t = do
+  gs <- sGroupStack <$> getState
+  let props = case gs of
+                (x:_) -> x
+                _ -> def
+  updateState (\s -> s{ sTextContent = (props, t) : sTextContent s })
+
+inGroup :: PandocMonad m => RTFParser m a -> RTFParser m a
+inGroup p = do
+  updateState $ \st ->
+    st{ sGroupStack =
+        case sGroupStack st of
+          [] -> [def]
+          (x:xs) -> (x:x:xs) } -- inherit current group's properties
+  result <- p
+  updateState $ \st ->
+    st{ sGroupStack =
+        case sGroupStack st of
+          [] -> [] -- should not happen
+          (_:xs) -> xs }
+  return result
+
+getStyleFormatting :: PandocMonad m => Int -> RTFParser m [Tok]
+getStyleFormatting stynum = do
+  stylesheet <- sStylesheet <$> getState
+  case IntMap.lookup stynum stylesheet of
+    Nothing -> return []
+    Just sty ->
+      case styleBasedOn sty of
+        Just i -> (<> styleFormatting sty)  <$> getStyleFormatting i
+        Nothing -> return $ styleFormatting sty
+
+isMetadataField :: Text -> Bool
+isMetadataField "title" = True
+isMetadataField "subject" = True
+isMetadataField "author" = True
+isMetadataField "manager" = True
+isMetadataField "company" = True
+isMetadataField "operator" = True
+isMetadataField "category" = True
+isMetadataField "keywords" = True
+isMetadataField "comment" = True
+isMetadataField "doccomm" = True
+isMetadataField "hlinkbase" = True
+isMetadataField "generator" = True
+isMetadataField _ = False
+
+isHeaderFooter :: Text -> Bool
+isHeaderFooter "header" = True
+isHeaderFooter "headerl" = True
+isHeaderFooter "headerr" = True
+isHeaderFooter "headerf" = True
+isHeaderFooter "footer" = True
+isHeaderFooter "footerl" = True
+isHeaderFooter "footerr" = True
+isHeaderFooter "footerf" = True
+isHeaderFooter _ = False
+
+boolParam :: Maybe Int -> Bool
+boolParam (Just 0) = False
+boolParam _ = True
+
+isUnderline :: Text -> Bool
+isUnderline "ul" = True
+isUnderline "uld" = True
+isUnderline "uldash" = True
+isUnderline "uldashd" = True
+isUnderline "uldashdd" = True
+isUnderline "uldb" = True
+isUnderline "ulth" = True
+isUnderline "ulthd" = True
+isUnderline "ulthdash" = True
+isUnderline "ulw" = True
+isUnderline "ulwave" = True
+isUnderline _ = False
+
+processTok :: PandocMonad m => Blocks -> Tok -> RTFParser m Blocks
+processTok bs (Tok pos tok') = do
+  setPosition pos
+  -- ignore \* at beginning of group:
+  let tok'' = case tok' of
+                Grouped (Tok _ (ControlSymbol '*') : toks) -> Grouped toks
+                _ -> tok'
+  case tok'' of
+    HexVal{} -> return ()
+    UnformattedText{} -> return ()
+    _ -> updateState $ \s -> s{ sEatChars = 0 }
+  case tok'' of
+    Grouped (Tok _ (ControlWord "fonttbl" _) : toks) -> inGroup $ do
+      updateState $ \s -> s{ sFontTable = processFontTable toks }
+      pure bs
+    Grouped (Tok _ (ControlWord "field" _) : toks) ->
+      inGroup $ handleField bs toks
+    Grouped (Tok _ (ControlWord "pict" _) : toks) ->
+      bs <$ inGroup (handlePict toks)
+    Grouped (Tok _ (ControlWord "stylesheet" _) : toks) ->
+      bs <$ inGroup (handleStylesheet toks)
+    Grouped (Tok _ (ControlWord "listtext" _) : _) -> do
+      -- eject any previous list items...sometimes TextEdit
+      -- doesn't put in a \par
+      emitBlocks bs
+    Grouped (Tok _ (ControlWord "colortbl" _) : _) -> pure bs
+    Grouped (Tok _ (ControlWord "listtable" _) : toks) ->
+      bs <$ inGroup (handleListTable toks)
+    Grouped (Tok _ (ControlWord "listoverridetable" _) : toks) ->
+      bs <$ inGroup (handleListOverrideTable toks)
+    Grouped (Tok _ (ControlWord "wgrffmtfilter" _) : _) -> pure bs
+    Grouped (Tok _ (ControlWord "themedata" _) : _) -> pure bs
+    Grouped (Tok _ (ControlWord "colorschememapping" _) : _) -> pure bs
+    Grouped (Tok _ (ControlWord "datastore" _) : _) -> pure bs
+    Grouped (Tok _ (ControlWord "latentstyles" _) : _) -> pure bs
+    Grouped (Tok _ (ControlWord "pntxta" _) : _) -> pure bs -- TODO
+    Grouped (Tok _ (ControlWord "pntxtb" _) : _) -> pure bs -- TODO
+    Grouped (Tok _ (ControlWord "xmlnstbl" _) : _) -> pure bs
+    Grouped (Tok _ (ControlWord "filetbl" _) : _) -> pure bs
+    Grouped (Tok _ (ControlWord "expandedcolortbl" _) : _) -> pure bs
+    Grouped (Tok _ (ControlWord "listtables" _) : _) -> pure bs
+    Grouped (Tok _ (ControlWord "revtbl" _) : _) -> pure bs
+    Grouped (Tok _ (ControlWord "bkmkstart" _)
+             : Tok _ (UnformattedText t) : _) -> do
+      -- TODO ideally we'd put the span around bkmkstart/end, but this
+      -- is good for now:
+      modifyGroup (\g -> g{ gAnchor = Just $ T.strip t })
+      addText ""
+      modifyGroup (\g -> g{ gAnchor = Nothing })
+      pure bs
+    Grouped (Tok _ (ControlWord "bkmkend" _) : _) -> pure bs -- TODO
+    Grouped (Tok _ (ControlWord f _) : _) | isHeaderFooter f -> pure bs
+    Grouped (Tok _ (ControlWord "footnote" _) : toks) -> do
+      noteBs <- inGroup $ processDestinationToks toks
+      modifyGroup (\g -> g{ gFootnote = Just noteBs })
+      addText "*"
+      modifyGroup (\g -> g{ gFootnote = Nothing })
+      return bs
+    Grouped (Tok _ (ControlWord "info" _) : toks) ->
+      bs <$ inGroup (processDestinationToks toks)
+    Grouped (Tok _ (ControlWord f _) : toks) | isMetadataField f -> inGroup $ do
+      foldM_ processTok mempty toks
+      annotatedToks <- reverse . sTextContent <$> getState
+      updateState $ \s -> s{ sTextContent = [] }
+      let ils = B.trimInlines . mconcat $ map addFormatting annotatedToks
+      updateState $ \s -> s{ sMetadata = (f, ils) : sMetadata s }
+      pure bs
+    Grouped toks -> inGroup (foldM processTok bs toks)
+    UnformattedText t -> bs <$ do
+      -- return $! traceShowId $! (pos, t)
+      eatChars <- sEatChars <$> getState
+      case eatChars of
+        0 -> addText t
+        n | n < T.length t -> do
+             updateState $ \s -> s{ sEatChars = 0 }
+             addText (T.drop n t)
+          | otherwise -> do
+             updateState $ \s -> s{ sEatChars = n - T.length t }
+    HexVal n -> bs <$ do
+      eatChars <- sEatChars <$> getState
+      if eatChars == 0
+         then do
+           charset <- sCharSet <$> getState
+           case charset of
+             ANSI -> addText (T.singleton $ ansiToChar n)
+             Mac  -> addText (T.singleton $ macToChar n)
+             Pc   -> addText (T.singleton $ pcToChar n)
+             Pca  -> addText (T.singleton $ pcaToChar n)
+         else updateState $ \s -> s{ sEatChars = eatChars - 1 }
+    ControlWord "ansi" _ -> bs <$
+      updateState (\s -> s{ sCharSet = ANSI })
+    ControlWord "mac" _ -> bs <$
+      updateState (\s -> s{ sCharSet = Mac })
+    ControlWord "pc" _ -> bs <$
+      updateState (\s -> s{ sCharSet = Pc })
+    ControlWord "pca" _ -> bs <$
+      updateState (\s -> s{ sCharSet = Pca })
+    ControlWord "outlinelevel" mbp -> bs <$
+      modifyGroup (\g -> g{ gOutlineLevel = mbp })
+    ControlWord "ls" mbp -> bs <$
+      modifyGroup (\g -> g{ gListOverride = mbp })
+    ControlWord "ilvl" mbp -> bs <$
+      modifyGroup (\g -> g{ gListLevel = mbp })
+    ControlSymbol '\\' -> bs <$ addText "\\"
+    ControlSymbol '{' -> bs <$ addText "{"
+    ControlSymbol '}' -> bs <$ addText "}"
+    ControlSymbol '~' -> bs <$ addText "\x00a0"
+    ControlSymbol '-' -> bs <$ addText "\x00ad"
+    ControlSymbol '_' -> bs <$ addText "\x2011"
+    ControlWord "trowd" _ -> bs <$ do -- add new row
+      updateState $ \s -> s{ sTableRows = TableRow [] : sTableRows s
+                           , sCurrentCell = mempty }
+    ControlWord "cell" _ -> bs <$ do
+      new <- emitBlocks mempty
+      curCell <- (<> new) . sCurrentCell <$> getState
+      updateState $ \s -> s{ sTableRows =
+                                case sTableRows s of
+                                  TableRow cs : rs ->
+                                    TableRow (curCell : cs) : rs
+                                  [] -> [TableRow [curCell]] -- shouldn't happen
+                           , sCurrentCell = mempty }
+    ControlWord "intbl" _ -> bs <$ modifyGroup (\g -> g{ gInTable = True })
+    ControlWord "plain" _ -> bs <$ modifyGroup (const def)
+    ControlWord "lquote" _ -> bs <$ addText "\x2018"
+    ControlWord "rquote" _ -> bs <$ addText "\x2019"
+    ControlWord "ldblquote" _ -> bs <$ addText "\x201C"
+    ControlWord "rdblquote" _ -> bs <$ addText "\x201D"
+    ControlWord "emdash" _ -> bs <$ addText "\x2014"
+    ControlWord "emspace" _ -> bs <$ addText "\x2003"
+    ControlWord "enspace" _ -> bs <$ addText "\x2002"
+    ControlWord "endash" _ -> bs <$ addText "\x2013"
+    ControlWord "bullet" _ -> bs <$ addText "\x2022"
+    ControlWord "tab" _ -> bs <$ addText "\t"
+    ControlWord "line" _ -> bs <$ addText "\n"
+    ControlSymbol '\n' -> bs <$ addText "\n"
+    ControlSymbol '\r' -> bs <$ addText "\n"
+    ControlWord "uc" (Just i) -> bs <$ modifyGroup (\g -> g{ gUC = i })
+    ControlWord "cs" (Just n) -> do
+      getStyleFormatting n >>= foldM processTok bs
+    ControlWord "s" (Just n) -> do
+      getStyleFormatting n >>= foldM processTok bs
+    ControlWord "ds" (Just n) -> do
+      getStyleFormatting n >>= foldM processTok bs
+    ControlWord "f" (Just i) -> bs <$ do
+      fontTable <- sFontTable <$> getState
+      modifyGroup (\g -> g{ gFontFamily = IntMap.lookup i fontTable })
+    ControlWord "u" (Just i) -> bs <$ do
+      st <- getState
+      let curgroup = case sGroupStack st of
+                       [] -> def
+                       (x:_) -> x
+      updateState $ \s -> s{ sEatChars = gUC curgroup }
+      -- "RTF control words generally accept signed 16-bit numbers as
+      -- arguments. For this reason, Unicode values greater than 32767
+      -- must be expressed as negative numbers."
+      let codepoint :: Word16
+          codepoint = fromIntegral i
+      addText (T.singleton (chr $ fromIntegral codepoint))
+    ControlWord "caps" mbp -> bs <$
+      modifyGroup (\g -> g{ gCaps = boolParam mbp })
+    ControlWord "deleted" mbp -> bs <$
+      modifyGroup (\g -> g{ gDeleted = boolParam mbp })
+    ControlWord "b" mbp -> bs <$
+      modifyGroup (\g -> g{ gBold = boolParam mbp })
+    ControlWord "i" mbp -> bs <$
+      modifyGroup (\g -> g{ gItalic = boolParam mbp })
+    ControlWord "sub" mbp -> bs <$
+      modifyGroup (\g -> g{ gSub = boolParam mbp })
+    ControlWord "super" mbp -> bs <$
+      modifyGroup (\g -> g{ gSuper = boolParam mbp })
+    ControlWord "up" mbp -> bs <$
+      modifyGroup (\g -> g{ gSuper = boolParam mbp })
+    ControlWord "strike" mbp -> bs <$
+      modifyGroup (\g -> g{ gDeleted = boolParam mbp })
+    ControlWord "strikedl" mbp -> bs <$
+      modifyGroup (\g -> g{ gDeleted = boolParam mbp })
+    ControlWord "striked" mbp -> bs <$
+      modifyGroup (\g -> g{ gDeleted = boolParam mbp })
+    ControlWord "scaps" mbp -> bs <$
+      modifyGroup (\g -> g{ gSmallCaps = boolParam mbp })
+    ControlWord "v" mbp -> bs <$
+      modifyGroup (\g -> g{ gHidden = boolParam mbp })
+    ControlWord x mbp | isUnderline x -> bs <$
+      modifyGroup (\g -> g{ gUnderline = boolParam mbp })
+    ControlWord "ulnone" _ -> bs <$
+      modifyGroup (\g -> g{ gUnderline = False })
+    ControlWord "pard" _ -> bs <$ do
+      modifyGroup (const def)
+      getStyleFormatting 0 >>= foldM processTok bs
+    ControlWord "par" _ -> emitBlocks bs
+    _ -> pure bs
+
+processDestinationToks :: PandocMonad m => [Tok] -> RTFParser m Blocks
+processDestinationToks toks = do
+  textContent <- sTextContent <$> getState
+  liststack <- sListStack <$> getState
+  updateState $ \s -> s{ sTextContent = mempty
+                       , sListStack = [] }
+  result <- inGroup $
+              foldM processTok mempty toks >>= emitBlocks
+  unclosed <- closeContainers
+  updateState $ \s -> s{ sTextContent = textContent
+                       , sListStack = liststack }
+  return $ result <> unclosed
+
+-- close lists >= level
+closeLists :: PandocMonad m => Int -> RTFParser m Blocks
+closeLists lvl = do
+  lists <- sListStack <$> getState
+  case lists of
+    (List _ lvl' lt items : rest) | lvl' >= lvl -> do
+      let newlist = (case lt of
+                      Bullet -> B.bulletList
+                      Ordered listAttr -> B.orderedListWith listAttr)
+                    (reverse items)
+      updateState $ \s -> s{ sListStack = rest }
+      case rest of
+        [] -> do
+          updateState $ \s -> s{ sListStack = rest }
+          pure newlist
+        (List lo lvl'' lt' [] : rest') -> do -- should not happen
+          updateState $ \s -> s{ sListStack =
+               List lo lvl'' lt' [newlist] : rest' }
+          closeLists lvl
+        (List lo lvl'' lt' (i:is) : rest') -> do
+          updateState $ \s -> s{ sListStack =
+               List lo lvl'' lt' (i <> newlist : is) : rest' }
+          closeLists lvl
+    _ -> pure mempty
+
+closeTable :: PandocMonad m => RTFParser m Blocks
+closeTable = do
+  rawrows <- sTableRows <$> getState
+  if null rawrows
+     then return mempty
+     else do
+       let getCells (TableRow cs) = reverse cs
+       let rows = map getCells . reverse $ rawrows
+       updateState $ \s -> s{ sCurrentCell = mempty
+                            , sTableRows = [] }
+       return $ B.simpleTable [] rows
+
+closeContainers :: PandocMonad m => RTFParser m Blocks
+closeContainers = do
+  tbl <- closeTable
+  lists <- closeLists 0
+  return $ tbl <> lists
+
+trimFinalLineBreak :: Inlines -> Inlines
+trimFinalLineBreak ils =
+  case Seq.viewr (B.unMany ils) of
+    rest Seq.:> LineBreak -> B.Many rest
+    _ -> ils
+
+emitBlocks :: PandocMonad m => Blocks -> RTFParser m Blocks
+emitBlocks bs = do
+  annotatedToks <- reverse . sTextContent <$> getState
+  updateState $ \s -> s{ sTextContent = [] }
+  let justCode = def{ gFontFamily = Just Modern }
+  let prop = case annotatedToks of
+               [] -> def
+               ((p,_):_) -> p
+  tbl <- if gInTable prop
+            then pure mempty
+            else closeTable
+  new <-
+    case annotatedToks of
+      [] -> pure mempty
+      _ | Just lst <- gListOverride prop
+         -> do
+           let level = fromMaybe 0 $ gListLevel prop
+           listOverrideTable <- sListOverrideTable <$> getState
+           let listType = fromMaybe Bullet $
+                 IntMap.lookup lst listOverrideTable >>= IntMap.lookup level
+           lists <- sListStack <$> getState
+           -- get para contents of list item
+           let newbs = B.para . B.trimInlines . trimFinalLineBreak . mconcat $
+                        map addFormatting annotatedToks
+           case lists of
+             (List lo parentlevel _lt items : cs)
+               | lo == lst
+               , parentlevel == level
+               -- add another item to existing list
+               -> do updateState $ \s ->
+                        s{ sListStack =
+                             List lo level listType (newbs:items) : cs }
+                     pure mempty
+               | lo /= lst || level < parentlevel
+               -- close parent list and add new list
+               -> do new <- closeLists level  -- close open lists > level
+                     updateState $ \s ->
+                       s{ sListStack = List lst level listType [newbs] :
+                           sListStack s }
+                     pure new
+             _ -> do -- add new list (level > parentlevel)
+                  updateState $ \s ->
+                    s{ sListStack = List lst level listType [newbs] :
+                         sListStack s }
+                  pure mempty
+        | Just lvl <- gOutlineLevel prop
+         -> do
+            lists <- closeLists 0
+            pure $ lists <>
+                   B.header (lvl + 1)
+                   (B.trimInlines . mconcat $ map addFormatting
+                                            $ removeCommonFormatting
+                                              annotatedToks)
+        | all ((== justCode) . fst) annotatedToks
+         -> do
+            lists <- closeLists 0
+            pure $ lists <>
+                    B.codeBlock (mconcat $ map snd annotatedToks)
+        | all (T.all isSpace . snd) annotatedToks
+         -> closeLists 0
+        | otherwise -> do
+            lists <- closeLists 0
+            pure $ lists <>
+              B.para (B.trimInlines . trimFinalLineBreak . mconcat
+                $ map addFormatting annotatedToks)
+  if gInTable prop
+     then do
+       updateState $ \s -> s{ sCurrentCell = sCurrentCell s <> new }
+       pure bs
+     else do
+       pure $ bs <> tbl <> new
+
+-- Headers often have a style applied. We usually want to remove
+-- this, because headers will have their own styling in the target
+-- format.
+removeCommonFormatting :: [(Properties, Text)] -> [(Properties, Text)]
+removeCommonFormatting =
+  (\ts ->
+    if all (gBold . fst) ts
+       then map (\(p,t) -> (p{ gBold = False }, t)) ts
+       else ts) .
+  (\ts ->
+    if all (gItalic . fst) ts
+       then map (\(p,t) -> (p{ gItalic = False }, t)) ts
+       else ts)
+
+
+-- {\field{\*\fldinst{HYPERLINK "http://pandoc.org"}}{\fldrslt foo}}
+handleField :: PandocMonad m => Blocks -> [Tok] -> RTFParser m Blocks
+handleField bs
+  (Tok _
+    (Grouped
+     (Tok _ (ControlSymbol '*')
+     :Tok _ (ControlWord "fldinst" Nothing)
+     :Tok _ (Grouped (Tok _ (UnformattedText insttext):rest))
+     :_))
+  :linktoks)
+  | Just linkdest <- getHyperlink insttext
+  = do let linkdest' = case rest of
+                         (Tok _ (ControlSymbol '\\')
+                          : Tok _ (UnformattedText t)
+                          : _) | Just bkmrk <- T.stripPrefix "l" t
+                           -> "#" <> unquote bkmrk
+                         _ -> linkdest
+       modifyGroup $ \g -> g{ gHyperlink = Just linkdest' }
+       result <- foldM processTok bs linktoks
+       modifyGroup $ \g -> g{ gHyperlink = Nothing }
+       return result
+handleField bs _ = pure bs
+
+unquote :: Text -> Text
+unquote = T.dropWhile (=='"') . T.dropWhileEnd (=='"') . T.strip
+
+handleListTable :: PandocMonad m => [Tok] -> RTFParser m ()
+handleListTable toks = do
+  mapM_ handleList toks
+
+handleList :: PandocMonad m => Tok -> RTFParser m ()
+handleList (Tok _ (Grouped (Tok _ (ControlWord "list" _) : toks))) = do
+  let listid = headDef 0 [n | Tok _ (ControlWord "listid" (Just n)) <- toks]
+  let levels = [ts | Tok _ (Grouped (Tok _ (ControlWord "listlevel" _) : ts))
+                 <- toks]
+  tbl <- foldM handleListLevel mempty (zip [0..] levels)
+  updateState $ \s -> s{ sListTable = IntMap.insert listid tbl $ sListTable s }
+handleList _ = return ()
+
+handleListLevel :: PandocMonad m
+                => ListLevelTable
+                -> (Int, [Tok])
+                -> RTFParser m ListLevelTable
+handleListLevel levelTable (lvl, toks) = do
+  let start = headDef 1
+                [n | Tok _ (ControlWord "levelstartat" (Just n)) <- toks]
+  let mbNumberStyle =
+        case [n | Tok _ (ControlWord "levelnfc" (Just n)) <- toks] of
+          [] -> Nothing
+          (0:_) -> Just Decimal
+          (1:_) -> Just UpperRoman
+          (2:_) -> Just LowerRoman
+          (3:_) -> Just UpperAlpha
+          (4:_) -> Just LowerAlpha
+          (23:_) -> Nothing
+          (255:_) -> Nothing
+          _ -> Just DefaultStyle
+  let listType = case mbNumberStyle of
+                   Nothing -> Bullet
+                   Just numStyle -> Ordered (start,numStyle,Period)
+  return $ IntMap.insert lvl listType levelTable
+
+handleListOverrideTable :: PandocMonad m => [Tok] -> RTFParser m ()
+handleListOverrideTable toks = mapM_ handleListOverride toks
+
+handleListOverride :: PandocMonad m => Tok -> RTFParser m ()
+handleListOverride
+ (Tok _ (Grouped (Tok _ (ControlWord "listoverride" _) : toks))) = do
+  let listid = headDef 0 [n | Tok _ (ControlWord "listid" (Just n)) <- toks]
+  let lsn = headDef 0 [n | Tok _ (ControlWord "ls" (Just n)) <- toks]
+  -- TODO override stuff, esp. start num -- for now we just handle indirection
+  listTable <- sListTable <$> getState
+  case IntMap.lookup listid listTable of
+    Nothing -> return ()
+    Just tbl -> updateState $ \s ->
+                   s{ sListOverrideTable = IntMap.insert lsn tbl $
+                        sListOverrideTable s }
+handleListOverride _ = return ()
+
+handleStylesheet :: PandocMonad m => [Tok] -> RTFParser m ()
+handleStylesheet toks = do
+  let styles = mapMaybe parseStyle toks
+  updateState $ \s -> s{ sStylesheet = IntMap.fromList
+                                     $ zip (map styleNum styles) styles }
+
+parseStyle :: Tok -> Maybe Style
+parseStyle (Tok _ (Grouped toks)) = do
+  let (styType, styNum, rest) =
+        case toks of
+          Tok _ (ControlWord "s" (Just n)) : ts -> (ParagraphStyle, n, ts)
+          Tok _ (ControlWord "ds" (Just n)) : ts -> (SectionStyle, n, ts)
+          Tok _ (ControlWord "cs" (Just n)) : ts -> (CharStyle, n, ts)
+          Tok _ (ControlWord "ts" (Just n)) : ts -> (TableStyle, n, ts)
+          _ -> (ParagraphStyle, 0, toks)
+  let styName = case lastMay rest of
+                  Just (Tok _ (UnformattedText t)) -> T.dropWhileEnd (==';') t
+                  _ -> mempty
+  let isBasedOn (Tok _ (ControlWord "sbasedon" (Just _))) = True
+      isBasedOn _ = False
+  let styBasedOn = case find isBasedOn toks of
+                     Just (Tok _ (ControlWord "sbasedon" (Just i))) -> Just i
+                     _ -> Nothing
+  let isStyleControl (Tok _ (ControlWord x _)) =
+         x `elem` ["cs", "s", "ds", "additive", "sbasedon", "snext",
+                   "sautoupd", "shidden", "keycode", "alt", "shift",
+                   "ctrl", "fn"]
+      isStyleControl _ = False
+  let styFormatting = filter (not . isStyleControl) (initSafe rest)
+  return $ Style{ styleNum = styNum
+                , styleType = styType
+                , styleBasedOn = styBasedOn
+                , styleName = styName
+                , styleFormatting = styFormatting
+                }
+parseStyle _ = Nothing
+
+hexToWord  :: Text -> Word8
+hexToWord t = case TR.hexadecimal t of
+                Left _ -> 0
+                Right (x,_) -> x
+
+
+handlePict :: PandocMonad m => [Tok] -> RTFParser m ()
+handlePict toks = do
+  let pict = foldl' getPictData def toks
+  let altText = "image"
+  let binToWord = T.foldl' (\acc x -> acc * 2 + fromIntegral (digitToInt x)) 0
+  let isBinaryDigit '0' = True
+      isBinaryDigit '1' = True
+      isBinaryDigit _   = False
+  let bytes = BL.pack $
+              if picBinary pict && T.all isBinaryDigit (picData pict)
+                 then map binToWord $ T.chunksOf 8 $ picData pict
+                 else map hexToWord $ T.chunksOf 2 $ picData pict
+  let (mimetype, ext) =
+        case picType pict of
+          Just Emfblip -> (Just "image/x-emf", ".emf")
+          Just Pngblip -> (Just "image/png", ".png")
+          Just Jpegblip -> (Just "image/jpeg", ".jpg")
+          Nothing -> (Nothing, "")
+  case mimetype of
+    Just mt -> do
+      let pictname = showDigest (sha1 bytes) <> ext
+      insertMedia pictname (Just mt) bytes
+      modifyGroup $ \g -> g{ gImage = Just pict{ picName = T.pack pictname,
+                                                 picBytes = bytes } }
+      addText altText
+      modifyGroup $ \g -> g{ gImage = Nothing }
+    _ -> return ()
+ where
+  getPictData :: Pict -> Tok -> Pict
+  getPictData pict (Tok _ tok') =
+    case tok' of
+      ControlWord "emfblip" _-> pict{ picType = Just Emfblip }
+      ControlWord "pngblip" _-> pict{ picType = Just Pngblip }
+      ControlWord "jpegblip" _-> pict{ picType = Just Jpegblip }
+      ControlWord "picw" (Just w) -> pict{ picWidth = Just w }
+      ControlWord "pich" (Just h) -> pict{ picHeight = Just h }
+      ControlWord "picwgoal" (Just w) -> pict{ picWidthGoal = Just w }
+      ControlWord "pichgoal" (Just h) -> pict{ picHeightGoal = Just h }
+      ControlWord "bin" _ -> pict{ picBinary = True }
+      UnformattedText t -> pict{ picData = t }
+      _ -> pict
+
+
+getHyperlink :: Text -> Maybe Text
+getHyperlink t =
+  case T.stripPrefix "HYPERLINK" (T.strip t) of
+    Nothing -> Nothing
+    Just rest -> Just $ unquote rest
+
+processFontTable :: [Tok] -> FontTable
+processFontTable = snd . foldl' go (0, mempty)
+ where
+  go (fontnum, tbl) (Tok _ tok') =
+    case tok' of
+     (ControlWord "f" (Just i)) -> (i, tbl)
+     (ControlWord "fnil" _) -> (fontnum, tbl)
+     (ControlWord "froman" _) -> (fontnum, IntMap.insert fontnum Roman tbl)
+     (ControlWord "fswiss" _) -> (fontnum, IntMap.insert fontnum Swiss tbl)
+     (ControlWord "fmodern" _) -> (fontnum, IntMap.insert fontnum Modern tbl)
+     (ControlWord "fscript" _) -> (fontnum, IntMap.insert fontnum Script tbl)
+     (ControlWord "fdecor" _) -> (fontnum, IntMap.insert fontnum Decor tbl)
+     (ControlWord "ftech" _) -> (fontnum, IntMap.insert fontnum Tech tbl)
+     (ControlWord "fbidi" _) -> (fontnum, IntMap.insert fontnum Bidi tbl)
+     (Grouped ts) -> foldl' go (fontnum, tbl) ts
+     _ -> (fontnum, tbl)
+
+
+ansiToChar :: Word8 -> Char
+ansiToChar i = chr $
+  case i of
+    128 -> 8364
+    130 -> 8218
+    131 -> 402
+    132 -> 8222
+    133 -> 8230
+    134 -> 8224
+    135 -> 8225
+    136 -> 710
+    137 -> 8240
+    138 -> 352
+    139 -> 8249
+    140 -> 338
+    142 -> 381
+    145 -> 8216
+    146 -> 8217
+    147 -> 8220
+    148 -> 8221
+    149 -> 8226
+    150 -> 8211
+    151 -> 8212
+    152 -> 732
+    153 -> 8482
+    154 -> 353
+    155 -> 8250
+    156 -> 339
+    158 -> 382
+    159 -> 376
+    173 -> 0xAD
+    _ -> fromIntegral i
+
+macToChar :: Word8 -> Char
+macToChar i = chr $
+  case i of
+    0x80 -> 0xC4
+    0x81 -> 0xC5
+    0x82 -> 0xC7
+    0x83 -> 0xC9
+    0x84 -> 0xD1
+    0x85 -> 0xD6
+    0x86 -> 0xDC
+    0x87 -> 0xE1
+    0x88 -> 0xE0
+    0x89 -> 0xE2
+    0x8A -> 0xE4
+    0x8B -> 0xE3
+    0x8C -> 0xE5
+    0x8D -> 0xE7
+    0x8E -> 0xE9
+    0x8F -> 0xE8
+    0x90 -> 0xEA
+    0x91 -> 0xEB
+    0x92 -> 0xED
+    0x93 -> 0xEC
+    0x94 -> 0xEE
+    0x95 -> 0xEF
+    0x96 -> 0xF1
+    0x97 -> 0xF3
+    0x98 -> 0xF2
+    0x99 -> 0xF4
+    0x9A -> 0xF6
+    0x9B -> 0xF5
+    0x9C -> 0xFA
+    0x9D -> 0xF9
+    0x9E -> 0xFB
+    0x9F -> 0xFC
+    0xA0 -> 0xDD
+    0xA1 -> 0xB0
+    0xA2 -> 0xA2
+    0xA3 -> 0xA3
+    0xA4 -> 0xA7
+    0xA5 -> 0xD7
+    0xA6 -> 0xB6
+    0xA7 -> 0xDF
+    0xA8 -> 0xAE
+    0xA9 -> 0xA9
+    0xAA -> 0xB2
+    0xAB -> 0xB4
+    0xAC -> 0xA8
+    0xAD -> 0xB3
+    0xAE -> 0xC6
+    0xAF -> 0xD8
+    0xB0 -> 0xB9
+    0xB1 -> 0xB1
+    0xB2 -> 0xBC
+    0xB3 -> 0xBD
+    0xB4 -> 0xA5
+    0xB5 -> 0xB5
+    0xBA -> 0xBE
+    0xBB -> 0xAA
+    0xBC -> 0xBA
+    0xBE -> 0xE6
+    0xBF -> 0xF8
+    0xC0 -> 0xBF
+    0xC1 -> 0xA1
+    0xC2 -> 0xAC
+    0xC3 -> 0x0141
+    0xC4 -> 0x0192
+    0xC5 -> 0x02CB
+    0xC7 -> 0xAB
+    0xC8 -> 0xBB
+    0xC9 -> 0xA6
+    0xCA -> 0xA0
+    0xCB -> 0xC0
+    0xCC -> 0xC3
+    0xCD -> 0xD5
+    0xCE -> 0x0152
+    0xCF -> 0x0153
+    0xD0 -> 0xAD
+    0xD4 -> 0x0142
+    0xD6 -> 0xF7
+    0xD8 -> 0xFF
+    0xD9 -> 0x0178
+    0xDB -> 0xA4
+    0xDC -> 0xD0
+    0xDD -> 0xF0
+    0xDE -> 0xDE
+    0xDF -> 0xFE
+    0xE0 -> 0xFD
+    0xE1 -> 0xB7
+    0xE5 -> 0xC2
+    0xE6 -> 0xCA
+    0xE7 -> 0xC1
+    0xE8 -> 0xCB
+    0xE9 -> 0xC8
+    0xEA -> 0xCD
+    0xEB -> 0xCE
+    0xEC -> 0xCF
+    0xED -> 0xCC
+    0xEE -> 0xD3
+    0xEF -> 0xD4
+    0xF1 -> 0xD2
+    0xF2 -> 0xDA
+    0xF3 -> 0xDB
+    0xF4 -> 0xD9
+    0xF5 -> 0x0131
+    0xF6 -> 0x02C6
+    0xF7 -> 0x02DC
+    0xF8 -> 0xAF
+    0xF9 -> 0x02D8
+    0xFA -> 0x02D9
+    0xFB -> 0x02DA
+    0xFC -> 0xB8
+    0xFD -> 0x02DD
+    0xFE -> 0x02DB
+    0xFF -> 0x02C7
+    _ -> fromIntegral i
+
+pcToChar :: Word8 -> Char
+pcToChar i = chr $
+  case i of
+    0x80 -> 0xc7
+    0x81 -> 0xfc
+    0x82 -> 0xe9
+    0x83 -> 0xe2
+    0x84 -> 0xe4
+    0x85 -> 0xe0
+    0x86 -> 0xe5
+    0x87 -> 0xe7
+    0x88 -> 0xea
+    0x89 -> 0xeb
+    0x8a -> 0xe8
+    0x8b -> 0xef
+    0x8c -> 0xee
+    0x8d -> 0xec
+    0x8e -> 0xc4
+    0x8f -> 0xc5
+    0x90 -> 0xc9
+    0x91 -> 0xe6
+    0x92 -> 0xc6
+    0x93 -> 0xf4
+    0x94 -> 0xf6
+    0x95 -> 0xf2
+    0x96 -> 0xfb
+    0x97 -> 0xf9
+    0x98 -> 0xff
+    0x99 -> 0xd6
+    0x9a -> 0xdc
+    0x9b -> 0xa2
+    0x9c -> 0xa3
+    0x9d -> 0xa5
+    0x9e -> 0x20a7
+    0x9f -> 0x0192
+    0xa0 -> 0xe1
+    0xa1 -> 0xed
+    0xa2 -> 0xf3
+    0xa3 -> 0xfa
+    0xa4 -> 0xf1
+    0xa5 -> 0xd1
+    0xa6 -> 0xaa
+    0xa7 -> 0xba
+    0xa8 -> 0xbf
+    0xa9 -> 0x2310
+    0xaa -> 0xac
+    0xab -> 0xbd
+    0xac -> 0xbc
+    0xad -> 0xa1
+    0xae -> 0xab
+    0xaf -> 0xbb
+    0xb0 -> 0x2591
+    0xb1 -> 0x2592
+    0xb2 -> 0x2593
+    0xb3 -> 0x2502
+    0xb4 -> 0x2524
+    0xb5 -> 0x2561
+    0xb6 -> 0x2562
+    0xb7 -> 0x2556
+    0xb8 -> 0x2555
+    0xb9 -> 0x2563
+    0xba -> 0x2551
+    0xbb -> 0x2557
+    0xbc -> 0x255d
+    0xbd -> 0x255c
+    0xbe -> 0x255b
+    0xbf -> 0x2510
+    0xc0 -> 0x2514
+    0xc1 -> 0x2534
+    0xc2 -> 0x252c
+    0xc3 -> 0x251c
+    0xc4 -> 0x2500
+    0xc5 -> 0x253c
+    0xc6 -> 0x255e
+    0xc7 -> 0x255f
+    0xc8 -> 0x255a
+    0xc9 -> 0x2554
+    0xca -> 0x2569
+    0xcb -> 0x2566
+    0xcc -> 0x2560
+    0xcd -> 0x2550
+    0xce -> 0x256c
+    0xcf -> 0x2567
+    0xd0 -> 0x2568
+    0xd1 -> 0x2564
+    0xd2 -> 0x2565
+    0xd3 -> 0x2559
+    0xd4 -> 0x2558
+    0xd5 -> 0x2552
+    0xd6 -> 0x2553
+    0xd7 -> 0x256b
+    0xd8 -> 0x256a
+    0xd9 -> 0x2518
+    0xda -> 0x250c
+    0xdb -> 0x2588
+    0xdc -> 0x2584
+    0xdd -> 0x258c
+    0xde -> 0x2590
+    0xdf -> 0x2580
+    0xe0 -> 0x03b1
+    0xe1 -> 0xdf
+    0xe2 -> 0x0393
+    0xe3 -> 0x03c0
+    0xe4 -> 0x03a3
+    0xe5 -> 0x03c3
+    0xe6 -> 0xb5
+    0xe7 -> 0x03c4
+    0xe8 -> 0x03a6
+    0xe9 -> 0x0398
+    0xea -> 0x03a9
+    0xeb -> 0x03b4
+    0xec -> 0x221e
+    0xed -> 0x03c6
+    0xee -> 0x03b5
+    0xef -> 0x2229
+    0xf0 -> 0x2261
+    0xf1 -> 0xb1
+    0xf2 -> 0x2265
+    0xf3 -> 0x2264
+    0xf4 -> 0x2320
+    0xf5 -> 0x2321
+    0xf6 -> 0xf7
+    0xf7 -> 0x2248
+    0xf8 -> 0xb0
+    0xf9 -> 0x2219
+    0xfa -> 0xb7
+    0xfb -> 0x221a
+    0xfc -> 0x207f
+    0xfd -> 0xb2
+    0xfe -> 0x25a0
+    0xff -> 0xa0
+    _    -> fromIntegral i
+
+pcaToChar :: Word8 -> Char
+pcaToChar i = chr $
+  case i of
+    0x80 -> 0x00c7
+    0x81 -> 0x00fc
+    0x82 -> 0x00e9
+    0x83 -> 0x00e2
+    0x84 -> 0x00e4
+    0x85 -> 0x00e0
+    0x86 -> 0x00e5
+    0x87 -> 0x00e7
+    0x88 -> 0x00ea
+    0x89 -> 0x00eb
+    0x8a -> 0x00e8
+    0x8b -> 0x00ef
+    0x8c -> 0x00ee
+    0x8d -> 0x00ec
+    0x8e -> 0x00c4
+    0x8f -> 0x00c5
+    0x90 -> 0x00c9
+    0x91 -> 0x00e6
+    0x92 -> 0x00c6
+    0x93 -> 0x00f4
+    0x94 -> 0x00f6
+    0x95 -> 0x00f2
+    0x96 -> 0x00fb
+    0x97 -> 0x00f9
+    0x98 -> 0x00ff
+    0x99 -> 0x00d6
+    0x9a -> 0x00dc
+    0x9b -> 0x00f8
+    0x9c -> 0x00a3
+    0x9d -> 0x00d8
+    0x9e -> 0x00d7
+    0x9f -> 0x0192
+    0xa0 -> 0x00e1
+    0xa1 -> 0x00ed
+    0xa2 -> 0x00f3
+    0xa3 -> 0x00fa
+    0xa4 -> 0x00f1
+    0xa5 -> 0x00d1
+    0xa6 -> 0x00aa
+    0xa7 -> 0x00ba
+    0xa8 -> 0x00bf
+    0xa9 -> 0x00ae
+    0xaa -> 0x00ac
+    0xab -> 0x00bd
+    0xac -> 0x00bc
+    0xad -> 0x00a1
+    0xae -> 0x00ab
+    0xaf -> 0x00bb
+    0xb0 -> 0x2591
+    0xb1 -> 0x2592
+    0xb2 -> 0x2593
+    0xb3 -> 0x2502
+    0xb4 -> 0x2524
+    0xb5 -> 0x00c1
+    0xb6 -> 0x00c2
+    0xb7 -> 0x00c0
+    0xb8 -> 0x00a9
+    0xb9 -> 0x2563
+    0xba -> 0x2551
+    0xbb -> 0x2557
+    0xbc -> 0x255d
+    0xbd -> 0x00a2
+    0xbe -> 0x00a5
+    0xbf -> 0x2510
+    0xc0 -> 0x2514
+    0xc1 -> 0x2534
+    0xc2 -> 0x252c
+    0xc3 -> 0x251c
+    0xc4 -> 0x2500
+    0xc5 -> 0x253c
+    0xc6 -> 0x00e3
+    0xc7 -> 0x00c3
+    0xc8 -> 0x255a
+    0xc9 -> 0x2554
+    0xca -> 0x2569
+    0xcb -> 0x2566
+    0xcc -> 0x2560
+    0xcd -> 0x2550
+    0xce -> 0x256c
+    0xcf -> 0x00a4
+    0xd0 -> 0x00f0
+    0xd1 -> 0x00d0
+    0xd2 -> 0x00ca
+    0xd3 -> 0x00cb
+    0xd4 -> 0x00c8
+    0xd5 -> 0x0131
+    0xd6 -> 0x00cd
+    0xd7 -> 0x00ce
+    0xd8 -> 0x00cf
+    0xd9 -> 0x2518
+    0xda -> 0x250c
+    0xdb -> 0x2588
+    0xdc -> 0x2584
+    0xdd -> 0x00a6
+    0xde -> 0x00cc
+    0xdf -> 0x2580
+    0xe0 -> 0x00d3
+    0xe1 -> 0x00df
+    0xe2 -> 0x00d4
+    0xe3 -> 0x00d2
+    0xe4 -> 0x00f5
+    0xe5 -> 0x00d5
+    0xe6 -> 0x00b5
+    0xe7 -> 0x00fe
+    0xe8 -> 0x00de
+    0xe9 -> 0x00da
+    0xea -> 0x00db
+    0xeb -> 0x00d9
+    0xec -> 0x00fd
+    0xed -> 0x00dd
+    0xee -> 0x00af
+    0xef -> 0x00b4
+    0xf0 -> 0x00ad
+    0xf1 -> 0x00b1
+    0xf2 -> 0x2017
+    0xf3 -> 0x00be
+    0xf4 -> 0x00b6
+    0xf5 -> 0x00a7
+    0xf6 -> 0x00f7
+    0xf7 -> 0x00b8
+    0xf8 -> 0x00b0
+    0xf9 -> 0x00a8
+    0xfa -> 0x00b7
+    0xfb -> 0x00b9
+    0xfc -> 0x00b3
+    0xfd -> 0x00b2
+    0xfe -> 0x25a0
+    0xff -> 0x00a0
+    _    -> fromIntegral i
-- 
cgit v1.2.3