From 6543b05116ee58ef4de62f93dcafeb27617d83e6 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Fri, 30 Jul 2021 17:23:46 -0600 Subject: Add RTF reader. - `rtf` is now supported as an input format as well as output. - New module Text.Pandoc.Readers.RTF (exporting `readRTF`). [API change] Closes #3982. --- src/Text/Pandoc/Readers.hs | 3 + src/Text/Pandoc/Readers/RTF.hs | 1333 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 1336 insertions(+) create mode 100644 src/Text/Pandoc/Readers/RTF.hs (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers.hs b/src/Text/Pandoc/Readers.hs index 5106f8058..dd3aecdc5 100644 --- a/src/Text/Pandoc/Readers.hs +++ b/src/Text/Pandoc/Readers.hs @@ -55,6 +55,7 @@ module Text.Pandoc.Readers , readCslJson , readBibTeX , readBibLaTeX + , readRTF -- * Miscellaneous , getReader , getDefaultExtensions @@ -102,6 +103,7 @@ import Text.Pandoc.Readers.Man import Text.Pandoc.Readers.CSV import Text.Pandoc.Readers.CslJson import Text.Pandoc.Readers.BibTeX +import Text.Pandoc.Readers.RTF import qualified Text.Pandoc.UTF8 as UTF8 import Text.Pandoc.Sources (ToSources(..), sourcesToText) @@ -149,6 +151,7 @@ readers = [("native" , TextReader readNative) ,("csljson" , TextReader readCslJson) ,("bibtex" , TextReader readBibTeX) ,("biblatex" , TextReader readBibLaTeX) + ,("rtf" , TextReader readRTF) ] -- | Retrieve reader, extensions based on formatSpec (format+extensions). diff --git a/src/Text/Pandoc/Readers/RTF.hs b/src/Text/Pandoc/Readers/RTF.hs new file mode 100644 index 000000000..749a38dd3 --- /dev/null +++ b/src/Text/Pandoc/Readers/RTF.hs @@ -0,0 +1,1333 @@ +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE OverloadedStrings #-} +{- | + Module : Text.Pandoc.Readers.RTF + Copyright : Copyright (C) 2021 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane () + Stability : alpha + Portability : portable + +Conversion of RTF documents 'Pandoc' document. +We target version 1.5 of the RTF spec. +-} +module Text.Pandoc.Readers.RTF (readRTF) where + +import qualified Data.IntMap as IntMap +import qualified Data.Sequence as Seq +import Control.Monad +import Control.Monad.Except (throwError) +import Data.List (find, foldl') +import Data.Word (Word8, Word16) +import Data.Default +import Data.Text (Text) +import qualified Data.Text as T +import qualified Data.Text.Read as TR +import Text.Pandoc.Builder (Blocks, Inlines) +import qualified Text.Pandoc.Builder as B +import Text.Pandoc.Class.PandocMonad (PandocMonad (..), insertMedia) +import Text.Pandoc.Definition +import Text.Pandoc.Options +import Text.Pandoc.Parsing +import Text.Pandoc.Shared (safeRead, tshow) +import Data.Char (isAlphaNum, chr, digitToInt, isAscii, isLetter, isSpace) +import qualified Data.ByteString.Lazy as BL +import Data.Digest.Pure.SHA (sha1, showDigest) +import Data.Maybe (mapMaybe, fromMaybe) +import Safe (lastMay, initSafe, headDef) +-- import Debug.Trace + +-- TODO: +-- [ ] more complex table features +-- + +-- | Read RTF from an input string and return a Pandoc document. +readRTF :: (PandocMonad m, ToSources a) + => ReaderOptions + -> a + -> m Pandoc +readRTF opts s = do + let sources = toSources s + parsed <- readWithM parseRTF def{ sOptions = opts } sources + case parsed of + Left e -> throwError e + Right d -> return d + +data CharSet = ANSI | Mac | Pc | Pca + deriving (Show, Eq) + +-- first index is the list (or override) id, second is the list level +type ListTable = IntMap.IntMap ListLevelTable +type ListLevelTable = IntMap.IntMap ListType + +data RTFState = RTFState { sOptions :: ReaderOptions + , sCharSet :: CharSet + , sGroupStack :: [Properties] + , sListStack :: [List] + , sCurrentCell :: Blocks + , sTableRows :: [TableRow] -- reverse order + , sTextContent :: [(Properties, Text)] + , sMetadata :: [(Text, Inlines)] + , sFontTable :: FontTable + , sStylesheet :: Stylesheet + , sListTable :: ListTable + , sListOverrideTable :: ListTable + , sEatChars :: Int + } deriving (Show) + +instance Default RTFState where + def = RTFState { sOptions = def + , sCharSet = ANSI + , sGroupStack = [] + , sListStack = [] + , sCurrentCell = mempty + , sTableRows = [] + , sTextContent = [] + , sMetadata = [] + , sFontTable = mempty + , sStylesheet = mempty + , sListTable = mempty + , sListOverrideTable = mempty + , sEatChars = 0 + } + +type FontTable = IntMap.IntMap FontFamily + +data FontFamily = + Roman | Swiss | Modern | Script | Decor | Tech | Bidi + deriving (Show, Eq) + +data StyleType = ParagraphStyle | SectionStyle | CharStyle | TableStyle + deriving (Show, Eq) + +data Style = + Style { styleNum :: Int + , styleType :: StyleType + , styleBasedOn :: Maybe Int + , styleName :: Text + , styleFormatting :: [Tok] + } deriving (Show, Eq) + +type Stylesheet = IntMap.IntMap Style + +data PictType = + Emfblip | Pngblip | Jpegblip + deriving (Show, Eq) + +data Pict = + Pict { picType :: Maybe PictType + , picWidth :: Maybe Int + , picHeight :: Maybe Int + , picWidthGoal :: Maybe Int + , picHeightGoal :: Maybe Int + , picBinary :: Bool + , picData :: Text + , picName :: Text + , picBytes :: BL.ByteString + } deriving (Show, Eq) + +instance Default Pict where + def = Pict { picType = Nothing + , picWidth = Nothing + , picHeight = Nothing + , picWidthGoal = Nothing + , picHeightGoal = Nothing + , picBinary = False + , picData = mempty + , picName = mempty + , picBytes = mempty } + +data Properties = + Properties + { gBold :: Bool + , gItalic :: Bool + , gCaps :: Bool + , gDeleted :: Bool + , gSub :: Bool + , gSuper :: Bool + , gSmallCaps :: Bool + , gUnderline :: Bool + , gHyperlink :: Maybe Text + , gAnchor :: Maybe Text + , gImage :: Maybe Pict + , gFontFamily :: Maybe FontFamily + , gHidden :: Bool + , gUC :: Int -- number of ansi chars to skip after unicode char + , gFootnote :: Maybe Blocks + , gOutlineLevel :: Maybe ListLevel + , gListOverride :: Maybe Override + , gListLevel :: Maybe Int + , gInTable :: Bool + } deriving (Show, Eq) + +instance Default Properties where + def = Properties { gBold = False + , gItalic = False + , gCaps = False + , gDeleted = False + , gSub = False + , gSuper = False + , gSmallCaps = False + , gUnderline = False + , gHyperlink = Nothing + , gAnchor = Nothing + , gImage = Nothing + , gFontFamily = Nothing + , gHidden = False + , gUC = 1 + , gFootnote = Nothing + , gOutlineLevel = Nothing + , gListOverride = Nothing + , gListLevel = Nothing + , gInTable = False + } + +type RTFParser m = ParserT Sources RTFState m + +data ListType = Bullet | Ordered ListAttributes + deriving (Show, Eq) + +type Override = Int + +type ListLevel = Int + +data List = + List Override ListLevel ListType [Blocks] -- items in reverse order + deriving (Show, Eq) + +newtype TableRow = TableRow [Blocks] -- cells in reverse order + deriving (Show, Eq) + +parseRTF :: PandocMonad m => RTFParser m Pandoc +parseRTF = do + skipMany nl + toks <- many tok + -- return $! traceShowId toks + bs <- (foldM processTok mempty toks >>= emitBlocks) + unclosed <- closeContainers + let doc = B.doc $ bs <> unclosed + kvs <- sMetadata <$> getState + pure $ foldr (uncurry B.setMeta) doc kvs + +data Tok = Tok SourcePos TokContents + deriving (Show, Eq) + +data TokContents = + ControlWord Text (Maybe Int) + | ControlSymbol Char + | UnformattedText Text + | HexVal Word8 + | Grouped [Tok] + deriving (Show, Eq) + +tok :: PandocMonad m => RTFParser m Tok +tok = do + pos <- getPosition + Tok pos <$> ((controlThing <|> unformattedText <|> grouped) <* skipMany nl) + where + controlThing = do + char '\\' *> + ( (ControlWord <$> letterSequence <*> (parameter <* optional delimChar)) + <|> (HexVal <$> hexVal) + <|> (ControlSymbol <$> anyChar) ) + parameter = do + hyph <- string "-" <|> pure "" + rest <- many digit + let pstr = T.pack $ hyph <> rest + return $ safeRead pstr + hexVal = do + char '\'' + x <- hexDigit + y <- hexDigit + return $ hexToWord (T.pack [x,y]) + letterSequence = T.pack <$> many1 (satisfy (\c -> isAscii c && isLetter c)) + unformattedText = + UnformattedText . T.pack . mconcat <$> + many1 ( many1 (satisfy (not . isSpecial)) + <|> ("" <$ nl)) + grouped = Grouped <$> (char '{' *> skipMany nl *> manyTill tok (char '}')) + +nl :: PandocMonad m => RTFParser m () +nl = void (char '\n' <|> char '\r') + +isSpecial :: Char -> Bool +isSpecial '{' = True +isSpecial '}' = True +isSpecial '\\' = True +isSpecial '\n' = True +isSpecial _ = False + +delimChar :: PandocMonad m => RTFParser m Char +delimChar = satisfy (\c -> not (isAlphaNum c || isSpecial c)) + +modifyGroup :: PandocMonad m + => (Properties -> Properties) + -> RTFParser m () +modifyGroup f = + updateState $ \st -> + st{ sGroupStack = + case sGroupStack st of + [] -> [] + (x:xs) -> f x : xs } + +addFormatting :: (Properties, Text) -> Inlines +addFormatting (_, "\n") = B.linebreak +addFormatting (props, _) | gHidden props = mempty +addFormatting (props, _) | Just bs <- gFootnote props = B.note bs +addFormatting (props, txt) = + (if gBold props then B.strong else id) . + (if gItalic props then B.emph else id) . + (if gDeleted props then B.strikeout else id) . + (if gSub props then B.subscript else id) . + (if gSuper props then B.superscript else id) . + (if gSmallCaps props then B.smallcaps else id) . + (if gUnderline props then B.underline else id) . + (case gHyperlink props of + Nothing -> id + Just linkdest -> B.link linkdest mempty) . + (case gAnchor props of + Nothing -> id + Just ident -> B.spanWith (ident,[],[])) . + (case gFontFamily props of + Just Modern -> B.code + _ -> case gImage props of + Just pict -> + let attr = ("",[], + (case picWidthGoal pict of + Nothing -> [] + Just w -> [("width", tshow (fromIntegral w / 1440 + :: Double) + <> "in")]) ++ + (case picHeightGoal pict of + Nothing -> [] + Just h -> [("height", tshow (fromIntegral h / 1440 + :: Double) + <> "in")])) + in B.imageWith attr (picName pict) "" . B.text + Nothing -> B.text) . + (if gCaps props then T.toUpper else id) + $ txt + +addText :: PandocMonad m => Text -> RTFParser m () +addText t = do + gs <- sGroupStack <$> getState + let props = case gs of + (x:_) -> x + _ -> def + updateState (\s -> s{ sTextContent = (props, t) : sTextContent s }) + +inGroup :: PandocMonad m => RTFParser m a -> RTFParser m a +inGroup p = do + updateState $ \st -> + st{ sGroupStack = + case sGroupStack st of + [] -> [def] + (x:xs) -> (x:x:xs) } -- inherit current group's properties + result <- p + updateState $ \st -> + st{ sGroupStack = + case sGroupStack st of + [] -> [] -- should not happen + (_:xs) -> xs } + return result + +getStyleFormatting :: PandocMonad m => Int -> RTFParser m [Tok] +getStyleFormatting stynum = do + stylesheet <- sStylesheet <$> getState + case IntMap.lookup stynum stylesheet of + Nothing -> return [] + Just sty -> + case styleBasedOn sty of + Just i -> (<> styleFormatting sty) <$> getStyleFormatting i + Nothing -> return $ styleFormatting sty + +isMetadataField :: Text -> Bool +isMetadataField "title" = True +isMetadataField "subject" = True +isMetadataField "author" = True +isMetadataField "manager" = True +isMetadataField "company" = True +isMetadataField "operator" = True +isMetadataField "category" = True +isMetadataField "keywords" = True +isMetadataField "comment" = True +isMetadataField "doccomm" = True +isMetadataField "hlinkbase" = True +isMetadataField "generator" = True +isMetadataField _ = False + +isHeaderFooter :: Text -> Bool +isHeaderFooter "header" = True +isHeaderFooter "headerl" = True +isHeaderFooter "headerr" = True +isHeaderFooter "headerf" = True +isHeaderFooter "footer" = True +isHeaderFooter "footerl" = True +isHeaderFooter "footerr" = True +isHeaderFooter "footerf" = True +isHeaderFooter _ = False + +boolParam :: Maybe Int -> Bool +boolParam (Just 0) = False +boolParam _ = True + +isUnderline :: Text -> Bool +isUnderline "ul" = True +isUnderline "uld" = True +isUnderline "uldash" = True +isUnderline "uldashd" = True +isUnderline "uldashdd" = True +isUnderline "uldb" = True +isUnderline "ulth" = True +isUnderline "ulthd" = True +isUnderline "ulthdash" = True +isUnderline "ulw" = True +isUnderline "ulwave" = True +isUnderline _ = False + +processTok :: PandocMonad m => Blocks -> Tok -> RTFParser m Blocks +processTok bs (Tok pos tok') = do + setPosition pos + -- ignore \* at beginning of group: + let tok'' = case tok' of + Grouped (Tok _ (ControlSymbol '*') : toks) -> Grouped toks + _ -> tok' + case tok'' of + HexVal{} -> return () + UnformattedText{} -> return () + _ -> updateState $ \s -> s{ sEatChars = 0 } + case tok'' of + Grouped (Tok _ (ControlWord "fonttbl" _) : toks) -> inGroup $ do + updateState $ \s -> s{ sFontTable = processFontTable toks } + pure bs + Grouped (Tok _ (ControlWord "field" _) : toks) -> + inGroup $ handleField bs toks + Grouped (Tok _ (ControlWord "pict" _) : toks) -> + bs <$ inGroup (handlePict toks) + Grouped (Tok _ (ControlWord "stylesheet" _) : toks) -> + bs <$ inGroup (handleStylesheet toks) + Grouped (Tok _ (ControlWord "listtext" _) : _) -> do + -- eject any previous list items...sometimes TextEdit + -- doesn't put in a \par + emitBlocks bs + Grouped (Tok _ (ControlWord "colortbl" _) : _) -> pure bs + Grouped (Tok _ (ControlWord "listtable" _) : toks) -> + bs <$ inGroup (handleListTable toks) + Grouped (Tok _ (ControlWord "listoverridetable" _) : toks) -> + bs <$ inGroup (handleListOverrideTable toks) + Grouped (Tok _ (ControlWord "wgrffmtfilter" _) : _) -> pure bs + Grouped (Tok _ (ControlWord "themedata" _) : _) -> pure bs + Grouped (Tok _ (ControlWord "colorschememapping" _) : _) -> pure bs + Grouped (Tok _ (ControlWord "datastore" _) : _) -> pure bs + Grouped (Tok _ (ControlWord "latentstyles" _) : _) -> pure bs + Grouped (Tok _ (ControlWord "pntxta" _) : _) -> pure bs -- TODO + Grouped (Tok _ (ControlWord "pntxtb" _) : _) -> pure bs -- TODO + Grouped (Tok _ (ControlWord "xmlnstbl" _) : _) -> pure bs + Grouped (Tok _ (ControlWord "filetbl" _) : _) -> pure bs + Grouped (Tok _ (ControlWord "expandedcolortbl" _) : _) -> pure bs + Grouped (Tok _ (ControlWord "listtables" _) : _) -> pure bs + Grouped (Tok _ (ControlWord "revtbl" _) : _) -> pure bs + Grouped (Tok _ (ControlWord "bkmkstart" _) + : Tok _ (UnformattedText t) : _) -> do + -- TODO ideally we'd put the span around bkmkstart/end, but this + -- is good for now: + modifyGroup (\g -> g{ gAnchor = Just $ T.strip t }) + addText "" + modifyGroup (\g -> g{ gAnchor = Nothing }) + pure bs + Grouped (Tok _ (ControlWord "bkmkend" _) : _) -> pure bs -- TODO + Grouped (Tok _ (ControlWord f _) : _) | isHeaderFooter f -> pure bs + Grouped (Tok _ (ControlWord "footnote" _) : toks) -> do + noteBs <- inGroup $ processDestinationToks toks + modifyGroup (\g -> g{ gFootnote = Just noteBs }) + addText "*" + modifyGroup (\g -> g{ gFootnote = Nothing }) + return bs + Grouped (Tok _ (ControlWord "info" _) : toks) -> + bs <$ inGroup (processDestinationToks toks) + Grouped (Tok _ (ControlWord f _) : toks) | isMetadataField f -> inGroup $ do + foldM_ processTok mempty toks + annotatedToks <- reverse . sTextContent <$> getState + updateState $ \s -> s{ sTextContent = [] } + let ils = B.trimInlines . mconcat $ map addFormatting annotatedToks + updateState $ \s -> s{ sMetadata = (f, ils) : sMetadata s } + pure bs + Grouped toks -> inGroup (foldM processTok bs toks) + UnformattedText t -> bs <$ do + -- return $! traceShowId $! (pos, t) + eatChars <- sEatChars <$> getState + case eatChars of + 0 -> addText t + n | n < T.length t -> do + updateState $ \s -> s{ sEatChars = 0 } + addText (T.drop n t) + | otherwise -> do + updateState $ \s -> s{ sEatChars = n - T.length t } + HexVal n -> bs <$ do + eatChars <- sEatChars <$> getState + if eatChars == 0 + then do + charset <- sCharSet <$> getState + case charset of + ANSI -> addText (T.singleton $ ansiToChar n) + Mac -> addText (T.singleton $ macToChar n) + Pc -> addText (T.singleton $ pcToChar n) + Pca -> addText (T.singleton $ pcaToChar n) + else updateState $ \s -> s{ sEatChars = eatChars - 1 } + ControlWord "ansi" _ -> bs <$ + updateState (\s -> s{ sCharSet = ANSI }) + ControlWord "mac" _ -> bs <$ + updateState (\s -> s{ sCharSet = Mac }) + ControlWord "pc" _ -> bs <$ + updateState (\s -> s{ sCharSet = Pc }) + ControlWord "pca" _ -> bs <$ + updateState (\s -> s{ sCharSet = Pca }) + ControlWord "outlinelevel" mbp -> bs <$ + modifyGroup (\g -> g{ gOutlineLevel = mbp }) + ControlWord "ls" mbp -> bs <$ + modifyGroup (\g -> g{ gListOverride = mbp }) + ControlWord "ilvl" mbp -> bs <$ + modifyGroup (\g -> g{ gListLevel = mbp }) + ControlSymbol '\\' -> bs <$ addText "\\" + ControlSymbol '{' -> bs <$ addText "{" + ControlSymbol '}' -> bs <$ addText "}" + ControlSymbol '~' -> bs <$ addText "\x00a0" + ControlSymbol '-' -> bs <$ addText "\x00ad" + ControlSymbol '_' -> bs <$ addText "\x2011" + ControlWord "trowd" _ -> bs <$ do -- add new row + updateState $ \s -> s{ sTableRows = TableRow [] : sTableRows s + , sCurrentCell = mempty } + ControlWord "cell" _ -> bs <$ do + new <- emitBlocks mempty + curCell <- (<> new) . sCurrentCell <$> getState + updateState $ \s -> s{ sTableRows = + case sTableRows s of + TableRow cs : rs -> + TableRow (curCell : cs) : rs + [] -> [TableRow [curCell]] -- shouldn't happen + , sCurrentCell = mempty } + ControlWord "intbl" _ -> bs <$ modifyGroup (\g -> g{ gInTable = True }) + ControlWord "plain" _ -> bs <$ modifyGroup (const def) + ControlWord "lquote" _ -> bs <$ addText "\x2018" + ControlWord "rquote" _ -> bs <$ addText "\x2019" + ControlWord "ldblquote" _ -> bs <$ addText "\x201C" + ControlWord "rdblquote" _ -> bs <$ addText "\x201D" + ControlWord "emdash" _ -> bs <$ addText "\x2014" + ControlWord "emspace" _ -> bs <$ addText "\x2003" + ControlWord "enspace" _ -> bs <$ addText "\x2002" + ControlWord "endash" _ -> bs <$ addText "\x2013" + ControlWord "bullet" _ -> bs <$ addText "\x2022" + ControlWord "tab" _ -> bs <$ addText "\t" + ControlWord "line" _ -> bs <$ addText "\n" + ControlSymbol '\n' -> bs <$ addText "\n" + ControlSymbol '\r' -> bs <$ addText "\n" + ControlWord "uc" (Just i) -> bs <$ modifyGroup (\g -> g{ gUC = i }) + ControlWord "cs" (Just n) -> do + getStyleFormatting n >>= foldM processTok bs + ControlWord "s" (Just n) -> do + getStyleFormatting n >>= foldM processTok bs + ControlWord "ds" (Just n) -> do + getStyleFormatting n >>= foldM processTok bs + ControlWord "f" (Just i) -> bs <$ do + fontTable <- sFontTable <$> getState + modifyGroup (\g -> g{ gFontFamily = IntMap.lookup i fontTable }) + ControlWord "u" (Just i) -> bs <$ do + st <- getState + let curgroup = case sGroupStack st of + [] -> def + (x:_) -> x + updateState $ \s -> s{ sEatChars = gUC curgroup } + -- "RTF control words generally accept signed 16-bit numbers as + -- arguments. For this reason, Unicode values greater than 32767 + -- must be expressed as negative numbers." + let codepoint :: Word16 + codepoint = fromIntegral i + addText (T.singleton (chr $ fromIntegral codepoint)) + ControlWord "caps" mbp -> bs <$ + modifyGroup (\g -> g{ gCaps = boolParam mbp }) + ControlWord "deleted" mbp -> bs <$ + modifyGroup (\g -> g{ gDeleted = boolParam mbp }) + ControlWord "b" mbp -> bs <$ + modifyGroup (\g -> g{ gBold = boolParam mbp }) + ControlWord "i" mbp -> bs <$ + modifyGroup (\g -> g{ gItalic = boolParam mbp }) + ControlWord "sub" mbp -> bs <$ + modifyGroup (\g -> g{ gSub = boolParam mbp }) + ControlWord "super" mbp -> bs <$ + modifyGroup (\g -> g{ gSuper = boolParam mbp }) + ControlWord "up" mbp -> bs <$ + modifyGroup (\g -> g{ gSuper = boolParam mbp }) + ControlWord "strike" mbp -> bs <$ + modifyGroup (\g -> g{ gDeleted = boolParam mbp }) + ControlWord "strikedl" mbp -> bs <$ + modifyGroup (\g -> g{ gDeleted = boolParam mbp }) + ControlWord "striked" mbp -> bs <$ + modifyGroup (\g -> g{ gDeleted = boolParam mbp }) + ControlWord "scaps" mbp -> bs <$ + modifyGroup (\g -> g{ gSmallCaps = boolParam mbp }) + ControlWord "v" mbp -> bs <$ + modifyGroup (\g -> g{ gHidden = boolParam mbp }) + ControlWord x mbp | isUnderline x -> bs <$ + modifyGroup (\g -> g{ gUnderline = boolParam mbp }) + ControlWord "ulnone" _ -> bs <$ + modifyGroup (\g -> g{ gUnderline = False }) + ControlWord "pard" _ -> bs <$ do + modifyGroup (const def) + getStyleFormatting 0 >>= foldM processTok bs + ControlWord "par" _ -> emitBlocks bs + _ -> pure bs + +processDestinationToks :: PandocMonad m => [Tok] -> RTFParser m Blocks +processDestinationToks toks = do + textContent <- sTextContent <$> getState + liststack <- sListStack <$> getState + updateState $ \s -> s{ sTextContent = mempty + , sListStack = [] } + result <- inGroup $ + foldM processTok mempty toks >>= emitBlocks + unclosed <- closeContainers + updateState $ \s -> s{ sTextContent = textContent + , sListStack = liststack } + return $ result <> unclosed + +-- close lists >= level +closeLists :: PandocMonad m => Int -> RTFParser m Blocks +closeLists lvl = do + lists <- sListStack <$> getState + case lists of + (List _ lvl' lt items : rest) | lvl' >= lvl -> do + let newlist = (case lt of + Bullet -> B.bulletList + Ordered listAttr -> B.orderedListWith listAttr) + (reverse items) + updateState $ \s -> s{ sListStack = rest } + case rest of + [] -> do + updateState $ \s -> s{ sListStack = rest } + pure newlist + (List lo lvl'' lt' [] : rest') -> do -- should not happen + updateState $ \s -> s{ sListStack = + List lo lvl'' lt' [newlist] : rest' } + closeLists lvl + (List lo lvl'' lt' (i:is) : rest') -> do + updateState $ \s -> s{ sListStack = + List lo lvl'' lt' (i <> newlist : is) : rest' } + closeLists lvl + _ -> pure mempty + +closeTable :: PandocMonad m => RTFParser m Blocks +closeTable = do + rawrows <- sTableRows <$> getState + if null rawrows + then return mempty + else do + let getCells (TableRow cs) = reverse cs + let rows = map getCells . reverse $ rawrows + updateState $ \s -> s{ sCurrentCell = mempty + , sTableRows = [] } + return $ B.simpleTable [] rows + +closeContainers :: PandocMonad m => RTFParser m Blocks +closeContainers = do + tbl <- closeTable + lists <- closeLists 0 + return $ tbl <> lists + +trimFinalLineBreak :: Inlines -> Inlines +trimFinalLineBreak ils = + case Seq.viewr (B.unMany ils) of + rest Seq.:> LineBreak -> B.Many rest + _ -> ils + +emitBlocks :: PandocMonad m => Blocks -> RTFParser m Blocks +emitBlocks bs = do + annotatedToks <- reverse . sTextContent <$> getState + updateState $ \s -> s{ sTextContent = [] } + let justCode = def{ gFontFamily = Just Modern } + let prop = case annotatedToks of + [] -> def + ((p,_):_) -> p + tbl <- if gInTable prop + then pure mempty + else closeTable + new <- + case annotatedToks of + [] -> pure mempty + _ | Just lst <- gListOverride prop + -> do + let level = fromMaybe 0 $ gListLevel prop + listOverrideTable <- sListOverrideTable <$> getState + let listType = fromMaybe Bullet $ + IntMap.lookup lst listOverrideTable >>= IntMap.lookup level + lists <- sListStack <$> getState + -- get para contents of list item + let newbs = B.para . B.trimInlines . trimFinalLineBreak . mconcat $ + map addFormatting annotatedToks + case lists of + (List lo parentlevel _lt items : cs) + | lo == lst + , parentlevel == level + -- add another item to existing list + -> do updateState $ \s -> + s{ sListStack = + List lo level listType (newbs:items) : cs } + pure mempty + | lo /= lst || level < parentlevel + -- close parent list and add new list + -> do new <- closeLists level -- close open lists > level + updateState $ \s -> + s{ sListStack = List lst level listType [newbs] : + sListStack s } + pure new + _ -> do -- add new list (level > parentlevel) + updateState $ \s -> + s{ sListStack = List lst level listType [newbs] : + sListStack s } + pure mempty + | Just lvl <- gOutlineLevel prop + -> do + lists <- closeLists 0 + pure $ lists <> + B.header (lvl + 1) + (B.trimInlines . mconcat $ map addFormatting + $ removeCommonFormatting + annotatedToks) + | all ((== justCode) . fst) annotatedToks + -> do + lists <- closeLists 0 + pure $ lists <> + B.codeBlock (mconcat $ map snd annotatedToks) + | all (T.all isSpace . snd) annotatedToks + -> closeLists 0 + | otherwise -> do + lists <- closeLists 0 + pure $ lists <> + B.para (B.trimInlines . trimFinalLineBreak . mconcat + $ map addFormatting annotatedToks) + if gInTable prop + then do + updateState $ \s -> s{ sCurrentCell = sCurrentCell s <> new } + pure bs + else do + pure $ bs <> tbl <> new + +-- Headers often have a style applied. We usually want to remove +-- this, because headers will have their own styling in the target +-- format. +removeCommonFormatting :: [(Properties, Text)] -> [(Properties, Text)] +removeCommonFormatting = + (\ts -> + if all (gBold . fst) ts + then map (\(p,t) -> (p{ gBold = False }, t)) ts + else ts) . + (\ts -> + if all (gItalic . fst) ts + then map (\(p,t) -> (p{ gItalic = False }, t)) ts + else ts) + + +-- {\field{\*\fldinst{HYPERLINK "http://pandoc.org"}}{\fldrslt foo}} +handleField :: PandocMonad m => Blocks -> [Tok] -> RTFParser m Blocks +handleField bs + (Tok _ + (Grouped + (Tok _ (ControlSymbol '*') + :Tok _ (ControlWord "fldinst" Nothing) + :Tok _ (Grouped (Tok _ (UnformattedText insttext):rest)) + :_)) + :linktoks) + | Just linkdest <- getHyperlink insttext + = do let linkdest' = case rest of + (Tok _ (ControlSymbol '\\') + : Tok _ (UnformattedText t) + : _) | Just bkmrk <- T.stripPrefix "l" t + -> "#" <> unquote bkmrk + _ -> linkdest + modifyGroup $ \g -> g{ gHyperlink = Just linkdest' } + result <- foldM processTok bs linktoks + modifyGroup $ \g -> g{ gHyperlink = Nothing } + return result +handleField bs _ = pure bs + +unquote :: Text -> Text +unquote = T.dropWhile (=='"') . T.dropWhileEnd (=='"') . T.strip + +handleListTable :: PandocMonad m => [Tok] -> RTFParser m () +handleListTable toks = do + mapM_ handleList toks + +handleList :: PandocMonad m => Tok -> RTFParser m () +handleList (Tok _ (Grouped (Tok _ (ControlWord "list" _) : toks))) = do + let listid = headDef 0 [n | Tok _ (ControlWord "listid" (Just n)) <- toks] + let levels = [ts | Tok _ (Grouped (Tok _ (ControlWord "listlevel" _) : ts)) + <- toks] + tbl <- foldM handleListLevel mempty (zip [0..] levels) + updateState $ \s -> s{ sListTable = IntMap.insert listid tbl $ sListTable s } +handleList _ = return () + +handleListLevel :: PandocMonad m + => ListLevelTable + -> (Int, [Tok]) + -> RTFParser m ListLevelTable +handleListLevel levelTable (lvl, toks) = do + let start = headDef 1 + [n | Tok _ (ControlWord "levelstartat" (Just n)) <- toks] + let mbNumberStyle = + case [n | Tok _ (ControlWord "levelnfc" (Just n)) <- toks] of + [] -> Nothing + (0:_) -> Just Decimal + (1:_) -> Just UpperRoman + (2:_) -> Just LowerRoman + (3:_) -> Just UpperAlpha + (4:_) -> Just LowerAlpha + (23:_) -> Nothing + (255:_) -> Nothing + _ -> Just DefaultStyle + let listType = case mbNumberStyle of + Nothing -> Bullet + Just numStyle -> Ordered (start,numStyle,Period) + return $ IntMap.insert lvl listType levelTable + +handleListOverrideTable :: PandocMonad m => [Tok] -> RTFParser m () +handleListOverrideTable toks = mapM_ handleListOverride toks + +handleListOverride :: PandocMonad m => Tok -> RTFParser m () +handleListOverride + (Tok _ (Grouped (Tok _ (ControlWord "listoverride" _) : toks))) = do + let listid = headDef 0 [n | Tok _ (ControlWord "listid" (Just n)) <- toks] + let lsn = headDef 0 [n | Tok _ (ControlWord "ls" (Just n)) <- toks] + -- TODO override stuff, esp. start num -- for now we just handle indirection + listTable <- sListTable <$> getState + case IntMap.lookup listid listTable of + Nothing -> return () + Just tbl -> updateState $ \s -> + s{ sListOverrideTable = IntMap.insert lsn tbl $ + sListOverrideTable s } +handleListOverride _ = return () + +handleStylesheet :: PandocMonad m => [Tok] -> RTFParser m () +handleStylesheet toks = do + let styles = mapMaybe parseStyle toks + updateState $ \s -> s{ sStylesheet = IntMap.fromList + $ zip (map styleNum styles) styles } + +parseStyle :: Tok -> Maybe Style +parseStyle (Tok _ (Grouped toks)) = do + let (styType, styNum, rest) = + case toks of + Tok _ (ControlWord "s" (Just n)) : ts -> (ParagraphStyle, n, ts) + Tok _ (ControlWord "ds" (Just n)) : ts -> (SectionStyle, n, ts) + Tok _ (ControlWord "cs" (Just n)) : ts -> (CharStyle, n, ts) + Tok _ (ControlWord "ts" (Just n)) : ts -> (TableStyle, n, ts) + _ -> (ParagraphStyle, 0, toks) + let styName = case lastMay rest of + Just (Tok _ (UnformattedText t)) -> T.dropWhileEnd (==';') t + _ -> mempty + let isBasedOn (Tok _ (ControlWord "sbasedon" (Just _))) = True + isBasedOn _ = False + let styBasedOn = case find isBasedOn toks of + Just (Tok _ (ControlWord "sbasedon" (Just i))) -> Just i + _ -> Nothing + let isStyleControl (Tok _ (ControlWord x _)) = + x `elem` ["cs", "s", "ds", "additive", "sbasedon", "snext", + "sautoupd", "shidden", "keycode", "alt", "shift", + "ctrl", "fn"] + isStyleControl _ = False + let styFormatting = filter (not . isStyleControl) (initSafe rest) + return $ Style{ styleNum = styNum + , styleType = styType + , styleBasedOn = styBasedOn + , styleName = styName + , styleFormatting = styFormatting + } +parseStyle _ = Nothing + +hexToWord :: Text -> Word8 +hexToWord t = case TR.hexadecimal t of + Left _ -> 0 + Right (x,_) -> x + + +handlePict :: PandocMonad m => [Tok] -> RTFParser m () +handlePict toks = do + let pict = foldl' getPictData def toks + let altText = "image" + let binToWord = T.foldl' (\acc x -> acc * 2 + fromIntegral (digitToInt x)) 0 + let isBinaryDigit '0' = True + isBinaryDigit '1' = True + isBinaryDigit _ = False + let bytes = BL.pack $ + if picBinary pict && T.all isBinaryDigit (picData pict) + then map binToWord $ T.chunksOf 8 $ picData pict + else map hexToWord $ T.chunksOf 2 $ picData pict + let (mimetype, ext) = + case picType pict of + Just Emfblip -> (Just "image/x-emf", ".emf") + Just Pngblip -> (Just "image/png", ".png") + Just Jpegblip -> (Just "image/jpeg", ".jpg") + Nothing -> (Nothing, "") + case mimetype of + Just mt -> do + let pictname = showDigest (sha1 bytes) <> ext + insertMedia pictname (Just mt) bytes + modifyGroup $ \g -> g{ gImage = Just pict{ picName = T.pack pictname, + picBytes = bytes } } + addText altText + modifyGroup $ \g -> g{ gImage = Nothing } + _ -> return () + where + getPictData :: Pict -> Tok -> Pict + getPictData pict (Tok _ tok') = + case tok' of + ControlWord "emfblip" _-> pict{ picType = Just Emfblip } + ControlWord "pngblip" _-> pict{ picType = Just Pngblip } + ControlWord "jpegblip" _-> pict{ picType = Just Jpegblip } + ControlWord "picw" (Just w) -> pict{ picWidth = Just w } + ControlWord "pich" (Just h) -> pict{ picHeight = Just h } + ControlWord "picwgoal" (Just w) -> pict{ picWidthGoal = Just w } + ControlWord "pichgoal" (Just h) -> pict{ picHeightGoal = Just h } + ControlWord "bin" _ -> pict{ picBinary = True } + UnformattedText t -> pict{ picData = t } + _ -> pict + + +getHyperlink :: Text -> Maybe Text +getHyperlink t = + case T.stripPrefix "HYPERLINK" (T.strip t) of + Nothing -> Nothing + Just rest -> Just $ unquote rest + +processFontTable :: [Tok] -> FontTable +processFontTable = snd . foldl' go (0, mempty) + where + go (fontnum, tbl) (Tok _ tok') = + case tok' of + (ControlWord "f" (Just i)) -> (i, tbl) + (ControlWord "fnil" _) -> (fontnum, tbl) + (ControlWord "froman" _) -> (fontnum, IntMap.insert fontnum Roman tbl) + (ControlWord "fswiss" _) -> (fontnum, IntMap.insert fontnum Swiss tbl) + (ControlWord "fmodern" _) -> (fontnum, IntMap.insert fontnum Modern tbl) + (ControlWord "fscript" _) -> (fontnum, IntMap.insert fontnum Script tbl) + (ControlWord "fdecor" _) -> (fontnum, IntMap.insert fontnum Decor tbl) + (ControlWord "ftech" _) -> (fontnum, IntMap.insert fontnum Tech tbl) + (ControlWord "fbidi" _) -> (fontnum, IntMap.insert fontnum Bidi tbl) + (Grouped ts) -> foldl' go (fontnum, tbl) ts + _ -> (fontnum, tbl) + + +ansiToChar :: Word8 -> Char +ansiToChar i = chr $ + case i of + 128 -> 8364 + 130 -> 8218 + 131 -> 402 + 132 -> 8222 + 133 -> 8230 + 134 -> 8224 + 135 -> 8225 + 136 -> 710 + 137 -> 8240 + 138 -> 352 + 139 -> 8249 + 140 -> 338 + 142 -> 381 + 145 -> 8216 + 146 -> 8217 + 147 -> 8220 + 148 -> 8221 + 149 -> 8226 + 150 -> 8211 + 151 -> 8212 + 152 -> 732 + 153 -> 8482 + 154 -> 353 + 155 -> 8250 + 156 -> 339 + 158 -> 382 + 159 -> 376 + 173 -> 0xAD + _ -> fromIntegral i + +macToChar :: Word8 -> Char +macToChar i = chr $ + case i of + 0x80 -> 0xC4 + 0x81 -> 0xC5 + 0x82 -> 0xC7 + 0x83 -> 0xC9 + 0x84 -> 0xD1 + 0x85 -> 0xD6 + 0x86 -> 0xDC + 0x87 -> 0xE1 + 0x88 -> 0xE0 + 0x89 -> 0xE2 + 0x8A -> 0xE4 + 0x8B -> 0xE3 + 0x8C -> 0xE5 + 0x8D -> 0xE7 + 0x8E -> 0xE9 + 0x8F -> 0xE8 + 0x90 -> 0xEA + 0x91 -> 0xEB + 0x92 -> 0xED + 0x93 -> 0xEC + 0x94 -> 0xEE + 0x95 -> 0xEF + 0x96 -> 0xF1 + 0x97 -> 0xF3 + 0x98 -> 0xF2 + 0x99 -> 0xF4 + 0x9A -> 0xF6 + 0x9B -> 0xF5 + 0x9C -> 0xFA + 0x9D -> 0xF9 + 0x9E -> 0xFB + 0x9F -> 0xFC + 0xA0 -> 0xDD + 0xA1 -> 0xB0 + 0xA2 -> 0xA2 + 0xA3 -> 0xA3 + 0xA4 -> 0xA7 + 0xA5 -> 0xD7 + 0xA6 -> 0xB6 + 0xA7 -> 0xDF + 0xA8 -> 0xAE + 0xA9 -> 0xA9 + 0xAA -> 0xB2 + 0xAB -> 0xB4 + 0xAC -> 0xA8 + 0xAD -> 0xB3 + 0xAE -> 0xC6 + 0xAF -> 0xD8 + 0xB0 -> 0xB9 + 0xB1 -> 0xB1 + 0xB2 -> 0xBC + 0xB3 -> 0xBD + 0xB4 -> 0xA5 + 0xB5 -> 0xB5 + 0xBA -> 0xBE + 0xBB -> 0xAA + 0xBC -> 0xBA + 0xBE -> 0xE6 + 0xBF -> 0xF8 + 0xC0 -> 0xBF + 0xC1 -> 0xA1 + 0xC2 -> 0xAC + 0xC3 -> 0x0141 + 0xC4 -> 0x0192 + 0xC5 -> 0x02CB + 0xC7 -> 0xAB + 0xC8 -> 0xBB + 0xC9 -> 0xA6 + 0xCA -> 0xA0 + 0xCB -> 0xC0 + 0xCC -> 0xC3 + 0xCD -> 0xD5 + 0xCE -> 0x0152 + 0xCF -> 0x0153 + 0xD0 -> 0xAD + 0xD4 -> 0x0142 + 0xD6 -> 0xF7 + 0xD8 -> 0xFF + 0xD9 -> 0x0178 + 0xDB -> 0xA4 + 0xDC -> 0xD0 + 0xDD -> 0xF0 + 0xDE -> 0xDE + 0xDF -> 0xFE + 0xE0 -> 0xFD + 0xE1 -> 0xB7 + 0xE5 -> 0xC2 + 0xE6 -> 0xCA + 0xE7 -> 0xC1 + 0xE8 -> 0xCB + 0xE9 -> 0xC8 + 0xEA -> 0xCD + 0xEB -> 0xCE + 0xEC -> 0xCF + 0xED -> 0xCC + 0xEE -> 0xD3 + 0xEF -> 0xD4 + 0xF1 -> 0xD2 + 0xF2 -> 0xDA + 0xF3 -> 0xDB + 0xF4 -> 0xD9 + 0xF5 -> 0x0131 + 0xF6 -> 0x02C6 + 0xF7 -> 0x02DC + 0xF8 -> 0xAF + 0xF9 -> 0x02D8 + 0xFA -> 0x02D9 + 0xFB -> 0x02DA + 0xFC -> 0xB8 + 0xFD -> 0x02DD + 0xFE -> 0x02DB + 0xFF -> 0x02C7 + _ -> fromIntegral i + +pcToChar :: Word8 -> Char +pcToChar i = chr $ + case i of + 0x80 -> 0xc7 + 0x81 -> 0xfc + 0x82 -> 0xe9 + 0x83 -> 0xe2 + 0x84 -> 0xe4 + 0x85 -> 0xe0 + 0x86 -> 0xe5 + 0x87 -> 0xe7 + 0x88 -> 0xea + 0x89 -> 0xeb + 0x8a -> 0xe8 + 0x8b -> 0xef + 0x8c -> 0xee + 0x8d -> 0xec + 0x8e -> 0xc4 + 0x8f -> 0xc5 + 0x90 -> 0xc9 + 0x91 -> 0xe6 + 0x92 -> 0xc6 + 0x93 -> 0xf4 + 0x94 -> 0xf6 + 0x95 -> 0xf2 + 0x96 -> 0xfb + 0x97 -> 0xf9 + 0x98 -> 0xff + 0x99 -> 0xd6 + 0x9a -> 0xdc + 0x9b -> 0xa2 + 0x9c -> 0xa3 + 0x9d -> 0xa5 + 0x9e -> 0x20a7 + 0x9f -> 0x0192 + 0xa0 -> 0xe1 + 0xa1 -> 0xed + 0xa2 -> 0xf3 + 0xa3 -> 0xfa + 0xa4 -> 0xf1 + 0xa5 -> 0xd1 + 0xa6 -> 0xaa + 0xa7 -> 0xba + 0xa8 -> 0xbf + 0xa9 -> 0x2310 + 0xaa -> 0xac + 0xab -> 0xbd + 0xac -> 0xbc + 0xad -> 0xa1 + 0xae -> 0xab + 0xaf -> 0xbb + 0xb0 -> 0x2591 + 0xb1 -> 0x2592 + 0xb2 -> 0x2593 + 0xb3 -> 0x2502 + 0xb4 -> 0x2524 + 0xb5 -> 0x2561 + 0xb6 -> 0x2562 + 0xb7 -> 0x2556 + 0xb8 -> 0x2555 + 0xb9 -> 0x2563 + 0xba -> 0x2551 + 0xbb -> 0x2557 + 0xbc -> 0x255d + 0xbd -> 0x255c + 0xbe -> 0x255b + 0xbf -> 0x2510 + 0xc0 -> 0x2514 + 0xc1 -> 0x2534 + 0xc2 -> 0x252c + 0xc3 -> 0x251c + 0xc4 -> 0x2500 + 0xc5 -> 0x253c + 0xc6 -> 0x255e + 0xc7 -> 0x255f + 0xc8 -> 0x255a + 0xc9 -> 0x2554 + 0xca -> 0x2569 + 0xcb -> 0x2566 + 0xcc -> 0x2560 + 0xcd -> 0x2550 + 0xce -> 0x256c + 0xcf -> 0x2567 + 0xd0 -> 0x2568 + 0xd1 -> 0x2564 + 0xd2 -> 0x2565 + 0xd3 -> 0x2559 + 0xd4 -> 0x2558 + 0xd5 -> 0x2552 + 0xd6 -> 0x2553 + 0xd7 -> 0x256b + 0xd8 -> 0x256a + 0xd9 -> 0x2518 + 0xda -> 0x250c + 0xdb -> 0x2588 + 0xdc -> 0x2584 + 0xdd -> 0x258c + 0xde -> 0x2590 + 0xdf -> 0x2580 + 0xe0 -> 0x03b1 + 0xe1 -> 0xdf + 0xe2 -> 0x0393 + 0xe3 -> 0x03c0 + 0xe4 -> 0x03a3 + 0xe5 -> 0x03c3 + 0xe6 -> 0xb5 + 0xe7 -> 0x03c4 + 0xe8 -> 0x03a6 + 0xe9 -> 0x0398 + 0xea -> 0x03a9 + 0xeb -> 0x03b4 + 0xec -> 0x221e + 0xed -> 0x03c6 + 0xee -> 0x03b5 + 0xef -> 0x2229 + 0xf0 -> 0x2261 + 0xf1 -> 0xb1 + 0xf2 -> 0x2265 + 0xf3 -> 0x2264 + 0xf4 -> 0x2320 + 0xf5 -> 0x2321 + 0xf6 -> 0xf7 + 0xf7 -> 0x2248 + 0xf8 -> 0xb0 + 0xf9 -> 0x2219 + 0xfa -> 0xb7 + 0xfb -> 0x221a + 0xfc -> 0x207f + 0xfd -> 0xb2 + 0xfe -> 0x25a0 + 0xff -> 0xa0 + _ -> fromIntegral i + +pcaToChar :: Word8 -> Char +pcaToChar i = chr $ + case i of + 0x80 -> 0x00c7 + 0x81 -> 0x00fc + 0x82 -> 0x00e9 + 0x83 -> 0x00e2 + 0x84 -> 0x00e4 + 0x85 -> 0x00e0 + 0x86 -> 0x00e5 + 0x87 -> 0x00e7 + 0x88 -> 0x00ea + 0x89 -> 0x00eb + 0x8a -> 0x00e8 + 0x8b -> 0x00ef + 0x8c -> 0x00ee + 0x8d -> 0x00ec + 0x8e -> 0x00c4 + 0x8f -> 0x00c5 + 0x90 -> 0x00c9 + 0x91 -> 0x00e6 + 0x92 -> 0x00c6 + 0x93 -> 0x00f4 + 0x94 -> 0x00f6 + 0x95 -> 0x00f2 + 0x96 -> 0x00fb + 0x97 -> 0x00f9 + 0x98 -> 0x00ff + 0x99 -> 0x00d6 + 0x9a -> 0x00dc + 0x9b -> 0x00f8 + 0x9c -> 0x00a3 + 0x9d -> 0x00d8 + 0x9e -> 0x00d7 + 0x9f -> 0x0192 + 0xa0 -> 0x00e1 + 0xa1 -> 0x00ed + 0xa2 -> 0x00f3 + 0xa3 -> 0x00fa + 0xa4 -> 0x00f1 + 0xa5 -> 0x00d1 + 0xa6 -> 0x00aa + 0xa7 -> 0x00ba + 0xa8 -> 0x00bf + 0xa9 -> 0x00ae + 0xaa -> 0x00ac + 0xab -> 0x00bd + 0xac -> 0x00bc + 0xad -> 0x00a1 + 0xae -> 0x00ab + 0xaf -> 0x00bb + 0xb0 -> 0x2591 + 0xb1 -> 0x2592 + 0xb2 -> 0x2593 + 0xb3 -> 0x2502 + 0xb4 -> 0x2524 + 0xb5 -> 0x00c1 + 0xb6 -> 0x00c2 + 0xb7 -> 0x00c0 + 0xb8 -> 0x00a9 + 0xb9 -> 0x2563 + 0xba -> 0x2551 + 0xbb -> 0x2557 + 0xbc -> 0x255d + 0xbd -> 0x00a2 + 0xbe -> 0x00a5 + 0xbf -> 0x2510 + 0xc0 -> 0x2514 + 0xc1 -> 0x2534 + 0xc2 -> 0x252c + 0xc3 -> 0x251c + 0xc4 -> 0x2500 + 0xc5 -> 0x253c + 0xc6 -> 0x00e3 + 0xc7 -> 0x00c3 + 0xc8 -> 0x255a + 0xc9 -> 0x2554 + 0xca -> 0x2569 + 0xcb -> 0x2566 + 0xcc -> 0x2560 + 0xcd -> 0x2550 + 0xce -> 0x256c + 0xcf -> 0x00a4 + 0xd0 -> 0x00f0 + 0xd1 -> 0x00d0 + 0xd2 -> 0x00ca + 0xd3 -> 0x00cb + 0xd4 -> 0x00c8 + 0xd5 -> 0x0131 + 0xd6 -> 0x00cd + 0xd7 -> 0x00ce + 0xd8 -> 0x00cf + 0xd9 -> 0x2518 + 0xda -> 0x250c + 0xdb -> 0x2588 + 0xdc -> 0x2584 + 0xdd -> 0x00a6 + 0xde -> 0x00cc + 0xdf -> 0x2580 + 0xe0 -> 0x00d3 + 0xe1 -> 0x00df + 0xe2 -> 0x00d4 + 0xe3 -> 0x00d2 + 0xe4 -> 0x00f5 + 0xe5 -> 0x00d5 + 0xe6 -> 0x00b5 + 0xe7 -> 0x00fe + 0xe8 -> 0x00de + 0xe9 -> 0x00da + 0xea -> 0x00db + 0xeb -> 0x00d9 + 0xec -> 0x00fd + 0xed -> 0x00dd + 0xee -> 0x00af + 0xef -> 0x00b4 + 0xf0 -> 0x00ad + 0xf1 -> 0x00b1 + 0xf2 -> 0x2017 + 0xf3 -> 0x00be + 0xf4 -> 0x00b6 + 0xf5 -> 0x00a7 + 0xf6 -> 0x00f7 + 0xf7 -> 0x00b8 + 0xf8 -> 0x00b0 + 0xf9 -> 0x00a8 + 0xfa -> 0x00b7 + 0xfb -> 0x00b9 + 0xfc -> 0x00b3 + 0xfd -> 0x00b2 + 0xfe -> 0x25a0 + 0xff -> 0x00a0 + _ -> fromIntegral i -- cgit v1.2.3