aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2021-09-03 21:50:30 -0700
committerJohn MacFarlane <jgm@berkeley.edu>2021-09-03 21:50:30 -0700
commit10c471907693aac3e01e9550ce203834ff367de1 (patch)
tree4eeaf91da4f59b4137b393bc3a65a433b05a061b
parente5d0b702c7d8b11f8ed4ecf10afa320103473dd0 (diff)
downloadpandoc-10c471907693aac3e01e9550ce203834ff367de1.tar.gz
RTF reader: if doc begins with {\rtf1 ... } only parse its contents.
Some documents seem to have non-RTF (e.g. XML) material after the `{\rtf1 ... }` group.
-rw-r--r--src/Text/Pandoc/Readers/RTF.hs8
1 files changed, 7 insertions, 1 deletions
diff --git a/src/Text/Pandoc/Readers/RTF.hs b/src/Text/Pandoc/Readers/RTF.hs
index 5e5799b49..e577ee70b 100644
--- a/src/Text/Pandoc/Readers/RTF.hs
+++ b/src/Text/Pandoc/Readers/RTF.hs
@@ -204,7 +204,13 @@ parseRTF = do
skipMany nl
toks <- many tok
-- return $! traceShowId toks
- bs <- (foldM processTok mempty toks >>= emitBlocks)
+ bs <- (case toks of
+ -- if we start with {\rtf1...}, parse that and ignore
+ -- what follows (which in certain cases can be non-RTF content)
+ tok@(Tok _ (Grouped (Tok _ (ControlWord "rtf" (Just 1)) : _))) : _
+ -> foldM processTok mempty [tok]
+ _ -> foldM processTok mempty toks)
+ >>= emitBlocks
unclosed <- closeContainers
let doc = B.doc $ bs <> unclosed
kvs <- sMetadata <$> getState