aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2019-06-09 11:51:20 -0700
committerJohn MacFarlane <jgm@berkeley.edu>2019-06-09 11:51:20 -0700
commit96fae8da507b44466133e759a297baeb52566d63 (patch)
tree0ec14a4e498e8acad195fc9d97872eab64f6ee56 /src/Text/Pandoc
parentd577552850f46afe056ed4a7a2bf3a1a62ade9af (diff)
downloadpandoc-96fae8da507b44466133e759a297baeb52566d63.tar.gz
FB2 reader: skip unknown elements rather than throwing errors.
Sometimes custom elements are used, and the reader should not abort but skip them with a warning. (For example, id element in author.) Closes #5560.
Diffstat (limited to 'src/Text/Pandoc')
-rw-r--r--src/Text/Pandoc/Readers/FB2.hs59
1 files changed, 39 insertions, 20 deletions
diff --git a/src/Text/Pandoc/Readers/FB2.hs b/src/Text/Pandoc/Readers/FB2.hs
index 5efcc2e5b..a702300c6 100644
--- a/src/Text/Pandoc/Readers/FB2.hs
+++ b/src/Text/Pandoc/Readers/FB2.hs
@@ -98,7 +98,9 @@ parseInline (Elem e) =
"sup" -> superscript <$> parseStyleType e
"code" -> pure $ code $ strContent e
"image" -> parseInlineImageElement e
- name -> throwError $ PandocParseError ("Couldn't parse FB2 file: unexpected element " ++ name ++ ".")
+ name -> do
+ report $ IgnoredElement name
+ pure mempty
parseInline (Text x) = pure $ text $ cdData x
parseInline (CRef r) = pure $ str $ convertEntity r
@@ -173,7 +175,9 @@ parseDescriptionChild e =
"publish-info" -> pure ()
"custom-info" -> pure ()
"output" -> pure ()
- name -> throwError $ PandocParseError ("Couldn't parse FB2 file: unexpected element " ++ name ++ "in description.")
+ name -> do
+ report $ IgnoredElement $ name ++ " in description"
+ pure mempty
-- | Parse a child of @\<body>@ element.
parseBodyChild :: PandocMonad m => Element -> FB2 m Blocks
@@ -189,26 +193,29 @@ parseBodyChild e =
parseBinaryElement :: PandocMonad m => Element -> FB2 m ()
parseBinaryElement e =
case (findAttr (unqual "id") e, findAttr (unqual "content-type") e) of
- (Nothing, _) -> throwError $ PandocParseError "<binary> element must have an \"id\" attribute"
- (Just _, Nothing) -> throwError $ PandocParseError "<binary> element must have a \"content-type\" attribute"
+ (Nothing, _) -> report $ IgnoredElement "binary without id attribute"
+ (Just _, Nothing) ->
+ report $ IgnoredElement "binary without content-type attribute"
(Just filename, contentType) -> insertMedia filename contentType (decodeLenient (pack (strContent e)))
-- * Type parsers
-- | Parse @authorType@
parseAuthor :: PandocMonad m => Element -> FB2 m String
-parseAuthor e = unwords <$> mapM parseAuthorChild (elChildren e)
+parseAuthor e = unwords . catMaybes <$> mapM parseAuthorChild (elChildren e)
-parseAuthorChild :: PandocMonad m => Element -> FB2 m String
+parseAuthorChild :: PandocMonad m => Element -> FB2 m (Maybe String)
parseAuthorChild e =
case qName $ elName e of
- "first-name" -> pure $ strContent e
- "middle-name" -> pure $ strContent e
- "last-name" -> pure $ strContent e
- "nickname" -> pure $ strContent e
- "home-page" -> pure $ strContent e
- "email" -> pure $ strContent e
- name -> throwError $ PandocParseError ("Couldn't parse FB2 file: unexpected element " ++ name ++ " in author.")
+ "first-name" -> pure $ Just $ strContent e
+ "middle-name" -> pure $ Just $ strContent e
+ "last-name" -> pure $ Just $ strContent e
+ "nickname" -> pure $ Just $ strContent e
+ "home-page" -> pure $ Just $ strContent e
+ "email" -> pure $ Just $ strContent e
+ name -> do
+ report $ IgnoredElement $ name ++ " in author"
+ pure Nothing
-- | Parse @titleType@
parseTitle :: PandocMonad m => Element -> FB2 m Blocks
@@ -230,7 +237,9 @@ parseImageElement :: PandocMonad m => Element -> FB2 m Blocks
parseImageElement e =
case href of
Just src -> pure $ para $ imageWith (imgId, [], []) (removeHash src) title alt
- Nothing -> throwError $ PandocParseError "Couldn't parse FB2 file: image without href."
+ Nothing -> do
+ report $ IgnoredElement " image without href"
+ pure mempty
where alt = maybe mempty str $ findAttr (unqual "alt") e
title = fromMaybe "" $ findAttr (unqual "title") e
imgId = fromMaybe "" $ findAttr (unqual "id") e
@@ -355,7 +364,9 @@ parseNamedStyle e = do
let lang = maybeToList $ ("lang",) <$> findAttr (QName "lang" Nothing (Just "xml")) e
case findAttr (unqual "name") e of
Just name -> pure $ spanWith ("", [name], lang) content
- Nothing -> throwError $ PandocParseError "Couldn't parse FB2 file: link without required name."
+ Nothing -> do
+ report $ IgnoredElement "link without required name"
+ pure mempty
parseNamedStyleChild :: PandocMonad m => Content -> FB2 m Inlines
parseNamedStyleChild (Elem e) =
@@ -369,7 +380,9 @@ parseNamedStyleChild (Elem e) =
"sup" -> superscript <$> parseStyleType e
"code" -> pure $ code $ strContent e
"image" -> parseInlineImageElement e
- name -> throwError $ PandocParseError ("Couldn't parse FB2 file: unexpected element " ++ name ++ ".")
+ name -> do
+ report $ IgnoredElement $ name ++ " in style"
+ pure mempty
parseNamedStyleChild x = parseInline x
-- | Parse @linkType@
@@ -383,13 +396,17 @@ parseLinkType e = do
Nothing -> pure $ link href "" content
Just contents -> pure $ note contents
_ -> pure $ link href "" content
- Nothing -> throwError $ PandocParseError "Couldn't parse FB2 file: link without required href."
+ Nothing -> do
+ report $ IgnoredElement "link without required href"
+ pure mempty
-- | Parse @styleLinkType@
parseStyleLinkType :: PandocMonad m => Content -> FB2 m Inlines
parseStyleLinkType x@(Elem e) =
case qName (elName e) of
- "a" -> throwError $ PandocParseError "Couldn't parse FB2 file: links cannot be nested."
+ "a" -> do
+ report $ IgnoredElement "nested link"
+ pure mempty
_ -> parseInline x
parseStyleLinkType x = parseInline x
@@ -412,7 +429,7 @@ parseTitleInfoChild e =
"src-lang" -> pure ()
"translator" -> pure ()
"sequence" -> pure ()
- name -> throwError $ PandocParseError ("Couldn't parse FB2 file: unexpected element " ++ name ++ " in title-info.")
+ name -> report $ IgnoredElement $ name ++ " in title-info"
parseCoverPage :: PandocMonad m => Element -> FB2 m ()
parseCoverPage e =
@@ -430,6 +447,8 @@ parseInlineImageElement :: PandocMonad m
parseInlineImageElement e =
case href of
Just src -> pure $ imageWith ("", [], []) (removeHash src) "" alt
- Nothing -> throwError $ PandocParseError "Couldn't parse FB2 file: inline image without href."
+ Nothing -> do
+ report $ IgnoredElement "inline image without href"
+ pure mempty
where alt = maybe mempty str $ findAttr (unqual "alt") e
href = findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) e