aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlexander <ilabdsf@gmail.com>2018-04-26 22:33:18 +0300
committerJohn MacFarlane <jgm@berkeley.edu>2018-04-26 12:33:18 -0700
commit1927bc9aac0e822bd6179323e00fe38bee5a2cf3 (patch)
tree508b3527a8c09dfe72a790f82270691622af407d
parent5f0d407279e453107b28ee71fa0e45c5cc93090c (diff)
downloadpandoc-1927bc9aac0e822bd6179323e00fe38bee5a2cf3.tar.gz
Add FB2 reader (#4539)
-rw-r--r--pandoc.cabal4
-rw-r--r--src/Text/Pandoc/App.hs1
-rw-r--r--src/Text/Pandoc/Readers.hs3
-rw-r--r--src/Text/Pandoc/Readers/FB2.hs402
-rw-r--r--test/Tests/Readers/FB2.hs29
-rw-r--r--test/fb2/reader/emphasis.fb211
-rw-r--r--test/fb2/reader/emphasis.native6
-rw-r--r--test/fb2/reader/epigraph.fb218
-rw-r--r--test/fb2/reader/epigraph.native9
-rw-r--r--test/fb2/reader/meta.fb226
-rw-r--r--test/fb2/reader/meta.native2
-rw-r--r--test/fb2/reader/poem.fb228
-rw-r--r--test/fb2/reader/poem.native14
-rw-r--r--test/fb2/reader/titles.fb218
-rw-r--r--test/fb2/reader/titles.native8
-rw-r--r--test/test-pandoc.hs2
16 files changed, 581 insertions, 0 deletions
diff --git a/pandoc.cabal b/pandoc.cabal
index 24fba87f7..383a35931 100644
--- a/pandoc.cabal
+++ b/pandoc.cabal
@@ -301,6 +301,8 @@ extra-source-files:
test/fb2/images-embedded.html
test/fb2/images-embedded.fb2
test/fb2/test-small.png
+ test/fb2/reader/*.fb2
+ test/fb2/reader/*.native
test/fb2/test.jpg
test/docx/*.docx
test/docx/golden/*.docx
@@ -445,6 +447,7 @@ library
Text.Pandoc.Readers.Odt,
Text.Pandoc.Readers.EPUB,
Text.Pandoc.Readers.Muse,
+ Text.Pandoc.Readers.FB2,
Text.Pandoc.Writers,
Text.Pandoc.Writers.Native,
Text.Pandoc.Writers.Docbook,
@@ -663,6 +666,7 @@ test-suite test-pandoc
Tests.Readers.EPUB
Tests.Readers.Muse
Tests.Readers.Creole
+ Tests.Readers.FB2
Tests.Writers.Native
Tests.Writers.ConTeXt
Tests.Writers.Docbook
diff --git a/src/Text/Pandoc/App.hs b/src/Text/Pandoc/App.hs
index 99277d434..9a3e00c9f 100644
--- a/src/Text/Pandoc/App.hs
+++ b/src/Text/Pandoc/App.hs
@@ -738,6 +738,7 @@ defaultReaderName fallback (x:xs) =
".odt" -> "odt"
".pdf" -> "pdf" -- so we get an "unknown reader" error
".doc" -> "doc" -- so we get an "unknown reader" error
+ ".fb2" -> "fb2"
_ -> defaultReaderName fallback xs
-- Determine default writer based on output file extension
diff --git a/src/Text/Pandoc/Readers.hs b/src/Text/Pandoc/Readers.hs
index 680e7e0b0..7b7f92b35 100644
--- a/src/Text/Pandoc/Readers.hs
+++ b/src/Text/Pandoc/Readers.hs
@@ -65,6 +65,7 @@ module Text.Pandoc.Readers
, readTxt2Tags
, readEPUB
, readMuse
+ , readFB2
-- * Miscellaneous
, getReader
, getDefaultExtensions
@@ -86,6 +87,7 @@ import Text.Pandoc.Readers.Creole
import Text.Pandoc.Readers.DocBook
import Text.Pandoc.Readers.Docx
import Text.Pandoc.Readers.EPUB
+import Text.Pandoc.Readers.FB2
import Text.Pandoc.Readers.Haddock
import Text.Pandoc.Readers.HTML (readHtml)
import Text.Pandoc.Readers.JATS (readJATS)
@@ -143,6 +145,7 @@ readers = [ ("native" , TextReader readNative)
,("t2t" , TextReader readTxt2Tags)
,("epub" , ByteStringReader readEPUB)
,("muse" , TextReader readMuse)
+ ,("fb2" , TextReader readFB2)
]
-- | Retrieve reader, extensions based on formatSpec (format+extensions).
diff --git a/src/Text/Pandoc/Readers/FB2.hs b/src/Text/Pandoc/Readers/FB2.hs
new file mode 100644
index 000000000..99b71922f
--- /dev/null
+++ b/src/Text/Pandoc/Readers/FB2.hs
@@ -0,0 +1,402 @@
+{-# LANGUAGE FlexibleContexts #-}
+{-# LANGUAGE TupleSections #-}
+{-
+Copyright (C) 2018 Alexander Krotov <ilabdsf@gmail.com>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Readers.FB2
+ Copyright : Copyright (C) 2018 Alexander Krotov
+ License : GNU GPL, version 2 or above
+
+ Maintainer : Alexander Krotov <ilabdsf@gmail.com>
+ Stability : alpha
+ Portability : portable
+
+Conversion of FB2 to 'Pandoc' document.
+-}
+
+{-
+
+TODO:
+ - Tables
+ - Named styles
+ - Parse ID attribute for all elements that have it
+
+-}
+
+module Text.Pandoc.Readers.FB2 ( readFB2 ) where
+import Prelude
+import Control.Monad.Except (throwError)
+import Control.Monad.State.Strict
+import Data.ByteString.Lazy.Char8 ( pack )
+import Data.ByteString.Base64.Lazy
+import Data.Char (isSpace, toUpper)
+import Data.List (dropWhileEnd, intersperse)
+import Data.List.Split (splitOn)
+import Data.Text (Text)
+import Data.Default
+import Data.Maybe
+import Text.HTML.TagSoup.Entity (lookupEntity)
+import Text.Pandoc.Builder
+import Text.Pandoc.Class (PandocMonad, insertMedia)
+import Text.Pandoc.Error
+import Text.Pandoc.Options
+import Text.Pandoc.Shared (crFilter)
+import Text.XML.Light
+
+type FB2 m = StateT FB2State m
+
+data FB2State = FB2State{ fb2SectionLevel :: Int
+ , fb2Meta :: Meta
+ , fb2Authors :: [String]
+ } deriving Show
+
+instance Default FB2State where
+ def = FB2State{ fb2SectionLevel = 1
+ , fb2Meta = mempty
+ , fb2Authors = []
+ }
+
+instance HasMeta FB2State where
+ setMeta field v s = s {fb2Meta = setMeta field v (fb2Meta s)}
+ deleteMeta field s = s {fb2Meta = deleteMeta field (fb2Meta s)}
+
+readFB2 :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
+readFB2 _ inp = do
+ (bs, st) <- runStateT (mapM parseBlock $ parseXML (crFilter inp)) def
+ let authors = if null $ fb2Authors st
+ then id
+ else setMeta "author" (map text $ reverse $ fb2Authors st)
+ pure $ Pandoc (authors $ fb2Meta st) (toList . mconcat $ bs)
+
+-- * Utility functions
+
+trim :: String -> String
+trim = dropWhileEnd isSpace . dropWhile isSpace
+
+removeHash :: String -> String
+removeHash ('#':xs) = xs
+removeHash xs = xs
+
+convertEntity :: String -> String
+convertEntity e = fromMaybe (map toUpper e) (lookupEntity e)
+
+parseInline :: PandocMonad m => Content -> FB2 m Inlines
+parseInline (Elem e) =
+ case qName $ elName e of
+ "strong" -> strong <$> parseStyleType e
+ "emphasis" -> emph <$> parseStyleType e
+ "style" -> parseNamedStyle e
+ "a" -> parseLinkType e
+ "strikethrough" -> strikeout <$> parseStyleType e
+ "sub" -> subscript <$> parseStyleType e
+ "sup" -> superscript <$> parseStyleType e
+ "code" -> pure $ code $ strContent e
+ "image" -> parseInlineImageElement e
+ name -> throwError $ PandocParseError ("Couldn't parse FB2 file: unexpected element " ++ name ++ ".")
+parseInline (Text x) = pure $ text $ cdData x
+parseInline (CRef r) = pure $ str $ convertEntity r
+
+parseSubtitle :: PandocMonad m => Element -> FB2 m Blocks
+parseSubtitle e = headerWith ("", ["unnumbered"], []) <$> gets fb2SectionLevel <*> parsePType e
+
+-- * Root element parser
+
+parseBlock :: PandocMonad m => Content -> FB2 m Blocks
+parseBlock (Elem e) =
+ case qName $ elName e of
+ "?xml" -> pure mempty
+ "FictionBook" -> mconcat <$> mapM parseFictionBookChild (elChildren e)
+ name -> throwError $ PandocParseError ("Couldn't parse FB2 file: unexpected element " ++ name ++ ".")
+parseBlock _ = pure mempty
+
+-- | Parse a child of @\<FictionBook>@ element.
+parseFictionBookChild :: PandocMonad m => Element -> FB2 m Blocks
+parseFictionBookChild e =
+ case qName $ elName e of
+ "stylesheet" -> pure mempty -- stylesheet is ignored
+ "description" -> mempty <$ mapM_ parseDescriptionChild (elChildren e)
+ "body" -> mconcat <$> mapM parseBodyChild (elChildren e)
+ "binary" -> mempty <$ parseBinaryElement e
+ name -> throwError $ PandocParseError ("Couldn't parse FB2 file: unexpected element " ++ name ++ "in FictionBook.")
+
+-- | Parse a child of @\<description>@ element.
+parseDescriptionChild :: PandocMonad m => Element -> FB2 m ()
+parseDescriptionChild e =
+ case qName $ elName e of
+ "title-info" -> mapM_ parseTitleInfoChild (elChildren e)
+ "src-title-info" -> pure () -- ignore
+ "document-info" -> pure ()
+ "publish-info" -> pure ()
+ "custom-info" -> pure ()
+ "output" -> pure ()
+ name -> throwError $ PandocParseError ("Couldn't parse FB2 file: unexpected element " ++ name ++ "in description.")
+
+-- | Parse a child of @\<body>@ element.
+parseBodyChild :: PandocMonad m => Element -> FB2 m Blocks
+parseBodyChild e =
+ case qName $ elName e of
+ "image" -> parseImageElement e
+ "title" -> header <$> gets fb2SectionLevel <*> parseTitleType (elContent e)
+ "epigraph" -> parseEpigraph e
+ "section" -> parseSection e
+ name -> throwError $ PandocParseError ("Couldn't parse FB2 file: unexpected element " ++ name ++ " in body.")
+
+-- | Parse a @\<binary>@ element.
+parseBinaryElement :: PandocMonad m => Element -> FB2 m ()
+parseBinaryElement e =
+ case (findAttr (QName "id" Nothing Nothing) e, findAttr (QName "content-type" Nothing Nothing) e) of
+ (Nothing, _) -> throwError $ PandocParseError "<binary> element must have an \"id\" attribute"
+ (Just _, Nothing) -> throwError $ PandocParseError "<binary> element must have a \"content-type\" attribute"
+ (Just filename, contentType) -> insertMedia filename contentType (decodeLenient (pack (strContent e)))
+
+-- * Type parsers
+
+-- | Parse @authorType@
+parseAuthor :: PandocMonad m => Element -> FB2 m String
+parseAuthor e = unwords <$> mapM parseAuthorChild (elChildren e)
+
+parseAuthorChild :: PandocMonad m => Element -> FB2 m String
+parseAuthorChild e =
+ case qName $ elName e of
+ "first-name" -> pure $ strContent e
+ "middle-name" -> pure $ strContent e
+ "last-name" -> pure $ strContent e
+ "nickname" -> pure $ strContent e
+ "home-page" -> pure $ strContent e
+ "email" -> pure $ strContent e
+ name -> throwError $ PandocParseError ("Couldn't parse FB2 file: unexpected element " ++ name ++ " in author.")
+
+-- | Parse @titleType@
+parseTitle :: PandocMonad m => Element -> FB2 m Blocks
+parseTitle e = header <$> gets fb2SectionLevel <*> parseTitleType (elContent e)
+
+parseTitleType :: PandocMonad m => [Content] -> FB2 m Inlines
+parseTitleType c = mconcat . intersperse linebreak . catMaybes <$> mapM parseTitleContent c
+
+parseTitleContent :: PandocMonad m => Content -> FB2 m (Maybe Inlines)
+parseTitleContent (Elem e) =
+ case qName $ elName e of
+ "p" -> Just <$> parsePType e
+ "empty-line" -> pure $ Just mempty
+ _ -> pure mempty
+parseTitleContent _ = pure Nothing
+
+-- | Parse @imageType@
+parseImageElement :: PandocMonad m => Element -> FB2 m Blocks
+parseImageElement e =
+ case href of
+ Just src -> pure $ para $ imageWith (imgId, [], []) (removeHash src) title alt
+ Nothing -> throwError $ PandocParseError "Couldn't parse FB2 file: image without href."
+ where alt = maybe mempty str $ findAttr (QName "alt" Nothing Nothing) e
+ title = fromMaybe "" $ findAttr (QName "title" Nothing Nothing) e
+ imgId = fromMaybe "" $ findAttr (QName "id" Nothing Nothing) e
+ href = findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) e
+
+-- | Parse @pType@
+parsePType :: PandocMonad m => Element -> FB2 m Inlines
+parsePType = parseStyleType -- TODO add support for optional "id" and "style" attributes
+
+-- | Parse @citeType@
+parseCite :: PandocMonad m => Element -> FB2 m Blocks
+parseCite e = blockQuote . mconcat <$> mapM parseCiteChild (elChildren e)
+
+-- | Parse @citeType@ child
+parseCiteChild :: PandocMonad m => Element -> FB2 m Blocks
+parseCiteChild e =
+ case qName $ elName e of
+ "p" -> para <$> parsePType e
+ "poem" -> parsePoem e
+ "empty-line" -> pure horizontalRule
+ "subtitle" -> parseSubtitle e
+ "table" -> parseTable e
+ "text-author" -> para <$> parsePType e
+ name -> throwError $ PandocParseError ("Couldn't parse FB2 file: unexpected element " ++ name ++ " in cite.")
+
+-- | Parse @poemType@
+parsePoem :: PandocMonad m => Element -> FB2 m Blocks
+parsePoem e = mconcat <$> mapM parsePoemChild (elChildren e)
+
+parsePoemChild :: PandocMonad m => Element -> FB2 m Blocks
+parsePoemChild e =
+ case qName $ elName e of
+ "title" -> parseTitle e
+ "subtitle" -> parseSubtitle e
+ "epigraph" -> parseEpigraph e
+ "stanza" -> parseStanza e
+ "text-author" -> para <$> parsePType e
+ "date" -> pure $ para $ text $ strContent e
+ name -> throwError $ PandocParseError ("Couldn't parse FB2 file: unexpected element " ++ name ++ " in poem.")
+
+parseStanza :: PandocMonad m => Element -> FB2 m Blocks
+parseStanza e = fromList . joinLineBlocks . toList . mconcat <$> mapM parseStanzaChild (elChildren e)
+
+joinLineBlocks :: [Block] -> [Block]
+joinLineBlocks (LineBlock xs:LineBlock ys:zs) = joinLineBlocks (LineBlock (xs ++ ys) : zs)
+joinLineBlocks (x:xs) = x:joinLineBlocks xs
+joinLineBlocks [] = []
+
+parseStanzaChild :: PandocMonad m => Element -> FB2 m Blocks
+parseStanzaChild e =
+ case qName $ elName e of
+ "title" -> parseTitle e
+ "subtitle" -> parseSubtitle e
+ "v" -> lineBlock . (:[]) <$> parsePType e
+ name -> throwError $ PandocParseError ("Couldn't parse FB2 file: unexpected element " ++ name ++ " in stanza.")
+
+-- | Parse @epigraphType@
+parseEpigraph :: PandocMonad m => Element -> FB2 m Blocks
+parseEpigraph e =
+ divWith (divId, ["epigraph"], []) . mconcat <$> mapM parseEpigraphChild (elChildren e)
+ where divId = fromMaybe "" $ findAttr (QName "id" Nothing Nothing) e
+
+parseEpigraphChild :: PandocMonad m => Element -> FB2 m Blocks
+parseEpigraphChild e =
+ case qName $ elName e of
+ "p" -> para <$> parsePType e
+ "poem" -> parsePoem e
+ "cite" -> parseCite e
+ "empty-line" -> pure horizontalRule
+ "text-author" -> para <$> parsePType e
+ name -> throwError $ PandocParseError ("Couldn't parse FB2 file: unexpected element " ++ name ++ " in epigraph.")
+
+-- | Parse @annotationType@
+parseAnnotation :: PandocMonad m => Element -> FB2 m Blocks
+parseAnnotation e = mconcat <$> mapM parseAnnotationChild (elChildren e)
+
+parseAnnotationChild :: PandocMonad m => Element -> FB2 m Blocks
+parseAnnotationChild e =
+ case qName $ elName e of
+ "p" -> para <$> parsePType e
+ "poem" -> parsePoem e
+ "cite" -> parseCite e
+ "subtitle" -> parseSubtitle e
+ "table" -> parseTable e
+ "empty-line" -> pure horizontalRule
+ name -> throwError $ PandocParseError ("Couldn't parse FB2 file: unexpected element " ++ name ++ " in annotation.")
+
+-- | Parse @sectionType@
+parseSection :: PandocMonad m => Element -> FB2 m Blocks
+parseSection e = do
+ n <- gets fb2SectionLevel
+ modify $ \st -> st{ fb2SectionLevel = n + 1 }
+ let sectionId = fromMaybe "" $ findAttr (QName "id" Nothing Nothing) e
+ bs <- divWith (sectionId, ["section"], []) . mconcat <$> mapM parseSectionChild (elChildren e)
+ modify $ \st -> st{ fb2SectionLevel = n }
+ pure bs
+
+parseSectionChild :: PandocMonad m => Element -> FB2 m Blocks
+parseSectionChild e =
+ case qName $ elName e of
+ "title" -> parseBodyChild e
+ "epigraph" -> parseEpigraph e
+ "image" -> parseImageElement e
+ "annotation" -> parseAnnotation e
+ "poem" -> parsePoem e
+ "cite" -> parseCite e
+ "empty-line" -> pure horizontalRule
+ "table" -> parseTable e
+ "subtitle" -> parseSubtitle e
+ "p" -> para <$> parsePType e
+ "section" -> parseSection e
+ name -> throwError $ PandocParseError ("Couldn't parse FB2 file: unexpected element " ++ name ++ " in section.")
+
+-- | parse @styleType@
+parseStyleType :: PandocMonad m => Element -> FB2 m Inlines
+parseStyleType e = mconcat <$> mapM parseInline (elContent e)
+
+-- | Parse @namedStyleType@
+parseNamedStyle :: PandocMonad m => Element -> FB2 m Inlines
+parseNamedStyle e = do
+ content <- mconcat <$> mapM parseNamedStyleChild (elContent e)
+ let lang = maybeToList $ ("lang",) <$> findAttr (QName "lang" Nothing (Just "xml")) e
+ case findAttr (QName "name" Nothing Nothing) e of
+ Just name -> pure $ spanWith ("", [name], lang) content
+ Nothing -> throwError $ PandocParseError "Couldn't parse FB2 file: link without required name."
+
+parseNamedStyleChild :: PandocMonad m => Content -> FB2 m Inlines
+parseNamedStyleChild (Elem e) =
+ case qName (elName e) of
+ "strong" -> strong <$> parseStyleType e
+ "emphasis" -> emph <$> parseStyleType e
+ "style" -> parseNamedStyle e
+ "a" -> parseLinkType e
+ "strikethrough" -> strikeout <$> parseStyleType e
+ "sub" -> subscript <$> parseStyleType e
+ "sup" -> superscript <$> parseStyleType e
+ "code" -> pure $ code $ strContent e
+ "image" -> parseInlineImageElement e
+ name -> throwError $ PandocParseError ("Couldn't parse FB2 file: unexpected element " ++ name ++ ".")
+parseNamedStyleChild x = parseInline x
+
+-- | Parse @linkType@
+parseLinkType :: PandocMonad m => Element -> FB2 m Inlines
+parseLinkType e = do
+ content <- mconcat <$> mapM parseStyleLinkType (elContent e)
+ case findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) e of
+ Just href -> pure $ link href "" content
+ Nothing -> throwError $ PandocParseError "Couldn't parse FB2 file: link without required href."
+
+-- | Parse @styleLinkType@
+parseStyleLinkType :: PandocMonad m => Content -> FB2 m Inlines
+parseStyleLinkType x@(Elem e) =
+ case qName (elName e) of
+ "a" -> throwError $ PandocParseError "Couldn't parse FB2 file: links cannot be nested."
+ _ -> parseInline x
+parseStyleLinkType x = parseInline x
+
+-- | Parse @tableType@
+parseTable :: PandocMonad m => Element -> FB2 m Blocks
+parseTable _ = pure mempty -- TODO: tables are not supported yet
+
+-- | Parse @title-infoType@
+parseTitleInfoChild :: PandocMonad m => Element -> FB2 m ()
+parseTitleInfoChild e =
+ case qName (elName e) of
+ "genre" -> pure ()
+ "author" -> parseAuthor e >>= \author -> modify (\st -> st {fb2Authors = author:fb2Authors st})
+ "book-title" -> modify (setMeta "title" (text $ strContent e))
+ "annotation" -> parseAnnotation e >>= modify . setMeta "abstract"
+ "keywords" -> modify (setMeta "keywords" (map (MetaString . trim) $ splitOn "," $ strContent e))
+ "date" -> modify (setMeta "date" (text $ strContent e))
+ "coverpage" -> parseCoverPage e
+ "lang" -> pure ()
+ "src-lang" -> pure ()
+ "translator" -> pure ()
+ "sequence" -> pure ()
+ name -> throwError $ PandocParseError ("Couldn't parse FB2 file: unexpected element " ++ name ++ " in title-info.")
+
+parseCoverPage :: PandocMonad m => Element -> FB2 m ()
+parseCoverPage e =
+ case findChild (QName "image" (Just "http://www.gribuser.ru/xml/fictionbook/2.0") Nothing) e of
+ Just img -> case href of
+ Just src -> modify (setMeta "cover-image" (MetaString $ removeHash src))
+ Nothing -> pure ()
+ where href = findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) img
+ Nothing -> pure ()
+
+-- | Parse @inlineImageType@ element
+parseInlineImageElement :: PandocMonad m
+ => Element
+ -> FB2 m Inlines
+parseInlineImageElement e =
+ case href of
+ Just src -> pure $ imageWith ("", [], []) (removeHash src) "" alt
+ Nothing -> throwError $ PandocParseError "Couldn't parse FB2 file: inline image without href."
+ where alt = maybe mempty str $ findAttr (QName "alt" Nothing Nothing) e
+ href = findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) e
diff --git a/test/Tests/Readers/FB2.hs b/test/Tests/Readers/FB2.hs
new file mode 100644
index 000000000..9b2983d57
--- /dev/null
+++ b/test/Tests/Readers/FB2.hs
@@ -0,0 +1,29 @@
+{-# LANGUAGE NoImplicitPrelude #-}
+
+module Tests.Readers.FB2 (tests) where
+
+import Prelude
+import Test.Tasty
+import Tests.Helpers
+import Test.Tasty.Golden (goldenVsString)
+import qualified Data.ByteString as BS
+import Text.Pandoc
+import Text.Pandoc.UTF8 (toText, fromTextLazy)
+import Data.Text (Text)
+import Data.Text.Lazy (fromStrict)
+import System.FilePath (replaceExtension)
+
+fb2ToNative :: Text -> Text
+fb2ToNative = purely (writeNative def{ writerTemplate = Just "" }) . purely (readFB2 def)
+
+fb2Test :: TestName -> FilePath -> TestTree
+fb2Test name path = goldenVsString name native (fromTextLazy . fromStrict . fb2ToNative . toText <$> BS.readFile path)
+ where native = replaceExtension path ".native"
+
+tests :: [TestTree]
+tests = [ fb2Test "Emphasis" "fb2/reader/emphasis.fb2"
+ , fb2Test "Titles" "fb2/reader/titles.fb2"
+ , fb2Test "Epigraph" "fb2/reader/epigraph.fb2"
+ , fb2Test "Poem" "fb2/reader/poem.fb2"
+ , fb2Test "Meta" "fb2/reader/meta.fb2"
+ ]
diff --git a/test/fb2/reader/emphasis.fb2 b/test/fb2/reader/emphasis.fb2
new file mode 100644
index 000000000..1a936a9d0
--- /dev/null
+++ b/test/fb2/reader/emphasis.fb2
@@ -0,0 +1,11 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<FictionBook xmlns="http://www.gribuser.ru/xml/fictionbook/2.0" xmlns:l="http://www.w3.org/1999/xlink">
+ <body>
+ <section>
+ <p>Plain, <strong>strong</strong>, <emphasis>emphasis</emphasis>, <strong><emphasis>strong emphasis</emphasis></strong>, <emphasis><strong>emphasized strong</strong></emphasis>.</p>
+ <p>Strikethrough: <strikethrough>deleted</strikethrough></p>
+ <p><sub>Subscript</sub> and <sup>superscript</sup></p>
+ <p>Some <code>code</code></p>
+ </section>
+ </body>
+</FictionBook>
diff --git a/test/fb2/reader/emphasis.native b/test/fb2/reader/emphasis.native
new file mode 100644
index 000000000..422e7bb15
--- /dev/null
+++ b/test/fb2/reader/emphasis.native
@@ -0,0 +1,6 @@
+Pandoc (Meta {unMeta = fromList []})
+[Div ("",["section"],[])
+ [Para [Str "Plain,",Space,Strong [Str "strong"],Str ",",Space,Emph [Str "emphasis"],Str ",",Space,Strong [Emph [Str "strong",Space,Str "emphasis"]],Str ",",Space,Emph [Strong [Str "emphasized",Space,Str "strong"]],Str "."]
+ ,Para [Str "Strikethrough:",Space,Strikeout [Str "deleted"]]
+ ,Para [Subscript [Str "Subscript"],Space,Str "and",Space,Superscript [Str "superscript"]]
+ ,Para [Str "Some",Space,Code ("",[],[]) "code"]]]
diff --git a/test/fb2/reader/epigraph.fb2 b/test/fb2/reader/epigraph.fb2
new file mode 100644
index 000000000..5bb5cd2ef
--- /dev/null
+++ b/test/fb2/reader/epigraph.fb2
@@ -0,0 +1,18 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<FictionBook xmlns="http://www.gribuser.ru/xml/fictionbook/2.0" xmlns:l="http://www.w3.org/1999/xlink">
+ <body>
+ <epigraph>
+ <p>Body epigraph</p>
+ </epigraph>
+ <section>
+ <epigraph>
+ <p>Section epigraph</p>
+ </epigraph>
+ <section>
+ <epigraph>
+ <p>Subsection epigraph</p>
+ </epigraph>
+ </section>
+ </section>
+ </body>
+</FictionBook>
diff --git a/test/fb2/reader/epigraph.native b/test/fb2/reader/epigraph.native
new file mode 100644
index 000000000..a58a3e05b
--- /dev/null
+++ b/test/fb2/reader/epigraph.native
@@ -0,0 +1,9 @@
+Pandoc (Meta {unMeta = fromList []})
+[Div ("",["epigraph"],[])
+ [Para [Str "Body",Space,Str "epigraph"]]
+,Div ("",["section"],[])
+ [Div ("",["epigraph"],[])
+ [Para [Str "Section",Space,Str "epigraph"]]
+ ,Div ("",["section"],[])
+ [Div ("",["epigraph"],[])
+ [Para [Str "Subsection",Space,Str "epigraph"]]]]]
diff --git a/test/fb2/reader/meta.fb2 b/test/fb2/reader/meta.fb2
new file mode 100644
index 000000000..7e1736d64
--- /dev/null
+++ b/test/fb2/reader/meta.fb2
@@ -0,0 +1,26 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<FictionBook xmlns="http://www.gribuser.ru/xml/fictionbook/2.0" xmlns:l="http://www.w3.org/1999/xlink">
+ <description>
+ <title-info>
+ <author>
+ <first-name>First</first-name>
+ <middle-name>Middle</middle-name>
+ <last-name>Last</last-name>
+ </author>
+ <author>
+ <first-name>Another</first-name>
+ <last-name>Author</last-name>
+ </author>
+ <book-title>Book title</book-title>
+ <annotation>
+ <p>Book annotation</p>
+ <p>Second paragraph of book annotation</p>
+ </annotation>
+ <keywords>foo, bar, baz</keywords>
+ <date>2018</date>
+ </title-info>
+ </description>
+ <body>
+ <title><p>Body title</p></title>
+ </body>
+</FictionBook>
diff --git a/test/fb2/reader/meta.native b/test/fb2/reader/meta.native
new file mode 100644
index 000000000..71a8795b6
--- /dev/null
+++ b/test/fb2/reader/meta.native
@@ -0,0 +1,2 @@
+Pandoc (Meta {unMeta = fromList [("abstract",MetaBlocks [Para [Str "Book",Space,Str "annotation"],Para [Str "Second",Space,Str "paragraph",Space,Str "of",Space,Str "book",Space,Str "annotation"]]),("author",MetaList [MetaInlines [Str "First",Space,Str "Middle",Space,Str "Last"],MetaInlines [Str "Another",Space,Str "Author"]]),("date",MetaInlines [Str "2018"]),("keywords",MetaList [MetaString "foo",MetaString "bar",MetaString "baz"]),("title",MetaInlines [Str "Book",Space,Str "title"])]})
+[Header 1 ("",[],[]) [Str "Body",Space,Str "title"]]
diff --git a/test/fb2/reader/poem.fb2 b/test/fb2/reader/poem.fb2
new file mode 100644
index 000000000..fcf4a0c02
--- /dev/null
+++ b/test/fb2/reader/poem.fb2
@@ -0,0 +1,28 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<FictionBook xmlns="http://www.gribuser.ru/xml/fictionbook/2.0" xmlns:l="http://www.w3.org/1999/xlink">
+ <body>
+ <section>
+ <poem>
+ <title>
+ <p>Poem title</p>
+ </title>
+ <epigraph>
+ <p>Poem epigraph</p>
+ </epigraph>
+ <stanza>
+ <subtitle>Subtitle</subtitle>
+ <title>
+ <p>First stanza title</p>
+ </title>
+ <v>Verse</v>
+ <v><emphasis>More</emphasis> verse</v>
+ </stanza>
+ <stanza>
+ <v>One more stanza</v>
+ </stanza>
+ <text-author>Author</text-author>
+ <date>April 2018</date>
+ </poem>
+ </section>
+ </body>
+</FictionBook>
diff --git a/test/fb2/reader/poem.native b/test/fb2/reader/poem.native
new file mode 100644
index 000000000..67be6a672
--- /dev/null
+++ b/test/fb2/reader/poem.native
@@ -0,0 +1,14 @@
+Pandoc (Meta {unMeta = fromList []})
+[Div ("",["section"],[])
+ [Header 2 ("",[],[]) [Str "Poem",Space,Str "title"]
+ ,Div ("",["epigraph"],[])
+ [Para [Str "Poem",Space,Str "epigraph"]]
+ ,Header 2 ("",["unnumbered"],[]) [Str "Subtitle"]
+ ,Header 2 ("",[],[]) [Str "First",Space,Str "stanza",Space,Str "title"]
+ ,LineBlock
+ [[Str "Verse"]
+ ,[Emph [Str "More"],Space,Str "verse"]]
+ ,LineBlock
+ [[Str "One",Space,Str "more",Space,Str "stanza"]]
+ ,Para [Str "Author"]
+ ,Para [Str "April",Space,Str "2018"]]]
diff --git a/test/fb2/reader/titles.fb2 b/test/fb2/reader/titles.fb2
new file mode 100644
index 000000000..cfe9588d8
--- /dev/null
+++ b/test/fb2/reader/titles.fb2
@@ -0,0 +1,18 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<FictionBook xmlns="http://www.gribuser.ru/xml/fictionbook/2.0" xmlns:l="http://www.w3.org/1999/xlink">
+ <body>
+ <title><p>Body title</p></title>
+ <section>
+ <title><p>Section title</p></title>
+ <section>
+ <title>
+ <p>Subsection title</p>
+ <p>with multiple paragraphs</p>
+ </title>
+ </section>
+ <section>
+ <title><p>Another subsection title</p></title>
+ </section>
+ </section>
+ </body>
+</FictionBook>
diff --git a/test/fb2/reader/titles.native b/test/fb2/reader/titles.native
new file mode 100644
index 000000000..a6c34f5ea
--- /dev/null
+++ b/test/fb2/reader/titles.native
@@ -0,0 +1,8 @@
+Pandoc (Meta {unMeta = fromList []})
+[Header 1 ("",[],[]) [Str "Body",Space,Str "title"]
+,Div ("",["section"],[])
+ [Header 2 ("",[],[]) [Str "Section",Space,Str "title"]
+ ,Div ("",["section"],[])
+ [Header 3 ("",[],[]) [Str "Subsection",Space,Str "title",LineBreak,Str "with",Space,Str "multiple",Space,Str "paragraphs"]]
+ ,Div ("",["section"],[])
+ [Header 3 ("",[],[]) [Str "Another",Space,Str "subsection",Space,Str "title"]]]]
diff --git a/test/test-pandoc.hs b/test/test-pandoc.hs
index 8613d5dda..b70d2286c 100644
--- a/test/test-pandoc.hs
+++ b/test/test-pandoc.hs
@@ -12,6 +12,7 @@ import qualified Tests.Old
import qualified Tests.Readers.Creole
import qualified Tests.Readers.Docx
import qualified Tests.Readers.EPUB
+import qualified Tests.Readers.FB2
import qualified Tests.Readers.HTML
import qualified Tests.Readers.JATS
import qualified Tests.Readers.LaTeX
@@ -75,6 +76,7 @@ tests = testGroup "pandoc tests" [ Tests.Command.tests
, testGroup "EPUB" Tests.Readers.EPUB.tests
, testGroup "Muse" Tests.Readers.Muse.tests
, testGroup "Creole" Tests.Readers.Creole.tests
+ , testGroup "FB2" Tests.Readers.FB2.tests
]
, testGroup "Lua filters" Tests.Lua.tests
]