aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Readers/FB2.hs
diff options
context:
space:
mode:
Diffstat (limited to 'src/Text/Pandoc/Readers/FB2.hs')
-rw-r--r--src/Text/Pandoc/Readers/FB2.hs123
1 files changed, 63 insertions, 60 deletions
diff --git a/src/Text/Pandoc/Readers/FB2.hs b/src/Text/Pandoc/Readers/FB2.hs
index 0b25b9fed..6eed3c104 100644
--- a/src/Text/Pandoc/Readers/FB2.hs
+++ b/src/Text/Pandoc/Readers/FB2.hs
@@ -1,5 +1,6 @@
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE TupleSections #-}
+{-# LANGUAGE OverloadedStrings #-}
{- |
Module : Text.Pandoc.Readers.FB2
Copyright : Copyright (C) 2018-2019 Alexander Krotov
@@ -27,12 +28,11 @@ import Control.Monad.Except (throwError)
import Control.Monad.State.Strict
import Data.ByteString.Lazy.Char8 ( pack )
import Data.ByteString.Base64.Lazy
-import Data.Char (isSpace, toUpper)
import Data.Functor
-import Data.List (dropWhileEnd, intersperse)
-import Data.List.Split (splitOn)
+import Data.List (intersperse)
import qualified Data.Map as M
import Data.Text (Text)
+import qualified Data.Text as T
import Data.Default
import Data.Maybe
import Text.HTML.TagSoup.Entity (lookupEntity)
@@ -48,8 +48,8 @@ type FB2 m = StateT FB2State m
data FB2State = FB2State{ fb2SectionLevel :: Int
, fb2Meta :: Meta
- , fb2Authors :: [String]
- , fb2Notes :: M.Map String Blocks
+ , fb2Authors :: [Text]
+ , fb2Notes :: M.Map Text Blocks
} deriving Show
instance Default FB2State where
@@ -76,19 +76,20 @@ readFB2 _ inp =
-- * Utility functions
-trim :: String -> String
-trim = dropWhileEnd isSpace . dropWhile isSpace
+trim :: Text -> Text
+trim = T.strip
-removeHash :: String -> String
-removeHash ('#':xs) = xs
-removeHash xs = xs
+removeHash :: Text -> Text
+removeHash t = case T.uncons t of
+ Just ('#', xs) -> xs
+ _ -> t
-convertEntity :: String -> String
-convertEntity e = fromMaybe (map toUpper e) (lookupEntity e)
+convertEntity :: String -> Text
+convertEntity e = maybe (T.toUpper $ T.pack e) T.pack $ lookupEntity e
parseInline :: PandocMonad m => Content -> FB2 m Inlines
parseInline (Elem e) =
- case qName $ elName e of
+ case T.pack $ qName $ elName e of
"strong" -> strong <$> parseStyleType e
"emphasis" -> emph <$> parseStyleType e
"style" -> parseNamedStyle e
@@ -96,12 +97,12 @@ parseInline (Elem e) =
"strikethrough" -> strikeout <$> parseStyleType e
"sub" -> subscript <$> parseStyleType e
"sup" -> superscript <$> parseStyleType e
- "code" -> pure $ code $ strContent e
+ "code" -> pure $ code $ T.pack $ strContent e
"image" -> parseInlineImageElement e
name -> do
report $ IgnoredElement name
pure mempty
-parseInline (Text x) = pure $ text $ cdData x
+parseInline (Text x) = pure $ text $ T.pack $ cdData x
parseInline (CRef r) = pure $ str $ convertEntity r
parseSubtitle :: PandocMonad m => Element -> FB2 m Blocks
@@ -111,7 +112,7 @@ parseSubtitle e = headerWith ("", ["unnumbered"], []) <$> gets fb2SectionLevel <
parseRootElement :: PandocMonad m => Element -> FB2 m Blocks
parseRootElement e =
- case qName $ elName e of
+ case T.pack $ qName $ elName e of
"FictionBook" -> do
-- Parse notes before parsing the rest of the content.
case filterChild isNotesBody e of
@@ -144,7 +145,7 @@ parseNote e =
Just sectionId -> do
content <- mconcat <$> mapM parseSectionChild (dropTitle $ elChildren e)
oldNotes <- gets fb2Notes
- modify $ \s -> s { fb2Notes = M.insert ("#" ++ sectionId) content oldNotes }
+ modify $ \s -> s { fb2Notes = M.insert ("#" <> T.pack sectionId) content oldNotes }
pure ()
where
isTitle x = qName (elName x) == "title"
@@ -156,7 +157,7 @@ parseNote e =
-- | Parse a child of @\<FictionBook>@ element.
parseFictionBookChild :: PandocMonad m => Element -> FB2 m Blocks
parseFictionBookChild e =
- case qName $ elName e of
+ case T.pack $ qName $ elName e of
"stylesheet" -> pure mempty -- stylesheet is ignored
"description" -> mempty <$ mapM_ parseDescriptionChild (elChildren e)
"body" -> if isNotesBody e
@@ -168,7 +169,7 @@ parseFictionBookChild e =
-- | Parse a child of @\<description>@ element.
parseDescriptionChild :: PandocMonad m => Element -> FB2 m ()
parseDescriptionChild e =
- case qName $ elName e of
+ case T.pack $ qName $ elName e of
"title-info" -> mapM_ parseTitleInfoChild (elChildren e)
"src-title-info" -> pure () -- ignore
"document-info" -> pure ()
@@ -176,13 +177,13 @@ parseDescriptionChild e =
"custom-info" -> pure ()
"output" -> pure ()
name -> do
- report $ IgnoredElement $ name ++ " in description"
+ report $ IgnoredElement $ name <> " in description"
pure mempty
-- | Parse a child of @\<body>@ element.
parseBodyChild :: PandocMonad m => Element -> FB2 m Blocks
parseBodyChild e =
- case qName $ elName e of
+ case T.pack $ qName $ elName e of
"image" -> parseImageElement e
"title" -> header <$> gets fb2SectionLevel <*> parseTitleType (elContent e)
"epigraph" -> parseEpigraph e
@@ -196,25 +197,25 @@ parseBinaryElement e =
(Nothing, _) -> report $ IgnoredElement "binary without id attribute"
(Just _, Nothing) ->
report $ IgnoredElement "binary without content-type attribute"
- (Just filename, contentType) -> insertMedia filename contentType (decodeLenient (pack (strContent e)))
+ (Just filename, contentType) -> insertMedia filename (T.pack <$> contentType) (decodeLenient (pack (strContent e)))
-- * Type parsers
-- | Parse @authorType@
-parseAuthor :: PandocMonad m => Element -> FB2 m String
-parseAuthor e = unwords . catMaybes <$> mapM parseAuthorChild (elChildren e)
+parseAuthor :: PandocMonad m => Element -> FB2 m Text
+parseAuthor e = T.unwords . catMaybes <$> mapM parseAuthorChild (elChildren e)
-parseAuthorChild :: PandocMonad m => Element -> FB2 m (Maybe String)
+parseAuthorChild :: PandocMonad m => Element -> FB2 m (Maybe Text)
parseAuthorChild e =
- case qName $ elName e of
- "first-name" -> pure $ Just $ strContent e
- "middle-name" -> pure $ Just $ strContent e
- "last-name" -> pure $ Just $ strContent e
- "nickname" -> pure $ Just $ strContent e
- "home-page" -> pure $ Just $ strContent e
- "email" -> pure $ Just $ strContent e
+ case T.pack $ qName $ elName e of
+ "first-name" -> pure $ Just $ T.pack $ strContent e
+ "middle-name" -> pure $ Just $ T.pack $ strContent e
+ "last-name" -> pure $ Just $ T.pack $ strContent e
+ "nickname" -> pure $ Just $ T.pack $ strContent e
+ "home-page" -> pure $ Just $ T.pack $ strContent e
+ "email" -> pure $ Just $ T.pack $ strContent e
name -> do
- report $ IgnoredElement $ name ++ " in author"
+ report $ IgnoredElement $ name <> " in author"
pure Nothing
-- | Parse @titleType@
@@ -236,13 +237,13 @@ parseTitleContent _ = pure Nothing
parseImageElement :: PandocMonad m => Element -> FB2 m Blocks
parseImageElement e =
case href of
- Just src -> pure $ para $ imageWith (imgId, [], []) (removeHash src) title alt
+ Just src -> pure $ para $ imageWith (imgId, [], []) (removeHash $ T.pack src) title alt
Nothing -> do
report $ IgnoredElement " image without href"
pure mempty
- where alt = maybe mempty str $ findAttr (unqual "alt") e
- title = fromMaybe "" $ findAttr (unqual "title") e
- imgId = fromMaybe "" $ findAttr (unqual "id") e
+ where alt = maybe mempty (str . T.pack) $ findAttr (unqual "alt") e
+ title = maybe "" T.pack $ findAttr (unqual "title") e
+ imgId = maybe "" T.pack $ findAttr (unqual "id") e
href = findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) e
-- | Parse @pType@
@@ -256,7 +257,7 @@ parseCite e = blockQuote . mconcat <$> mapM parseCiteChild (elChildren e)
-- | Parse @citeType@ child
parseCiteChild :: PandocMonad m => Element -> FB2 m Blocks
parseCiteChild e =
- case qName $ elName e of
+ case T.pack $ qName $ elName e of
"p" -> para <$> parsePType e
"poem" -> parsePoem e
"empty-line" -> pure horizontalRule
@@ -271,13 +272,13 @@ parsePoem e = mconcat <$> mapM parsePoemChild (elChildren e)
parsePoemChild :: PandocMonad m => Element -> FB2 m Blocks
parsePoemChild e =
- case qName $ elName e of
+ case T.pack $ qName $ elName e of
"title" -> parseTitle e
"subtitle" -> parseSubtitle e
"epigraph" -> parseEpigraph e
"stanza" -> parseStanza e
"text-author" -> para <$> parsePType e
- "date" -> pure $ para $ text $ strContent e
+ "date" -> pure $ para $ text $ T.pack $ strContent e
name -> report (UnexpectedXmlElement name "poem") $> mempty
parseStanza :: PandocMonad m => Element -> FB2 m Blocks
@@ -290,7 +291,7 @@ joinLineBlocks [] = []
parseStanzaChild :: PandocMonad m => Element -> FB2 m Blocks
parseStanzaChild e =
- case qName $ elName e of
+ case T.pack $ qName $ elName e of
"title" -> parseTitle e
"subtitle" -> parseSubtitle e
"v" -> lineBlock . (:[]) <$> parsePType e
@@ -300,11 +301,11 @@ parseStanzaChild e =
parseEpigraph :: PandocMonad m => Element -> FB2 m Blocks
parseEpigraph e =
divWith (divId, ["epigraph"], []) . mconcat <$> mapM parseEpigraphChild (elChildren e)
- where divId = fromMaybe "" $ findAttr (unqual "id") e
+ where divId = maybe "" T.pack $ findAttr (unqual "id") e
parseEpigraphChild :: PandocMonad m => Element -> FB2 m Blocks
parseEpigraphChild e =
- case qName $ elName e of
+ case T.pack $ qName $ elName e of
"p" -> para <$> parsePType e
"poem" -> parsePoem e
"cite" -> parseCite e
@@ -318,7 +319,7 @@ parseAnnotation e = mconcat <$> mapM parseAnnotationChild (elChildren e)
parseAnnotationChild :: PandocMonad m => Element -> FB2 m Blocks
parseAnnotationChild e =
- case qName $ elName e of
+ case T.pack $ qName $ elName e of
"p" -> para <$> parsePType e
"poem" -> parsePoem e
"cite" -> parseCite e
@@ -332,14 +333,14 @@ parseSection :: PandocMonad m => Element -> FB2 m Blocks
parseSection e = do
n <- gets fb2SectionLevel
modify $ \st -> st{ fb2SectionLevel = n + 1 }
- let sectionId = fromMaybe "" $ findAttr (unqual "id") e
+ let sectionId = maybe "" T.pack $ findAttr (unqual "id") e
bs <- divWith (sectionId, ["section"], []) . mconcat <$> mapM parseSectionChild (elChildren e)
modify $ \st -> st{ fb2SectionLevel = n }
pure bs
parseSectionChild :: PandocMonad m => Element -> FB2 m Blocks
parseSectionChild e =
- case qName $ elName e of
+ case T.pack $ qName $ elName e of
"title" -> parseBodyChild e
"epigraph" -> parseEpigraph e
"image" -> parseImageElement e
@@ -361,16 +362,16 @@ parseStyleType e = mconcat <$> mapM parseInline (elContent e)
parseNamedStyle :: PandocMonad m => Element -> FB2 m Inlines
parseNamedStyle e = do
content <- mconcat <$> mapM parseNamedStyleChild (elContent e)
- let lang = maybeToList $ ("lang",) <$> findAttr (QName "lang" Nothing (Just "xml")) e
+ let lang = maybeToList $ ("lang",) . T.pack <$> findAttr (QName "lang" Nothing (Just "xml")) e
case findAttr (unqual "name") e of
- Just name -> pure $ spanWith ("", [name], lang) content
+ Just name -> pure $ spanWith ("", [T.pack name], lang) content
Nothing -> do
report $ IgnoredElement "link without required name"
pure mempty
parseNamedStyleChild :: PandocMonad m => Content -> FB2 m Inlines
parseNamedStyleChild (Elem e) =
- case qName (elName e) of
+ case T.pack $ qName (elName e) of
"strong" -> strong <$> parseStyleType e
"emphasis" -> emph <$> parseStyleType e
"style" -> parseNamedStyle e
@@ -378,10 +379,10 @@ parseNamedStyleChild (Elem e) =
"strikethrough" -> strikeout <$> parseStyleType e
"sub" -> subscript <$> parseStyleType e
"sup" -> superscript <$> parseStyleType e
- "code" -> pure $ code $ strContent e
+ "code" -> pure $ code $ T.pack $ strContent e
"image" -> parseInlineImageElement e
name -> do
- report $ IgnoredElement $ name ++ " in style"
+ report $ IgnoredElement $ name <> " in style"
pure mempty
parseNamedStyleChild x = parseInline x
@@ -390,7 +391,7 @@ parseLinkType :: PandocMonad m => Element -> FB2 m Inlines
parseLinkType e = do
content <- mconcat <$> mapM parseStyleLinkType (elContent e)
notes <- gets fb2Notes
- case findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) e of
+ case T.pack <$> findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) e of
Just href -> case findAttr (QName "type" Nothing Nothing) e of
Just "note" -> case M.lookup href notes of
Nothing -> pure $ link href "" content
@@ -417,19 +418,21 @@ parseTable _ = pure mempty -- TODO: tables are not supported yet
-- | Parse @title-infoType@
parseTitleInfoChild :: PandocMonad m => Element -> FB2 m ()
parseTitleInfoChild e =
- case qName (elName e) of
+ case T.pack $ qName (elName e) of
"genre" -> pure ()
"author" -> parseAuthor e >>= \author -> modify (\st -> st {fb2Authors = author:fb2Authors st})
- "book-title" -> modify (setMeta "title" (text $ strContent e))
+ "book-title" -> modify (setMeta "title" (text $ T.pack $ strContent e))
"annotation" -> parseAnnotation e >>= modify . setMeta "abstract"
- "keywords" -> modify (setMeta "keywords" (map (MetaString . trim) $ splitOn "," $ strContent e))
- "date" -> modify (setMeta "date" (text $ strContent e))
+ "keywords" -> modify (setMeta "keywords" (map (MetaString . trim) $ T.splitOn ","
+ $ T.pack
+ $ strContent e))
+ "date" -> modify (setMeta "date" (text $ T.pack $ strContent e))
"coverpage" -> parseCoverPage e
"lang" -> pure ()
"src-lang" -> pure ()
"translator" -> pure ()
"sequence" -> pure ()
- name -> report $ IgnoredElement $ name ++ " in title-info"
+ name -> report $ IgnoredElement $ name <> " in title-info"
parseCoverPage :: PandocMonad m => Element -> FB2 m ()
parseCoverPage e =
@@ -437,7 +440,7 @@ parseCoverPage e =
Just img -> case href of
Just src -> modify (setMeta "cover-image" (MetaString $ removeHash src))
Nothing -> pure ()
- where href = findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) img
+ where href = T.pack <$> findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) img
Nothing -> pure ()
-- | Parse @inlineImageType@ element
@@ -450,5 +453,5 @@ parseInlineImageElement e =
Nothing -> do
report $ IgnoredElement "inline image without href"
pure mempty
- where alt = maybe mempty str $ findAttr (unqual "alt") e
- href = findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) e
+ where alt = maybe mempty (str . T.pack) $ findAttr (unqual "alt") e
+ href = T.pack <$> findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) e