diff options
| author | John MacFarlane <jgm@berkeley.edu> | 2019-02-23 15:40:06 -0700 | 
|---|---|---|
| committer | John MacFarlane <jgm@berkeley.edu> | 2019-02-23 15:40:06 -0700 | 
| commit | 38c028bd50fc3781e69370864f163f8d33fd481f (patch) | |
| tree | c155aad5c0d54b3a06ffe723c57f18cfae463ce6 /src/Text | |
| parent | c75b558cbc2d21cdc4f5fa243b5f900ca7e83bbc (diff) | |
| download | pandoc-38c028bd50fc3781e69370864f163f8d33fd481f.tar.gz | |
JATS reader: fix parsing of figures.
This ensures that a figure containing a single image
is parsed as a pandoc "implicit figure" (i.e., a
Para with a single Image whose title attribute begins
with `fig:`).  More complex figures will still be parsed
as divs.
Closes #5321.
Diffstat (limited to 'src/Text')
| -rw-r--r-- | src/Text/Pandoc/Readers/JATS.hs | 45 | 
1 files changed, 27 insertions, 18 deletions
| diff --git a/src/Text/Pandoc/Readers/JATS.hs b/src/Text/Pandoc/Readers/JATS.hs index 52a18347c..f4e9f5a31 100644 --- a/src/Text/Pandoc/Readers/JATS.hs +++ b/src/Text/Pandoc/Readers/JATS.hs @@ -57,7 +57,6 @@ data JATSState = JATSState{ jatsSectionLevel :: Int                            , jatsQuoteType    :: QuoteType                            , jatsMeta         :: Meta                            , jatsBook         :: Bool -                          , jatsFigureTitle  :: Inlines                            , jatsContent      :: [Content]                            } deriving Show @@ -66,7 +65,6 @@ instance Default JATSState where                   , jatsQuoteType = DoubleQuote                   , jatsMeta = mempty                   , jatsBook = False -                 , jatsFigureTitle = mempty                   , jatsContent = [] } @@ -153,21 +151,18 @@ trimNl = reverse . go . reverse . go  -- function that is used by both graphic (in parseBlock)  -- and inline-graphic (in parseInline) -getGraphic :: PandocMonad m => Element -> JATS m Inlines -getGraphic e = do +getGraphic :: PandocMonad m +           => Maybe (Inlines, String) -> Element -> JATS m Inlines +getGraphic mbfigdata e = do    let atVal a = attrValue a e -      attr = (atVal "id", words $ atVal "role", []) +      (ident, title, caption) = +         case mbfigdata of +           Just (capt, i) -> (i, "fig:" <> atVal "title", capt) +           Nothing        -> (atVal "id", atVal "title", +                              text (atVal "alt-text")) +      attr = (ident, words $ atVal "role", [])        imageUrl = atVal "href" -      captionOrLabel = case filterChild (\x -> named "caption" x -                                            || named "label" x) e of -                        Nothing -> return mempty -                        Just z  -> mconcat <$> -                                         mapM parseInline (elContent z) -  figTitle <- gets jatsFigureTitle -  let (caption, title) = if isNull figTitle -                            then (captionOrLabel, atVal "title") -                            else (return figTitle, "fig:") -  fmap (imageWith attr imageUrl title) caption +  return $ imageWith attr imageUrl title caption  getBlocks :: PandocMonad m => Element -> JATS m Blocks  getBlocks e =  mconcat <$> @@ -197,13 +192,13 @@ parseBlock (Elem e) =                          <$> listitems          "def-list" -> definitionList <$> deflistitems          "sec" -> gets jatsSectionLevel >>= sect . (+1) -        "graphic" -> para <$> getGraphic e +        "graphic" -> para <$> getGraphic Nothing e          "journal-meta" -> parseMetadata e          "article-meta" -> parseMetadata e          "custom-meta" -> parseMetadata e          "title" -> return mempty -- processed by header          "table" -> parseTable -        "fig" -> divWith (attrValue "id" e, ["fig"], []) <$> getBlocks e +        "fig" -> parseFigure          "table-wrap" -> divWith (attrValue "id" e, ["table-wrap"], []) <$> getBlocks e          "caption" -> divWith (attrValue "id" e, ["caption"], []) <$> sect 6          "ref-list" -> parseRefList e @@ -247,6 +242,20 @@ parseBlock (Elem e) =                       terms' <- mapM getInlines terms                       items' <- mapM getBlocks items                       return (mconcat $ intersperse (str "; ") terms', items') +         parseFigure = do +           -- if a simple caption and single graphic, we emit a standard +           -- implicit figure.  otherwise, we emit a div with the contents +           case filterChildren (named "graphic") e of +                  [g] -> do +                         caption <- case filterChild (named "caption") e of +                                           Just t  -> mconcat . +                                             intersperse linebreak <$> +                                             mapM getInlines +                                             (filterChildren (const True) t) +                                           Nothing -> return mempty +                         img <- getGraphic (Just (caption, attrValue "id" e)) g +                         return $ para $ img +                  _   -> divWith (attrValue "id" e, ["fig"], []) <$> getBlocks e           parseTable = do                        let isCaption x = named "title" x || named "caption" x                        caption <- case filterChild isCaption e of @@ -456,7 +465,7 @@ parseInline (Elem e) =          "code" -> codeWithLang          "monospace" -> codeWithLang -        "inline-graphic" -> getGraphic e +        "inline-graphic" -> getGraphic Nothing e          "disp-quote" -> do              qt <- gets jatsQuoteType              let qt' = if qt == SingleQuote then DoubleQuote else SingleQuote | 
