From 6acc82c5d2885c596c52e6c35bed8fe08f535066 Mon Sep 17 00:00:00 2001 From: Milan Bracke Date: Fri, 11 Jun 2021 09:26:09 +0200 Subject: Docx parser: implement PAGEREF fields These fields, often used in tables of contents, can be a hyperlink. --- src/Text/Pandoc/Readers/Docx.hs | 1 + src/Text/Pandoc/Readers/Docx/Fields.hs | 25 +++++++++++++++++++++++++ 2 files changed, 26 insertions(+) (limited to 'src') diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs index 462e3c679..5c8f20c18 100644 --- a/src/Text/Pandoc/Readers/Docx.hs +++ b/src/Text/Pandoc/Readers/Docx.hs @@ -448,6 +448,7 @@ parPartToInlines' (PlainOMath exps) = parPartToInlines' (Field info children) = case info of HyperlinkField url -> parPartToInlines' $ ExternalHyperLink url children + PagerefField fieldAnchor True -> parPartToInlines' $ InternalHyperLink fieldAnchor children _ -> smushInlines <$> mapM parPartToInlines' children parPartToInlines' NullParPart = return mempty diff --git a/src/Text/Pandoc/Readers/Docx/Fields.hs b/src/Text/Pandoc/Readers/Docx/Fields.hs index 442bc3466..5f090b6be 100644 --- a/src/Text/Pandoc/Readers/Docx/Fields.hs +++ b/src/Text/Pandoc/Readers/Docx/Fields.hs @@ -21,8 +21,11 @@ import Text.Parsec import Text.Parsec.Text (Parser) type URL = T.Text +type Anchor = T.Text data FieldInfo = HyperlinkField URL + -- The boolean indicates whether the field is a hyperlink. + | PagerefField Anchor Bool | UnknownField deriving (Show) @@ -33,6 +36,8 @@ fieldInfo :: Parser FieldInfo fieldInfo = try (HyperlinkField <$> hyperlink) <|> + try ((uncurry PagerefField) <$> pageref) + <|> return UnknownField escapedQuote :: Parser T.Text @@ -72,3 +77,23 @@ hyperlink = do ("\\l", s) : _ -> farg <> "#" <> s _ -> farg return url + +-- See ยง17.16.5.45 +pagerefSwitch :: Parser (T.Text, T.Text) +pagerefSwitch = do + sw <- string "\\h" + spaces + farg <- fieldArgument + return (T.pack sw, farg) + +pageref :: Parser (Anchor, Bool) +pageref = do + many space + string "PAGEREF" + spaces + farg <- fieldArgument + switches <- spaces *> many pagerefSwitch + let isLink = case switches of + ("\\h", _) : _ -> True + _ -> False + return (farg, isLink) -- cgit v1.2.3