diff options
author | Milan Bracke <mbracke@antidot.net> | 2021-06-11 09:26:09 +0200 |
---|---|---|
committer | John MacFarlane <jgm@berkeley.edu> | 2021-10-18 19:15:40 -0700 |
commit | 6acc82c5d2885c596c52e6c35bed8fe08f535066 (patch) | |
tree | ad9a434851bc05154cc4680fd779aa8f9367b0e2 /src/Text/Pandoc/Readers | |
parent | 193f6bfebaa43d0d6749d10a4e7ca78a0d31361d (diff) | |
download | pandoc-6acc82c5d2885c596c52e6c35bed8fe08f535066.tar.gz |
Docx parser: implement PAGEREF fields
These fields, often used in tables of contents, can be a hyperlink.
Diffstat (limited to 'src/Text/Pandoc/Readers')
-rw-r--r-- | src/Text/Pandoc/Readers/Docx.hs | 1 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Docx/Fields.hs | 25 |
2 files changed, 26 insertions, 0 deletions
diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs index 462e3c679..5c8f20c18 100644 --- a/src/Text/Pandoc/Readers/Docx.hs +++ b/src/Text/Pandoc/Readers/Docx.hs @@ -448,6 +448,7 @@ parPartToInlines' (PlainOMath exps) = parPartToInlines' (Field info children) = case info of HyperlinkField url -> parPartToInlines' $ ExternalHyperLink url children + PagerefField fieldAnchor True -> parPartToInlines' $ InternalHyperLink fieldAnchor children _ -> smushInlines <$> mapM parPartToInlines' children parPartToInlines' NullParPart = return mempty diff --git a/src/Text/Pandoc/Readers/Docx/Fields.hs b/src/Text/Pandoc/Readers/Docx/Fields.hs index 442bc3466..5f090b6be 100644 --- a/src/Text/Pandoc/Readers/Docx/Fields.hs +++ b/src/Text/Pandoc/Readers/Docx/Fields.hs @@ -21,8 +21,11 @@ import Text.Parsec import Text.Parsec.Text (Parser) type URL = T.Text +type Anchor = T.Text data FieldInfo = HyperlinkField URL + -- The boolean indicates whether the field is a hyperlink. + | PagerefField Anchor Bool | UnknownField deriving (Show) @@ -33,6 +36,8 @@ fieldInfo :: Parser FieldInfo fieldInfo = try (HyperlinkField <$> hyperlink) <|> + try ((uncurry PagerefField) <$> pageref) + <|> return UnknownField escapedQuote :: Parser T.Text @@ -72,3 +77,23 @@ hyperlink = do ("\\l", s) : _ -> farg <> "#" <> s _ -> farg return url + +-- See ยง17.16.5.45 +pagerefSwitch :: Parser (T.Text, T.Text) +pagerefSwitch = do + sw <- string "\\h" + spaces + farg <- fieldArgument + return (T.pack sw, farg) + +pageref :: Parser (Anchor, Bool) +pageref = do + many space + string "PAGEREF" + spaces + farg <- fieldArgument + switches <- spaces *> many pagerefSwitch + let isLink = case switches of + ("\\h", _) : _ -> True + _ -> False + return (farg, isLink) |