aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2018-11-20 23:43:21 -0500
committerJohn MacFarlane <jgm@berkeley.edu>2018-11-20 23:43:21 -0500
commitd333c283cc7149462491e97e95d3ccfaefab84c2 (patch)
tree694c50ae32258b4e1abae6a64bbfcd6ff3d5dd62 /src
parent2d265917b0e9efdd065349032befb96daa021503 (diff)
downloadpandoc-d333c283cc7149462491e97e95d3ccfaefab84c2.tar.gz
Docx writer: Fix bookmarks to headers with long titles.
Word has a 40 character limit for bookmark names. In addition, bookmarks must begin with a letter. Since pandoc's auto-generated identifiers may not respect these constraints, some internal links did not work. With this change, pandoc uses a bookmark name based on the SHA1 hash of the identifier when the identifier isn't a legal bookmark name. Closes #5091.
Diffstat (limited to 'src')
-rw-r--r--src/Text/Pandoc/Writers/Docx.hs22
1 files changed, 18 insertions, 4 deletions
diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs
index a441aeec7..c2a160320 100644
--- a/src/Text/Pandoc/Writers/Docx.hs
+++ b/src/Text/Pandoc/Writers/Docx.hs
@@ -41,19 +41,21 @@ import Control.Monad.Reader
import Control.Monad.State.Strict
import qualified Data.ByteString as B
import qualified Data.ByteString.Lazy as BL
-import Data.Char (isSpace, ord, toLower)
+import Data.Char (isSpace, ord, toLower, isLetter)
import Data.List (intercalate, isPrefixOf, isSuffixOf)
import qualified Data.Map as M
import Data.Maybe (fromMaybe, isNothing, mapMaybe, maybeToList)
import qualified Data.Set as Set
import qualified Data.Text as T
import Data.Time.Clock.POSIX
+import Data.Digest.Pure.SHA (sha1, showDigest)
import Skylighting
import System.Random (randomR, StdGen, mkStdGen)
import Text.Pandoc.BCP47 (getLang, renderLang)
import Text.Pandoc.Class (PandocMonad, report, toLang)
import qualified Text.Pandoc.Class as P
import Data.Time
+import Text.Pandoc.UTF8 (fromStringLazy)
import Text.Pandoc.Definition
import Text.Pandoc.Generic
import Text.Pandoc.Highlighting (highlight)
@@ -1268,7 +1270,8 @@ inlineToOpenXML' opts (Note bs) = do
-- internal link:
inlineToOpenXML' opts (Link _ txt ('#':xs,_)) = do
contents <- withTextPropM (rStyleM "Hyperlink") $ inlinesToOpenXML opts txt
- return [ mknode "w:hyperlink" [("w:anchor",xs)] contents ]
+ return
+ [ mknode "w:hyperlink" [("w:anchor", toBookmarkName xs)] contents ]
-- external link:
inlineToOpenXML' opts (Link _ txt (src,_)) = do
contents <- withTextPropM (rStyleM "Hyperlink") $ inlinesToOpenXML opts txt
@@ -1427,7 +1430,18 @@ wrapBookmark :: (PandocMonad m) => String -> [Element] -> WS m [Element]
wrapBookmark [] contents = return contents
wrapBookmark ident contents = do
id' <- getUniqueId
- let bookmarkStart = mknode "w:bookmarkStart" [("w:id", id')
- ,("w:name",ident)] ()
+ let bookmarkStart = mknode "w:bookmarkStart"
+ [("w:id", id')
+ ,("w:name", toBookmarkName ident)] ()
bookmarkEnd = mknode "w:bookmarkEnd" [("w:id", id')] ()
return $ bookmarkStart : contents ++ [bookmarkEnd]
+
+-- Word imposes a 40 character limit on bookmark names and requires
+-- that they begin with a letter. So we just use a hash of the
+-- identifer when otherwise we'd have an illegal bookmark name.
+toBookmarkName :: String -> String
+toBookmarkName s =
+ case s of
+ (c:_) | isLetter c
+ , length s <= 40 -> s
+ _ -> 'X' : drop 1 (showDigest (sha1 (fromStringLazy s)))