diff options
author | John MacFarlane <fiddlosopher@gmail.com> | 2012-01-27 13:30:15 -0800 |
---|---|---|
committer | John MacFarlane <fiddlosopher@gmail.com> | 2012-01-27 13:30:47 -0800 |
commit | 5b3c0a10819a317cda5d6acaf2144a017bf55d75 (patch) | |
tree | 5651218ed35a6d2695adceed934bf9abac80bad5 /src/Text/Pandoc | |
parent | 663cfc2fbd0ff83f74dcde53e3cc98130fb645e0 (diff) | |
download | pandoc-5b3c0a10819a317cda5d6acaf2144a017bf55d75.tar.gz |
Made charWidth in Text.Pandoc.Pretty complete.
Used the unicode spec EastAsianWidth.txt.
All characters marked W or F get width 2.
Closes #377.
Diffstat (limited to 'src/Text/Pandoc')
-rw-r--r-- | src/Text/Pandoc/Pretty.hs | 64 |
1 files changed, 35 insertions, 29 deletions
diff --git a/src/Text/Pandoc/Pretty.hs b/src/Text/Pandoc/Pretty.hs index c3979348e..b06f2f384 100644 --- a/src/Text/Pandoc/Pretty.hs +++ b/src/Text/Pandoc/Pretty.hs @@ -468,36 +468,42 @@ doubleQuotes = inside (char '"') (char '"') charWidth :: Char -> Int charWidth c = case c of - _ | c >= '\x300' && c <= '\x36e' -> 0 -- combining - | c == '\x3000' -> 2 -- full - | c >= '\xFF01' && c <= '\xFF60' -> 2 - | c >= '\xFFE0' && c <= '\xFFE6' -> 2 - | c >= '\x1100' && c <= '\x1159' -> 2 -- wide - | c >= '\x115F' && c <= '\x115F' -> 2 + _ | c < '\x0300' -> 1 + | c >= '\x0300' && c <= '\x036F' -> 0 -- combining + | c >= '\x0370' && c <= '\x10FC' -> 1 + | c >= '\x1100' && c <= '\x115F' -> 2 + | c >= '\x1160' && c <= '\x11A2' -> 1 + | c >= '\x11A3' && c <= '\x11A7' -> 2 + | c >= '\x11A8' && c <= '\x11F9' -> 1 + | c >= '\x11FA' && c <= '\x11FF' -> 2 + | c >= '\x1200' && c <= '\x2328' -> 1 | c >= '\x2329' && c <= '\x232A' -> 2 - | c >= '\x2E80' && c <= '\x2E99' -> 2 - | c >= '\x2E9B' && c <= '\x2EF3' -> 2 - | c >= '\x2F00' && c <= '\x2FD5' -> 2 - | c >= '\x2FF0' && c <= '\x2FFB' -> 2 - | c >= '\x3001' && c <= '\x303E' -> 2 - | c >= '\x3041' && c <= '\x3096' -> 2 - | c >= '\x3099' && c <= '\x30FF' -> 2 - | c >= '\x3105' && c <= '\x312C' -> 2 - | c >= '\x3131' && c <= '\x318E' -> 2 - | c >= '\x3190' && c <= '\x31B7' -> 2 - | c >= '\x31F0' && c <= '\x321E' -> 2 - | c >= '\x3220' && c <= '\x3243' -> 2 - | c >= '\x3250' && c <= '\x327D' -> 2 - | c >= '\x327F' && c <= '\x32FE' -> 2 - | c >= '\x3300' && c <= '\x33FF' -> 2 - | c >= '\xA000' && c <= '\xA48C' -> 2 - | c >= '\xA490' && c <= '\xA4C6' -> 2 - | c >= '\xF900' && c <= '\xFA2D' -> 2 - | c >= '\xFA30' && c <= '\xFA6A' -> 2 - | c >= '\xFE30' && c <= '\xFE52' -> 2 - | c >= '\xFE54' && c <= '\xFE66' -> 2 - | c >= '\xFE68' && c <= '\xFE6B' -> 2 - | c >= '\x2F800' && c <= '\x2FA1D' -> 2 + | c >= '\x232B' && c <= '\x2E31' -> 1 + | c >= '\x2E80' && c <= '\x303E' -> 2 + | c == '\x303F' -> 1 + | c >= '\x3041' && c <= '\x3247' -> 2 + | c >= '\x3248' && c <= '\x324F' -> 1 -- ambiguous + | c >= '\x3250' && c <= '\x4DBF' -> 2 + | c >= '\x4DC0' && c <= '\x4DFF' -> 1 + | c >= '\x4E00' && c <= '\xA4C6' -> 2 + | c >= '\xA4D0' && c <= '\xA95F' -> 1 + | c >= '\xA960' && c <= '\xA97C' -> 2 + | c >= '\xA980' && c <= '\xABF9' -> 1 + | c >= '\xAC00' && c <= '\xD7FB' -> 2 + | c >= '\xD800' && c <= '\xDFFF' -> 1 + | c >= '\xE000' && c <= '\xF8FF' -> 1 -- ambiguous + | c >= '\xF900' && c <= '\xFAFF' -> 2 + | c >= '\xFB00' && c <= '\xFDFD' -> 1 + | c >= '\xFE00' && c <= '\xFE0F' -> 1 -- ambiguous + | c >= '\xFE10' && c <= '\xFE19' -> 2 + | c >= '\xFE20' && c <= '\xFE26' -> 1 + | c >= '\xFE30' && c <= '\xFE6B' -> 2 + | c >= '\xFE70' && c <= '\x16A38' -> 1 + | c >= '\x1B000' && c <= '\x1B001' -> 2 + | c >= '\x1D000' && c <= '\x1F1FF' -> 1 + | c >= '\x1F200' && c <= '\x1F251' -> 2 + | c >= '\x1F300' && c <= '\x1F773' -> 1 + | c >= '\x20000' && c <= '\x3FFFD' -> 2 | otherwise -> 1 -- | Get real length of string, taking into account combining and double-wide |