diff options
author | John MacFarlane <jgm@berkeley.edu> | 2014-08-09 20:22:06 -0700 |
---|---|---|
committer | John MacFarlane <jgm@berkeley.edu> | 2014-08-09 20:22:06 -0700 |
commit | eed1274fcaa0348fd08624eaa863ad26248958de (patch) | |
tree | 6000aeb164abda25f1cce788e18e37e5180d7438 /lib/fonts | |
parent | c2a0d47c7b15483efa565ab2b6d9fa836e3a8818 (diff) | |
parent | ecb42cc002f892295ea33711947338580f60522b (diff) | |
download | pandoc-eed1274fcaa0348fd08624eaa863ad26248958de.tar.gz |
Merge pull request #1509 from jkr/symbol
Parse "w:sym"
Diffstat (limited to 'lib/fonts')
-rw-r--r-- | lib/fonts/Makefile | 6 | ||||
-rw-r--r-- | lib/fonts/parseUnicodeMapping.hs | 40 | ||||
-rw-r--r-- | lib/fonts/symbol.txt | 256 |
3 files changed, 302 insertions, 0 deletions
diff --git a/lib/fonts/Makefile b/lib/fonts/Makefile new file mode 100644 index 000000000..5693ee054 --- /dev/null +++ b/lib/fonts/Makefile @@ -0,0 +1,6 @@ +symbol.hs: symbol.txt + runghc parseUnicodeMapping.hs symbol.txt + +.PHONY: clean +clean: + -rm symbol.hs diff --git a/lib/fonts/parseUnicodeMapping.hs b/lib/fonts/parseUnicodeMapping.hs new file mode 100644 index 000000000..4f7ff692b --- /dev/null +++ b/lib/fonts/parseUnicodeMapping.hs @@ -0,0 +1,40 @@ +import System.FilePath +import Text.Parsec +import Data.Char +import System.Environment +import Control.Applicative hiding (many) +import Data.List + +main :: IO () +main = (head <$> getArgs) >>= parseUnicodeMapping + + +parseUnicodeMapping :: FilePath -> IO () +parseUnicodeMapping fname = do + fin <- readFile fname + let mapname = dropExtension . takeFileName $ fname + let res = runParse fin + let header = "-- Generated from " ++ fname ++ "\n" ++ + mapname ++ " :: [(Char, Char)]\n" ++ mapname ++" =\n [ " + let footer = "]" + writeFile (replaceExtension fname ".hs") + (header ++ (concat $ intersperse "\n , " (map show res)) ++ footer) + +type Unicode = Char + +runParse :: String -> [(Char, Unicode)] +runParse s= either (error . show) id (parse parseMap "" s) + +anyline = manyTill anyChar newline + +getHexChar :: Parsec String () Char +getHexChar = do + [(c,_)] <- readLitChar . ("\\x" ++) <$> many1 hexDigit + return c + +parseMap :: Parsec String () [(Char, Unicode)] +parseMap = do + skipMany (char '#' >> anyline) + many (flip (,) <$> getHexChar <* tab <*> getHexChar <* anyline) + + diff --git a/lib/fonts/symbol.txt b/lib/fonts/symbol.txt new file mode 100644 index 000000000..b98baf6cf --- /dev/null +++ b/lib/fonts/symbol.txt @@ -0,0 +1,256 @@ +# +# Name: Adobe Symbol Encoding to Unicode +# Unicode version: 2.0 +# Table version: 1.0 +# Date: 2011 July 12 +# +# Copyright (c) 1991-2011 Unicode, Inc. All Rights reserved. +# +# This file is provided as-is by Unicode, Inc. (The Unicode Consortium). No +# claims are made as to fitness for any particular purpose. No warranties of +# any kind are expressed or implied. The recipient agrees to determine +# applicability of information provided. If this file has been provided on +# magnetic media by Unicode, Inc., the sole remedy for any claim will be +# exchange of defective media within 90 days of receipt. +# +# Unicode, Inc. hereby grants the right to freely use the information +# supplied in this file in the creation of products supporting the +# Unicode Standard, and to make copies of this file in any form for +# internal or external distribution as long as this notice remains +# attached. +# +# Format: 4 tab-delimited fields: +# +# (1) The Unicode value (in hexadecimal) +# (2) The Symbol Encoding code point (in hexadecimal) +# (3) # Unicode name +# (4) # PostScript character name +# +# General Notes: +# +# The Unicode values in this table were produced as the result of applying +# the algorithm described in the section "Populating a Unicode space" in the +# document "Unicode and Glyph Names," at +# http://partners.adobe.com/asn/developer/typeforum/unicodegn.html +# to the characters in Symbol. Note that some characters, such as "space", +# are mapped to 2 Unicode values. 29 characters have assignments in the +# Corporate Use Subarea; these are indicated by "(CUS)" in field 4. Refer to +# the above document for more details. +# +# 2011 July 12: The above link is no longer valid. For comparable, +# more current information, see the document, "Glyph", at: +# <http://www.adobe.com/devnet/opentype/archives/glyph.html> +# +# Revision History: +# +# [v1.0, 2011 July 12] +# Updated terms of use to current wording. +# Updated contact information and document link. +# No changes to the mapping data. +# +# [v0.2, 30 March 1999] +# Different algorithm to produce Unicode values (see notes above) results in +# some character codes being mapped to 2 Unicode values; use of Corporate +# Use subarea values; addition of the euro character; changed assignments of +# some characters such as the COPYRIGHT SIGNs and RADICAL EXTENDER. Updated +# Unicode names to Unicode 2.0 names. +# +# [v0.1, 5 May 1995] First release. +# +# Use the Unicode reporting form <http://www.unicode.org/reporting.html> +# for any questions or comments or to report errors in the data. +# +0020 20 # SPACE # space +00A0 20 # NO-BREAK SPACE # space +0021 21 # EXCLAMATION MARK # exclam +2200 22 # FOR ALL # universal +0023 23 # NUMBER SIGN # numbersign +2203 24 # THERE EXISTS # existential +0025 25 # PERCENT SIGN # percent +0026 26 # AMPERSAND # ampersand +220B 27 # CONTAINS AS MEMBER # suchthat +0028 28 # LEFT PARENTHESIS # parenleft +0029 29 # RIGHT PARENTHESIS # parenright +2217 2A # ASTERISK OPERATOR # asteriskmath +002B 2B # PLUS SIGN # plus +002C 2C # COMMA # comma +2212 2D # MINUS SIGN # minus +002E 2E # FULL STOP # period +002F 2F # SOLIDUS # slash +0030 30 # DIGIT ZERO # zero +0031 31 # DIGIT ONE # one +0032 32 # DIGIT TWO # two +0033 33 # DIGIT THREE # three +0034 34 # DIGIT FOUR # four +0035 35 # DIGIT FIVE # five +0036 36 # DIGIT SIX # six +0037 37 # DIGIT SEVEN # seven +0038 38 # DIGIT EIGHT # eight +0039 39 # DIGIT NINE # nine +003A 3A # COLON # colon +003B 3B # SEMICOLON # semicolon +003C 3C # LESS-THAN SIGN # less +003D 3D # EQUALS SIGN # equal +003E 3E # GREATER-THAN SIGN # greater +003F 3F # QUESTION MARK # question +2245 40 # APPROXIMATELY EQUAL TO # congruent +0391 41 # GREEK CAPITAL LETTER ALPHA # Alpha +0392 42 # GREEK CAPITAL LETTER BETA # Beta +03A7 43 # GREEK CAPITAL LETTER CHI # Chi +0394 44 # GREEK CAPITAL LETTER DELTA # Delta +2206 44 # INCREMENT # Delta +0395 45 # GREEK CAPITAL LETTER EPSILON # Epsilon +03A6 46 # GREEK CAPITAL LETTER PHI # Phi +0393 47 # GREEK CAPITAL LETTER GAMMA # Gamma +0397 48 # GREEK CAPITAL LETTER ETA # Eta +0399 49 # GREEK CAPITAL LETTER IOTA # Iota +03D1 4A # GREEK THETA SYMBOL # theta1 +039A 4B # GREEK CAPITAL LETTER KAPPA # Kappa +039B 4C # GREEK CAPITAL LETTER LAMDA # Lambda +039C 4D # GREEK CAPITAL LETTER MU # Mu +039D 4E # GREEK CAPITAL LETTER NU # Nu +039F 4F # GREEK CAPITAL LETTER OMICRON # Omicron +03A0 50 # GREEK CAPITAL LETTER PI # Pi +0398 51 # GREEK CAPITAL LETTER THETA # Theta +03A1 52 # GREEK CAPITAL LETTER RHO # Rho +03A3 53 # GREEK CAPITAL LETTER SIGMA # Sigma +03A4 54 # GREEK CAPITAL LETTER TAU # Tau +03A5 55 # GREEK CAPITAL LETTER UPSILON # Upsilon +03C2 56 # GREEK SMALL LETTER FINAL SIGMA # sigma1 +03A9 57 # GREEK CAPITAL LETTER OMEGA # Omega +2126 57 # OHM SIGN # Omega +039E 58 # GREEK CAPITAL LETTER XI # Xi +03A8 59 # GREEK CAPITAL LETTER PSI # Psi +0396 5A # GREEK CAPITAL LETTER ZETA # Zeta +005B 5B # LEFT SQUARE BRACKET # bracketleft +2234 5C # THEREFORE # therefore +005D 5D # RIGHT SQUARE BRACKET # bracketright +22A5 5E # UP TACK # perpendicular +005F 5F # LOW LINE # underscore +F8E5 60 # RADICAL EXTENDER # radicalex (CUS) +03B1 61 # GREEK SMALL LETTER ALPHA # alpha +03B2 62 # GREEK SMALL LETTER BETA # beta +03C7 63 # GREEK SMALL LETTER CHI # chi +03B4 64 # GREEK SMALL LETTER DELTA # delta +03B5 65 # GREEK SMALL LETTER EPSILON # epsilon +03C6 66 # GREEK SMALL LETTER PHI # phi +03B3 67 # GREEK SMALL LETTER GAMMA # gamma +03B7 68 # GREEK SMALL LETTER ETA # eta +03B9 69 # GREEK SMALL LETTER IOTA # iota +03D5 6A # GREEK PHI SYMBOL # phi1 +03BA 6B # GREEK SMALL LETTER KAPPA # kappa +03BB 6C # GREEK SMALL LETTER LAMDA # lambda +00B5 6D # MICRO SIGN # mu +03BC 6D # GREEK SMALL LETTER MU # mu +03BD 6E # GREEK SMALL LETTER NU # nu +03BF 6F # GREEK SMALL LETTER OMICRON # omicron +03C0 70 # GREEK SMALL LETTER PI # pi +03B8 71 # GREEK SMALL LETTER THETA # theta +03C1 72 # GREEK SMALL LETTER RHO # rho +03C3 73 # GREEK SMALL LETTER SIGMA # sigma +03C4 74 # GREEK SMALL LETTER TAU # tau +03C5 75 # GREEK SMALL LETTER UPSILON # upsilon +03D6 76 # GREEK PI SYMBOL # omega1 +03C9 77 # GREEK SMALL LETTER OMEGA # omega +03BE 78 # GREEK SMALL LETTER XI # xi +03C8 79 # GREEK SMALL LETTER PSI # psi +03B6 7A # GREEK SMALL LETTER ZETA # zeta +007B 7B # LEFT CURLY BRACKET # braceleft +007C 7C # VERTICAL LINE # bar +007D 7D # RIGHT CURLY BRACKET # braceright +223C 7E # TILDE OPERATOR # similar +20AC A0 # EURO SIGN # Euro +03D2 A1 # GREEK UPSILON WITH HOOK SYMBOL # Upsilon1 +2032 A2 # PRIME # minute +2264 A3 # LESS-THAN OR EQUAL TO # lessequal +2044 A4 # FRACTION SLASH # fraction +2215 A4 # DIVISION SLASH # fraction +221E A5 # INFINITY # infinity +0192 A6 # LATIN SMALL LETTER F WITH HOOK # florin +2663 A7 # BLACK CLUB SUIT # club +2666 A8 # BLACK DIAMOND SUIT # diamond +2665 A9 # BLACK HEART SUIT # heart +2660 AA # BLACK SPADE SUIT # spade +2194 AB # LEFT RIGHT ARROW # arrowboth +2190 AC # LEFTWARDS ARROW # arrowleft +2191 AD # UPWARDS ARROW # arrowup +2192 AE # RIGHTWARDS ARROW # arrowright +2193 AF # DOWNWARDS ARROW # arrowdown +00B0 B0 # DEGREE SIGN # degree +00B1 B1 # PLUS-MINUS SIGN # plusminus +2033 B2 # DOUBLE PRIME # second +2265 B3 # GREATER-THAN OR EQUAL TO # greaterequal +00D7 B4 # MULTIPLICATION SIGN # multiply +221D B5 # PROPORTIONAL TO # proportional +2202 B6 # PARTIAL DIFFERENTIAL # partialdiff +2022 B7 # BULLET # bullet +00F7 B8 # DIVISION SIGN # divide +2260 B9 # NOT EQUAL TO # notequal +2261 BA # IDENTICAL TO # equivalence +2248 BB # ALMOST EQUAL TO # approxequal +2026 BC # HORIZONTAL ELLIPSIS # ellipsis +F8E6 BD # VERTICAL ARROW EXTENDER # arrowvertex (CUS) +F8E7 BE # HORIZONTAL ARROW EXTENDER # arrowhorizex (CUS) +21B5 BF # DOWNWARDS ARROW WITH CORNER LEFTWARDS # carriagereturn +2135 C0 # ALEF SYMBOL # aleph +2111 C1 # BLACK-LETTER CAPITAL I # Ifraktur +211C C2 # BLACK-LETTER CAPITAL R # Rfraktur +2118 C3 # SCRIPT CAPITAL P # weierstrass +2297 C4 # CIRCLED TIMES # circlemultiply +2295 C5 # CIRCLED PLUS # circleplus +2205 C6 # EMPTY SET # emptyset +2229 C7 # INTERSECTION # intersection +222A C8 # UNION # union +2283 C9 # SUPERSET OF # propersuperset +2287 CA # SUPERSET OF OR EQUAL TO # reflexsuperset +2284 CB # NOT A SUBSET OF # notsubset +2282 CC # SUBSET OF # propersubset +2286 CD # SUBSET OF OR EQUAL TO # reflexsubset +2208 CE # ELEMENT OF # element +2209 CF # NOT AN ELEMENT OF # notelement +2220 D0 # ANGLE # angle +2207 D1 # NABLA # gradient +F6DA D2 # REGISTERED SIGN SERIF # registerserif (CUS) +F6D9 D3 # COPYRIGHT SIGN SERIF # copyrightserif (CUS) +F6DB D4 # TRADE MARK SIGN SERIF # trademarkserif (CUS) +220F D5 # N-ARY PRODUCT # product +221A D6 # SQUARE ROOT # radical +22C5 D7 # DOT OPERATOR # dotmath +00AC D8 # NOT SIGN # logicalnot +2227 D9 # LOGICAL AND # logicaland +2228 DA # LOGICAL OR # logicalor +21D4 DB # LEFT RIGHT DOUBLE ARROW # arrowdblboth +21D0 DC # LEFTWARDS DOUBLE ARROW # arrowdblleft +21D1 DD # UPWARDS DOUBLE ARROW # arrowdblup +21D2 DE # RIGHTWARDS DOUBLE ARROW # arrowdblright +21D3 DF # DOWNWARDS DOUBLE ARROW # arrowdbldown +25CA E0 # LOZENGE # lozenge +2329 E1 # LEFT-POINTING ANGLE BRACKET # angleleft +F8E8 E2 # REGISTERED SIGN SANS SERIF # registersans (CUS) +F8E9 E3 # COPYRIGHT SIGN SANS SERIF # copyrightsans (CUS) +F8EA E4 # TRADE MARK SIGN SANS SERIF # trademarksans (CUS) +2211 E5 # N-ARY SUMMATION # summation +F8EB E6 # LEFT PAREN TOP # parenlefttp (CUS) +F8EC E7 # LEFT PAREN EXTENDER # parenleftex (CUS) +F8ED E8 # LEFT PAREN BOTTOM # parenleftbt (CUS) +F8EE E9 # LEFT SQUARE BRACKET TOP # bracketlefttp (CUS) +F8EF EA # LEFT SQUARE BRACKET EXTENDER # bracketleftex (CUS) +F8F0 EB # LEFT SQUARE BRACKET BOTTOM # bracketleftbt (CUS) +F8F1 EC # LEFT CURLY BRACKET TOP # bracelefttp (CUS) +F8F2 ED # LEFT CURLY BRACKET MID # braceleftmid (CUS) +F8F3 EE # LEFT CURLY BRACKET BOTTOM # braceleftbt (CUS) +F8F4 EF # CURLY BRACKET EXTENDER # braceex (CUS) +232A F1 # RIGHT-POINTING ANGLE BRACKET # angleright +222B F2 # INTEGRAL # integral +2320 F3 # TOP HALF INTEGRAL # integraltp +F8F5 F4 # INTEGRAL EXTENDER # integralex (CUS) +2321 F5 # BOTTOM HALF INTEGRAL # integralbt +F8F6 F6 # RIGHT PAREN TOP # parenrighttp (CUS) +F8F7 F7 # RIGHT PAREN EXTENDER # parenrightex (CUS) +F8F8 F8 # RIGHT PAREN BOTTOM # parenrightbt (CUS) +F8F9 F9 # RIGHT SQUARE BRACKET TOP # bracketrighttp (CUS) +F8FA FA # RIGHT SQUARE BRACKET EXTENDER # bracketrightex (CUS) +F8FB FB # RIGHT SQUARE BRACKET BOTTOM # bracketrightbt (CUS) +F8FC FC # RIGHT CURLY BRACKET TOP # bracerighttp (CUS) +F8FD FD # RIGHT CURLY BRACKET MID # bracerightmid (CUS) +F8FE FE # RIGHT CURLY BRACKET BOTTOM # bracerightbt (CUS) |