diff options
Diffstat (limited to 'src/Text')
-rw-r--r-- | src/Text/Pandoc/App.hs | 4 | ||||
-rw-r--r-- | src/Text/Pandoc/Parsing.hs | 27 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Txt2Tags.hs | 1 | ||||
-rw-r--r-- | src/Text/Pandoc/SelfContained.hs | 4 | ||||
-rw-r--r-- | src/Text/Pandoc/Shared.hs | 69 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/ConTeXt.hs | 2 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/DokuWiki.hs | 3 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/FB2.hs | 3 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/Haddock.hs | 1 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/ICML.hs | 3 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/LaTeX.hs | 2 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/Markdown.hs | 1 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/MediaWiki.hs | 1 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/RST.hs | 1 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/Texinfo.hs | 2 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/ZimWiki.hs | 5 |
16 files changed, 81 insertions, 48 deletions
diff --git a/src/Text/Pandoc/App.hs b/src/Text/Pandoc/App.hs index 97954764a..845146f34 100644 --- a/src/Text/Pandoc/App.hs +++ b/src/Text/Pandoc/App.hs @@ -57,7 +57,7 @@ import Data.Maybe (fromMaybe, isJust, isNothing) import qualified Data.Text as T import Data.Yaml (decode) import qualified Data.Yaml as Yaml -import Network.URI (URI (..), isURI, parseURI) +import Network.URI (URI (..), parseURI) import Paths_pandoc (getDataDir) import Skylighting (Style, Syntax (..), defaultSyntaxMap, parseTheme) import Skylighting.Parser (missingIncludes, parseSyntaxDefinition, @@ -80,7 +80,7 @@ import Text.Pandoc.Lua ( runLuaFilter ) import Text.Pandoc.PDF (makePDF) import Text.Pandoc.Process (pipeProcess) import Text.Pandoc.SelfContained (makeSelfContained, makeDataURI) -import Text.Pandoc.Shared (headerShift, openURL, readDataFile, +import Text.Pandoc.Shared (isURI, headerShift, openURL, readDataFile, readDataFileUTF8, safeRead, tabFilter) import qualified Text.Pandoc.UTF8 as UTF8 import Text.Pandoc.XML (toEntities) diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs index e430c7cb5..c6be48d19 100644 --- a/src/Text/Pandoc/Parsing.hs +++ b/src/Text/Pandoc/Parsing.hs @@ -465,33 +465,8 @@ emailAddress = try $ toResult <$> mailbox <*> (char '@' *> domain) sepby1 p sep = (:) <$> p <*> (many (try $ sep >> p)) --- Schemes from http://www.iana.org/assignments/uri-schemes.html plus --- the unofficial schemes coap, doi, javascript, isbn, pmid -schemes :: [String] -schemes = ["coap","doi","javascript","aaa","aaas","about","acap","cap","cid", - "crid","data","dav","dict","dns","file","ftp","geo","go","gopher", - "h323","http","https","iax","icap","im","imap","info","ipp","iris", - "iris.beep","iris.xpc","iris.xpcs","iris.lwz","ldap","mailto","mid", - "msrp","msrps","mtqp","mupdate","news","nfs","ni","nih","nntp", - "opaquelocktoken","pop","pres","rtsp","service","session","shttp","sieve", - "sip","sips","sms","snmp","soap.beep","soap.beeps","tag","tel","telnet", - "tftp","thismessage","tn3270","tip","tv","urn","vemmi","ws","wss","xcon", - "xcon-userid","xmlrpc.beep","xmlrpc.beeps","xmpp","z39.50r","z39.50s", - "adiumxtra","afp","afs","aim","apt","attachment","aw","beshare","bitcoin", - "bolo","callto","chrome","chrome-extension","com-eventbrite-attendee", - "content", "cvs","dlna-playsingle","dlna-playcontainer","dtn","dvb", - "ed2k","facetime","feed","finger","fish","gg","git","gizmoproject", - "gtalk","hcp","icon","ipn","irc","irc6","ircs","itms","jar","jms", - "keyparc","lastfm","ldaps","magnet","maps","market","message","mms", - "ms-help","msnim","mumble","mvn","notes","oid","palm","paparazzi", - "platform","proxy","psyc","query","res","resource","rmi","rsync", - "rtmp","secondlife","sftp","sgn","skype","smb","soldat","spotify", - "ssh","steam","svn","teamspeak","things","udp","unreal","ut2004", - "ventrilo","view-source","webcal","wtai","wyciwyg","xfire","xri", - "ymsgr", "isbn", "pmid"] - uriScheme :: Stream s m Char => ParserT s st m String -uriScheme = oneOfStringsCI schemes +uriScheme = oneOfStringsCI (Set.toList schemes) -- | Parses a URI. Returns pair of original and URI-escaped version. uri :: Stream [Char] m Char => ParserT [Char] st m (String, String) diff --git a/src/Text/Pandoc/Readers/Txt2Tags.hs b/src/Text/Pandoc/Readers/Txt2Tags.hs index ba2b20083..05c6c9a69 100644 --- a/src/Text/Pandoc/Readers/Txt2Tags.hs +++ b/src/Text/Pandoc/Readers/Txt2Tags.hs @@ -42,7 +42,6 @@ import Text.Pandoc.Definition import Text.Pandoc.Options import Text.Pandoc.Parsing hiding (macro, space, spaces, uri) import Text.Pandoc.Shared (compactify, compactifyDL, escapeURI) ---import Network.URI (isURI) -- Not sure whether to use this function import Control.Monad (guard, void, when) import Control.Monad.Reader (Reader, asks, runReader) import Data.Default diff --git a/src/Text/Pandoc/SelfContained.hs b/src/Text/Pandoc/SelfContained.hs index f8ad43b1e..55df147b6 100644 --- a/src/Text/Pandoc/SelfContained.hs +++ b/src/Text/Pandoc/SelfContained.hs @@ -42,7 +42,7 @@ import qualified Data.ByteString.Char8 as B import qualified Data.ByteString.Lazy as L import Data.Char (isAlphaNum, isAscii, toLower) import Data.List (isPrefixOf) -import Network.URI (URI (..), escapeURIString, isURI, parseURI) +import Network.URI (URI (..), escapeURIString, parseURI) import System.FilePath (takeDirectory, takeExtension, (</>)) import Text.HTML.TagSoup import Text.Pandoc.Class (PandocMonad (..), fetchItem, report) @@ -50,7 +50,7 @@ import Text.Pandoc.Error import Text.Pandoc.Logging import Text.Pandoc.MIME (MimeType) import Text.Pandoc.Options (WriterOptions (..)) -import Text.Pandoc.Shared (renderTags', trim) +import Text.Pandoc.Shared (isURI, renderTags', trim) import Text.Pandoc.UTF8 (toString) import Text.Parsec (ParsecT, runParserT) import qualified Text.Parsec as P diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index 3a61656e5..7a1e6f3e3 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -81,6 +81,9 @@ module Text.Pandoc.Shared ( openURL, collapseFilePath, filteredFilesFromArchive, + -- * URI handling + schemes, + isURI, -- * Error handling mapLeft, -- * for squashing blocks @@ -104,7 +107,7 @@ import Data.List ( find, stripPrefix, intercalate ) import Data.Maybe (mapMaybe) import Data.Version ( showVersion ) import qualified Data.Map as M -import Network.URI ( escapeURIString, unEscapeString ) +import Network.URI ( URI(uriScheme), escapeURIString, unEscapeString, parseURI ) import qualified Data.Set as Set import System.Directory import System.FilePath (splitDirectories, isPathSeparator) @@ -774,6 +777,70 @@ filteredFilesFromArchive zf f = fileAndBinary :: Archive -> FilePath -> Maybe (FilePath, BL.ByteString) fileAndBinary a fp = findEntryByPath fp a >>= \e -> Just (fp, fromEntry e) + +-- +-- IANA URIs +-- + +-- | Schemes from http://www.iana.org/assignments/uri-schemes.html plus +-- the unofficial schemes doi, javascript, isbn, pmid. +schemes :: Set.Set String +schemes = Set.fromList + -- Official IANA schemes + [ "aaa", "aaas", "about", "acap", "acct", "acr", "adiumxtra", "afp", "afs" + , "aim", "appdata", "apt", "attachment", "aw", "barion", "beshare", "bitcoin" + , "blob", "bolo", "browserext", "callto", "cap", "chrome", "chrome-extension" + , "cid", "coap", "coaps", "com-eventbrite-attendee", "content", "crid", "cvs" + , "data", "dav", "dict", "dis", "dlna-playcontainer", "dlna-playsingle" + , "dns", "dntp", "dtn", "dvb", "ed2k", "example", "facetime", "fax", "feed" + , "feedready", "file", "filesystem", "finger", "fish", "ftp", "geo", "gg" + , "git", "gizmoproject", "go", "gopher", "graph", "gtalk", "h323", "ham" + , "hcp", "http", "https", "hxxp", "hxxps", "hydrazone", "iax", "icap", "icon" + , "im", "imap", "info", "iotdisco", "ipn", "ipp", "ipps", "irc", "irc6" + , "ircs", "iris", "iris.beep", "iris.lwz", "iris.xpc", "iris.xpcs" + , "isostore", "itms", "jabber", "jar", "jms", "keyparc", "lastfm", "ldap" + , "ldaps", "lvlt", "magnet", "mailserver", "mailto", "maps", "market" + , "message", "mid", "mms", "modem", "mongodb", "moz", "ms-access" + , "ms-browser-extension", "ms-drive-to", "ms-enrollment", "ms-excel" + , "ms-gamebarservices", "ms-getoffice", "ms-help", "ms-infopath" + , "ms-media-stream-id", "ms-officeapp", "ms-project", "ms-powerpoint" + , "ms-publisher", "ms-search-repair", "ms-secondary-screen-controller" + , "ms-secondary-screen-setup", "ms-settings", "ms-settings-airplanemode" + , "ms-settings-bluetooth", "ms-settings-camera", "ms-settings-cellular" + , "ms-settings-cloudstorage", "ms-settings-connectabledevices" + , "ms-settings-displays-topology", "ms-settings-emailandaccounts" + , "ms-settings-language", "ms-settings-location", "ms-settings-lock" + , "ms-settings-nfctransactions", "ms-settings-notifications" + , "ms-settings-power", "ms-settings-privacy", "ms-settings-proximity" + , "ms-settings-screenrotation", "ms-settings-wifi", "ms-settings-workplace" + , "ms-spd", "ms-sttoverlay", "ms-transit-to", "ms-virtualtouchpad" + , "ms-visio", "ms-walk-to", "ms-whiteboard", "ms-whiteboard-cmd", "ms-word" + , "msnim", "msrp", "msrps", "mtqp", "mumble", "mupdate", "mvn", "news", "nfs" + , "ni", "nih", "nntp", "notes", "ocf", "oid", "onenote", "onenote-cmd" + , "opaquelocktoken", "pack", "palm", "paparazzi", "pkcs11", "platform", "pop" + , "pres", "prospero", "proxy", "pwid", "psyc", "qb", "query", "redis" + , "rediss", "reload", "res", "resource", "rmi", "rsync", "rtmfp", "rtmp" + , "rtsp", "rtsps", "rtspu", "secondlife", "service", "session", "sftp", "sgn" + , "shttp", "sieve", "sip", "sips", "skype", "smb", "sms", "smtp", "snews" + , "snmp", "soap.beep", "soap.beeps", "soldat", "spotify", "ssh", "steam" + , "stun", "stuns", "submit", "svn", "tag", "teamspeak", "tel", "teliaeid" + , "telnet", "tftp", "things", "thismessage", "tip", "tn3270", "tool", "turn" + , "turns", "tv", "udp", "unreal", "urn", "ut2004", "v-event", "vemmi" + , "ventrilo", "videotex", "vnc", "view-source", "wais", "webcal", "wpid" + , "ws", "wss", "wtai", "wyciwyg", "xcon", "xcon-userid", "xfire" + , "xmlrpc.beep", "xmlrpc.beeps", "xmpp", "xri", "ymsgr", "z39.50", "z39.50r" + , "z39.50s" + -- Inofficial schemes + , "doi", "isbn", "javascript", "pmid" + ] + +-- | Check if the string is a valid URL with a IANA or frequently used but +-- unofficial scheme (see @schemes@). +isURI :: String -> Bool +isURI = maybe False hasKnownScheme . parseURI + where + hasKnownScheme = (`Set.member` schemes) . filter (/= ':') . uriScheme + --- --- Squash blocks into inlines --- diff --git a/src/Text/Pandoc/Writers/ConTeXt.hs b/src/Text/Pandoc/Writers/ConTeXt.hs index eef16d3da..2d4502153 100644 --- a/src/Text/Pandoc/Writers/ConTeXt.hs +++ b/src/Text/Pandoc/Writers/ConTeXt.hs @@ -33,7 +33,7 @@ import Control.Monad.State import Data.Char (ord) import Data.List (intercalate, intersperse) import Data.Maybe (catMaybes) -import Network.URI (isURI, unEscapeString) +import Network.URI (unEscapeString) import Text.Pandoc.Class (PandocMonad, report) import Text.Pandoc.Logging import Text.Pandoc.Definition diff --git a/src/Text/Pandoc/Writers/DokuWiki.hs b/src/Text/Pandoc/Writers/DokuWiki.hs index 81987dc44..1d02a9c40 100644 --- a/src/Text/Pandoc/Writers/DokuWiki.hs +++ b/src/Text/Pandoc/Writers/DokuWiki.hs @@ -44,13 +44,12 @@ import Control.Monad.Reader (ReaderT, ask, local, runReaderT) import Control.Monad.State (StateT, evalStateT, gets, modify) import Data.Default (Default (..)) import Data.List (intercalate, intersect, isPrefixOf, transpose) -import Network.URI (isURI) import Text.Pandoc.Class (PandocMonad, report) import Text.Pandoc.Logging import Text.Pandoc.Definition import Text.Pandoc.ImageSize import Text.Pandoc.Options (WrapOption (..), WriterOptions (writerTableOfContents, writerTemplate, writerWrapText)) -import Text.Pandoc.Shared (camelCaseToHyphenated, escapeURI, linesToPara, +import Text.Pandoc.Shared (camelCaseToHyphenated, escapeURI, isURI, linesToPara, removeFormatting, substitute, trimr) import Text.Pandoc.Templates (renderTemplate') import Text.Pandoc.Writers.Shared (defField, metaToJSON) diff --git a/src/Text/Pandoc/Writers/FB2.hs b/src/Text/Pandoc/Writers/FB2.hs index 0926cc331..d450513bc 100644 --- a/src/Text/Pandoc/Writers/FB2.hs +++ b/src/Text/Pandoc/Writers/FB2.hs @@ -46,7 +46,6 @@ import Data.Char (isAscii, isControl, isSpace, toLower) import Data.Either (lefts, rights) import Data.List (intercalate, intersperse, isPrefixOf, stripPrefix) import Network.HTTP (urlEncode) -import Network.URI (isURI) import Text.XML.Light import qualified Text.XML.Light as X import qualified Text.XML.Light.Cursor as XC @@ -57,7 +56,7 @@ import Text.Pandoc.Definition import Text.Pandoc.Error import Text.Pandoc.Logging import Text.Pandoc.Options (HTMLMathMethod (..), WriterOptions (..), def) -import Text.Pandoc.Shared (capitalize, isHeaderBlock, linesToPara, +import Text.Pandoc.Shared (capitalize, isHeaderBlock, isURI, linesToPara, orderedListMarkers) -- | Data to be written at the end of the document: diff --git a/src/Text/Pandoc/Writers/Haddock.hs b/src/Text/Pandoc/Writers/Haddock.hs index 812b46c30..cbbe5bdb4 100644 --- a/src/Text/Pandoc/Writers/Haddock.hs +++ b/src/Text/Pandoc/Writers/Haddock.hs @@ -36,7 +36,6 @@ module Text.Pandoc.Writers.Haddock (writeHaddock) where import Control.Monad.State import Data.Default import Data.List (intersperse, transpose) -import Network.URI (isURI) import Text.Pandoc.Class (PandocMonad, report) import Text.Pandoc.Definition import Text.Pandoc.Logging diff --git a/src/Text/Pandoc/Writers/ICML.hs b/src/Text/Pandoc/Writers/ICML.hs index 2f7a4889f..f36a32015 100644 --- a/src/Text/Pandoc/Writers/ICML.hs +++ b/src/Text/Pandoc/Writers/ICML.hs @@ -21,7 +21,6 @@ import Control.Monad.State import Data.List (intersperse, isInfixOf, isPrefixOf, stripPrefix) import qualified Data.Set as Set import Data.Text as Text (breakOnAll, pack) -import Network.URI (isURI) import Text.Pandoc.Class (PandocMonad, report) import qualified Text.Pandoc.Class as P import Text.Pandoc.Definition @@ -29,7 +28,7 @@ import Text.Pandoc.ImageSize import Text.Pandoc.Logging import Text.Pandoc.Options import Text.Pandoc.Pretty -import Text.Pandoc.Shared (linesToPara, splitBy) +import Text.Pandoc.Shared (isURI, linesToPara, splitBy) import Text.Pandoc.Templates (renderTemplate') import Text.Pandoc.Writers.Math (texMathToInlines) import Text.Pandoc.Writers.Shared diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs index 31c70e99d..2b3d7c878 100644 --- a/src/Text/Pandoc/Writers/LaTeX.hs +++ b/src/Text/Pandoc/Writers/LaTeX.hs @@ -43,7 +43,7 @@ import Data.List (foldl', intercalate, intersperse, isInfixOf, nub, nubBy, stripPrefix, (\\)) import Data.Maybe (catMaybes, fromMaybe, isJust) import qualified Data.Text as T -import Network.URI (isURI, unEscapeString) +import Network.URI (unEscapeString) import Text.Pandoc.Class (PandocMonad, report) import Text.Pandoc.Definition import Text.Pandoc.Highlighting (formatLaTeXBlock, formatLaTeXInline, highlight, diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs index b70716181..e858bc43f 100644 --- a/src/Text/Pandoc/Writers/Markdown.hs +++ b/src/Text/Pandoc/Writers/Markdown.hs @@ -49,7 +49,6 @@ import qualified Data.Text as T import qualified Data.Vector as V import Data.Yaml (Value (Array, Bool, Number, Object, String)) import Network.HTTP (urlEncode) -import Network.URI (isURI) import Text.HTML.TagSoup (Tag (..), isTagText, parseTags) import Text.Pandoc.Class (PandocMonad, report) import Text.Pandoc.Definition diff --git a/src/Text/Pandoc/Writers/MediaWiki.hs b/src/Text/Pandoc/Writers/MediaWiki.hs index 439bbb2f9..aa5c3bc4f 100644 --- a/src/Text/Pandoc/Writers/MediaWiki.hs +++ b/src/Text/Pandoc/Writers/MediaWiki.hs @@ -34,7 +34,6 @@ import Control.Monad.Reader import Control.Monad.State import Data.List (intercalate) import qualified Data.Set as Set -import Network.URI (isURI) import Text.Pandoc.Class (PandocMonad, report) import Text.Pandoc.Logging import Text.Pandoc.Definition diff --git a/src/Text/Pandoc/Writers/RST.hs b/src/Text/Pandoc/Writers/RST.hs index 5dc2ba31a..b88fc2245 100644 --- a/src/Text/Pandoc/Writers/RST.hs +++ b/src/Text/Pandoc/Writers/RST.hs @@ -35,7 +35,6 @@ import Control.Monad.State import Data.Char (isSpace, toLower) import Data.List (isPrefixOf, stripPrefix) import Data.Maybe (fromMaybe) -import Network.URI (isURI) import qualified Text.Pandoc.Builder as B import Text.Pandoc.Class (PandocMonad, report) import Text.Pandoc.Logging diff --git a/src/Text/Pandoc/Writers/Texinfo.hs b/src/Text/Pandoc/Writers/Texinfo.hs index 9926daea1..710e1dea0 100644 --- a/src/Text/Pandoc/Writers/Texinfo.hs +++ b/src/Text/Pandoc/Writers/Texinfo.hs @@ -37,7 +37,7 @@ import Data.Char (chr, ord) import Data.List (maximumBy, transpose) import Data.Ord (comparing) import qualified Data.Set as Set -import Network.URI (isURI, unEscapeString) +import Network.URI (unEscapeString) import System.FilePath import Text.Pandoc.Class (PandocMonad, report) import Text.Pandoc.Definition diff --git a/src/Text/Pandoc/Writers/ZimWiki.hs b/src/Text/Pandoc/Writers/ZimWiki.hs index bc2cf8f3c..4ab8bde30 100644 --- a/src/Text/Pandoc/Writers/ZimWiki.hs +++ b/src/Text/Pandoc/Writers/ZimWiki.hs @@ -38,14 +38,13 @@ import Data.Default (Default (..)) import Data.List (intercalate, isInfixOf, isPrefixOf, transpose) import qualified Data.Map as Map import Data.Text (breakOnAll, pack) -import Network.URI (isURI) import Text.Pandoc.Class (PandocMonad, report) import Text.Pandoc.Logging import Text.Pandoc.Definition import Text.Pandoc.ImageSize import Text.Pandoc.Options (WrapOption (..), WriterOptions (writerTableOfContents, writerTemplate, writerWrapText)) -import Text.Pandoc.Shared (escapeURI, linesToPara, removeFormatting, substitute, - trimr) +import Text.Pandoc.Shared (isURI, escapeURI, linesToPara, removeFormatting, + substitute, trimr) import Text.Pandoc.Templates (renderTemplate') import Text.Pandoc.Writers.Shared (defField, metaToJSON) |