diff options
author | schrieveslaach <schrieveslaach@online.de> | 2017-06-12 15:52:29 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2017-06-12 15:52:29 +0200 |
commit | 635f299b441e238ccd34e3ad61c5e36f0ca30067 (patch) | |
tree | 11cfc34402975bad208f9a48d075fe2ace959e70 /src/Text/Pandoc/Shared.hs | |
parent | 181c56d4003aa83abed23b95a452c4890aa3797c (diff) | |
parent | 23f3c2d7b4796d1af742a74999ce67924bf2abb3 (diff) | |
download | pandoc-635f299b441e238ccd34e3ad61c5e36f0ca30067.tar.gz |
Merge branch 'master' into textcolor-support
Diffstat (limited to 'src/Text/Pandoc/Shared.hs')
-rw-r--r-- | src/Text/Pandoc/Shared.hs | 146 |
1 files changed, 100 insertions, 46 deletions
diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index 8256d14c0..745e809d0 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -2,7 +2,7 @@ FlexibleContexts, ScopedTypeVariables, PatternGuards, ViewPatterns #-} {- -Copyright (C) 2006-2016 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2006-2017 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -21,7 +21,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Shared - Copyright : Copyright (C) 2006-2016 John MacFarlane + Copyright : Copyright (C) 2006-2017 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -70,6 +70,7 @@ module Text.Pandoc.Shared ( isTightList, addMetaField, makeMeta, + eastAsianLineBreakFilter, -- * TagSoup HTML handling renderTags', -- * File handling @@ -81,6 +82,9 @@ module Text.Pandoc.Shared ( openURL, collapseFilePath, filteredFilesFromArchive, + -- * URI handling + schemes, + isURI, -- * Error handling mapLeft, -- * for squashing blocks @@ -104,7 +108,7 @@ import Data.List ( find, stripPrefix, intercalate ) import Data.Maybe (mapMaybe) import Data.Version ( showVersion ) import qualified Data.Map as M -import Network.URI ( escapeURIString, unEscapeString ) +import Network.URI ( URI(uriScheme), escapeURIString, unEscapeString, parseURI ) import qualified Data.Set as Set import System.Directory import System.FilePath (splitDirectories, isPathSeparator) @@ -117,6 +121,7 @@ import qualified Control.Monad.State as S import qualified Control.Exception as E import Control.Monad (msum, unless, MonadPlus(..)) import Text.Pandoc.Pretty (charWidth) +import Text.Pandoc.Generic (bottomUp) import Text.Pandoc.Compat.Time import Data.Time.Clock.POSIX import System.IO.Error @@ -128,7 +133,7 @@ import qualified Data.ByteString as BS import qualified Data.ByteString.Char8 as B8 import Data.ByteString.Base64 (decodeLenient) import Data.Sequence (ViewR(..), ViewL(..), viewl, viewr) -import qualified Data.Text as T (toUpper, pack, unpack) +import qualified Data.Text as T import Data.ByteString.Lazy (toChunks, fromChunks) import qualified Data.ByteString.Lazy as BL import Paths_pandoc (version) @@ -140,9 +145,9 @@ import Text.Pandoc.Data (dataFiles) #else import Paths_pandoc (getDataFileName) #endif -#ifdef HTTP_CLIENT import Network.HTTP.Client (httpLbs, responseBody, responseHeaders, - Request(port,host,requestHeaders)) + Request(port,host,requestHeaders), + HttpException) import Network.HTTP.Client (parseRequest) import Network.HTTP.Client (newManager) import Network.HTTP.Client.Internal (addProxy) @@ -150,12 +155,6 @@ import Network.HTTP.Client.TLS (tlsManagerSettings) import System.Environment (getEnv) import Network.HTTP.Types.Header ( hContentType, hUserAgent) import Network (withSocketsDo) -#else -import Network.URI (parseURI) -import Network.HTTP (findHeader, rspBody, - RequestMethod(..), HeaderName(..), mkRequest) -import Network.Browser (browse, setAllowRedirects, setOutHandler, request) -#endif -- | Version number of pandoc library. pandocVersion :: String @@ -280,26 +279,20 @@ escapeURI = escapeURIString (not . needsEscaping) where needsEscaping c = isSpace c || c `elem` ['<','>','|','"','{','}','[',']','^', '`'] - -- | Convert tabs to spaces and filter out DOS line endings. -- Tabs will be preserved if tab stop is set to 0. tabFilter :: Int -- ^ Tab stop - -> String -- ^ Input - -> String -tabFilter tabStop = - let go _ [] = "" - go _ ('\n':xs) = '\n' : go tabStop xs - go _ ('\r':'\n':xs) = '\n' : go tabStop xs - go _ ('\r':xs) = '\n' : go tabStop xs - go spsToNextStop ('\t':xs) = - if tabStop == 0 - then '\t' : go tabStop xs - else replicate spsToNextStop ' ' ++ go tabStop xs - go 1 (x:xs) = - x : go tabStop xs - go spsToNextStop (x:xs) = - x : go (spsToNextStop - 1) xs - in go tabStop + -> T.Text -- ^ Input + -> T.Text +tabFilter tabStop = T.filter (/= '\r') . T.unlines . + (if tabStop == 0 then id else map go) . T.lines + where go s = + let (s1, s2) = T.break (== '\t') s + in if T.null s2 + then s1 + else s1 <> T.replicate + (tabStop - (T.length s1 `mod` tabStop)) (T.pack " ") + <> go (T.drop 1 s2) -- -- Date/time @@ -581,6 +574,16 @@ makeMeta title authors date = $ addMetaField "date" (B.fromList date) $ nullMeta +-- | Remove soft breaks between East Asian characters. +eastAsianLineBreakFilter :: Pandoc -> Pandoc +eastAsianLineBreakFilter = bottomUp go + where go (x:SoftBreak:y:zs) = + case (stringify x, stringify y) of + (xs@(_:_), (c:_)) + | charWidth (last xs) == 2 && charWidth c == 2 -> x:y:zs + _ -> x:SoftBreak:y:zs + go xs = xs + -- -- TagSoup HTML handling -- @@ -709,14 +712,13 @@ readDataFileUTF8 userDir fname = UTF8.toString `fmap` readDataFile userDir fname -- | Read from a URL and return raw data and maybe mime type. -openURL :: String -> IO (BS.ByteString, Maybe MimeType) +openURL :: String -> IO (Either HttpException (BS.ByteString, Maybe MimeType)) openURL u | Just u'' <- stripPrefix "data:" u = let mime = takeWhile (/=',') u'' contents = B8.pack $ unEscapeString $ drop 1 $ dropWhile (/=',') u'' - in return (decodeLenient contents, Just mime) -#ifdef HTTP_CLIENT - | otherwise = withSocketsDo $ do + in return $ Right (decodeLenient contents, Just mime) + | otherwise = E.try $ withSocketsDo $ do let parseReq = parseRequest (proxy :: Either IOError String) <- tryIOError $ getEnv "http_proxy" @@ -738,19 +740,6 @@ openURL u resp <- newManager tlsManagerSettings >>= httpLbs req'' return (BS.concat $ toChunks $ responseBody resp, UTF8.toString `fmap` lookup hContentType (responseHeaders resp)) -#else - | otherwise = getBodyAndMimeType `fmap` browse - (do liftIO $ UTF8.hPutStrLn stderr $ "Fetching " ++ u ++ "..." - setOutHandler $ const (return ()) - setAllowRedirects True - request (getRequest' u')) - where getBodyAndMimeType (_, r) = (rspBody r, findHeader HdrContentType r) - getRequest' uriString = case parseURI uriString of - Nothing -> error ("Not a valid URL: " ++ - uriString) - Just v -> mkRequest GET v - u' = escapeURIString (/= '|') u -- pipes are rejected by Network.URI -#endif -- -- Error reporting @@ -794,6 +783,71 @@ filteredFilesFromArchive zf f = fileAndBinary :: Archive -> FilePath -> Maybe (FilePath, BL.ByteString) fileAndBinary a fp = findEntryByPath fp a >>= \e -> Just (fp, fromEntry e) + +-- +-- IANA URIs +-- + +-- | Schemes from http://www.iana.org/assignments/uri-schemes.html plus +-- the unofficial schemes doi, javascript, isbn, pmid. +schemes :: Set.Set String +schemes = Set.fromList + -- Official IANA schemes + [ "aaa", "aaas", "about", "acap", "acct", "acr", "adiumxtra", "afp", "afs" + , "aim", "appdata", "apt", "attachment", "aw", "barion", "beshare", "bitcoin" + , "blob", "bolo", "browserext", "callto", "cap", "chrome", "chrome-extension" + , "cid", "coap", "coaps", "com-eventbrite-attendee", "content", "crid", "cvs" + , "data", "dav", "dict", "dis", "dlna-playcontainer", "dlna-playsingle" + , "dns", "dntp", "dtn", "dvb", "ed2k", "example", "facetime", "fax", "feed" + , "feedready", "file", "filesystem", "finger", "fish", "ftp", "geo", "gg" + , "git", "gizmoproject", "go", "gopher", "graph", "gtalk", "h323", "ham" + , "hcp", "http", "https", "hxxp", "hxxps", "hydrazone", "iax", "icap", "icon" + , "im", "imap", "info", "iotdisco", "ipn", "ipp", "ipps", "irc", "irc6" + , "ircs", "iris", "iris.beep", "iris.lwz", "iris.xpc", "iris.xpcs" + , "isostore", "itms", "jabber", "jar", "jms", "keyparc", "lastfm", "ldap" + , "ldaps", "lvlt", "magnet", "mailserver", "mailto", "maps", "market" + , "message", "mid", "mms", "modem", "mongodb", "moz", "ms-access" + , "ms-browser-extension", "ms-drive-to", "ms-enrollment", "ms-excel" + , "ms-gamebarservices", "ms-getoffice", "ms-help", "ms-infopath" + , "ms-media-stream-id", "ms-officeapp", "ms-project", "ms-powerpoint" + , "ms-publisher", "ms-search-repair", "ms-secondary-screen-controller" + , "ms-secondary-screen-setup", "ms-settings", "ms-settings-airplanemode" + , "ms-settings-bluetooth", "ms-settings-camera", "ms-settings-cellular" + , "ms-settings-cloudstorage", "ms-settings-connectabledevices" + , "ms-settings-displays-topology", "ms-settings-emailandaccounts" + , "ms-settings-language", "ms-settings-location", "ms-settings-lock" + , "ms-settings-nfctransactions", "ms-settings-notifications" + , "ms-settings-power", "ms-settings-privacy", "ms-settings-proximity" + , "ms-settings-screenrotation", "ms-settings-wifi", "ms-settings-workplace" + , "ms-spd", "ms-sttoverlay", "ms-transit-to", "ms-virtualtouchpad" + , "ms-visio", "ms-walk-to", "ms-whiteboard", "ms-whiteboard-cmd", "ms-word" + , "msnim", "msrp", "msrps", "mtqp", "mumble", "mupdate", "mvn", "news", "nfs" + , "ni", "nih", "nntp", "notes", "ocf", "oid", "onenote", "onenote-cmd" + , "opaquelocktoken", "pack", "palm", "paparazzi", "pkcs11", "platform", "pop" + , "pres", "prospero", "proxy", "pwid", "psyc", "qb", "query", "redis" + , "rediss", "reload", "res", "resource", "rmi", "rsync", "rtmfp", "rtmp" + , "rtsp", "rtsps", "rtspu", "secondlife", "service", "session", "sftp", "sgn" + , "shttp", "sieve", "sip", "sips", "skype", "smb", "sms", "smtp", "snews" + , "snmp", "soap.beep", "soap.beeps", "soldat", "spotify", "ssh", "steam" + , "stun", "stuns", "submit", "svn", "tag", "teamspeak", "tel", "teliaeid" + , "telnet", "tftp", "things", "thismessage", "tip", "tn3270", "tool", "turn" + , "turns", "tv", "udp", "unreal", "urn", "ut2004", "v-event", "vemmi" + , "ventrilo", "videotex", "vnc", "view-source", "wais", "webcal", "wpid" + , "ws", "wss", "wtai", "wyciwyg", "xcon", "xcon-userid", "xfire" + , "xmlrpc.beep", "xmlrpc.beeps", "xmpp", "xri", "ymsgr", "z39.50", "z39.50r" + , "z39.50s" + -- Inofficial schemes + , "doi", "isbn", "javascript", "pmid" + ] + +-- | Check if the string is a valid URL with a IANA or frequently used but +-- unofficial scheme (see @schemes@). +isURI :: String -> Bool +isURI = maybe False hasKnownScheme . parseURI + where + hasKnownScheme = (`Set.member` schemes) . map toLower . + filter (/= ':') . uriScheme + --- --- Squash blocks into inlines --- |