diff options
Diffstat (limited to 'src/Text')
-rw-r--r-- | src/Text/Pandoc/MIME.hs | 1 | ||||
-rw-r--r-- | src/Text/Pandoc/Parsing.hs | 24 | ||||
-rw-r--r-- | src/Text/Pandoc/Pretty.hs | 3 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Markdown.hs | 4 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Org.hs | 70 | ||||
-rw-r--r-- | src/Text/Pandoc/Shared.hs | 10 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/EPUB.hs | 33 |
7 files changed, 104 insertions, 41 deletions
diff --git a/src/Text/Pandoc/MIME.hs b/src/Text/Pandoc/MIME.hs index 3b3b3b5b3..75b4ff0d2 100644 --- a/src/Text/Pandoc/MIME.hs +++ b/src/Text/Pandoc/MIME.hs @@ -477,6 +477,7 @@ mimeTypesList = -- List borrowed from happstack-server. ,("vrml","model/vrml") ,("vs","text/plain") ,("vsd","application/vnd.visio") + ,("vtt","text/vtt") ,("wad","application/x-doom") ,("wav","audio/x-wav") ,("wax","audio/x-ms-wax") diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs index d1fba1e21..e0f5f65bb 100644 --- a/src/Text/Pandoc/Parsing.hs +++ b/src/Text/Pandoc/Parsing.hs @@ -472,7 +472,12 @@ mathInlineWith op cl = try $ do string op notFollowedBy space words' <- many1Till (count 1 (noneOf " \t\n\\") - <|> (char '\\' >> anyChar >>= \c -> return ['\\',c]) + <|> (char '\\' >> + -- This next clause is needed because \text{..} can + -- contain $, \(\), etc. + (try (string "text" >> + (("\\text" ++) <$> inBalancedBraces 0 "")) + <|> (\c -> ['\\',c]) <$> anyChar)) <|> do (blankline <* notFollowedBy' blankline) <|> (oneOf " \t" <* skipMany (oneOf " \t")) notFollowedBy (char '$') @@ -480,6 +485,23 @@ mathInlineWith op cl = try $ do ) (try $ string cl) notFollowedBy digit -- to prevent capture of $5 return $ concat words' + where + inBalancedBraces :: Stream s m Char => Int -> String -> ParserT s st m String + inBalancedBraces 0 "" = do + c <- anyChar + if c == '{' + then inBalancedBraces 1 "{" + else mzero + inBalancedBraces 0 s = return $ reverse s + inBalancedBraces numOpen ('\\':xs) = do + c <- anyChar + inBalancedBraces numOpen (c:'\\':xs) + inBalancedBraces numOpen xs = do + c <- anyChar + case c of + '}' -> inBalancedBraces (numOpen - 1) (c:xs) + '{' -> inBalancedBraces (numOpen + 1) (c:xs) + _ -> inBalancedBraces numOpen (c:xs) mathDisplayWith :: Stream s m Char => String -> String -> ParserT s st m String mathDisplayWith op cl = try $ do diff --git a/src/Text/Pandoc/Pretty.hs b/src/Text/Pandoc/Pretty.hs index 1e72c2040..9ee7fe94a 100644 --- a/src/Text/Pandoc/Pretty.hs +++ b/src/Text/Pandoc/Pretty.hs @@ -286,6 +286,9 @@ renderList (BlankLines num : xs) = do | otherwise -> replicateM_ (1 + num - newlines st) (outp (-1) "\n") renderList xs +renderList (CarriageReturn : BlankLines m : xs) = + renderList (BlankLines m : xs) + renderList (CarriageReturn : xs) = do st <- get if newlines st > 0 || null xs diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 02a787670..7a3be8291 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -735,9 +735,9 @@ anyOrderedListStart = try $ do skipNonindentSpaces notFollowedBy $ string "p." >> spaceChar >> digit -- page number res <- do guardDisabled Ext_fancy_lists - many1 digit + start <- many1 digit >>= safeRead char '.' - return (1, DefaultStyle, DefaultDelim) + return (start, DefaultStyle, DefaultDelim) <|> do (num, style, delim) <- anyOrderedListMarker -- if it could be an abbreviated first name, -- insist on more than one space diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index 5c00a1b27..a6ebf65dc 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -69,7 +69,32 @@ parseOrg = do blocks' <- parseBlocks st <- getState let meta = runF (orgStateMeta' st) st - return $ Pandoc meta $ filter (/= Null) (B.toList $ runF blocks' st) + let removeUnwantedBlocks = dropCommentTrees . filter (/= Null) + return $ Pandoc meta $ removeUnwantedBlocks (B.toList $ runF blocks' st) + +-- | Drop COMMENT headers and the document tree below those headers. +dropCommentTrees :: [Block] -> [Block] +dropCommentTrees [] = [] +dropCommentTrees blks@(b:bs) = + maybe blks (flip dropUntilHeaderAboveLevel bs) $ commentHeaderLevel b + +-- | Return the level of a header starting a comment tree and Nothing +-- otherwise. +commentHeaderLevel :: Block -> Maybe Int +commentHeaderLevel blk = + case blk of + (Header level _ ((Str "COMMENT"):_)) -> Just level + _ -> Nothing + +-- | Drop blocks until a header on or above the given level is seen +dropUntilHeaderAboveLevel :: Int -> [Block] -> [Block] +dropUntilHeaderAboveLevel n = dropWhile (not . isHeaderLevelLowerEq n) + +isHeaderLevelLowerEq :: Int -> Block -> Bool +isHeaderLevelLowerEq n blk = + case blk of + (Header level _ _) -> n >= level + _ -> False -- -- Parser State for Org @@ -828,12 +853,14 @@ list :: OrgParser (F Blocks) list = choice [ definitionList, bulletList, orderedList ] <?> "list" definitionList :: OrgParser (F Blocks) -definitionList = fmap B.definitionList . fmap compactify'DL . sequence - <$> many1 (definitionListItem bulletListStart) +definitionList = try $ do n <- lookAhead (bulletListStart' Nothing) + fmap B.definitionList . fmap compactify'DL . sequence + <$> many1 (definitionListItem $ bulletListStart' (Just n)) bulletList :: OrgParser (F Blocks) -bulletList = fmap B.bulletList . fmap compactify' . sequence - <$> many1 (listItem bulletListStart) +bulletList = try $ do n <- lookAhead (bulletListStart' Nothing) + fmap B.bulletList . fmap compactify' . sequence + <$> many1 (listItem (bulletListStart' $ Just n)) orderedList :: OrgParser (F Blocks) orderedList = fmap B.orderedList . fmap compactify' . sequence @@ -845,10 +872,27 @@ genericListStart listMarker = try $ (+) <$> (length <$> many spaceChar) <*> (length <$> listMarker <* many1 spaceChar) --- parses bullet list start and returns its length (excl. following whitespace) +-- parses bullet list marker. maybe we know the indent level bulletListStart :: OrgParser Int -bulletListStart = genericListStart bulletListMarker - where bulletListMarker = pure <$> oneOf "*-+" +bulletListStart = bulletListStart' Nothing + +bulletListStart' :: Maybe Int -> OrgParser Int +-- returns length of bulletList prefix, inclusive of marker +bulletListStart' Nothing = do ind <- many spaceChar + oneOf bullets + many1 spaceChar + return $ length ind + 1 + -- Unindented lists are legal, but they can't use '*' bullets + -- We return n to maintain compatibility with the generic listItem +bulletListStart' (Just n) = do count (n-1) spaceChar + oneOf validBullets + many1 spaceChar + return n + where validBullets = if n == 1 then noAsterisks else bullets + noAsterisks = filter (/= '*') bullets + +bullets :: String +bullets = "*+-" orderedListStart :: OrgParser Int orderedListStart = genericListStart orderedListMarker @@ -927,7 +971,7 @@ parseInlines = trimInlinesF . mconcat <$> many1 inline -- treat these as potentially non-text when parsing inline: specialChars :: [Char] -specialChars = "\"$'()*+-./:<=>[\\]^_{|}~" +specialChars = "\"$'()*+-,./:<=>[\\]^_{|}~" whitespace :: OrgParser (F Inlines) @@ -1205,10 +1249,10 @@ displayMath = return . B.displayMath <$> choice [ rawMathBetween "\\[" "\\]" ] symbol :: OrgParser (F Inlines) symbol = return . B.str . (: "") <$> (oneOf specialChars >>= updatePositions) - where updatePositions c - | c `elem` emphasisPreChars = c <$ updateLastPreCharPos - | c `elem` emphasisForbiddenBorderChars = c <$ updateLastForbiddenCharPos - | otherwise = return c + where updatePositions c = do + when (c `elem` emphasisPreChars) updateLastPreCharPos + when (c `elem` emphasisForbiddenBorderChars) updateLastForbiddenCharPos + return c emphasisBetween :: Char -> OrgParser (F Inlines) diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index 6e1f84335..9aa70e6f2 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -735,12 +735,10 @@ renderTags' = renderTagsOptions -- | Perform an IO action in a directory, returning to starting directory. inDirectory :: FilePath -> IO a -> IO a -inDirectory path action = do - oldDir <- getCurrentDirectory - setCurrentDirectory path - result <- action - setCurrentDirectory oldDir - return result +inDirectory path action = E.bracket + getCurrentDirectory + setCurrentDirectory + (const $ setCurrentDirectory path >> action) readDefaultDataFile :: FilePath -> IO BS.ByteString readDefaultDataFile fname = diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs index 32256cb42..53574711f 100644 --- a/src/Text/Pandoc/Writers/EPUB.hs +++ b/src/Text/Pandoc/Writers/EPUB.hs @@ -35,7 +35,7 @@ import Data.Maybe ( fromMaybe ) import Data.List ( isPrefixOf, isInfixOf, intercalate ) import System.Environment ( getEnv ) import Text.Printf (printf) -import System.FilePath ( (</>), takeExtension, takeFileName ) +import System.FilePath ( takeExtension, takeFileName ) import qualified Data.ByteString.Lazy as B import qualified Data.ByteString.Lazy.Char8 as B8 import qualified Text.Pandoc.UTF8 as UTF8 @@ -64,7 +64,6 @@ import Text.XML.Light ( unode, Element(..), unqual, Attr(..), add_attrs import Text.Pandoc.UUID (getRandomUUID) import Text.Pandoc.Writers.HTML (writeHtmlString, writeHtml) import Data.Char ( toLower, isDigit, isAlphaNum ) -import Network.URI ( unEscapeString ) import Text.Pandoc.MIME (MimeType, getMimeType) import qualified Control.Exception as E import Text.Blaze.Html.Renderer.Utf8 (renderHtml) @@ -765,23 +764,20 @@ metadataElement version md currentTime = showDateTimeISO8601 :: UTCTime -> String showDateTimeISO8601 = formatTime defaultTimeLocale "%FT%TZ" -transformTag :: WriterOptions - -> IORef [(FilePath, FilePath)] -- ^ (oldpath, newpath) media +transformTag :: IORef [(FilePath, FilePath)] -- ^ (oldpath, newpath) media -> Tag String -> IO (Tag String) -transformTag opts mediaRef tag@(TagOpen name attr) +transformTag mediaRef tag@(TagOpen name attr) | name `elem` ["video", "source", "img", "audio"] = do let src = fromAttrib "src" tag let poster = fromAttrib "poster" tag - let oldsrc = maybe src (</> src) $ writerSourceURL opts - let oldposter = maybe poster (</> poster) $ writerSourceURL opts - newsrc <- modifyMediaRef mediaRef oldsrc - newposter <- modifyMediaRef mediaRef oldposter + newsrc <- modifyMediaRef mediaRef src + newposter <- modifyMediaRef mediaRef poster let attr' = filter (\(x,_) -> x /= "src" && x /= "poster") attr ++ [("src", newsrc) | not (null newsrc)] ++ [("poster", newposter) | not (null newposter)] return $ TagOpen name attr' -transformTag _ _ tag = return tag +transformTag _ tag = return tag modifyMediaRef :: IORef [(FilePath, FilePath)] -> FilePath -> IO FilePath modifyMediaRef _ "" = return "" @@ -791,7 +787,7 @@ modifyMediaRef mediaRef oldsrc = do Just n -> return n Nothing -> do let new = "media/file" ++ show (length media) ++ - takeExtension oldsrc + takeExtension (takeWhile (/='?') oldsrc) -- remove query modifyIORef mediaRef ( (oldsrc, new): ) return new @@ -799,10 +795,10 @@ transformBlock :: WriterOptions -> IORef [(FilePath, FilePath)] -- ^ (oldpath, newpath) media -> Block -> IO Block -transformBlock opts mediaRef (RawBlock fmt raw) +transformBlock _ mediaRef (RawBlock fmt raw) | fmt == Format "html" = do let tags = parseTags raw - tags' <- mapM (transformTag opts mediaRef) tags + tags' <- mapM (transformTag mediaRef) tags return $ RawBlock fmt (renderTags' tags') transformBlock _ _ b = return b @@ -810,19 +806,17 @@ transformInline :: WriterOptions -> IORef [(FilePath, FilePath)] -- ^ (oldpath, newpath) media -> Inline -> IO Inline -transformInline opts mediaRef (Image lab (src,tit)) = do - let src' = unEscapeString src - let oldsrc = maybe src' (</> src) $ writerSourceURL opts - newsrc <- modifyMediaRef mediaRef oldsrc +transformInline _ mediaRef (Image lab (src,tit)) = do + newsrc <- modifyMediaRef mediaRef src return $ Image lab (newsrc, tit) transformInline opts _ (x@(Math _ _)) | WebTeX _ <- writerHTMLMathMethod opts = do raw <- makeSelfContained opts $ writeHtmlInline opts x return $ RawInline (Format "html") raw -transformInline opts mediaRef (RawInline fmt raw) +transformInline _ mediaRef (RawInline fmt raw) | fmt == Format "html" = do let tags = parseTags raw - tags' <- mapM (transformTag opts mediaRef) tags + tags' <- mapM (transformTag mediaRef) tags return $ RawInline fmt (renderTags' tags') transformInline _ _ x = return x @@ -1204,3 +1198,4 @@ docTitle' meta = fromMaybe [] $ go <$> lookupMeta "title" meta _ -> [] go (MetaList xs) = concatMap go xs go _ = [] + |