aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc
diff options
context:
space:
mode:
Diffstat (limited to 'src/Text/Pandoc')
-rw-r--r--src/Text/Pandoc/MIME.hs1
-rw-r--r--src/Text/Pandoc/Parsing.hs24
-rw-r--r--src/Text/Pandoc/Pretty.hs3
-rw-r--r--src/Text/Pandoc/Readers/Markdown.hs4
-rw-r--r--src/Text/Pandoc/Readers/Org.hs70
-rw-r--r--src/Text/Pandoc/Shared.hs10
-rw-r--r--src/Text/Pandoc/Writers/EPUB.hs33
7 files changed, 104 insertions, 41 deletions
diff --git a/src/Text/Pandoc/MIME.hs b/src/Text/Pandoc/MIME.hs
index 3b3b3b5b3..75b4ff0d2 100644
--- a/src/Text/Pandoc/MIME.hs
+++ b/src/Text/Pandoc/MIME.hs
@@ -477,6 +477,7 @@ mimeTypesList = -- List borrowed from happstack-server.
,("vrml","model/vrml")
,("vs","text/plain")
,("vsd","application/vnd.visio")
+ ,("vtt","text/vtt")
,("wad","application/x-doom")
,("wav","audio/x-wav")
,("wax","audio/x-ms-wax")
diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs
index d1fba1e21..e0f5f65bb 100644
--- a/src/Text/Pandoc/Parsing.hs
+++ b/src/Text/Pandoc/Parsing.hs
@@ -472,7 +472,12 @@ mathInlineWith op cl = try $ do
string op
notFollowedBy space
words' <- many1Till (count 1 (noneOf " \t\n\\")
- <|> (char '\\' >> anyChar >>= \c -> return ['\\',c])
+ <|> (char '\\' >>
+ -- This next clause is needed because \text{..} can
+ -- contain $, \(\), etc.
+ (try (string "text" >>
+ (("\\text" ++) <$> inBalancedBraces 0 ""))
+ <|> (\c -> ['\\',c]) <$> anyChar))
<|> do (blankline <* notFollowedBy' blankline) <|>
(oneOf " \t" <* skipMany (oneOf " \t"))
notFollowedBy (char '$')
@@ -480,6 +485,23 @@ mathInlineWith op cl = try $ do
) (try $ string cl)
notFollowedBy digit -- to prevent capture of $5
return $ concat words'
+ where
+ inBalancedBraces :: Stream s m Char => Int -> String -> ParserT s st m String
+ inBalancedBraces 0 "" = do
+ c <- anyChar
+ if c == '{'
+ then inBalancedBraces 1 "{"
+ else mzero
+ inBalancedBraces 0 s = return $ reverse s
+ inBalancedBraces numOpen ('\\':xs) = do
+ c <- anyChar
+ inBalancedBraces numOpen (c:'\\':xs)
+ inBalancedBraces numOpen xs = do
+ c <- anyChar
+ case c of
+ '}' -> inBalancedBraces (numOpen - 1) (c:xs)
+ '{' -> inBalancedBraces (numOpen + 1) (c:xs)
+ _ -> inBalancedBraces numOpen (c:xs)
mathDisplayWith :: Stream s m Char => String -> String -> ParserT s st m String
mathDisplayWith op cl = try $ do
diff --git a/src/Text/Pandoc/Pretty.hs b/src/Text/Pandoc/Pretty.hs
index 1e72c2040..9ee7fe94a 100644
--- a/src/Text/Pandoc/Pretty.hs
+++ b/src/Text/Pandoc/Pretty.hs
@@ -286,6 +286,9 @@ renderList (BlankLines num : xs) = do
| otherwise -> replicateM_ (1 + num - newlines st) (outp (-1) "\n")
renderList xs
+renderList (CarriageReturn : BlankLines m : xs) =
+ renderList (BlankLines m : xs)
+
renderList (CarriageReturn : xs) = do
st <- get
if newlines st > 0 || null xs
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 02a787670..7a3be8291 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -735,9 +735,9 @@ anyOrderedListStart = try $ do
skipNonindentSpaces
notFollowedBy $ string "p." >> spaceChar >> digit -- page number
res <- do guardDisabled Ext_fancy_lists
- many1 digit
+ start <- many1 digit >>= safeRead
char '.'
- return (1, DefaultStyle, DefaultDelim)
+ return (start, DefaultStyle, DefaultDelim)
<|> do (num, style, delim) <- anyOrderedListMarker
-- if it could be an abbreviated first name,
-- insist on more than one space
diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs
index 5c00a1b27..a6ebf65dc 100644
--- a/src/Text/Pandoc/Readers/Org.hs
+++ b/src/Text/Pandoc/Readers/Org.hs
@@ -69,7 +69,32 @@ parseOrg = do
blocks' <- parseBlocks
st <- getState
let meta = runF (orgStateMeta' st) st
- return $ Pandoc meta $ filter (/= Null) (B.toList $ runF blocks' st)
+ let removeUnwantedBlocks = dropCommentTrees . filter (/= Null)
+ return $ Pandoc meta $ removeUnwantedBlocks (B.toList $ runF blocks' st)
+
+-- | Drop COMMENT headers and the document tree below those headers.
+dropCommentTrees :: [Block] -> [Block]
+dropCommentTrees [] = []
+dropCommentTrees blks@(b:bs) =
+ maybe blks (flip dropUntilHeaderAboveLevel bs) $ commentHeaderLevel b
+
+-- | Return the level of a header starting a comment tree and Nothing
+-- otherwise.
+commentHeaderLevel :: Block -> Maybe Int
+commentHeaderLevel blk =
+ case blk of
+ (Header level _ ((Str "COMMENT"):_)) -> Just level
+ _ -> Nothing
+
+-- | Drop blocks until a header on or above the given level is seen
+dropUntilHeaderAboveLevel :: Int -> [Block] -> [Block]
+dropUntilHeaderAboveLevel n = dropWhile (not . isHeaderLevelLowerEq n)
+
+isHeaderLevelLowerEq :: Int -> Block -> Bool
+isHeaderLevelLowerEq n blk =
+ case blk of
+ (Header level _ _) -> n >= level
+ _ -> False
--
-- Parser State for Org
@@ -828,12 +853,14 @@ list :: OrgParser (F Blocks)
list = choice [ definitionList, bulletList, orderedList ] <?> "list"
definitionList :: OrgParser (F Blocks)
-definitionList = fmap B.definitionList . fmap compactify'DL . sequence
- <$> many1 (definitionListItem bulletListStart)
+definitionList = try $ do n <- lookAhead (bulletListStart' Nothing)
+ fmap B.definitionList . fmap compactify'DL . sequence
+ <$> many1 (definitionListItem $ bulletListStart' (Just n))
bulletList :: OrgParser (F Blocks)
-bulletList = fmap B.bulletList . fmap compactify' . sequence
- <$> many1 (listItem bulletListStart)
+bulletList = try $ do n <- lookAhead (bulletListStart' Nothing)
+ fmap B.bulletList . fmap compactify' . sequence
+ <$> many1 (listItem (bulletListStart' $ Just n))
orderedList :: OrgParser (F Blocks)
orderedList = fmap B.orderedList . fmap compactify' . sequence
@@ -845,10 +872,27 @@ genericListStart listMarker = try $
(+) <$> (length <$> many spaceChar)
<*> (length <$> listMarker <* many1 spaceChar)
--- parses bullet list start and returns its length (excl. following whitespace)
+-- parses bullet list marker. maybe we know the indent level
bulletListStart :: OrgParser Int
-bulletListStart = genericListStart bulletListMarker
- where bulletListMarker = pure <$> oneOf "*-+"
+bulletListStart = bulletListStart' Nothing
+
+bulletListStart' :: Maybe Int -> OrgParser Int
+-- returns length of bulletList prefix, inclusive of marker
+bulletListStart' Nothing = do ind <- many spaceChar
+ oneOf bullets
+ many1 spaceChar
+ return $ length ind + 1
+ -- Unindented lists are legal, but they can't use '*' bullets
+ -- We return n to maintain compatibility with the generic listItem
+bulletListStart' (Just n) = do count (n-1) spaceChar
+ oneOf validBullets
+ many1 spaceChar
+ return n
+ where validBullets = if n == 1 then noAsterisks else bullets
+ noAsterisks = filter (/= '*') bullets
+
+bullets :: String
+bullets = "*+-"
orderedListStart :: OrgParser Int
orderedListStart = genericListStart orderedListMarker
@@ -927,7 +971,7 @@ parseInlines = trimInlinesF . mconcat <$> many1 inline
-- treat these as potentially non-text when parsing inline:
specialChars :: [Char]
-specialChars = "\"$'()*+-./:<=>[\\]^_{|}~"
+specialChars = "\"$'()*+-,./:<=>[\\]^_{|}~"
whitespace :: OrgParser (F Inlines)
@@ -1205,10 +1249,10 @@ displayMath = return . B.displayMath <$> choice [ rawMathBetween "\\[" "\\]"
]
symbol :: OrgParser (F Inlines)
symbol = return . B.str . (: "") <$> (oneOf specialChars >>= updatePositions)
- where updatePositions c
- | c `elem` emphasisPreChars = c <$ updateLastPreCharPos
- | c `elem` emphasisForbiddenBorderChars = c <$ updateLastForbiddenCharPos
- | otherwise = return c
+ where updatePositions c = do
+ when (c `elem` emphasisPreChars) updateLastPreCharPos
+ when (c `elem` emphasisForbiddenBorderChars) updateLastForbiddenCharPos
+ return c
emphasisBetween :: Char
-> OrgParser (F Inlines)
diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs
index 6e1f84335..9aa70e6f2 100644
--- a/src/Text/Pandoc/Shared.hs
+++ b/src/Text/Pandoc/Shared.hs
@@ -735,12 +735,10 @@ renderTags' = renderTagsOptions
-- | Perform an IO action in a directory, returning to starting directory.
inDirectory :: FilePath -> IO a -> IO a
-inDirectory path action = do
- oldDir <- getCurrentDirectory
- setCurrentDirectory path
- result <- action
- setCurrentDirectory oldDir
- return result
+inDirectory path action = E.bracket
+ getCurrentDirectory
+ setCurrentDirectory
+ (const $ setCurrentDirectory path >> action)
readDefaultDataFile :: FilePath -> IO BS.ByteString
readDefaultDataFile fname =
diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs
index 32256cb42..53574711f 100644
--- a/src/Text/Pandoc/Writers/EPUB.hs
+++ b/src/Text/Pandoc/Writers/EPUB.hs
@@ -35,7 +35,7 @@ import Data.Maybe ( fromMaybe )
import Data.List ( isPrefixOf, isInfixOf, intercalate )
import System.Environment ( getEnv )
import Text.Printf (printf)
-import System.FilePath ( (</>), takeExtension, takeFileName )
+import System.FilePath ( takeExtension, takeFileName )
import qualified Data.ByteString.Lazy as B
import qualified Data.ByteString.Lazy.Char8 as B8
import qualified Text.Pandoc.UTF8 as UTF8
@@ -64,7 +64,6 @@ import Text.XML.Light ( unode, Element(..), unqual, Attr(..), add_attrs
import Text.Pandoc.UUID (getRandomUUID)
import Text.Pandoc.Writers.HTML (writeHtmlString, writeHtml)
import Data.Char ( toLower, isDigit, isAlphaNum )
-import Network.URI ( unEscapeString )
import Text.Pandoc.MIME (MimeType, getMimeType)
import qualified Control.Exception as E
import Text.Blaze.Html.Renderer.Utf8 (renderHtml)
@@ -765,23 +764,20 @@ metadataElement version md currentTime =
showDateTimeISO8601 :: UTCTime -> String
showDateTimeISO8601 = formatTime defaultTimeLocale "%FT%TZ"
-transformTag :: WriterOptions
- -> IORef [(FilePath, FilePath)] -- ^ (oldpath, newpath) media
+transformTag :: IORef [(FilePath, FilePath)] -- ^ (oldpath, newpath) media
-> Tag String
-> IO (Tag String)
-transformTag opts mediaRef tag@(TagOpen name attr)
+transformTag mediaRef tag@(TagOpen name attr)
| name `elem` ["video", "source", "img", "audio"] = do
let src = fromAttrib "src" tag
let poster = fromAttrib "poster" tag
- let oldsrc = maybe src (</> src) $ writerSourceURL opts
- let oldposter = maybe poster (</> poster) $ writerSourceURL opts
- newsrc <- modifyMediaRef mediaRef oldsrc
- newposter <- modifyMediaRef mediaRef oldposter
+ newsrc <- modifyMediaRef mediaRef src
+ newposter <- modifyMediaRef mediaRef poster
let attr' = filter (\(x,_) -> x /= "src" && x /= "poster") attr ++
[("src", newsrc) | not (null newsrc)] ++
[("poster", newposter) | not (null newposter)]
return $ TagOpen name attr'
-transformTag _ _ tag = return tag
+transformTag _ tag = return tag
modifyMediaRef :: IORef [(FilePath, FilePath)] -> FilePath -> IO FilePath
modifyMediaRef _ "" = return ""
@@ -791,7 +787,7 @@ modifyMediaRef mediaRef oldsrc = do
Just n -> return n
Nothing -> do
let new = "media/file" ++ show (length media) ++
- takeExtension oldsrc
+ takeExtension (takeWhile (/='?') oldsrc) -- remove query
modifyIORef mediaRef ( (oldsrc, new): )
return new
@@ -799,10 +795,10 @@ transformBlock :: WriterOptions
-> IORef [(FilePath, FilePath)] -- ^ (oldpath, newpath) media
-> Block
-> IO Block
-transformBlock opts mediaRef (RawBlock fmt raw)
+transformBlock _ mediaRef (RawBlock fmt raw)
| fmt == Format "html" = do
let tags = parseTags raw
- tags' <- mapM (transformTag opts mediaRef) tags
+ tags' <- mapM (transformTag mediaRef) tags
return $ RawBlock fmt (renderTags' tags')
transformBlock _ _ b = return b
@@ -810,19 +806,17 @@ transformInline :: WriterOptions
-> IORef [(FilePath, FilePath)] -- ^ (oldpath, newpath) media
-> Inline
-> IO Inline
-transformInline opts mediaRef (Image lab (src,tit)) = do
- let src' = unEscapeString src
- let oldsrc = maybe src' (</> src) $ writerSourceURL opts
- newsrc <- modifyMediaRef mediaRef oldsrc
+transformInline _ mediaRef (Image lab (src,tit)) = do
+ newsrc <- modifyMediaRef mediaRef src
return $ Image lab (newsrc, tit)
transformInline opts _ (x@(Math _ _))
| WebTeX _ <- writerHTMLMathMethod opts = do
raw <- makeSelfContained opts $ writeHtmlInline opts x
return $ RawInline (Format "html") raw
-transformInline opts mediaRef (RawInline fmt raw)
+transformInline _ mediaRef (RawInline fmt raw)
| fmt == Format "html" = do
let tags = parseTags raw
- tags' <- mapM (transformTag opts mediaRef) tags
+ tags' <- mapM (transformTag mediaRef) tags
return $ RawInline fmt (renderTags' tags')
transformInline _ _ x = return x
@@ -1204,3 +1198,4 @@ docTitle' meta = fromMaybe [] $ go <$> lookupMeta "title" meta
_ -> []
go (MetaList xs) = concatMap go xs
go _ = []
+