diff options
Diffstat (limited to 'src/Text/Pandoc')
-rw-r--r-- | src/Text/Pandoc/Readers/HTML.hs | 43 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/LaTeX.hs | 36 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Markdown.hs | 8 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Org.hs | 22 | ||||
-rw-r--r-- | src/Text/Pandoc/Shared.hs | 16 |
5 files changed, 61 insertions, 64 deletions
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index 905e55b22..d27afc543 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -50,7 +50,6 @@ import Data.Char ( isDigit ) import Control.Monad ( liftM, guard, when, mzero ) import Control.Applicative ( (<$>), (<$), (<*) ) import Data.Monoid -import Data.Sequence (ViewL(..), ViewR(..), viewr, viewl) isSpace :: Char -> Bool isSpace ' ' = True @@ -239,30 +238,26 @@ pTable = try $ do caption <- option mempty $ pInTags "caption" inline >>~ skipMany pBlank -- TODO actually read these and take width information from them widths' <- pColgroup <|> many pCol - head' <- option mempty $ pOptInTag "thead" $ pInTags "tr" (pCell "th") + head' <- option [] $ pOptInTag "thead" $ pInTags "tr" (pCell "th") skipMany pBlank rows <- pOptInTag "tbody" $ many1 $ try $ skipMany pBlank >> pInTags "tr" (pCell "td") skipMany pBlank TagClose _ <- pSatisfy (~== TagClose "table") - let isSinglePlain [] = True - isSinglePlain [Plain _] = True - isSinglePlain _ = False - let lHead = B.toList head' - let lRows = map B.toList rows - let isSimple = all isSinglePlain (lHead:lRows) - let cols = length $ if null lHead - then head lRows - else lHead + let isSinglePlain x = case B.toList x of + [Plain _] -> True + _ -> False + let isSimple = all isSinglePlain $ concat (head':rows) + let cols = length $ if null head' then head rows else head' -- fail if there are colspans or rowspans - guard $ all (\r -> length r == cols) lRows - let aligns = replicate cols AlignLeft + guard $ all (\r -> length r == cols) rows + let aligns = replicate cols AlignDefault let widths = if null widths' then if isSimple then replicate cols 0 else replicate cols (1.0 / fromIntegral cols) else widths' - return $ B.table caption (zip aligns widths) [head'] [rows] + return $ B.table caption (zip aligns widths) head' rows pCol :: TagParser Double pCol = try $ do @@ -280,12 +275,12 @@ pColgroup = try $ do skipMany pBlank manyTill pCol (pCloses "colgroup" <|> eof) <* skipMany pBlank -pCell :: String -> TagParser Blocks +pCell :: String -> TagParser [Blocks] pCell celltype = try $ do skipMany pBlank res <- pInTags celltype block skipMany pBlank - return res + return [res] pBlockQuote :: TagParser Blocks pBlockQuote = do @@ -369,9 +364,9 @@ pQ = do then InSingleQuote else InDoubleQuote let constructor = case quoteType of - SingleQuote -> B.singleQuoted + SingleQuote -> B.singleQuoted DoubleQuote -> B.doubleQuoted - withQuoteContext innerQuoteContext $ + withQuoteContext innerQuoteContext $ pInlinesInTags "q" constructor pEmph :: TagParser Inlines @@ -406,7 +401,7 @@ pLink = try $ do let url = fromAttrib "href" tag let title = fromAttrib "title" tag lab <- trimInlines . mconcat <$> manyTill inline (pCloses "a") - return $ B.link (escapeURI url) title lab + return $ B.link (escapeURI url) title lab pImage :: TagParser Inlines pImage = do @@ -439,15 +434,7 @@ pRawHtmlInline = do pInlinesInTags :: String -> (Inlines -> Inlines) -> TagParser Inlines -pInlinesInTags tagtype f = do - contents <- B.unMany <$> pInTags tagtype inline - let left = case viewl contents of - (Space :< _) -> B.space - _ -> mempty - let right = case viewr contents of - (_ :> Space) -> B.space - _ -> mempty - return (left <> f (trimInlines . B.Many $ contents) <> right) +pInlinesInTags tagtype f = extractSpaces f <$> pInTags tagtype inline pInTags :: (Monoid a) => String -> TagParser a -> TagParser a diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index 6b5958920..3c4d4ee52 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -397,18 +397,18 @@ isBlockCommand s = maybe False (const True) $ M.lookup s blockCommands inlineCommands :: M.Map String (LP Inlines) inlineCommands = M.fromList $ - [ ("emph", emph <$> tok) - , ("textit", emph <$> tok) - , ("textsl", emph <$> tok) - , ("textsc", smallcaps <$> tok) - , ("sout", strikeout <$> tok) - , ("textsuperscript", superscript <$> tok) - , ("textsubscript", subscript <$> tok) + [ ("emph", extractSpaces emph <$> tok) + , ("textit", extractSpaces emph <$> tok) + , ("textsl", extractSpaces emph <$> tok) + , ("textsc", extractSpaces smallcaps <$> tok) + , ("sout", extractSpaces strikeout <$> tok) + , ("textsuperscript", extractSpaces superscript <$> tok) + , ("textsubscript", extractSpaces subscript <$> tok) , ("textbackslash", lit "\\") , ("backslash", lit "\\") , ("slash", lit "/") - , ("textbf", strong <$> tok) - , ("textnormal", spanWith ("",["nodecor"],[]) <$> tok) + , ("textbf", extractSpaces strong <$> tok) + , ("textnormal", extractSpaces (spanWith ("",["nodecor"],[])) <$> tok) , ("ldots", lit "…") , ("dots", lit "…") , ("mdots", lit "…") @@ -428,15 +428,15 @@ inlineCommands = M.fromList $ , ("{", lit "{") , ("}", lit "}") -- old TeX commands - , ("em", emph <$> inlines) - , ("it", emph <$> inlines) - , ("sl", emph <$> inlines) - , ("bf", strong <$> inlines) + , ("em", extractSpaces emph <$> inlines) + , ("it", extractSpaces emph <$> inlines) + , ("sl", extractSpaces emph <$> inlines) + , ("bf", extractSpaces strong <$> inlines) , ("rm", inlines) - , ("itshape", emph <$> inlines) - , ("slshape", emph <$> inlines) - , ("scshape", smallcaps <$> inlines) - , ("bfseries", strong <$> inlines) + , ("itshape", extractSpaces emph <$> inlines) + , ("slshape", extractSpaces emph <$> inlines) + , ("scshape", extractSpaces smallcaps <$> inlines) + , ("bfseries", extractSpaces strong <$> inlines) , ("/", pure mempty) -- italic correction , ("aa", lit "å") , ("AA", lit "Å") @@ -1134,7 +1134,7 @@ paragraph = do preamble :: LP Blocks preamble = mempty <$> manyTill preambleBlock beginDoc - where beginDoc = lookAhead $ controlSeq "begin" *> string "{document}" + where beginDoc = lookAhead $ try $ controlSeq "begin" *> string "{document}" preambleBlock = (void comment) <|> (void sp) <|> (void blanklines) diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index caa938ed6..a6720beba 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -1117,13 +1117,11 @@ multilineTable headless = multilineTableHeader :: Bool -- ^ Headerless table -> MarkdownParser (F [Blocks], [Alignment], [Int]) multilineTableHeader headless = try $ do - if headless - then return '\n' - else tableSep >>~ notFollowedBy blankline + unless headless $ + tableSep >> notFollowedBy blankline rawContent <- if headless then return $ repeat "" - else many1 - (notFollowedBy tableSep >> many1Till anyChar newline) + else many1 $ notFollowedBy tableSep >> anyLine initSp <- nonindentSpaces dashes <- many1 (dashedLine '-') newline diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index c3ea8d7c2..7a35e2ca0 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -38,10 +38,9 @@ import qualified Text.Pandoc.Parsing as P import Text.Pandoc.Parsing hiding ( F, unF, askF, asksF, runF , newline, orderedListMarker , parseFromString - , updateLastStrPos ) + ) import Text.Pandoc.Readers.LaTeX (inlineCommand, rawLaTeXInline) import Text.Pandoc.Shared (compactify', compactify'DL) -import Text.Parsec.Pos (updatePosString) import Text.TeXMath (texMathToPandoc, DisplayType(..)) import Control.Applicative ( Applicative, pure @@ -148,10 +147,6 @@ resetBlockAttributes :: OrgParser () resetBlockAttributes = updateState $ \s -> s{ orgStateBlockAttributes = orgStateBlockAttributes def } -updateLastStrPos :: OrgParser () -updateLastStrPos = getPosition >>= \p -> - updateState $ \s -> s{ orgStateLastStrPos = Just p } - updateLastForbiddenCharPos :: OrgParser () updateLastForbiddenCharPos = getPosition >>= \p -> updateState $ \s -> s{ orgStateLastForbiddenCharPos = Just p} @@ -1153,11 +1148,11 @@ strikeout = fmap B.strikeout <$> emphasisBetween '+' underline :: OrgParser (F Inlines) underline = fmap B.strong <$> emphasisBetween '_' -code :: OrgParser (F Inlines) -code = return . B.code <$> verbatimBetween '=' - verbatim :: OrgParser (F Inlines) -verbatim = return . B.rawInline "" <$> verbatimBetween '~' +verbatim = return . B.code <$> verbatimBetween '=' + +code :: OrgParser (F Inlines) +code = return . B.code <$> verbatimBetween '~' subscript :: OrgParser (F Inlines) subscript = fmap B.subscript <$> try (char '_' *> subOrSuperExpr) @@ -1376,8 +1371,9 @@ maybeRight = either (const Nothing) Just inlineLaTeXCommand :: OrgParser String inlineLaTeXCommand = try $ do rest <- getInput - pos <- getPosition case runParser rawLaTeXInline def "source" rest of - Right (RawInline _ cs) -> cs <$ (setInput $ drop (length cs) rest) - <* (setPosition $ updatePosString pos cs) + Right (RawInline _ cs) -> do + let len = length cs + count len anyChar + return cs _ -> mzero diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index b0adf55f5..5b0d9b6b4 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -53,6 +53,7 @@ module Text.Pandoc.Shared ( -- * Pandoc block and inline list processing orderedListMarkers, normalizeSpaces, + extractSpaces, normalize, stringify, compactify, @@ -113,6 +114,7 @@ import qualified Data.ByteString as BS import qualified Data.ByteString.Char8 as B8 import Text.Pandoc.Compat.Monoid import Data.ByteString.Base64 (decodeLenient) +import Data.Sequence (ViewR(..), ViewL(..), viewl, viewr) #ifdef EMBED_DATA_FILES import Text.Pandoc.Data (dataFiles) @@ -331,6 +333,20 @@ isSpaceOrEmpty Space = True isSpaceOrEmpty (Str "") = True isSpaceOrEmpty _ = False +-- | Extract the leading and trailing spaces from inside an inline element +-- and place them outside the element. + +extractSpaces :: (Inlines -> Inlines) -> Inlines -> Inlines +extractSpaces f is = + let contents = B.unMany is + left = case viewl contents of + (Space :< _) -> B.space + _ -> mempty + right = case viewr contents of + (_ :> Space) -> B.space + _ -> mempty in + (left <> f (B.trimInlines . B.Many $ contents) <> right) + -- | Normalize @Pandoc@ document, consolidating doubled 'Space's, -- combining adjacent 'Str's and 'Emph's, remove 'Null's and -- empty elements, etc. |