aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--changelog2
-rw-r--r--src/Text/Pandoc/Readers/HTML.hs43
-rw-r--r--src/Text/Pandoc/Readers/LaTeX.hs36
-rw-r--r--src/Text/Pandoc/Readers/Markdown.hs8
-rw-r--r--src/Text/Pandoc/Readers/Org.hs22
-rw-r--r--src/Text/Pandoc/Shared.hs16
-rw-r--r--tests/Tests/Readers/Org.hs6
-rw-r--r--tests/html-reader.html20
-rw-r--r--tests/html-reader.native13
9 files changed, 96 insertions, 70 deletions
diff --git a/changelog b/changelog
index e012dabbf..fe3b1845b 100644
--- a/changelog
+++ b/changelog
@@ -17,7 +17,7 @@ pandoc (1.12.4.2)
+ Support code block headers (`#+BEGIN_SRC ...`) (Albert Krewinkel).
+ Fix parsing of blank lines within blocks (Albert Krewinkel).
+ Support pandoc citation extension (Albert Krewinkel). This can
- be turned off by specifying `org-citation` as the input format.
+ be turned off by specifying `org-citations` as the input format.
* Markdown reader:
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index 905e55b22..d27afc543 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -50,7 +50,6 @@ import Data.Char ( isDigit )
import Control.Monad ( liftM, guard, when, mzero )
import Control.Applicative ( (<$>), (<$), (<*) )
import Data.Monoid
-import Data.Sequence (ViewL(..), ViewR(..), viewr, viewl)
isSpace :: Char -> Bool
isSpace ' ' = True
@@ -239,30 +238,26 @@ pTable = try $ do
caption <- option mempty $ pInTags "caption" inline >>~ skipMany pBlank
-- TODO actually read these and take width information from them
widths' <- pColgroup <|> many pCol
- head' <- option mempty $ pOptInTag "thead" $ pInTags "tr" (pCell "th")
+ head' <- option [] $ pOptInTag "thead" $ pInTags "tr" (pCell "th")
skipMany pBlank
rows <- pOptInTag "tbody"
$ many1 $ try $ skipMany pBlank >> pInTags "tr" (pCell "td")
skipMany pBlank
TagClose _ <- pSatisfy (~== TagClose "table")
- let isSinglePlain [] = True
- isSinglePlain [Plain _] = True
- isSinglePlain _ = False
- let lHead = B.toList head'
- let lRows = map B.toList rows
- let isSimple = all isSinglePlain (lHead:lRows)
- let cols = length $ if null lHead
- then head lRows
- else lHead
+ let isSinglePlain x = case B.toList x of
+ [Plain _] -> True
+ _ -> False
+ let isSimple = all isSinglePlain $ concat (head':rows)
+ let cols = length $ if null head' then head rows else head'
-- fail if there are colspans or rowspans
- guard $ all (\r -> length r == cols) lRows
- let aligns = replicate cols AlignLeft
+ guard $ all (\r -> length r == cols) rows
+ let aligns = replicate cols AlignDefault
let widths = if null widths'
then if isSimple
then replicate cols 0
else replicate cols (1.0 / fromIntegral cols)
else widths'
- return $ B.table caption (zip aligns widths) [head'] [rows]
+ return $ B.table caption (zip aligns widths) head' rows
pCol :: TagParser Double
pCol = try $ do
@@ -280,12 +275,12 @@ pColgroup = try $ do
skipMany pBlank
manyTill pCol (pCloses "colgroup" <|> eof) <* skipMany pBlank
-pCell :: String -> TagParser Blocks
+pCell :: String -> TagParser [Blocks]
pCell celltype = try $ do
skipMany pBlank
res <- pInTags celltype block
skipMany pBlank
- return res
+ return [res]
pBlockQuote :: TagParser Blocks
pBlockQuote = do
@@ -369,9 +364,9 @@ pQ = do
then InSingleQuote
else InDoubleQuote
let constructor = case quoteType of
- SingleQuote -> B.singleQuoted
+ SingleQuote -> B.singleQuoted
DoubleQuote -> B.doubleQuoted
- withQuoteContext innerQuoteContext $
+ withQuoteContext innerQuoteContext $
pInlinesInTags "q" constructor
pEmph :: TagParser Inlines
@@ -406,7 +401,7 @@ pLink = try $ do
let url = fromAttrib "href" tag
let title = fromAttrib "title" tag
lab <- trimInlines . mconcat <$> manyTill inline (pCloses "a")
- return $ B.link (escapeURI url) title lab
+ return $ B.link (escapeURI url) title lab
pImage :: TagParser Inlines
pImage = do
@@ -439,15 +434,7 @@ pRawHtmlInline = do
pInlinesInTags :: String -> (Inlines -> Inlines)
-> TagParser Inlines
-pInlinesInTags tagtype f = do
- contents <- B.unMany <$> pInTags tagtype inline
- let left = case viewl contents of
- (Space :< _) -> B.space
- _ -> mempty
- let right = case viewr contents of
- (_ :> Space) -> B.space
- _ -> mempty
- return (left <> f (trimInlines . B.Many $ contents) <> right)
+pInlinesInTags tagtype f = extractSpaces f <$> pInTags tagtype inline
pInTags :: (Monoid a) => String -> TagParser a
-> TagParser a
diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 6b5958920..3c4d4ee52 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -397,18 +397,18 @@ isBlockCommand s = maybe False (const True) $ M.lookup s blockCommands
inlineCommands :: M.Map String (LP Inlines)
inlineCommands = M.fromList $
- [ ("emph", emph <$> tok)
- , ("textit", emph <$> tok)
- , ("textsl", emph <$> tok)
- , ("textsc", smallcaps <$> tok)
- , ("sout", strikeout <$> tok)
- , ("textsuperscript", superscript <$> tok)
- , ("textsubscript", subscript <$> tok)
+ [ ("emph", extractSpaces emph <$> tok)
+ , ("textit", extractSpaces emph <$> tok)
+ , ("textsl", extractSpaces emph <$> tok)
+ , ("textsc", extractSpaces smallcaps <$> tok)
+ , ("sout", extractSpaces strikeout <$> tok)
+ , ("textsuperscript", extractSpaces superscript <$> tok)
+ , ("textsubscript", extractSpaces subscript <$> tok)
, ("textbackslash", lit "\\")
, ("backslash", lit "\\")
, ("slash", lit "/")
- , ("textbf", strong <$> tok)
- , ("textnormal", spanWith ("",["nodecor"],[]) <$> tok)
+ , ("textbf", extractSpaces strong <$> tok)
+ , ("textnormal", extractSpaces (spanWith ("",["nodecor"],[])) <$> tok)
, ("ldots", lit "…")
, ("dots", lit "…")
, ("mdots", lit "…")
@@ -428,15 +428,15 @@ inlineCommands = M.fromList $
, ("{", lit "{")
, ("}", lit "}")
-- old TeX commands
- , ("em", emph <$> inlines)
- , ("it", emph <$> inlines)
- , ("sl", emph <$> inlines)
- , ("bf", strong <$> inlines)
+ , ("em", extractSpaces emph <$> inlines)
+ , ("it", extractSpaces emph <$> inlines)
+ , ("sl", extractSpaces emph <$> inlines)
+ , ("bf", extractSpaces strong <$> inlines)
, ("rm", inlines)
- , ("itshape", emph <$> inlines)
- , ("slshape", emph <$> inlines)
- , ("scshape", smallcaps <$> inlines)
- , ("bfseries", strong <$> inlines)
+ , ("itshape", extractSpaces emph <$> inlines)
+ , ("slshape", extractSpaces emph <$> inlines)
+ , ("scshape", extractSpaces smallcaps <$> inlines)
+ , ("bfseries", extractSpaces strong <$> inlines)
, ("/", pure mempty) -- italic correction
, ("aa", lit "å")
, ("AA", lit "Å")
@@ -1134,7 +1134,7 @@ paragraph = do
preamble :: LP Blocks
preamble = mempty <$> manyTill preambleBlock beginDoc
- where beginDoc = lookAhead $ controlSeq "begin" *> string "{document}"
+ where beginDoc = lookAhead $ try $ controlSeq "begin" *> string "{document}"
preambleBlock = (void comment)
<|> (void sp)
<|> (void blanklines)
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index caa938ed6..a6720beba 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -1117,13 +1117,11 @@ multilineTable headless =
multilineTableHeader :: Bool -- ^ Headerless table
-> MarkdownParser (F [Blocks], [Alignment], [Int])
multilineTableHeader headless = try $ do
- if headless
- then return '\n'
- else tableSep >>~ notFollowedBy blankline
+ unless headless $
+ tableSep >> notFollowedBy blankline
rawContent <- if headless
then return $ repeat ""
- else many1
- (notFollowedBy tableSep >> many1Till anyChar newline)
+ else many1 $ notFollowedBy tableSep >> anyLine
initSp <- nonindentSpaces
dashes <- many1 (dashedLine '-')
newline
diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs
index c3ea8d7c2..7a35e2ca0 100644
--- a/src/Text/Pandoc/Readers/Org.hs
+++ b/src/Text/Pandoc/Readers/Org.hs
@@ -38,10 +38,9 @@ import qualified Text.Pandoc.Parsing as P
import Text.Pandoc.Parsing hiding ( F, unF, askF, asksF, runF
, newline, orderedListMarker
, parseFromString
- , updateLastStrPos )
+ )
import Text.Pandoc.Readers.LaTeX (inlineCommand, rawLaTeXInline)
import Text.Pandoc.Shared (compactify', compactify'DL)
-import Text.Parsec.Pos (updatePosString)
import Text.TeXMath (texMathToPandoc, DisplayType(..))
import Control.Applicative ( Applicative, pure
@@ -148,10 +147,6 @@ resetBlockAttributes :: OrgParser ()
resetBlockAttributes = updateState $ \s ->
s{ orgStateBlockAttributes = orgStateBlockAttributes def }
-updateLastStrPos :: OrgParser ()
-updateLastStrPos = getPosition >>= \p ->
- updateState $ \s -> s{ orgStateLastStrPos = Just p }
-
updateLastForbiddenCharPos :: OrgParser ()
updateLastForbiddenCharPos = getPosition >>= \p ->
updateState $ \s -> s{ orgStateLastForbiddenCharPos = Just p}
@@ -1153,11 +1148,11 @@ strikeout = fmap B.strikeout <$> emphasisBetween '+'
underline :: OrgParser (F Inlines)
underline = fmap B.strong <$> emphasisBetween '_'
-code :: OrgParser (F Inlines)
-code = return . B.code <$> verbatimBetween '='
-
verbatim :: OrgParser (F Inlines)
-verbatim = return . B.rawInline "" <$> verbatimBetween '~'
+verbatim = return . B.code <$> verbatimBetween '='
+
+code :: OrgParser (F Inlines)
+code = return . B.code <$> verbatimBetween '~'
subscript :: OrgParser (F Inlines)
subscript = fmap B.subscript <$> try (char '_' *> subOrSuperExpr)
@@ -1376,8 +1371,9 @@ maybeRight = either (const Nothing) Just
inlineLaTeXCommand :: OrgParser String
inlineLaTeXCommand = try $ do
rest <- getInput
- pos <- getPosition
case runParser rawLaTeXInline def "source" rest of
- Right (RawInline _ cs) -> cs <$ (setInput $ drop (length cs) rest)
- <* (setPosition $ updatePosString pos cs)
+ Right (RawInline _ cs) -> do
+ let len = length cs
+ count len anyChar
+ return cs
_ -> mzero
diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs
index b0adf55f5..5b0d9b6b4 100644
--- a/src/Text/Pandoc/Shared.hs
+++ b/src/Text/Pandoc/Shared.hs
@@ -53,6 +53,7 @@ module Text.Pandoc.Shared (
-- * Pandoc block and inline list processing
orderedListMarkers,
normalizeSpaces,
+ extractSpaces,
normalize,
stringify,
compactify,
@@ -113,6 +114,7 @@ import qualified Data.ByteString as BS
import qualified Data.ByteString.Char8 as B8
import Text.Pandoc.Compat.Monoid
import Data.ByteString.Base64 (decodeLenient)
+import Data.Sequence (ViewR(..), ViewL(..), viewl, viewr)
#ifdef EMBED_DATA_FILES
import Text.Pandoc.Data (dataFiles)
@@ -331,6 +333,20 @@ isSpaceOrEmpty Space = True
isSpaceOrEmpty (Str "") = True
isSpaceOrEmpty _ = False
+-- | Extract the leading and trailing spaces from inside an inline element
+-- and place them outside the element.
+
+extractSpaces :: (Inlines -> Inlines) -> Inlines -> Inlines
+extractSpaces f is =
+ let contents = B.unMany is
+ left = case viewl contents of
+ (Space :< _) -> B.space
+ _ -> mempty
+ right = case viewr contents of
+ (_ :> Space) -> B.space
+ _ -> mempty in
+ (left <> f (B.trimInlines . B.Many $ contents) <> right)
+
-- | Normalize @Pandoc@ document, consolidating doubled 'Space's,
-- combining adjacent 'Str's and 'Emph's, remove 'Null's and
-- empty elements, etc.
diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs
index 4ed77887f..f8240ca3d 100644
--- a/tests/Tests/Readers/Org.hs
+++ b/tests/Tests/Readers/Org.hs
@@ -50,13 +50,13 @@ tests =
"+Kill Bill+" =?>
para (strikeout . spcSep $ [ "Kill", "Bill" ])
- , "Code" =:
+ , "Verbatim" =:
"=Robot.rock()=" =?>
para (code "Robot.rock()")
- , "Verbatim" =:
+ , "Code" =:
"~word for word~" =?>
- para (rawInline "" "word for word")
+ para (code "word for word")
, "Math $..$" =:
"$E=mc^2$" =?>
diff --git a/tests/html-reader.html b/tests/html-reader.html
index 1e104b00f..d059d7b4b 100644
--- a/tests/html-reader.html
+++ b/tests/html-reader.html
@@ -431,6 +431,24 @@ An e-mail address: nobody [at] nowhere.net<blockquote>
<p><em>Trailing space </em>text</p>
<p>text<em> Leading spaces</em></p>
<p><em>Trailing spaces </em>text</p>
-
+<h1>Tables</h1>
+<table>
+ <tr>
+ <th>X</th>
+ <th>Y</th>
+ <th>Z</th>
+ </tr>
+ <tr>
+ <td>1</td>
+ <td>2</td>
+ <td>3</td>
+ </tr>
+ <tr>
+ <td>4</td>
+ <td>5</td>
+ <td>6</td>
+ </tr>
+</table>
+</body>
</body>
</html>
diff --git a/tests/html-reader.native b/tests/html-reader.native
index 8fbecf34f..c6ed36910 100644
--- a/tests/html-reader.native
+++ b/tests/html-reader.native
@@ -308,4 +308,15 @@ Pandoc (Meta {unMeta = fromList [("generator",MetaInlines [Str "pandoc"]),("titl
,Para [Str "text",Space,Emph [Str "Leading",Space,Str "space"]]
,Para [Emph [Str "Trailing",Space,Str "space"],Space,Str "text"]
,Para [Str "text",Space,Emph [Str "Leading",Space,Str "spaces"]]
-,Para [Emph [Str "Trailing",Space,Str "spaces"],Space,Str "text"]]
+,Para [Emph [Str "Trailing",Space,Str "spaces"],Space,Str "text"]
+,Header 1 ("",[],[]) [Str "Tables"]
+,Table [] [AlignDefault,AlignDefault,AlignDefault] [0.0,0.0,0.0]
+ [[Plain [Str "X"]]
+ ,[Plain [Str "Y"]]
+ ,[Plain [Str "Z"]]]
+ [[[Plain [Str "1"]]
+ ,[Plain [Str "2"]]
+ ,[Plain [Str "3"]]]
+ ,[[Plain [Str "4"]]
+ ,[Plain [Str "5"]]
+ ,[Plain [Str "6"]]]]]