aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Readers/Org.hs
diff options
context:
space:
mode:
Diffstat (limited to 'src/Text/Pandoc/Readers/Org.hs')
-rw-r--r--src/Text/Pandoc/Readers/Org.hs161
1 files changed, 92 insertions, 69 deletions
diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs
index 7dd611be3..5a50a8f34 100644
--- a/src/Text/Pandoc/Readers/Org.hs
+++ b/src/Text/Pandoc/Readers/Org.hs
@@ -35,6 +35,7 @@ import Text.Pandoc.Builder ( Inlines, Blocks, HasMeta(..),
trimInlines )
import Text.Pandoc.Definition
import Text.Pandoc.Compat.Monoid ((<>))
+import Text.Pandoc.Error
import Text.Pandoc.Options
import qualified Text.Pandoc.Parsing as P
import Text.Pandoc.Parsing hiding ( F, unF, askF, asksF, runF
@@ -49,7 +50,7 @@ import qualified Text.TeXMath.Readers.MathML.EntityMap as MathMLEntityMap
import Control.Arrow (first)
import Control.Monad (foldM, guard, liftM, liftM2, mplus, mzero, when)
import Control.Monad.Reader (Reader, runReader, ask, asks, local)
-import Data.Char (isAlphaNum, toLower)
+import Data.Char (isAlphaNum, isSpace, toLower)
import Data.Default
import Data.List (intersperse, isPrefixOf, isSuffixOf)
import qualified Data.Map as M
@@ -57,8 +58,6 @@ import qualified Data.Set as Set
import Data.Maybe (fromMaybe, isJust)
import Network.HTTP (urlEncode)
-import Text.Pandoc.Error
-
-- | Parse org-mode string and return a Pandoc document.
readOrg :: ReaderOptions -- ^ Reader options
-> String -- ^ String to parse (assuming @'\n'@ line endings)
@@ -391,6 +390,9 @@ lookupBlockAttribute key =
type BlockProperties = (Int, String) -- (Indentation, Block-Type)
+updateIndent :: BlockProperties -> Int -> BlockProperties
+updateIndent (_, blkType) indent = (indent, blkType)
+
orgBlock :: OrgParser (F Blocks)
orgBlock = try $ do
blockProp@(_, blkType) <- blockHeaderStart
@@ -407,11 +409,23 @@ orgBlock = try $ do
_ -> withParsed (fmap $ divWithClass blkType)
blockHeaderStart :: OrgParser (Int, String)
-blockHeaderStart = try $ (,) <$> indent <*> blockType
+blockHeaderStart = try $ (,) <$> indentation <*> blockType
where
- indent = length <$> many spaceChar
blockType = map toLower <$> (stringAnyCase "#+begin_" *> orgArgWord)
+indentation :: OrgParser Int
+indentation = try $ do
+ tabStop <- getOption readerTabStop
+ s <- many spaceChar
+ return $ spaceLength tabStop s
+
+spaceLength :: Int -> String -> Int
+spaceLength tabStop s = (sum . map charLen) s
+ where
+ charLen ' ' = 1
+ charLen '\t' = tabStop
+ charLen _ = 0
+
withRaw' :: (String -> F Blocks) -> BlockProperties -> OrgParser (F Blocks)
withRaw' f blockProp = (ignHeaders *> (f <$> rawBlockContent blockProp))
@@ -450,7 +464,8 @@ codeBlock blkProp = do
skipSpaces
(classes, kv) <- codeHeaderArgs <|> (mempty <$ ignHeaders)
id' <- fromMaybe "" <$> lookupBlockAttribute "name"
- content <- rawBlockContent blkProp
+ leadingIndent <- lookAhead indentation
+ content <- rawBlockContent (updateIndent blkProp leadingIndent)
resultsContent <- followingResultsBlock
let includeCode = exportsCode kv
let includeResults = exportsResults kv
@@ -472,7 +487,7 @@ rawBlockContent (indent, blockType) = try $
unlines . map commaEscaped <$> manyTill indentedLine blockEnder
where
indentedLine = try $ ("" <$ blankline) <|> (indentWith indent *> anyLine)
- blockEnder = try $ indentWith indent *> stringAnyCase ("#+end_" <> blockType)
+ blockEnder = try $ skipSpaces *> stringAnyCase ("#+end_" <> blockType)
parsedBlockContent :: BlockProperties -> OrgParser (F Blocks)
parsedBlockContent blkProps = try $ do
@@ -758,9 +773,13 @@ data OrgTableRow = OrgContentRow (F [Blocks])
| OrgAlignRow [Alignment]
| OrgHlineRow
+-- OrgTable is strongly related to the pandoc table ADT. Using the same
+-- (i.e. pandoc-global) ADT would mean that the reader would break if the
+-- global structure was to be changed, which would be bad. The final table
+-- should be generated using a builder function. Column widths aren't
+-- implemented yet, so they are not tracked here.
data OrgTable = OrgTable
- { orgTableColumns :: Int
- , orgTableAlignments :: [Alignment]
+ { orgTableAlignments :: [Alignment]
, orgTableHeader :: [Blocks]
, orgTableRows :: [[Blocks]]
}
@@ -776,7 +795,7 @@ table = try $ do
orgToPandocTable :: OrgTable
-> Inlines
-> Blocks
-orgToPandocTable (OrgTable _ aligns heads lns) caption =
+orgToPandocTable (OrgTable aligns heads lns) caption =
B.table caption (zip aligns $ repeat 0) heads lns
tableStart :: OrgParser Char
@@ -787,18 +806,19 @@ tableRows = try $ many (tableAlignRow <|> tableHline <|> tableContentRow)
tableContentRow :: OrgParser OrgTableRow
tableContentRow = try $
- OrgContentRow . sequence <$> (tableStart *> manyTill tableContentCell newline)
+ OrgContentRow . sequence <$> (tableStart *> many1Till tableContentCell newline)
tableContentCell :: OrgParser (F Blocks)
tableContentCell = try $
- fmap B.plain . trimInlinesF . mconcat <$> many1Till inline endOfCell
-
-endOfCell :: OrgParser Char
-endOfCell = try $ char '|' <|> lookAhead newline
+ fmap B.plain . trimInlinesF . mconcat <$> manyTill inline endOfCell
tableAlignRow :: OrgParser OrgTableRow
-tableAlignRow = try $
- OrgAlignRow <$> (tableStart *> manyTill tableAlignCell newline)
+tableAlignRow = try $ do
+ tableStart
+ cells <- many1Till tableAlignCell newline
+ -- Empty rows are regular (i.e. content) rows, not alignment rows.
+ guard $ any (/= AlignDefault) cells
+ return $ OrgAlignRow cells
tableAlignCell :: OrgParser Alignment
tableAlignCell =
@@ -813,65 +833,61 @@ tableAlignCell =
where emptyCell = try $ skipSpaces *> endOfCell
tableAlignFromChar :: OrgParser Alignment
-tableAlignFromChar = try $ choice [ char 'l' *> return AlignLeft
- , char 'c' *> return AlignCenter
- , char 'r' *> return AlignRight
- ]
+tableAlignFromChar = try $
+ choice [ char 'l' *> return AlignLeft
+ , char 'c' *> return AlignCenter
+ , char 'r' *> return AlignRight
+ ]
tableHline :: OrgParser OrgTableRow
tableHline = try $
OrgHlineRow <$ (tableStart *> char '-' *> anyLine)
+endOfCell :: OrgParser Char
+endOfCell = try $ char '|' <|> lookAhead newline
+
rowsToTable :: [OrgTableRow]
-> F OrgTable
-rowsToTable = foldM (flip rowToContent) zeroTable
- where zeroTable = OrgTable 0 mempty mempty mempty
-
-normalizeTable :: OrgTable
- -> OrgTable
-normalizeTable (OrgTable cols aligns heads lns) =
- let aligns' = fillColumns aligns AlignDefault
- heads' = if heads == mempty
- then mempty
- else fillColumns heads (B.plain mempty)
- lns' = map (`fillColumns` B.plain mempty) lns
- fillColumns base padding = take cols $ base ++ repeat padding
- in OrgTable cols aligns' heads' lns'
+rowsToTable = foldM rowToContent emptyTable
+ where emptyTable = OrgTable mempty mempty mempty
+normalizeTable :: OrgTable -> OrgTable
+normalizeTable (OrgTable aligns heads rows) = OrgTable aligns' heads rows
+ where
+ refRow = if heads /= mempty
+ then heads
+ else if rows == mempty then mempty else head rows
+ cols = length refRow
+ fillColumns base padding = take cols $ base ++ repeat padding
+ aligns' = fillColumns aligns AlignDefault
-- One or more horizontal rules after the first content line mark the previous
-- line as a header. All other horizontal lines are discarded.
-rowToContent :: OrgTableRow
- -> OrgTable
- -> F OrgTable
-rowToContent OrgHlineRow t = maybeBodyToHeader t
-rowToContent (OrgAlignRow as) t = setLongestRow as =<< setAligns as t
-rowToContent (OrgContentRow rf) t = do
- rs <- rf
- setLongestRow rs =<< appendToBody rs t
-
-setLongestRow :: [a]
- -> OrgTable
- -> F OrgTable
-setLongestRow rs t =
- return t{ orgTableColumns = max (length rs) (orgTableColumns t) }
-
-maybeBodyToHeader :: OrgTable
- -> F OrgTable
-maybeBodyToHeader t = case t of
- OrgTable{ orgTableHeader = [], orgTableRows = b:[] } ->
- return t{ orgTableHeader = b , orgTableRows = [] }
- _ -> return t
-
-appendToBody :: [Blocks]
- -> OrgTable
+rowToContent :: OrgTable
+ -> OrgTableRow
-> F OrgTable
-appendToBody r t = return t{ orgTableRows = orgTableRows t ++ [r] }
+rowToContent orgTable row =
+ case row of
+ OrgHlineRow -> return singleRowPromotedToHeader
+ OrgAlignRow as -> return . setAligns $ as
+ OrgContentRow cs -> appendToBody cs
+ where
+ singleRowPromotedToHeader :: OrgTable
+ singleRowPromotedToHeader = case orgTable of
+ OrgTable{ orgTableHeader = [], orgTableRows = b:[] } ->
+ orgTable{ orgTableHeader = b , orgTableRows = [] }
+ _ -> orgTable
-setAligns :: [Alignment]
- -> OrgTable
- -> F OrgTable
-setAligns aligns t = return $ t{ orgTableAlignments = aligns }
+ setAligns :: [Alignment] -> OrgTable
+ setAligns aligns = orgTable{ orgTableAlignments = aligns }
+
+ appendToBody :: F [Blocks] -> F OrgTable
+ appendToBody frow = do
+ newRow <- frow
+ let oldRows = orgTableRows orgTable
+ -- NOTE: This is an inefficient O(n) operation. This should be changed
+ -- if performance ever becomes a problem.
+ return orgTable{ orgTableRows = oldRows ++ [newRow] }
--
@@ -1565,14 +1581,14 @@ inlineLaTeX = try $ do
parseAsMathMLSym :: String -> Maybe Inlines
parseAsMathMLSym cs = B.str <$> MathMLEntityMap.getUnicode (clean cs)
- -- dropWhileEnd would be nice here, but it's not available before base 4.5
- where clean = reverse . dropWhile (`elem` ("{}" :: String)) . reverse . drop 1
+ -- drop initial backslash and any trailing "{}"
+ where clean = dropWhileEnd (`elem` ("{}" :: String)) . drop 1
state :: ParserState
state = def{ stateOptions = def{ readerParseRaw = True }}
- texMathToPandoc inp = (maybeRight $ readTeX inp) >>=
- writePandoc DisplayInline
+ texMathToPandoc :: String -> Maybe [Inline]
+ texMathToPandoc cs = (maybeRight $ readTeX cs) >>= writePandoc DisplayInline
maybeRight :: Either a b -> Maybe b
maybeRight = either (const Nothing) Just
@@ -1582,11 +1598,18 @@ inlineLaTeXCommand = try $ do
rest <- getInput
case runParser rawLaTeXInline def "source" rest of
Right (RawInline _ cs) -> do
- let len = length cs
+ -- drop any trailing whitespace, those are not be part of the command as
+ -- far as org mode is concerned.
+ let cmdNoSpc = dropWhileEnd isSpace cs
+ let len = length cmdNoSpc
count len anyChar
- return cs
+ return cmdNoSpc
_ -> mzero
+-- Taken from Data.OldList.
+dropWhileEnd :: (a -> Bool) -> [a] -> [a]
+dropWhileEnd p = foldr (\x xs -> if p x && null xs then [] else x : xs) []
+
smart :: OrgParser (F Inlines)
smart = do
getOption readerSmart >>= guard