From 24b2ac43b0a8596f7baea10579c95ee75b6e584f Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Tue, 4 Mar 2014 00:33:25 +0100 Subject: Add a simple Emacs Org-mode reader The basic structure of org-mode documents is recognized; however, org-mode features like todo markers, tags etc. are not supported yet. --- src/Text/Pandoc/Readers/Org.hs | 552 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 552 insertions(+) create mode 100644 src/Text/Pandoc/Readers/Org.hs (limited to 'src/Text/Pandoc/Readers/Org.hs') diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs new file mode 100644 index 000000000..5dc250f04 --- /dev/null +++ b/src/Text/Pandoc/Readers/Org.hs @@ -0,0 +1,552 @@ +{-# LANGUAGE OverloadedStrings #-} +{- +Copyright (C) 2014 Albert Krewinkel + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Readers.Org + Copyright : Copyright (C) 2014 Albert Krewinkel + License : GNU GPL, version 2 or above + + Maintainer : Albert Krewinkel + +Conversion of Org-Mode to 'Pandoc' document. +-} +module Text.Pandoc.Readers.Org ( readOrg ) where + +import qualified Text.Pandoc.Builder as B +import Text.Pandoc.Builder (Inlines, Blocks, trimInlines, (<>)) +import Text.Pandoc.Definition +import Text.Pandoc.Options +import Text.Pandoc.Parsing hiding (orderedListMarker) +import Text.Pandoc.Shared (compactify') + +import Control.Applicative (pure, (<$>), (<$), (<*>), (<*), (*>), (<**>)) +import Control.Monad (guard, mzero) +import Data.Char (toLower) +import Data.List (foldl') +import Data.Maybe (listToMaybe, fromMaybe) +import Data.Monoid (mconcat, mempty, mappend) + +-- | Parse org-mode string and return a Pandoc document. +readOrg :: ReaderOptions -- ^ Reader options + -> String -- ^ String to parse (assuming @'\n'@ line endings) + -> Pandoc +readOrg opts s = (readWith parseOrg) def{ stateOptions = opts } (s ++ "\n\n") + +type OrgParser = Parser [Char] ParserState + +parseOrg:: OrgParser Pandoc +parseOrg = do + blocks' <- B.toList <$> parseBlocks + st <- getState + let meta = stateMeta st + return $ Pandoc meta $ filter (/= Null) blocks' + +-- +-- parsing blocks +-- + +parseBlocks :: OrgParser Blocks +parseBlocks = mconcat <$> manyTill block eof + +block :: OrgParser Blocks +block = choice [ mempty <$ blanklines + , orgBlock + , example + , drawer + , specialLine + , header + , hline + , list + , table + , paraOrPlain + ] "block" + +-- +-- Org Blocks (#+BEGIN_... / #+END_...) +-- + +orgBlock :: OrgParser Blocks +orgBlock = try $ do + (indent, blockType, args) <- blockHeader + blockStr <- rawBlockContent indent blockType + let classArgs = [ translateLang . fromMaybe [] $ listToMaybe args ] + case blockType of + "comment" -> return mempty + "src" -> return $ B.codeBlockWith ("", classArgs, []) blockStr + _ -> B.divWith ("", [blockType], []) + <$> (parseFromString parseBlocks blockStr) + +blockHeader :: OrgParser (Int, String, [String]) +blockHeader = (,,) <$> blockIndent + <*> blockType + <*> (skipSpaces *> blockArgs) + where blockIndent = length <$> many spaceChar + blockType = map toLower <$> (stringAnyCase "#+begin_" *> many letter) + blockArgs = manyTill (many nonspaceChar <* skipSpaces) newline + +rawBlockContent :: Int -> String -> OrgParser String +rawBlockContent indent blockType = + unlines . map commaEscaped <$> manyTill indentedLine blockEnder + where + indentedLine = try $ choice [ blankline *> pure "\n" + , indentWith indent *> anyLine + ] + blockEnder = try $ indentWith indent *> stringAnyCase ("#+end_" <> blockType) + +-- indent by specified number of spaces (or equiv. tabs) +indentWith :: Int -> OrgParser String +indentWith num = do + tabStop <- getOption readerTabStop + if (num < tabStop) + then count num (char ' ') + else choice [ try (count num (char ' ')) + , try (char '\t' >> count (num - tabStop) (char ' ')) ] + +translateLang :: String -> String +translateLang "sh" = "bash" +translateLang cs = cs + +commaEscaped :: String -> String +commaEscaped (',':cs@('*':_)) = cs +commaEscaped (',':cs@('#':'+':_)) = cs +commaEscaped cs = cs + +example :: OrgParser Blocks +example = try $ + B.codeBlockWith ("", ["example"], []) . unlines <$> many1 exampleLine + +exampleLine :: OrgParser String +exampleLine = try $ string ": " *> anyLine + +-- Drawers for properties or a logbook +drawer :: OrgParser Blocks +drawer = try $ do + drawerStart + manyTill drawerLine (try drawerEnd) + return mempty + +drawerStart :: OrgParser String +drawerStart = try $ + skipSpaces *> drawerName <* skipSpaces <* newline + where drawerName = try $ char ':' *> validDrawerName <* char ':' + validDrawerName = stringAnyCase "PROPERTIES" + <|> stringAnyCase "LOGBOOK" + +drawerLine :: OrgParser String +drawerLine = try $ anyLine + +drawerEnd :: OrgParser String +drawerEnd = try $ + skipSpaces *> stringAnyCase ":END:" <* skipSpaces <* newline + + +-- Comments, Options and Metadata +specialLine :: OrgParser Blocks +specialLine = try $ metaLine <|> commentLine + +metaLine :: OrgParser Blocks +metaLine = try $ metaLineStart *> declarationLine + +commentLine :: OrgParser Blocks +commentLine = try $ commentLineStart *> anyLine *> pure mempty + +-- The order, in which blocks are tried, makes sure that we're not looking at +-- the beginning of a block, so we don't need to check for it +metaLineStart :: OrgParser String +metaLineStart = try $ mappend <$> many spaceChar <*> string "#+" + +commentLineStart :: OrgParser String +commentLineStart = try $ mappend <$> many spaceChar <*> string "# " + +declarationLine :: OrgParser Blocks +declarationLine = try $ do + meta' <- B.setMeta <$> metaKey <*> metaValue <*> pure nullMeta + updateState $ \st -> st { stateMeta = stateMeta st <> meta' } + return mempty + +metaValue :: OrgParser MetaValue +metaValue = MetaInlines . B.toList . trimInlines <$> restOfLine + +metaKey :: OrgParser [Char] +metaKey = map toLower <$> many1 (noneOf ": \n\r") + <* char ':' + <* skipSpaces + +-- | Headers +header :: OrgParser Blocks +header = try $ + B.header <$> headerStart + <*> (trimInlines <$> restOfLine) + +headerStart :: OrgParser Int +headerStart = try $ + (length <$> many1 (char '*')) <* many1 (char ' ') + +-- Horizontal Line (five dashes or more) +hline :: OrgParser Blocks +hline = try $ do + skipSpaces + string "-----" + many (char '-') + skipSpaces + newline + return B.horizontalRule + +-- +-- Tables +-- + +data OrgTableRow = OrgContentRow [Blocks] + | OrgAlignRow [Alignment] + | OrgHlineRow + deriving (Eq, Show) + +type OrgTableContent = (Int, [Alignment], [Double], [Blocks], [[Blocks]]) + +table :: OrgParser Blocks +table = try $ do + lookAhead tableStart + (_, aligns, widths, heads, lns) <- normalizeTable . tableContent <$> tableRows + return $ B.table "" (zip aligns widths) heads lns + +tableStart :: OrgParser Char +tableStart = try $ skipSpaces *> char '|' + +tableRows :: OrgParser [OrgTableRow] +tableRows = try $ many (tableAlignRow <|> tableHline <|> tableContentRow) + +tableContentRow :: OrgParser OrgTableRow +tableContentRow = try $ + OrgContentRow <$> (tableStart *> manyTill tableContentCell newline) + +tableContentCell :: OrgParser Blocks +tableContentCell = try $ + B.plain . trimInlines . mconcat <$> many1Till inline (try endOfCell) + +endOfCell :: OrgParser Char +-- endOfCell = char '|' <|> newline +endOfCell = try $ char '|' <|> lookAhead newline + +tableAlignRow :: OrgParser OrgTableRow +tableAlignRow = try $ + OrgAlignRow <$> (tableStart *> manyTill tableAlignCell newline) + +tableAlignCell :: OrgParser Alignment +tableAlignCell = + choice [ try $ emptyCell *> return (AlignDefault) + , try $ skipSpaces + *> char '<' + *> tableAlignFromChar + <* many digit + <* char '>' + <* emptyCell + ] "alignment info" + where emptyCell = try $ skipSpaces *> endOfCell + +tableAlignFromChar :: OrgParser Alignment +tableAlignFromChar = try $ choice [ char 'l' *> return AlignLeft + , char 'c' *> return AlignCenter + , char 'r' *> return AlignRight + ] + +tableHline :: OrgParser OrgTableRow +tableHline = try $ + OrgHlineRow <$ (tableStart *> char '-' *> anyLine) + +tableContent :: [OrgTableRow] + -> OrgTableContent +tableContent = foldl' (flip rowToContent) (0, mempty, repeat 0, mempty, mempty) + +normalizeTable :: OrgTableContent + -> OrgTableContent +normalizeTable (cols, aligns, widths, heads, lns) = + let aligns' = fillColumns aligns AlignDefault + widths' = fillColumns widths 0.0 + heads' = if heads == mempty + then heads + else fillColumns heads (B.plain mempty) + lns' = map (flip fillColumns (B.plain mempty)) lns + fillColumns base padding = take cols $ base ++ repeat padding + in (cols, aligns', widths', heads', lns') + + +-- One or more horizontal rules after the first content line mark the previous +-- line as a header. All other horizontal lines are discarded. +rowToContent :: OrgTableRow + -> OrgTableContent + -> OrgTableContent +rowToContent OrgHlineRow = maybeBodyToHeader +rowToContent (OrgContentRow rs) = setLongestRow rs . appendToBody rs +rowToContent (OrgAlignRow as) = setLongestRow as . setAligns as + +setLongestRow :: [a] + -> OrgTableContent + -> OrgTableContent +setLongestRow r (cols, aligns, widths, heads, lns) = + (max cols (length r), aligns, widths, heads, lns) + +maybeBodyToHeader :: OrgTableContent + -> OrgTableContent +maybeBodyToHeader (cols, aligns, widths, [], b:[]) = (cols, aligns, widths, b, []) +maybeBodyToHeader content = content + +appendToBody :: [Blocks] + -> OrgTableContent + -> OrgTableContent +appendToBody r (cols, aligns, widths, heads, lns) = + (cols, aligns, widths, heads, lns ++ [r]) + +setAligns :: [Alignment] + -> OrgTableContent + -> OrgTableContent +setAligns aligns (cols, _, widths, heads, lns) = + (cols, aligns, widths, heads, lns) + +-- Paragraphs or Plain text +paraOrPlain :: OrgParser Blocks +paraOrPlain = try $ + trimInlines . mconcat + <$> many1 inline + <**> option B.plain + (try $ newline *> pure B.para) + +restOfLine :: OrgParser Inlines +restOfLine = mconcat <$> manyTill inline newline + + +-- +-- list blocks +-- + +list :: OrgParser Blocks +list = choice [ bulletList, orderedList ] "list" + +bulletList :: OrgParser Blocks +bulletList = B.bulletList . compactify' <$> many1 (listItem bulletListStart) + +orderedList :: OrgParser Blocks +orderedList = B.orderedList . compactify' <$> many1 (listItem orderedListStart) + +genericListStart :: OrgParser String + -> OrgParser Int +genericListStart listMarker = try $ + (+) <$> (length <$> many spaceChar) + <*> (length <$> listMarker <* many1 spaceChar) + +-- parses bullet list start and returns its length (excl. following whitespace) +bulletListStart :: OrgParser Int +bulletListStart = genericListStart bulletListMarker + where bulletListMarker = pure <$> oneOf "*-+" + +orderedListStart :: OrgParser Int +orderedListStart = genericListStart orderedListMarker + -- Ordered list markers allowed in org-mode + where orderedListMarker = mappend <$> many1 digit <*> (pure <$> oneOf ".)") + +listItem :: OrgParser Int + -> OrgParser Blocks +listItem start = try $ do + (markerLength, first) <- try (start >>= rawListItem) + rest <- many (listContinuation markerLength) + parseFromString parseBlocks $ concat (first:rest) + +-- parse raw text for one list item, excluding start marker and continuations +rawListItem :: Int + -> OrgParser (Int, String) +rawListItem markerLength = try $ do + firstLine <- anyLine + restLines <- many (listLine markerLength) + return (markerLength, (firstLine ++ "\n" ++ (concat restLines))) + +-- continuation of a list item - indented and separated by blankline or endline. +-- Note: nested lists are parsed as continuations. +listContinuation :: Int + -> OrgParser String +listContinuation markerLength = try $ + mappend <$> many blankline + <*> (concat <$> many1 (listLine markerLength)) + +-- parse a line of a list item +listLine :: Int + -> OrgParser String +listLine markerLength = try $ + indentWith markerLength *> anyLine + <**> pure (++ "\n") + + +-- +-- inline +-- + +inline :: OrgParser Inlines +inline = choice inlineParsers "inline" + where inlineParsers = [ whitespace + , link + , str + , endline + , emph + , strong + , strikeout + , underline + , code + , verbatim + , subscript + , superscript + , symbol + ] + +-- treat these as potentially non-text when parsing inline: +specialChars :: [Char] +specialChars = "\"$'()*+-./:<=>[\\]^_{|}~" + + +whitespace :: OrgParser Inlines +whitespace = B.space <$ skipMany1 spaceChar "whitespace" + +str :: OrgParser Inlines +str = B.str <$> many1 (noneOf $ specialChars ++ "\n\r ") + <* updateLastStrPos + +-- an endline character that can be treated as a space, not a structural break +endline :: OrgParser Inlines +endline = try $ do + newline + notFollowedBy blankline + notFollowedBy' exampleLine + notFollowedBy' hline + notFollowedBy' tableStart + notFollowedBy' drawerStart + notFollowedBy' headerStart + notFollowedBy' metaLineStart + notFollowedBy' commentLineStart + notFollowedBy' bulletListStart + notFollowedBy' orderedListStart + return B.space + +link :: OrgParser Inlines +link = explicitLink <|> selfLink "link" + +explicitLink :: OrgParser Inlines +explicitLink = try $ do + char '[' + src <- enclosedRaw (char '[') (char ']') + title <- enclosedInlines (char '[') (char ']') + char ']' + return $ B.link src "" title + +selfLink :: OrgParser Inlines +selfLink = try $ do + src <- enclosedRaw (string "[[") (string "]]") + return $ B.link src "" (B.str src) + +emph :: OrgParser Inlines +emph = B.emph <$> inlinesEnclosedBy '/' + +strong :: OrgParser Inlines +strong = B.strong <$> inlinesEnclosedBy '*' + +strikeout :: OrgParser Inlines +strikeout = B.strikeout <$> inlinesEnclosedBy '+' + +-- There is no underline, so we use strong instead. +underline :: OrgParser Inlines +underline = B.strong <$> inlinesEnclosedBy '_' + +code :: OrgParser Inlines +code = B.code <$> rawEnclosedBy '=' + +verbatim :: OrgParser Inlines +verbatim = B.rawInline "" <$> rawEnclosedBy '~' + +subscript :: OrgParser Inlines +subscript = B.subscript <$> (try $ char '_' *> maybeGroupedByBraces) + +superscript :: OrgParser Inlines +superscript = B.superscript <$> (try $ char '^' *> maybeGroupedByBraces) + +maybeGroupedByBraces :: OrgParser Inlines +maybeGroupedByBraces = try $ + choice [ try $ enclosedInlines (char '{') (char '}') + , B.str . (:"") <$> anyChar + ] + +symbol :: OrgParser Inlines +symbol = B.str . (: "") <$> oneOf specialChars + +enclosedInlines :: OrgParser a + -> OrgParser b + -> OrgParser Inlines +enclosedInlines start end = try $ + trimInlines . mconcat <$> enclosed start end inline + +-- FIXME: This is a hack +inlinesEnclosedBy :: Char + -> OrgParser Inlines +inlinesEnclosedBy c = enclosedInlines (atStart (char c) <* endsOnThisOrNextLine c) + (atEnd $ char c) + +enclosedRaw :: OrgParser a + -> OrgParser b + -> OrgParser String +enclosedRaw start end = try $ + start *> (onSingleLine <|> spanningTwoLines) + where onSingleLine = try $ many1Till (noneOf "\n\r") end + spanningTwoLines = try $ + anyLine >>= \f -> mappend (f <> " ") <$> onSingleLine + +rawEnclosedBy :: Char + -> OrgParser String +rawEnclosedBy c = enclosedRaw (atStart $ char c) (atEnd $ char c) + +-- succeeds only if we're not right after a str (ie. in middle of word) +atStart :: OrgParser a -> OrgParser a +atStart p = do + pos <- getPosition + st <- getState + guard $ stateLastStrPos st /= Just pos + p + +-- | succeeds only if we're at the end of a word +atEnd :: OrgParser a -> OrgParser a +atEnd p = try $ p <* lookingAtEndOfWord + where lookingAtEndOfWord = lookAhead . oneOf $ postWordChars + +postWordChars :: [Char] +postWordChars = "\t\n\r !\"'),-.:?}" + +-- FIXME: These functions are hacks and should be replaced +endsOnThisOrNextLine :: Char + -> OrgParser () +endsOnThisOrNextLine c = do + inp <- getInput + let doOtherwise = \rest -> endsOnThisLine rest c (const mzero) + endsOnThisLine inp c doOtherwise + +endsOnThisLine :: [Char] + -> Char + -> ([Char] -> OrgParser ()) + -> OrgParser () +endsOnThisLine input c doOnOtherLines = do + case break (`elem` c:"\n") input of + (_,'\n':rest) -> doOnOtherLines rest + (_,_:rest@(n:_)) -> if n `elem` postWordChars + then return () + else endsOnThisLine rest c doOnOtherLines + _ -> mzero + -- cgit v1.2.3 From 7cf7e45e4cbb99b320a92b4bd31e433f535d3ef7 Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Fri, 4 Apr 2014 14:17:43 +0200 Subject: Org reader: Slight cleaning of table parsing code --- src/Text/Pandoc/Readers/Org.hs | 68 ++++++++++++++++++++++-------------------- 1 file changed, 35 insertions(+), 33 deletions(-) (limited to 'src/Text/Pandoc/Readers/Org.hs') diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index 5dc250f04..8b155194b 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -217,13 +217,18 @@ data OrgTableRow = OrgContentRow [Blocks] | OrgHlineRow deriving (Eq, Show) -type OrgTableContent = (Int, [Alignment], [Double], [Blocks], [[Blocks]]) +data OrgTable = OrgTable + { orgTableColumns :: Int + , orgTableAlignments :: [Alignment] + , orgTableHeader :: [Blocks] + , orgTableRows :: [[Blocks]] + } deriving (Eq, Show) table :: OrgParser Blocks table = try $ do lookAhead tableStart - (_, aligns, widths, heads, lns) <- normalizeTable . tableContent <$> tableRows - return $ B.table "" (zip aligns widths) heads lns + OrgTable _ aligns heads lns <- normalizeTable . rowsToTable <$> tableRows + return $ B.table "" (zip aligns $ repeat 0) heads lns tableStart :: OrgParser Char tableStart = try $ skipSpaces *> char '|' @@ -237,10 +242,9 @@ tableContentRow = try $ tableContentCell :: OrgParser Blocks tableContentCell = try $ - B.plain . trimInlines . mconcat <$> many1Till inline (try endOfCell) + B.plain . trimInlines . mconcat <$> many1Till inline endOfCell endOfCell :: OrgParser Char --- endOfCell = char '|' <|> newline endOfCell = try $ char '|' <|> lookAhead newline tableAlignRow :: OrgParser OrgTableRow @@ -269,54 +273,53 @@ tableHline :: OrgParser OrgTableRow tableHline = try $ OrgHlineRow <$ (tableStart *> char '-' *> anyLine) -tableContent :: [OrgTableRow] - -> OrgTableContent -tableContent = foldl' (flip rowToContent) (0, mempty, repeat 0, mempty, mempty) +rowsToTable :: [OrgTableRow] + -> OrgTable +rowsToTable = foldl' (flip rowToContent) zeroTable + where zeroTable = OrgTable 0 mempty mempty mempty -normalizeTable :: OrgTableContent - -> OrgTableContent -normalizeTable (cols, aligns, widths, heads, lns) = +normalizeTable :: OrgTable + -> OrgTable +normalizeTable (OrgTable cols aligns heads lns) = let aligns' = fillColumns aligns AlignDefault - widths' = fillColumns widths 0.0 heads' = if heads == mempty - then heads + then mempty else fillColumns heads (B.plain mempty) lns' = map (flip fillColumns (B.plain mempty)) lns fillColumns base padding = take cols $ base ++ repeat padding - in (cols, aligns', widths', heads', lns') + in OrgTable cols aligns' heads' lns' -- One or more horizontal rules after the first content line mark the previous -- line as a header. All other horizontal lines are discarded. rowToContent :: OrgTableRow - -> OrgTableContent - -> OrgTableContent + -> OrgTable + -> OrgTable rowToContent OrgHlineRow = maybeBodyToHeader rowToContent (OrgContentRow rs) = setLongestRow rs . appendToBody rs rowToContent (OrgAlignRow as) = setLongestRow as . setAligns as setLongestRow :: [a] - -> OrgTableContent - -> OrgTableContent -setLongestRow r (cols, aligns, widths, heads, lns) = - (max cols (length r), aligns, widths, heads, lns) + -> OrgTable + -> OrgTable +setLongestRow rs t = t{ orgTableColumns = max (length rs) (orgTableColumns t) } -maybeBodyToHeader :: OrgTableContent - -> OrgTableContent -maybeBodyToHeader (cols, aligns, widths, [], b:[]) = (cols, aligns, widths, b, []) -maybeBodyToHeader content = content +maybeBodyToHeader :: OrgTable + -> OrgTable +maybeBodyToHeader t = case t of + OrgTable{ orgTableHeader = [], orgTableRows = b:[] } -> + t{ orgTableHeader = b , orgTableRows = [] } + _ -> t appendToBody :: [Blocks] - -> OrgTableContent - -> OrgTableContent -appendToBody r (cols, aligns, widths, heads, lns) = - (cols, aligns, widths, heads, lns ++ [r]) + -> OrgTable + -> OrgTable +appendToBody r t = t{ orgTableRows = orgTableRows t ++ [r] } setAligns :: [Alignment] - -> OrgTableContent - -> OrgTableContent -setAligns aligns (cols, _, widths, heads, lns) = - (cols, aligns, widths, heads, lns) + -> OrgTable + -> OrgTable +setAligns aligns t = t{ orgTableAlignments = aligns } -- Paragraphs or Plain text paraOrPlain :: OrgParser Blocks @@ -549,4 +552,3 @@ endsOnThisLine input c doOnOtherLines = do then return () else endsOnThisLine rest c doOnOtherLines _ -> mzero - -- cgit v1.2.3 From d43c3e81017734170fb25460c4b9ab9cccb1e0db Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Fri, 4 Apr 2014 17:20:36 +0200 Subject: Org reader: Use specialized org parser state The default pandoc ParserState is replaced with `OrgParserState`. This is done to simplify the introduction of new state fields required for efficient Org parsing. --- src/Text/Pandoc/Readers/Org.hs | 48 ++++++++++++++++++++++++++++++++++++------ 1 file changed, 41 insertions(+), 7 deletions(-) (limited to 'src/Text/Pandoc/Readers/Org.hs') diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index 8b155194b..0ae4d231c 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -29,15 +29,16 @@ Conversion of Org-Mode to 'Pandoc' document. module Text.Pandoc.Readers.Org ( readOrg ) where import qualified Text.Pandoc.Builder as B -import Text.Pandoc.Builder (Inlines, Blocks, trimInlines, (<>)) +import Text.Pandoc.Builder (Inlines, Blocks, trimInlines, (<>), HasMeta(..)) import Text.Pandoc.Definition import Text.Pandoc.Options -import Text.Pandoc.Parsing hiding (orderedListMarker) +import Text.Pandoc.Parsing hiding (orderedListMarker, updateLastStrPos) import Text.Pandoc.Shared (compactify') import Control.Applicative (pure, (<$>), (<$), (<*>), (<*), (*>), (<**>)) import Control.Monad (guard, mzero) import Data.Char (toLower) +import Data.Default import Data.List (foldl') import Data.Maybe (listToMaybe, fromMaybe) import Data.Monoid (mconcat, mempty, mappend) @@ -46,15 +47,48 @@ import Data.Monoid (mconcat, mempty, mappend) readOrg :: ReaderOptions -- ^ Reader options -> String -- ^ String to parse (assuming @'\n'@ line endings) -> Pandoc -readOrg opts s = (readWith parseOrg) def{ stateOptions = opts } (s ++ "\n\n") +readOrg opts s = (readWith parseOrg) def{ orgOptions = opts } (s ++ "\n\n") + +type OrgParser = Parser [Char] OrgParserState + +-- | Org-mode parser state +data OrgParserState = OrgParserState + { orgOptions :: ReaderOptions + , orgInlineCharStack :: [Char] + , orgLastStrPos :: Maybe SourcePos + , orgMeta :: Meta + } deriving (Show) + +instance HasReaderOptions OrgParserState where + extractReaderOptions = orgOptions + +instance HasMeta OrgParserState where + setMeta field val st = + st{ orgMeta = setMeta field val $ orgMeta st } + deleteMeta field st = + st{ orgMeta = deleteMeta field $ orgMeta st } + +instance Default OrgParserState where + def = defaultOrgParserState + +defaultOrgParserState :: OrgParserState +defaultOrgParserState = OrgParserState + { orgOptions = def + , orgInlineCharStack = [] + , orgLastStrPos = Nothing + , orgMeta = nullMeta + } + +updateLastStrPos :: OrgParser () +updateLastStrPos = getPosition >>= \p -> + updateState $ \s -> s{ orgLastStrPos = Just p } -type OrgParser = Parser [Char] ParserState parseOrg:: OrgParser Pandoc parseOrg = do blocks' <- B.toList <$> parseBlocks st <- getState - let meta = stateMeta st + let meta = orgMeta st return $ Pandoc meta $ filter (/= Null) blocks' -- @@ -177,7 +211,7 @@ commentLineStart = try $ mappend <$> many spaceChar <*> string "# " declarationLine :: OrgParser Blocks declarationLine = try $ do meta' <- B.setMeta <$> metaKey <*> metaValue <*> pure nullMeta - updateState $ \st -> st { stateMeta = stateMeta st <> meta' } + updateState $ \st -> st { orgMeta = orgMeta st <> meta' } return mempty metaValue :: OrgParser MetaValue @@ -522,7 +556,7 @@ atStart :: OrgParser a -> OrgParser a atStart p = do pos <- getPosition st <- getState - guard $ stateLastStrPos st /= Just pos + guard $ orgLastStrPos st /= Just pos p -- | succeeds only if we're at the end of a word -- cgit v1.2.3 From fd98532784e43ad73072f37a31af5ff40fdc1c56 Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Sat, 5 Apr 2014 09:37:46 +0200 Subject: Org reader: Fix parsing of nested inlines Text such as /*this*/ was not correctly parsed as a strong, emphasised word. This was due to the end-of-word recognition being to strict as it did not accept markup chars as part of a word. The fix involves an additional parser state field, listing the markup chars which might be parsed as part of a word. --- src/Text/Pandoc/Readers/Org.hs | 27 ++++++++++++++++++++------- tests/Tests/Readers/Org.hs | 4 ++++ 2 files changed, 24 insertions(+), 7 deletions(-) (limited to 'src/Text/Pandoc/Readers/Org.hs') diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index 0ae4d231c..ad66caab9 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -535,8 +535,15 @@ enclosedInlines start end = try $ -- FIXME: This is a hack inlinesEnclosedBy :: Char -> OrgParser Inlines -inlinesEnclosedBy c = enclosedInlines (atStart (char c) <* endsOnThisOrNextLine c) - (atEnd $ char c) +inlinesEnclosedBy c = try $ do + updateState $ \st -> st { orgInlineCharStack = c:(orgInlineCharStack st) } + res <- enclosedInlines (atStart (char c) <* endsOnThisOrNextLine c) + (atEnd $ char c) + updateState $ \st -> st { orgInlineCharStack = shift . orgInlineCharStack $ st } + return res + where shift xs + | null xs = [] + | otherwise = tail xs enclosedRaw :: OrgParser a -> OrgParser b @@ -561,11 +568,16 @@ atStart p = do -- | succeeds only if we're at the end of a word atEnd :: OrgParser a -> OrgParser a -atEnd p = try $ p <* lookingAtEndOfWord - where lookingAtEndOfWord = lookAhead . oneOf $ postWordChars +atEnd p = try $ do + p <* lookingAtEndOfWord + where lookingAtEndOfWord = lookAhead . oneOf =<< postWordChars -postWordChars :: [Char] -postWordChars = "\t\n\r !\"'),-.:?}" +postWordChars :: OrgParser [Char] +postWordChars = do + st <- getState + return $ "\t\n\r !\"'),-.:?}" ++ (safeSecond . orgInlineCharStack $ st) + where safeSecond (_:x2:_) = [x2] + safeSecond _ = [] -- FIXME: These functions are hacks and should be replaced endsOnThisOrNextLine :: Char @@ -580,9 +592,10 @@ endsOnThisLine :: [Char] -> ([Char] -> OrgParser ()) -> OrgParser () endsOnThisLine input c doOnOtherLines = do + postWordChars' <- postWordChars case break (`elem` c:"\n") input of (_,'\n':rest) -> doOnOtherLines rest - (_,_:rest@(n:_)) -> if n `elem` postWordChars + (_,_:rest@(n:_)) -> if n `elem` postWordChars' then return () else endsOnThisLine rest c doOnOtherLines _ -> mzero diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs index 8c5982302..9091d9c74 100644 --- a/tests/Tests/Readers/Org.hs +++ b/tests/Tests/Readers/Org.hs @@ -42,6 +42,10 @@ tests = "*Cider*" =?> para (strong "Cider") + , "Strong Emphasis" =: + "/*strength*/" =?> + para (emph . strong $ "strength") + , "Strikeout" =: "+Kill Bill+" =?> para (strikeout . spcSep $ [ "Kill", "Bill" ]) -- cgit v1.2.3 From d76d2b707b2b5cebb38122e117527a70996c2c4f Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Sat, 5 Apr 2014 09:09:44 +0200 Subject: Org reader: Provide more language identifier translations Org-mode and Pandoc use different language identifiers, marking source code as being written in a certain programming language. This adds more translations from identifiers as used in Org to identifiers used in Pandoc. The full list of identifiers used in Org and Pandoc is available through http://orgmode.org/manual/Languages.html and `pandoc -v`, respectively. --- src/Text/Pandoc/Readers/Org.hs | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'src/Text/Pandoc/Readers/Org.hs') diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index ad66caab9..62088a04d 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -153,7 +153,14 @@ indentWith num = do , try (char '\t' >> count (num - tabStop) (char ' ')) ] translateLang :: String -> String -translateLang "sh" = "bash" +translateLang "C" = "c" +translateLang "C++" = "cpp" +translateLang "emacs-lisp" = "commonlisp" -- emacs lisp is not supported +translateLang "js" = "javascript" +translateLang "lisp" = "commonlisp" +translateLang "R" = "r" +translateLang "sh" = "bash" +translateLang "sqlite" = "sql" translateLang cs = cs commaEscaped :: String -> String -- cgit v1.2.3 From 652c781e375f3678a0ec821663240d4958f324de Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Sat, 5 Apr 2014 16:10:52 +0200 Subject: Org reader: Support inline images --- src/Text/Pandoc/Readers/Org.hs | 34 ++++++++++++++++++++++++---------- tests/Tests/Readers/Org.hs | 12 ++++++++++-- 2 files changed, 34 insertions(+), 12 deletions(-) (limited to 'src/Text/Pandoc/Readers/Org.hs') diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index 62088a04d..8b1b4fa23 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -39,7 +39,7 @@ import Control.Applicative (pure, (<$>), (<$), (<*>), (<*), (*>), (<** import Control.Monad (guard, mzero) import Data.Char (toLower) import Data.Default -import Data.List (foldl') +import Data.List (foldl', isPrefixOf, isSuffixOf) import Data.Maybe (listToMaybe, fromMaybe) import Data.Monoid (mconcat, mempty, mappend) @@ -484,20 +484,26 @@ endline = try $ do return B.space link :: OrgParser Inlines -link = explicitLink <|> selfLink "link" +link = explicitOrImageLink <|> selflinkOrImage "link" -explicitLink :: OrgParser Inlines -explicitLink = try $ do +explicitOrImageLink :: OrgParser Inlines +explicitOrImageLink = try $ do char '[' - src <- enclosedRaw (char '[') (char ']') - title <- enclosedInlines (char '[') (char ']') + src <- enclosedRaw (char '[') (char ']') + title <- enclosedRaw (char '[') (char ']') + title' <- parseFromString (mconcat . butLast <$> many inline) (title++"\n") char ']' - return $ B.link src "" title + return $ if (isImage src) && (isImage title) + then B.link src "" (B.image title "" "") + else B.link src "" title' + where butLast = reverse . tail . reverse -selfLink :: OrgParser Inlines -selfLink = try $ do +selflinkOrImage :: OrgParser Inlines +selflinkOrImage = try $ do src <- enclosedRaw (string "[[") (string "]]") - return $ B.link src "" (B.str src) + return $ if isImage src + then B.image src "" "" + else B.link src "" (B.str src) emph :: OrgParser Inlines emph = B.emph <$> inlinesEnclosedBy '/' @@ -606,3 +612,11 @@ endsOnThisLine input c doOnOtherLines = do then return () else endsOnThisLine rest c doOnOtherLines _ -> mzero + +isImage filename = + any (\x -> ('.':x) `isSuffixOf` filename) imageExtensions && + any (\x -> (x++":") `isPrefixOf` filename) protocols || + ':' `notElem` filename + where + imageExtensions = [ "jpeg" , "jpg" , "png" , "gif" , "svg" ] + protocols = [ "file", "http", "https" ] diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs index 9091d9c74..1088d6611 100644 --- a/tests/Tests/Readers/Org.hs +++ b/tests/Tests/Readers/Org.hs @@ -94,14 +94,22 @@ tests = , (strong ("is" <> space <> "not")) , "emph/" ]) + , "Image" =: + "[[./sunset.jpg]]" =?> + (para $ image "./sunset.jpg" "" "") + , "Explicit link" =: - "[[http://zeitlens.com/][pseudo-random nonsense]]" =?> + "[[http://zeitlens.com/][pseudo-random /nonsense/]]" =?> (para $ link "http://zeitlens.com/" "" - ("pseudo-random" <> space <> "nonsense")) + ("pseudo-random" <> space <> emph "nonsense")) , "Self-link" =: "[[http://zeitlens.com/]]" =?> (para $ link "http://zeitlens.com/" "" "http://zeitlens.com/") + + , "Image link" =: + "[[sunset.png][dusk.svg]]" =?> + (para $ link "sunset.png" "" (image "dusk.svg" "" "")) ] , testGroup "Meta Information" $ -- cgit v1.2.3 From f2deb9d86d864c79c5ca18205b4ca565b7199413 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 5 Apr 2014 15:50:46 -0700 Subject: Org reader: Added type signature. --- src/Text/Pandoc/Readers/Org.hs | 1 + 1 file changed, 1 insertion(+) (limited to 'src/Text/Pandoc/Readers/Org.hs') diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index 8b1b4fa23..5ad2531ac 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -613,6 +613,7 @@ endsOnThisLine input c doOnOtherLines = do else endsOnThisLine rest c doOnOtherLines _ -> mzero +isImage :: String -> Bool isImage filename = any (\x -> ('.':x) `isSuffixOf` filename) imageExtensions && any (\x -> (x++":") `isPrefixOf` filename) protocols || -- cgit v1.2.3 From 4ebf6f6ebf7d679252ade08203ec13e3e92c2db5 Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Sun, 6 Apr 2014 19:09:33 +0200 Subject: Org reader: Minor code clean-up --- src/Text/Pandoc/Readers/Org.hs | 51 +++++++++++++++++------------------------- 1 file changed, 21 insertions(+), 30 deletions(-) (limited to 'src/Text/Pandoc/Readers/Org.hs') diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index 5ad2531ac..6652925aa 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -268,8 +268,12 @@ data OrgTable = OrgTable table :: OrgParser Blocks table = try $ do lookAhead tableStart - OrgTable _ aligns heads lns <- normalizeTable . rowsToTable <$> tableRows - return $ B.table "" (zip aligns $ repeat 0) heads lns + orgToPandocTable . normalizeTable . rowsToTable <$> tableRows + +orgToPandocTable :: OrgTable + -> Blocks +orgToPandocTable (OrgTable _ aligns heads lns) = + B.table "" (zip aligns $ repeat 0) heads lns tableStart :: OrgParser Char tableStart = try $ skipSpaces *> char '|' @@ -403,20 +407,14 @@ orderedListStart = genericListStart orderedListMarker -- Ordered list markers allowed in org-mode where orderedListMarker = mappend <$> many1 digit <*> (pure <$> oneOf ".)") +-- parse raw text for one list item, excluding start marker and continuations listItem :: OrgParser Int -> OrgParser Blocks listItem start = try $ do - (markerLength, first) <- try (start >>= rawListItem) - rest <- many (listContinuation markerLength) - parseFromString parseBlocks $ concat (first:rest) - --- parse raw text for one list item, excluding start marker and continuations -rawListItem :: Int - -> OrgParser (Int, String) -rawListItem markerLength = try $ do - firstLine <- anyLine - restLines <- many (listLine markerLength) - return (markerLength, (firstLine ++ "\n" ++ (concat restLines))) + markerLength <- try start + firstLine <- anyLineNewline + rest <- concat <$> many (listContinuation markerLength) + parseFromString parseBlocks $ firstLine ++ rest -- continuation of a list item - indented and separated by blankline or endline. -- Note: nested lists are parsed as continuations. @@ -424,14 +422,11 @@ listContinuation :: Int -> OrgParser String listContinuation markerLength = try $ mappend <$> many blankline - <*> (concat <$> many1 (listLine markerLength)) + <*> (concat <$> many1 listLine) + where listLine = try $ indentWith markerLength *> anyLineNewline --- parse a line of a list item -listLine :: Int - -> OrgParser String -listLine markerLength = try $ - indentWith markerLength *> anyLine - <**> pure (++ "\n") +anyLineNewline :: OrgParser String +anyLineNewline = (++ "\n") <$> anyLine -- @@ -491,12 +486,11 @@ explicitOrImageLink = try $ do char '[' src <- enclosedRaw (char '[') (char ']') title <- enclosedRaw (char '[') (char ']') - title' <- parseFromString (mconcat . butLast <$> many inline) (title++"\n") + title' <- parseFromString (mconcat <$> many inline) title char ']' return $ if (isImage src) && (isImage title) then B.link src "" (B.image title "" "") else B.link src "" title' - where butLast = reverse . tail . reverse selflinkOrImage :: OrgParser Inlines selflinkOrImage = try $ do @@ -552,11 +546,8 @@ inlinesEnclosedBy c = try $ do updateState $ \st -> st { orgInlineCharStack = c:(orgInlineCharStack st) } res <- enclosedInlines (atStart (char c) <* endsOnThisOrNextLine c) (atEnd $ char c) - updateState $ \st -> st { orgInlineCharStack = shift . orgInlineCharStack $ st } + updateState $ \st -> st { orgInlineCharStack = drop 1 . orgInlineCharStack $ st } return res - where shift xs - | null xs = [] - | otherwise = tail xs enclosedRaw :: OrgParser a -> OrgParser b @@ -583,14 +574,13 @@ atStart p = do atEnd :: OrgParser a -> OrgParser a atEnd p = try $ do p <* lookingAtEndOfWord - where lookingAtEndOfWord = lookAhead . oneOf =<< postWordChars + where lookingAtEndOfWord = + eof <|> const (return ()) =<< lookAhead . oneOf =<< postWordChars postWordChars :: OrgParser [Char] postWordChars = do st <- getState - return $ "\t\n\r !\"'),-.:?}" ++ (safeSecond . orgInlineCharStack $ st) - where safeSecond (_:x2:_) = [x2] - safeSecond _ = [] + return $ "\t\n\r !\"'),-.:?}" ++ (take 1 . drop 1 . orgInlineCharStack $ st) -- FIXME: These functions are hacks and should be replaced endsOnThisOrNextLine :: Char @@ -608,6 +598,7 @@ endsOnThisLine input c doOnOtherLines = do postWordChars' <- postWordChars case break (`elem` c:"\n") input of (_,'\n':rest) -> doOnOtherLines rest + (_,_:[]) -> return () (_,_:rest@(n:_)) -> if n `elem` postWordChars' then return () else endsOnThisLine rest c doOnOtherLines -- cgit v1.2.3 From 480b33b7100048ef3fad51754ae76c21daa8b86f Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Sun, 6 Apr 2014 14:49:57 +0200 Subject: Org reader: Add support for definition lists --- src/Text/Pandoc/Readers/Org.hs | 17 ++++++++++++++++- tests/Tests/Readers/Org.hs | 26 +++++++++++++++++++++++--- 2 files changed, 39 insertions(+), 4 deletions(-) (limited to 'src/Text/Pandoc/Readers/Org.hs') diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index 6652925aa..20bca3e28 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -383,7 +383,10 @@ restOfLine = mconcat <$> manyTill inline newline -- list :: OrgParser Blocks -list = choice [ bulletList, orderedList ] "list" +list = choice [ definitionList, bulletList, orderedList ] "list" + +definitionList :: OrgParser Blocks +definitionList = B.definitionList <$> many1 (definitionListItem bulletListStart) bulletList :: OrgParser Blocks bulletList = B.bulletList . compactify' <$> many1 (listItem bulletListStart) @@ -407,6 +410,18 @@ orderedListStart = genericListStart orderedListMarker -- Ordered list markers allowed in org-mode where orderedListMarker = mappend <$> many1 digit <*> (pure <$> oneOf ".)") +definitionListItem :: OrgParser Int + -> OrgParser (Inlines, [Blocks]) +definitionListItem parseMarkerGetLength = try $ do + markerLength <- parseMarkerGetLength + term <- manyTill (noneOf "\n\r") (try $ string "::") + first <- anyLineNewline + cont <- concat <$> many (listContinuation markerLength) + term' <- parseFromString inline term + contents' <- parseFromString parseBlocks $ first ++ cont + return (term', [contents']) + + -- parse raw text for one list item, excluding start marker and continuations listItem :: OrgParser Int -> OrgParser Blocks diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs index 1088d6611..eb9f4d741 100644 --- a/tests/Tests/Readers/Org.hs +++ b/tests/Tests/Readers/Org.hs @@ -43,8 +43,8 @@ tests = para (strong "Cider") , "Strong Emphasis" =: - "/*strength*/" =?> - para (emph . strong $ "strength") + "/*strength*/" =?> + para (emph . strong $ "strength") , "Strikeout" =: "+Kill Bill+" =?> @@ -428,7 +428,27 @@ tests = , "Bullet List in Ordered List" =: ("1. GNU\n" ++ " - Freedom\n") =?> - orderedList [ (para "GNU") <> bulletList [ (plain "Freedom") ] ] + orderedList [ (para "GNU") <> bulletList [ (plain "Freedom") ] ] + + , "Definition List" =: + unlines [ "- PLL :: phase-locked loop" + , "- TTL ::" + , " transistor-transistor logic" + , "- PSK::phase-shift keying" + , "" + , " a digital modulation scheme" + ] =?> + definitionList [ ("PLL", [ plain $ "phase-locked" <> space <> "loop" ]) + , ("TTL", [ plain $ "transistor-transistor" <> space <> + "logic" ]) + , ("PSK", [ mconcat + [ para $ "phase-shift" <> space <> "keying" + , plain $ spcSep [ "a", "digital" + , "modulation", "scheme" ] + ] + ] + ) + ] ] , testGroup "Tables" -- cgit v1.2.3 From c47bd8404fda0a782719848ef190b56eb0fdb9dc Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Mon, 7 Apr 2014 11:00:30 +0200 Subject: Org reader: Support inline math (like $E=mc^2$) Closes #1223. --- src/Text/Pandoc/Readers/Org.hs | 22 ++++++++++++++++------ tests/Tests/Readers/Org.hs | 4 ++++ 2 files changed, 20 insertions(+), 6 deletions(-) (limited to 'src/Text/Pandoc/Readers/Org.hs') diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index 20bca3e28..2bb6ee122 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -459,6 +459,7 @@ inline = choice inlineParsers "inline" , strikeout , underline , code + , math , verbatim , subscript , superscript @@ -530,10 +531,13 @@ underline = B.strong <$> inlinesEnclosedBy '_' code :: OrgParser Inlines code = B.code <$> rawEnclosedBy '=' -verbatim :: OrgParser Inlines +math :: OrgParser Inlines +math = B.math <$> rawEnclosedBy '$' + +verbatim :: OrgParser Inlines verbatim = B.rawInline "" <$> rawEnclosedBy '~' -subscript :: OrgParser Inlines +subscript :: OrgParser Inlines subscript = B.subscript <$> (try $ char '_' *> maybeGroupedByBraces) superscript :: OrgParser Inlines @@ -580,18 +584,24 @@ rawEnclosedBy c = enclosedRaw (atStart $ char c) (atEnd $ char c) -- succeeds only if we're not right after a str (ie. in middle of word) atStart :: OrgParser a -> OrgParser a atStart p = do - pos <- getPosition - st <- getState - guard $ orgLastStrPos st /= Just pos + guard =<< not <$> isRightAfterString p -- | succeeds only if we're at the end of a word atEnd :: OrgParser a -> OrgParser a atEnd p = try $ do - p <* lookingAtEndOfWord + p <* lookingAtEndOfWord where lookingAtEndOfWord = eof <|> const (return ()) =<< lookAhead . oneOf =<< postWordChars +isRightAfterString :: OrgParser Bool +isRightAfterString = do + pos <- getPosition + st <- getState + -- the position `Nothing` isn't after a String, either, hence the double + -- negation + return $ not $ orgLastStrPos st /= Just pos + postWordChars :: OrgParser [Char] postWordChars = do st <- getState diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs index eb9f4d741..77b9d9327 100644 --- a/tests/Tests/Readers/Org.hs +++ b/tests/Tests/Readers/Org.hs @@ -54,6 +54,10 @@ tests = "=Robot.rock()=" =?> para (code "Robot.rock()") + , "Math" =: + "$E=mc^2$" =?> + para (math "E=mc^2") + , "Verbatim" =: "~word for word~" =?> para (rawInline "" "word for word") -- cgit v1.2.3 From 030020236c85c736892a6f8e0dcefca1681e5ce0 Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Tue, 8 Apr 2014 22:39:25 +0200 Subject: Org reader: Precise rules for the recognition of markup The inline parsers have been rewritten using the org source code as a reference. This fixes a couple of bugs related to erroneous markup recognition. --- src/Text/Pandoc/Readers/Org.hs | 380 ++++++++++++++++++++++++++++------------- tests/Tests/Readers/Org.hs | 25 ++- 2 files changed, 283 insertions(+), 122 deletions(-) (limited to 'src/Text/Pandoc/Readers/Org.hs') diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index 2bb6ee122..392b17bbc 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -32,11 +32,12 @@ import qualified Text.Pandoc.Builder as B import Text.Pandoc.Builder (Inlines, Blocks, trimInlines, (<>), HasMeta(..)) import Text.Pandoc.Definition import Text.Pandoc.Options -import Text.Pandoc.Parsing hiding (orderedListMarker, updateLastStrPos) +import qualified Text.Pandoc.Parsing as P +import Text.Pandoc.Parsing hiding (newline, orderedListMarker, updateLastStrPos) import Text.Pandoc.Shared (compactify') import Control.Applicative (pure, (<$>), (<$), (<*>), (<*), (*>), (<**>)) -import Control.Monad (guard, mzero) +import Control.Monad (guard, when) import Data.Char (toLower) import Data.Default import Data.List (foldl', isPrefixOf, isSuffixOf) @@ -47,49 +48,100 @@ import Data.Monoid (mconcat, mempty, mappend) readOrg :: ReaderOptions -- ^ Reader options -> String -- ^ String to parse (assuming @'\n'@ line endings) -> Pandoc -readOrg opts s = (readWith parseOrg) def{ orgOptions = opts } (s ++ "\n\n") +readOrg opts s = (readWith parseOrg) def{ orgStateOptions = opts } (s ++ "\n\n") type OrgParser = Parser [Char] OrgParserState +parseOrg:: OrgParser Pandoc +parseOrg = do + blocks' <- B.toList <$> parseBlocks + st <- getState + let meta = orgStateMeta st + return $ Pandoc meta $ filter (/= Null) blocks' + +-- +-- Parser State for Org +-- + -- | Org-mode parser state data OrgParserState = OrgParserState - { orgOptions :: ReaderOptions - , orgInlineCharStack :: [Char] - , orgLastStrPos :: Maybe SourcePos - , orgMeta :: Meta + { orgStateOptions :: ReaderOptions + , orgStateEmphasisCharStack :: [Char] + , orgStateEmphasisNewlines :: Maybe Int + , orgStateLastForbiddenCharPos :: Maybe SourcePos + , orgStateLastPreCharPos :: Maybe SourcePos + , orgStateLastStrPos :: Maybe SourcePos + , orgStateMeta :: Meta } deriving (Show) instance HasReaderOptions OrgParserState where - extractReaderOptions = orgOptions + extractReaderOptions = orgStateOptions instance HasMeta OrgParserState where setMeta field val st = - st{ orgMeta = setMeta field val $ orgMeta st } + st{ orgStateMeta = setMeta field val $ orgStateMeta st } deleteMeta field st = - st{ orgMeta = deleteMeta field $ orgMeta st } + st{ orgStateMeta = deleteMeta field $ orgStateMeta st } instance Default OrgParserState where def = defaultOrgParserState defaultOrgParserState :: OrgParserState defaultOrgParserState = OrgParserState - { orgOptions = def - , orgInlineCharStack = [] - , orgLastStrPos = Nothing - , orgMeta = nullMeta + { orgStateOptions = def + , orgStateEmphasisCharStack = [] + , orgStateEmphasisNewlines = Nothing + , orgStateLastForbiddenCharPos = Nothing + , orgStateLastPreCharPos = Nothing + , orgStateLastStrPos = Nothing + , orgStateMeta = nullMeta } updateLastStrPos :: OrgParser () updateLastStrPos = getPosition >>= \p -> - updateState $ \s -> s{ orgLastStrPos = Just p } + updateState $ \s -> s{ orgStateLastStrPos = Just p } +updateLastForbiddenCharPos :: OrgParser () +updateLastForbiddenCharPos = getPosition >>= \p -> + updateState $ \s -> s{ orgStateLastForbiddenCharPos = Just p} -parseOrg:: OrgParser Pandoc -parseOrg = do - blocks' <- B.toList <$> parseBlocks +updateLastPreCharPos :: OrgParser () +updateLastPreCharPos = getPosition >>= \p -> + updateState $ \s -> s{ orgStateLastPreCharPos = Just p} + +pushToInlineCharStack :: Char -> OrgParser () +pushToInlineCharStack c = updateState $ \st -> + st { orgStateEmphasisCharStack = c:(orgStateEmphasisCharStack st) } + +popInlineCharStack :: OrgParser () +popInlineCharStack = updateState $ \st -> + st { orgStateEmphasisCharStack = drop 1 . orgStateEmphasisCharStack $ st } + +surroundingEmphasisChar :: OrgParser [Char] +surroundingEmphasisChar = take 1 . drop 1 . orgStateEmphasisCharStack <$> getState + +startEmphasisNewlinesCounting :: Int -> OrgParser () +startEmphasisNewlinesCounting maxNewlines = updateState $ \s -> + s { orgStateEmphasisNewlines = Just maxNewlines } + +decEmphasisNewlinesCount :: OrgParser () +decEmphasisNewlinesCount = updateState $ \s -> + s{ orgStateEmphasisNewlines = (\n -> n - 1) <$> orgStateEmphasisNewlines s } + +newlinesCountWithinLimits :: OrgParser Bool +newlinesCountWithinLimits = do st <- getState - let meta = orgMeta st - return $ Pandoc meta $ filter (/= Null) blocks' + return $ ((< 0) <$> orgStateEmphasisNewlines st) /= Just True + +resetEmphasisNewlines :: OrgParser () +resetEmphasisNewlines = updateState $ \s -> + s{ orgStateEmphasisNewlines = Nothing } + +newline :: OrgParser Char +newline = + P.newline + <* updateLastPreCharPos + <* updateLastForbiddenCharPos -- -- parsing blocks @@ -218,7 +270,7 @@ commentLineStart = try $ mappend <$> many spaceChar <*> string "# " declarationLine :: OrgParser Blocks declarationLine = try $ do meta' <- B.setMeta <$> metaKey <*> metaValue <*> pure nullMeta - updateState $ \st -> st { orgMeta = orgMeta st <> meta' } + updateState $ \st -> st { orgStateMeta = orgStateMeta st <> meta' } return mempty metaValue :: OrgParser MetaValue @@ -449,22 +501,24 @@ anyLineNewline = (++ "\n") <$> anyLine -- inline :: OrgParser Inlines -inline = choice inlineParsers "inline" - where inlineParsers = [ whitespace - , link - , str - , endline - , emph - , strong - , strikeout - , underline - , code - , math - , verbatim - , subscript - , superscript - , symbol - ] +inline = + choice [ whitespace + , link + , str + , endline + , emph + , strong + , strikeout + , underline + , code + , math + , verbatim + , subscript + , superscript + , symbol + ] <* (guard =<< newlinesCountWithinLimits) + "inline" + -- treat these as potentially non-text when parsing inline: specialChars :: [Char] @@ -472,7 +526,10 @@ specialChars = "\"$'()*+-./:<=>[\\]^_{|}~" whitespace :: OrgParser Inlines -whitespace = B.space <$ skipMany1 spaceChar "whitespace" +whitespace = B.space <$ skipMany1 spaceChar + <* updateLastPreCharPos + <* updateLastForbiddenCharPos + "whitespace" str :: OrgParser Inlines str = B.str <$> many1 (noneOf $ specialChars ++ "\n\r ") @@ -492,6 +549,9 @@ endline = try $ do notFollowedBy' commentLineStart notFollowedBy' bulletListStart notFollowedBy' orderedListStart + decEmphasisNewlinesCount + guard =<< newlinesCountWithinLimits + updateLastPreCharPos return B.space link :: OrgParser Inlines @@ -500,42 +560,54 @@ link = explicitOrImageLink <|> selflinkOrImage "link" explicitOrImageLink :: OrgParser Inlines explicitOrImageLink = try $ do char '[' - src <- enclosedRaw (char '[') (char ']') + src <- linkTarget title <- enclosedRaw (char '[') (char ']') title' <- parseFromString (mconcat <$> many inline) title char ']' - return $ if (isImage src) && (isImage title) + return $ if (isImageFilename src) && (isImageFilename title) then B.link src "" (B.image title "" "") else B.link src "" title' selflinkOrImage :: OrgParser Inlines selflinkOrImage = try $ do - src <- enclosedRaw (string "[[") (string "]]") - return $ if isImage src + src <- (char '[') *> linkTarget <* char ']' + return $ if isImageFilename src then B.image src "" "" else B.link src "" (B.str src) +linkTarget :: OrgParser String +linkTarget = enclosed (char '[') (char ']') (noneOf "\n\r]") + +isImageFilename :: String -> Bool +isImageFilename filename = + any (\x -> ('.':x) `isSuffixOf` filename) imageExtensions && + any (\x -> (x++":") `isPrefixOf` filename) protocols || + ':' `notElem` filename + where + imageExtensions = [ "jpeg" , "jpg" , "png" , "gif" , "svg" ] + protocols = [ "file", "http", "https" ] + emph :: OrgParser Inlines -emph = B.emph <$> inlinesEnclosedBy '/' +emph = B.emph <$> emphasisBetween '/' strong :: OrgParser Inlines -strong = B.strong <$> inlinesEnclosedBy '*' +strong = B.strong <$> emphasisBetween '*' strikeout :: OrgParser Inlines -strikeout = B.strikeout <$> inlinesEnclosedBy '+' +strikeout = B.strikeout <$> emphasisBetween '+' -- There is no underline, so we use strong instead. underline :: OrgParser Inlines -underline = B.strong <$> inlinesEnclosedBy '_' +underline = B.strong <$> emphasisBetween '_' code :: OrgParser Inlines -code = B.code <$> rawEnclosedBy '=' - -math :: OrgParser Inlines -math = B.math <$> rawEnclosedBy '$' +code = B.code <$> verbatimBetween '=' verbatim :: OrgParser Inlines -verbatim = B.rawInline "" <$> rawEnclosedBy '~' +verbatim = B.rawInline "" <$> verbatimBetween '~' + +math :: OrgParser Inlines +math = B.math <$> mathStringBetween '$' subscript :: OrgParser Inlines subscript = B.subscript <$> (try $ char '_' *> maybeGroupedByBraces) @@ -550,7 +622,72 @@ maybeGroupedByBraces = try $ ] symbol :: OrgParser Inlines -symbol = B.str . (: "") <$> oneOf specialChars +symbol = B.str . (: "") <$> (oneOf specialChars >>= updatePositions) + where updatePositions c + | c `elem` emphasisPreChars = c <$ updateLastPreCharPos + | c `elem` emphasisForbiddenBorderChars = c <$ updateLastForbiddenCharPos + | otherwise = return c + +emphasisBetween :: Char + -> OrgParser Inlines +emphasisBetween c = try $ do + startEmphasisNewlinesCounting emphasisAllowedNewlines + res <- enclosedInlines (emphasisStart c) (emphasisEnd c) + isTopLevelEmphasis <- null . orgStateEmphasisCharStack <$> getState + when isTopLevelEmphasis + resetEmphasisNewlines + return res + +verbatimBetween :: Char + -> OrgParser String +verbatimBetween c = try $ + emphasisStart c *> + many1TillNOrLessNewlines 1 (noneOf "\n\r") (emphasisEnd c) + +-- | Parses a raw string delimited by @c@ using Org's math rules +mathStringBetween :: Char + -> OrgParser String +mathStringBetween c = try $ do + mathStart c + body <- many1TillNOrLessNewlines mathAllowedNewlines + (noneOf (c:"\n\r")) + (lookAhead $ mathEnd c) + final <- mathEnd c + return $ body ++ [final] + +-- | Parses the start (opening character) of emphasis +emphasisStart :: Char -> OrgParser Char +emphasisStart c = try $ do + guard =<< afterEmphasisPreChar + guard =<< notAfterString + char c + lookAhead (noneOf emphasisForbiddenBorderChars) + pushToInlineCharStack c + return c + +-- | Parses the closing character of emphasis +emphasisEnd :: Char -> OrgParser Char +emphasisEnd c = try $ do + guard =<< notAfterForbiddenBorderChar + char c + eof <|> lookAhead (surroundingEmphasisChar >>= \x -> + oneOf (x ++ emphasisPostChars)) + *> return () + updateLastStrPos + popInlineCharStack + return c + +mathStart :: Char -> OrgParser Char +mathStart c = try $ do + char c <* notFollowedBy' (oneOf (c:mathForbiddenBorderChars)) + +mathEnd :: Char -> OrgParser Char +mathEnd c = try $ do + res <- noneOf (c:mathForbiddenBorderChars) + char c + eof <|> (lookAhead $ oneOf mathPostChars *> pure ()) + return res + enclosedInlines :: OrgParser a -> OrgParser b @@ -558,16 +695,6 @@ enclosedInlines :: OrgParser a enclosedInlines start end = try $ trimInlines . mconcat <$> enclosed start end inline --- FIXME: This is a hack -inlinesEnclosedBy :: Char - -> OrgParser Inlines -inlinesEnclosedBy c = try $ do - updateState $ \st -> st { orgInlineCharStack = c:(orgInlineCharStack st) } - res <- enclosedInlines (atStart (char c) <* endsOnThisOrNextLine c) - (atEnd $ char c) - updateState $ \st -> st { orgInlineCharStack = drop 1 . orgInlineCharStack $ st } - return res - enclosedRaw :: OrgParser a -> OrgParser b -> OrgParser String @@ -577,63 +704,76 @@ enclosedRaw start end = try $ spanningTwoLines = try $ anyLine >>= \f -> mappend (f <> " ") <$> onSingleLine -rawEnclosedBy :: Char - -> OrgParser String -rawEnclosedBy c = enclosedRaw (atStart $ char c) (atEnd $ char c) - --- succeeds only if we're not right after a str (ie. in middle of word) -atStart :: OrgParser a -> OrgParser a -atStart p = do - guard =<< not <$> isRightAfterString - p - --- | succeeds only if we're at the end of a word -atEnd :: OrgParser a -> OrgParser a -atEnd p = try $ do - p <* lookingAtEndOfWord - where lookingAtEndOfWord = - eof <|> const (return ()) =<< lookAhead . oneOf =<< postWordChars - -isRightAfterString :: OrgParser Bool -isRightAfterString = do +-- | Like many1Till, but parses at most @n+1@ lines. @p@ must not consume +-- newlines. +many1TillNOrLessNewlines :: Int + -> OrgParser Char + -> OrgParser a + -> OrgParser String +many1TillNOrLessNewlines n p end = try $ + nMoreLines (Just n) mempty >>= oneOrMore + where + nMoreLines Nothing cs = return cs + nMoreLines (Just 0) cs = try $ (cs ++) <$> finalLine + nMoreLines k cs = try $ (final k cs <|> rest k cs) + >>= uncurry nMoreLines + final _ cs = (\x -> (Nothing, cs ++ x)) <$> (try finalLine) + rest m cs = (\x -> (minus1 <$> m, cs ++ x ++ "\n")) <$> (try $ manyTill p P.newline) + finalLine = try $ manyTill p end + minus1 k = k - 1 + oneOrMore cs = guard (not $ null cs) *> return cs + +-- Org allows customization of the way it reads emphasis. We use the defaults +-- here (see, e.g., the Emacs Lisp variable `org-emphasis-regexp-components` +-- for details). + +-- | Chars allowed to occur before emphasis (spaces and newlines are ok, too) +emphasisPreChars :: [Char] +emphasisPreChars = "\t \"'({" + +-- | Chars allowed at after emphasis +emphasisPostChars :: [Char] +emphasisPostChars = "\t\n !\"'),-.:;?\\}" + +-- | Chars not allowed at the (inner) border of emphasis +emphasisForbiddenBorderChars :: [Char] +emphasisForbiddenBorderChars = "\t\n\r \"'," + +-- | The maximum number of newlines within +emphasisAllowedNewlines :: Int +emphasisAllowedNewlines = 1 + +-- LaTeX-style math: see `org-latex-regexps` for details + +-- | Chars allowed after an inline ($...$) math statement +mathPostChars :: [Char] +mathPostChars = "\t\n \"',-.:;?" + +-- | Chars not allowed at the (inner) border of math +mathForbiddenBorderChars :: [Char] +mathForbiddenBorderChars = "\t\n\r ,;.$" + +-- | Maximum number of newlines in an inline math statement +mathAllowedNewlines :: Int +mathAllowedNewlines = 2 + +-- | Whether we are right behind a char allowed before emphasis +afterEmphasisPreChar :: OrgParser Bool +afterEmphasisPreChar = do pos <- getPosition - st <- getState - -- the position `Nothing` isn't after a String, either, hence the double - -- negation - return $ not $ orgLastStrPos st /= Just pos + lastPrePos <- orgStateLastPreCharPos <$> getState + return $ lastPrePos == Nothing || lastPrePos == Just pos -postWordChars :: OrgParser [Char] -postWordChars = do - st <- getState - return $ "\t\n\r !\"'),-.:?}" ++ (take 1 . drop 1 . orgInlineCharStack $ st) - --- FIXME: These functions are hacks and should be replaced -endsOnThisOrNextLine :: Char - -> OrgParser () -endsOnThisOrNextLine c = do - inp <- getInput - let doOtherwise = \rest -> endsOnThisLine rest c (const mzero) - endsOnThisLine inp c doOtherwise - -endsOnThisLine :: [Char] - -> Char - -> ([Char] -> OrgParser ()) - -> OrgParser () -endsOnThisLine input c doOnOtherLines = do - postWordChars' <- postWordChars - case break (`elem` c:"\n") input of - (_,'\n':rest) -> doOnOtherLines rest - (_,_:[]) -> return () - (_,_:rest@(n:_)) -> if n `elem` postWordChars' - then return () - else endsOnThisLine rest c doOnOtherLines - _ -> mzero - -isImage :: String -> Bool -isImage filename = - any (\x -> ('.':x) `isSuffixOf` filename) imageExtensions && - any (\x -> (x++":") `isPrefixOf` filename) protocols || - ':' `notElem` filename - where - imageExtensions = [ "jpeg" , "jpg" , "png" , "gif" , "svg" ] - protocols = [ "file", "http", "https" ] +-- | Whether we are right after the end of a string +notAfterString :: OrgParser Bool +notAfterString = do + pos <- getPosition + lastStrPos <- orgStateLastStrPos <$> getState + return $ lastStrPos /= Just pos + +-- | Whether the parser is right after a forbidden border char +notAfterForbiddenBorderChar :: OrgParser Bool +notAfterForbiddenBorderChar = do + pos <- getPosition + lastFBCPos <- orgStateLastForbiddenCharPos <$> getState + return $ lastFBCPos /= Just pos diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs index 77b9d9327..efd8fe977 100644 --- a/tests/Tests/Readers/Org.hs +++ b/tests/Tests/Readers/Org.hs @@ -86,16 +86,37 @@ tests = unlines [ "this+that+ +so+on" , "seven*eight* nine*" , "+not+funny+" + , "this == self" ] =?> para (spcSep [ "this+that+", "+so+on" , "seven*eight*", "nine*" , strikeout "not+funny" + , "this" <> space <> "==" <> space <> "self" ]) + , "Adherence to Org's rules for markup borders" =: + "/t/& a/ / ./r/ (*l*) /e/! /b/." =?> + para (spcSep [ emph $ "t/&" <> space <> "a" + , "/" + , "./r/" + , "(" <> (strong "l") <> ")" + , (emph "e") <> "!" + , (emph "b") <> "." + ]) + + , "Inline math must stay within three lines" =: + unlines [ "$a", "b", "c$", "$d", "e", "f", "g$" ] =?> + para ((math "a\nb\nc") <> space <> + spcSep [ "$d", "e", "f", "g$" ]) + , "Markup may not span more than two lines" =: - unlines [ "/this *is", "not*", "emph/" ] =?> + unlines [ "/this *is +totally", "nice+ not*", "emph/" ] =?> para (spcSep [ "/this" - , (strong ("is" <> space <> "not")) + , (strong (spcSep + [ "is" + , (strikeout ("totally" <> space <> "nice")) + , "not" + ])) , "emph/" ]) , "Image" =: -- cgit v1.2.3 From 1715d7cee0b9388ac77b8b2a31fcbb00ead80adf Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Thu, 10 Apr 2014 15:11:03 +0200 Subject: Org reader: Support more inline/display math variants Support all of the following variants as valid ways to define inline or display math inlines: - `\[..\]` (display) - `$$..$$` (display) - `\(..\)` (inline) - `$..$` (inline) This closes #1223. Again. --- src/Text/Pandoc/Readers/Org.hs | 28 ++++++++++++++++++++++++++-- tests/Tests/Readers/Org.hs | 36 ++++++++++++++++++++++++++++++------ 2 files changed, 56 insertions(+), 8 deletions(-) (limited to 'src/Text/Pandoc/Readers/Org.hs') diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index 392b17bbc..1d0400d96 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -512,6 +512,7 @@ inline = , underline , code , math + , displayMath , verbatim , subscript , superscript @@ -607,7 +608,15 @@ verbatim :: OrgParser Inlines verbatim = B.rawInline "" <$> verbatimBetween '~' math :: OrgParser Inlines -math = B.math <$> mathStringBetween '$' +math = B.math <$> choice [ math1CharBetween '$' + , mathStringBetween '$' + , rawMathBetween "\\(" "\\)" + ] + +displayMath :: OrgParser Inlines +displayMath = B.displayMath <$> choice [ rawMathBetween "\\[" "\\]" + , rawMathBetween "$$" "$$" + ] subscript :: OrgParser Inlines subscript = B.subscript <$> (try $ char '_' *> maybeGroupedByBraces) @@ -655,6 +664,21 @@ mathStringBetween c = try $ do final <- mathEnd c return $ body ++ [final] +-- | Parse a single character between @c@ using math rules +math1CharBetween :: Char + -> OrgParser String +math1CharBetween c = try $ do + char c + res <- noneOf $ c:mathForbiddenBorderChars + char c + eof <|> lookAhead (oneOf mathPostChars) *> return () + return [res] + +rawMathBetween :: String + -> String + -> OrgParser String +rawMathBetween s e = try $ string s *> manyTill anyChar (try $ string e) + -- | Parses the start (opening character) of emphasis emphasisStart :: Char -> OrgParser Char emphasisStart c = try $ do @@ -747,7 +771,7 @@ emphasisAllowedNewlines = 1 -- | Chars allowed after an inline ($...$) math statement mathPostChars :: [Char] -mathPostChars = "\t\n \"',-.:;?" +mathPostChars = "\t\n \"'),-.:;?" -- | Chars not allowed at the (inner) border of math mathForbiddenBorderChars :: [Char] diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs index efd8fe977..9e9482e45 100644 --- a/tests/Tests/Readers/Org.hs +++ b/tests/Tests/Readers/Org.hs @@ -54,14 +54,26 @@ tests = "=Robot.rock()=" =?> para (code "Robot.rock()") - , "Math" =: - "$E=mc^2$" =?> - para (math "E=mc^2") - , "Verbatim" =: "~word for word~" =?> para (rawInline "" "word for word") + , "Math $..$" =: + "$E=mc^2$" =?> + para (math "E=mc^2") + + , "Math $$..$$" =: + "$$E=mc^2$$" =?> + para (displayMath "E=mc^2") + + , "Math \\[..\\]" =: + "\\[E=ℎν\\]" =?> + para (displayMath "E=ℎν") + + , "Math \\(..\\)" =: + "\\(σ_x σ_p ≥ \\frac{ℏ}{2}\\)" =?> + para (math "σ_x σ_p ≥ \\frac{ℏ}{2}") + , "Symbol" =: "A * symbol" =?> para (str "A" <> space <> str "*" <> space <> "symbol") @@ -86,14 +98,19 @@ tests = unlines [ "this+that+ +so+on" , "seven*eight* nine*" , "+not+funny+" - , "this == self" ] =?> para (spcSep [ "this+that+", "+so+on" , "seven*eight*", "nine*" , strikeout "not+funny" - , "this" <> space <> "==" <> space <> "self" ]) + , "No empty markup" =: + -- FIXME: __ is erroneously parsed as subscript "_" + -- "// ** __ ++ == ~~ $$" =?> + -- para (spcSep [ "//", "**", "__", "++", "==", "~~", "$$" ]) + "// ** ++ == ~~ $$" =?> + para (spcSep [ "//", "**", "++", "==", "~~", "$$" ]) + , "Adherence to Org's rules for markup borders" =: "/t/& a/ / ./r/ (*l*) /e/! /b/." =?> para (spcSep [ emph $ "t/&" <> space <> "a" @@ -109,6 +126,13 @@ tests = para ((math "a\nb\nc") <> space <> spcSep [ "$d", "e", "f", "g$" ]) + , "Single-character math" =: + "$a$ $b$! $c$?" =?> + para (spcSep [ math "a" + , "$b$!" + , (math "c") <> "?" + ]) + , "Markup may not span more than two lines" =: unlines [ "/this *is +totally", "nice+ not*", "emph/" ] =?> para (spcSep [ "/this" -- cgit v1.2.3 From ace8837cd691b17e994b41dcb797de6ca1940136 Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Thu, 10 Apr 2014 17:25:05 +0200 Subject: Org reader: Improve code by following HLint recommendations HLint's recommendations for better code are applied to the Org-mode reader code. --- src/Text/Pandoc/Readers/Org.hs | 44 +++++++++++++++++++++++------------------- 1 file changed, 24 insertions(+), 20 deletions(-) (limited to 'src/Text/Pandoc/Readers/Org.hs') diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index 1d0400d96..29611e8cc 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -44,11 +44,14 @@ import Data.List (foldl', isPrefixOf, isSuffixOf) import Data.Maybe (listToMaybe, fromMaybe) import Data.Monoid (mconcat, mempty, mappend) +-- Ignore HLint warnings to use String instead of [Char] +{-# ANN module ("HLint: ignore Use String" :: String) #-} + -- | Parse org-mode string and return a Pandoc document. readOrg :: ReaderOptions -- ^ Reader options -> String -- ^ String to parse (assuming @'\n'@ line endings) -> Pandoc -readOrg opts s = (readWith parseOrg) def{ orgStateOptions = opts } (s ++ "\n\n") +readOrg opts s = readWith parseOrg def{ orgStateOptions = opts } (s ++ "\n\n") type OrgParser = Parser [Char] OrgParserState @@ -111,7 +114,7 @@ updateLastPreCharPos = getPosition >>= \p -> pushToInlineCharStack :: Char -> OrgParser () pushToInlineCharStack c = updateState $ \st -> - st { orgStateEmphasisCharStack = c:(orgStateEmphasisCharStack st) } + st { orgStateEmphasisCharStack = c:orgStateEmphasisCharStack st } popInlineCharStack :: OrgParser () popInlineCharStack = updateState $ \st -> @@ -176,7 +179,7 @@ orgBlock = try $ do "comment" -> return mempty "src" -> return $ B.codeBlockWith ("", classArgs, []) blockStr _ -> B.divWith ("", [blockType], []) - <$> (parseFromString parseBlocks blockStr) + <$> parseFromString parseBlocks blockStr blockHeader :: OrgParser (Int, String, [String]) blockHeader = (,,) <$> blockIndent @@ -199,7 +202,7 @@ rawBlockContent indent blockType = indentWith :: Int -> OrgParser String indentWith num = do tabStop <- getOption readerTabStop - if (num < tabStop) + if num < tabStop then count num (char ' ') else choice [ try (count num (char ' ')) , try (char '\t' >> count (num - tabStop) (char ' ')) ] @@ -242,7 +245,7 @@ drawerStart = try $ <|> stringAnyCase "LOGBOOK" drawerLine :: OrgParser String -drawerLine = try $ anyLine +drawerLine = try anyLine drawerEnd :: OrgParser String drawerEnd = try $ @@ -276,7 +279,7 @@ declarationLine = try $ do metaValue :: OrgParser MetaValue metaValue = MetaInlines . B.toList . trimInlines <$> restOfLine -metaKey :: OrgParser [Char] +metaKey :: OrgParser String metaKey = map toLower <$> many1 (noneOf ": \n\r") <* char ':' <* skipSpaces @@ -350,7 +353,7 @@ tableAlignRow = try $ tableAlignCell :: OrgParser Alignment tableAlignCell = - choice [ try $ emptyCell *> return (AlignDefault) + choice [ try $ emptyCell *> return AlignDefault , try $ skipSpaces *> char '<' *> tableAlignFromChar @@ -381,8 +384,8 @@ normalizeTable (OrgTable cols aligns heads lns) = let aligns' = fillColumns aligns AlignDefault heads' = if heads == mempty then mempty - else fillColumns heads (B.plain mempty) - lns' = map (flip fillColumns (B.plain mempty)) lns + else fillColumns heads (B.plain mempty) + lns' = map (`fillColumns` B.plain mempty) lns fillColumns base padding = take cols $ base ++ repeat padding in OrgTable cols aligns' heads' lns' @@ -565,13 +568,14 @@ explicitOrImageLink = try $ do title <- enclosedRaw (char '[') (char ']') title' <- parseFromString (mconcat <$> many inline) title char ']' - return $ if (isImageFilename src) && (isImageFilename title) - then B.link src "" (B.image title "" "") - else B.link src "" title' + return . B.link src "" + $ if isImageFilename src && isImageFilename title + then B.image title "" "" + else title' selflinkOrImage :: OrgParser Inlines selflinkOrImage = try $ do - src <- (char '[') *> linkTarget <* char ']' + src <- char '[' *> linkTarget <* char ']' return $ if isImageFilename src then B.image src "" "" else B.link src "" (B.str src) @@ -619,10 +623,10 @@ displayMath = B.displayMath <$> choice [ rawMathBetween "\\[" "\\]" ] subscript :: OrgParser Inlines -subscript = B.subscript <$> (try $ char '_' *> maybeGroupedByBraces) +subscript = B.subscript <$> try (char '_' *> maybeGroupedByBraces) superscript :: OrgParser Inlines -superscript = B.superscript <$> (try $ char '^' *> maybeGroupedByBraces) +superscript = B.superscript <$> try (char '^' *> maybeGroupedByBraces) maybeGroupedByBraces :: OrgParser Inlines maybeGroupedByBraces = try $ @@ -702,14 +706,14 @@ emphasisEnd c = try $ do return c mathStart :: Char -> OrgParser Char -mathStart c = try $ do +mathStart c = try $ char c <* notFollowedBy' (oneOf (c:mathForbiddenBorderChars)) mathEnd :: Char -> OrgParser Char mathEnd c = try $ do res <- noneOf (c:mathForbiddenBorderChars) char c - eof <|> (lookAhead $ oneOf mathPostChars *> pure ()) + eof <|> lookAhead (oneOf mathPostChars *> pure ()) return res @@ -741,8 +745,8 @@ many1TillNOrLessNewlines n p end = try $ nMoreLines (Just 0) cs = try $ (cs ++) <$> finalLine nMoreLines k cs = try $ (final k cs <|> rest k cs) >>= uncurry nMoreLines - final _ cs = (\x -> (Nothing, cs ++ x)) <$> (try finalLine) - rest m cs = (\x -> (minus1 <$> m, cs ++ x ++ "\n")) <$> (try $ manyTill p P.newline) + final _ cs = (\x -> (Nothing, cs ++ x)) <$> try finalLine + rest m cs = (\x -> (minus1 <$> m, cs ++ x ++ "\n")) <$> try (manyTill p P.newline) finalLine = try $ manyTill p end minus1 k = k - 1 oneOrMore cs = guard (not $ null cs) *> return cs @@ -786,7 +790,7 @@ afterEmphasisPreChar :: OrgParser Bool afterEmphasisPreChar = do pos <- getPosition lastPrePos <- orgStateLastPreCharPos <$> getState - return $ lastPrePos == Nothing || lastPrePos == Just pos + return . fromMaybe True $ (== pos) <$> lastPrePos -- | Whether we are right after the end of a string notAfterString :: OrgParser Bool -- cgit v1.2.3 From 6f19be7d40f583ee4e10fa2b0f20bd4f1fa80c43 Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Fri, 11 Apr 2014 11:05:42 +0200 Subject: Org reader: Fix parsing of sub-/superscript expressions This fixes the org-reader's handling of sub- and superscript expressions. Simple expressions (like `2^+10`), expressions in parentheses (`a_(n+1)`) and nested sexp (like `a_(nested()parens)`) are now read correctly. --- src/Text/Pandoc/Readers/Org.hs | 47 +++++++++++++++++++++++++++++++++--------- tests/Tests/Readers/Org.hs | 39 +++++++++++++++++++++++++++-------- 2 files changed, 67 insertions(+), 19 deletions(-) (limited to 'src/Text/Pandoc/Readers/Org.hs') diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index 29611e8cc..ceac69367 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -622,17 +622,11 @@ displayMath = B.displayMath <$> choice [ rawMathBetween "\\[" "\\]" , rawMathBetween "$$" "$$" ] -subscript :: OrgParser Inlines -subscript = B.subscript <$> try (char '_' *> maybeGroupedByBraces) +subscript :: OrgParser Inlines +subscript = B.subscript <$> try (char '_' *> subOrSuperExpr) -superscript :: OrgParser Inlines -superscript = B.superscript <$> try (char '^' *> maybeGroupedByBraces) - -maybeGroupedByBraces :: OrgParser Inlines -maybeGroupedByBraces = try $ - choice [ try $ enclosedInlines (char '{') (char '}') - , B.str . (:"") <$> anyChar - ] +superscript :: OrgParser Inlines +superscript = B.superscript <$> try (char '^' *> subOrSuperExpr) symbol :: OrgParser Inlines symbol = B.str . (: "") <$> (oneOf specialChars >>= updatePositions) @@ -805,3 +799,36 @@ notAfterForbiddenBorderChar = do pos <- getPosition lastFBCPos <- orgStateLastForbiddenCharPos <$> getState return $ lastFBCPos /= Just pos + +-- | Read a sub- or superscript expression +subOrSuperExpr :: OrgParser Inlines +subOrSuperExpr = try $ do + choice [ balancedSexp '{' '}' + , balancedSexp '(' ')' >>= return . enclosing ('(', ')') + , simpleSubOrSuperString + ] >>= parseFromString (mconcat <$> many inline) + +-- | Read a balanced sexp +balancedSexp :: Char + -> Char + -> OrgParser String +balancedSexp l r = try $ do + char l + res <- concat <$> many ( many1 (noneOf ([l, r] ++ "\n\r")) + <|> try (string [l, r]) + <|> enclosing (l, r) <$> balancedSexp l r + ) + char r + return res + +simpleSubOrSuperString :: OrgParser String +simpleSubOrSuperString = try $ + choice [ string "*" + , mappend <$> option [] ((:[]) <$> oneOf "+-") + <*> many1 alphaNum + ] + +enclosing :: (a, a) + -> [a] + -> [a] +enclosing (left, right) s = left : s ++ [right] diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs index 9e9482e45..49130f0ab 100644 --- a/tests/Tests/Readers/Org.hs +++ b/tests/Tests/Readers/Org.hs @@ -78,15 +78,15 @@ tests = "A * symbol" =?> para (str "A" <> space <> str "*" <> space <> "symbol") - , "Superscript single char" =: - "2^n" =?> - para (str "2" <> superscript "n") + , "Superscript simple expression" =: + "2^-λ" =?> + para (str "2" <> superscript "-λ") , "Superscript multi char" =: "2^{n-1}" =?> para (str "2" <> superscript "n-1") - , "Subscript single char" =: + , "Subscript simple expression" =: "a_n" =?> para (str "a" <> subscript "n") @@ -105,11 +105,8 @@ tests = ]) , "No empty markup" =: - -- FIXME: __ is erroneously parsed as subscript "_" - -- "// ** __ ++ == ~~ $$" =?> - -- para (spcSep [ "//", "**", "__", "++", "==", "~~", "$$" ]) - "// ** ++ == ~~ $$" =?> - para (spcSep [ "//", "**", "++", "==", "~~", "$$" ]) + "// ** __ ++ == ~~ $$" =?> + para (spcSep [ "//", "**", "__", "++", "==", "~~", "$$" ]) , "Adherence to Org's rules for markup borders" =: "/t/& a/ / ./r/ (*l*) /e/! /b/." =?> @@ -143,6 +140,30 @@ tests = ])) , "emph/" ]) + , "Sub- and superscript expressions" =: + unlines [ "a_(a(b)(c)d)" + , "e^(f(g)h)" + , "i_(jk)l)" + , "m^()n" + , "o_{p{q{}r}}" + , "s^{t{u}v}" + , "w_{xy}z}" + , "1^{}2" + , "3_{{}}" + , "4^(a(*b(c*)d))" + ] =?> + para (spcSep [ "a" <> subscript "(a(b)(c)d)" + , "e" <> superscript "(f(g)h)" + , "i" <> (subscript "(jk)") <> "l)" + , "m" <> (superscript "()") <> "n" + , "o" <> subscript "p{q{}r}" + , "s" <> superscript "t{u}v" + , "w" <> (subscript "xy") <> "z}" + , "1" <> (superscript "") <> "2" + , "3" <> subscript "{}" + , "4" <> superscript ("(a(" <> strong "b(c" <> ")d))") + ]) + , "Image" =: "[[./sunset.jpg]]" =?> (para $ image "./sunset.jpg" "" "") -- cgit v1.2.3 From ae4280fba528efe68c5955cb3ca0779e6910f43b Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Sat, 12 Apr 2014 00:17:46 +0200 Subject: Org reader: Add support for figures Support for figures (images with name and caption) is added. --- src/Text/Pandoc/Readers/Org.hs | 57 ++++++++++++++++++++++++++++++++++++------ tests/Tests/Readers/Org.hs | 21 ++++++++++++++++ 2 files changed, 70 insertions(+), 8 deletions(-) (limited to 'src/Text/Pandoc/Readers/Org.hs') diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index ceac69367..8f0ce61e0 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -37,6 +37,7 @@ import Text.Pandoc.Parsing hiding (newline, orderedListMarker, updateL import Text.Pandoc.Shared (compactify') import Control.Applicative (pure, (<$>), (<$), (<*>), (<*), (*>), (<**>)) +import Control.Arrow ((***)) import Control.Monad (guard, when) import Data.Char (toLower) import Data.Default @@ -158,6 +159,7 @@ block = choice [ mempty <$ blanklines , orgBlock , example , drawer + , figure , specialLine , header , hline @@ -252,6 +254,43 @@ drawerEnd = try $ skipSpaces *> stringAnyCase ":END:" <* skipSpaces <* newline +-- +-- Figures +-- + +-- Figures (Image on a line by itself, preceded by name and/or caption) +figure :: OrgParser Blocks +figure = try $ do + (tit, cap) <- (maybe mempty withFigPrefix *** fromMaybe mempty) + <$> nameAndOrCaption + src <- skipSpaces *> selfTarget <* skipSpaces <* newline + guard (isImageFilename src) + return . B.para $ B.image src tit cap + where withFigPrefix cs = if "fig:" `isPrefixOf` cs + then cs + else "fig:" ++ cs + +nameAndOrCaption :: OrgParser (Maybe String, Maybe Inlines) +nameAndOrCaption = try $ nameFirst <|> captionFirst + where + nameFirst = try $ do + n <- name + c <- optionMaybe caption + return (Just n, c) + captionFirst = try $ do + c <- caption + n <- optionMaybe name + return (n, Just c) + +caption :: OrgParser Inlines +caption = try $ annotation "CAPTION" *> inlinesTillNewline + +name :: OrgParser String +name = try $ annotation "NAME" *> skipSpaces *> manyTill anyChar newline + +annotation :: String -> OrgParser String +annotation ann = try $ metaLineStart *> stringAnyCase ann <* char ':' + -- Comments, Options and Metadata specialLine :: OrgParser Blocks specialLine = try $ metaLine <|> commentLine @@ -277,7 +316,7 @@ declarationLine = try $ do return mempty metaValue :: OrgParser MetaValue -metaValue = MetaInlines . B.toList . trimInlines <$> restOfLine +metaValue = MetaInlines . B.toList <$> inlinesTillNewline metaKey :: OrgParser String metaKey = map toLower <$> many1 (noneOf ": \n\r") @@ -288,7 +327,7 @@ metaKey = map toLower <$> many1 (noneOf ": \n\r") header :: OrgParser Blocks header = try $ B.header <$> headerStart - <*> (trimInlines <$> restOfLine) + <*> inlinesTillNewline headerStart :: OrgParser Int headerStart = try $ @@ -424,13 +463,10 @@ setAligns aligns t = t{ orgTableAlignments = aligns } -- Paragraphs or Plain text paraOrPlain :: OrgParser Blocks paraOrPlain = try $ - trimInlines . mconcat - <$> many1 inline - <**> option B.plain - (try $ newline *> pure B.para) + parseInlines <**> option B.plain (try $ newline *> pure B.para) -restOfLine :: OrgParser Inlines -restOfLine = mconcat <$> manyTill inline newline +inlinesTillNewline :: OrgParser Inlines +inlinesTillNewline = trimInlines . mconcat <$> manyTill inline newline -- @@ -523,6 +559,8 @@ inline = ] <* (guard =<< newlinesCountWithinLimits) "inline" +parseInlines :: OrgParser Inlines +parseInlines = trimInlines . mconcat <$> many1 inline -- treat these as potentially non-text when parsing inline: specialChars :: [Char] @@ -580,6 +618,9 @@ selflinkOrImage = try $ do then B.image src "" "" else B.link src "" (B.str src) +selfTarget :: OrgParser String +selfTarget = try $ char '[' *> linkTarget <* char ']' + linkTarget :: OrgParser String linkTarget = enclosed (char '[') (char ']') (noneOf "\n\r]") diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs index 49130f0ab..99dadc0ac 100644 --- a/tests/Tests/Readers/Org.hs +++ b/tests/Tests/Readers/Org.hs @@ -377,6 +377,27 @@ tests = code' = "main = putStrLn greeting\n" ++ " where greeting = \"moin\"\n" in codeBlockWith attr' code' + + , "Figure" =: + unlines [ "#+caption: A very courageous man." + , "#+name: goodguy" + , "[[edward.jpg]]" + ] =?> + para (image "edward.jpg" "fig:goodguy" "A very courageous man.") + + , "Unnamed figure" =: + unlines [ "#+caption: A great whistleblower." + , "[[snowden.png]]" + ] =?> + para (image "snowden.png" "" "A great whistleblower.") + + , "Figure with `fig:` prefix in name" =: + unlines [ "#+caption: Used as a metapher in evolutionary biology." + , "#+name: fig:redqueen" + , "[[the-red-queen.jpg]]" + ] =?> + para (image "the-red-queen.jpg" "fig:redqueen" + "Used as a metapher in evolutionary biology.") ] , testGroup "Lists" $ -- cgit v1.2.3 From 82d4160bdcc149df020d1f95f4a7d893a9ecb42a Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Sat, 12 Apr 2014 11:07:38 +0200 Subject: Org reader: Read linebreaks Linebreaks are marked by the string `\\` at the end of a line. --- src/Text/Pandoc/Readers/Org.hs | 4 ++++ tests/Tests/Readers/Org.hs | 4 ++++ 2 files changed, 8 insertions(+) (limited to 'src/Text/Pandoc/Readers/Org.hs') diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index ceac69367..19dd03c6b 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -506,6 +506,7 @@ anyLineNewline = (++ "\n") <$> anyLine inline :: OrgParser Inlines inline = choice [ whitespace + , linebreak , link , str , endline @@ -535,6 +536,9 @@ whitespace = B.space <$ skipMany1 spaceChar <* updateLastForbiddenCharPos "whitespace" +linebreak :: OrgParser Inlines +linebreak = try $ B.linebreak <$ string "\\\\" <* skipSpaces <* newline + str :: OrgParser Inlines str = B.str <$> many1 (noneOf $ specialChars ++ "\n\r ") <* updateLastStrPos diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs index 49130f0ab..567cc4c41 100644 --- a/tests/Tests/Readers/Org.hs +++ b/tests/Tests/Readers/Org.hs @@ -94,6 +94,10 @@ tests = "a_{n+1}" =?> para (str "a" <> subscript "n+1") + , "Linebreak" =: + "line \\\\ \nbreak" =?> + para ("line" <> linebreak <> "break") + , "Markup-chars not occuring on word break are symbols" =: unlines [ "this+that+ +so+on" , "seven*eight* nine*" -- cgit v1.2.3 From d4c1cd456c16298069b05e1da328e70dc87ea547 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 12 Apr 2014 21:44:54 -0700 Subject: Org reader: Removed ANN pragma. This relies on Template Haskell, which causes problems in Windows due to libraries with C dependencies. We need to avoid using TH in pandoc code. --- src/Text/Pandoc/Readers/Org.hs | 3 --- 1 file changed, 3 deletions(-) (limited to 'src/Text/Pandoc/Readers/Org.hs') diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index ceac69367..36b1ad287 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -44,9 +44,6 @@ import Data.List (foldl', isPrefixOf, isSuffixOf) import Data.Maybe (listToMaybe, fromMaybe) import Data.Monoid (mconcat, mempty, mappend) --- Ignore HLint warnings to use String instead of [Char] -{-# ANN module ("HLint: ignore Use String" :: String) #-} - -- | Parse org-mode string and return a Pandoc document. readOrg :: ReaderOptions -- ^ Reader options -> String -- ^ String to parse (assuming @'\n'@ line endings) -- cgit v1.2.3 From 346bcea713f933a6cf48829d948e14b9c28b4798 Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Wed, 16 Apr 2014 13:22:48 +0200 Subject: Org reader: Better module description, minor style changes Use module description analogous to the markdown reader's. Use (<$) where it makes sense. --- src/Text/Pandoc/Readers/Org.hs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'src/Text/Pandoc/Readers/Org.hs') diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index bda0b0262..c4ea64ba7 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -24,7 +24,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Maintainer : Albert Krewinkel -Conversion of Org-Mode to 'Pandoc' document. +Conversion of org-mode formatted plain text to 'Pandoc' document. -} module Text.Pandoc.Readers.Org ( readOrg ) where @@ -711,7 +711,7 @@ math1CharBetween c = try $ do char c res <- noneOf $ c:mathForbiddenBorderChars char c - eof <|> lookAhead (oneOf mathPostChars) *> return () + eof <|> () <$ lookAhead (oneOf mathPostChars) return [res] rawMathBetween :: String @@ -734,12 +734,12 @@ emphasisEnd :: Char -> OrgParser Char emphasisEnd c = try $ do guard =<< notAfterForbiddenBorderChar char c - eof <|> lookAhead (surroundingEmphasisChar >>= \x -> - oneOf (x ++ emphasisPostChars)) - *> return () + eof <|> () <$ lookAhead acceptablePostChars updateLastStrPos popInlineCharStack return c + where acceptablePostChars = + surroundingEmphasisChar >>= \x -> oneOf (x ++ emphasisPostChars) mathStart :: Char -> OrgParser Char mathStart c = try $ @@ -749,7 +749,7 @@ mathEnd :: Char -> OrgParser Char mathEnd c = try $ do res <- noneOf (c:mathForbiddenBorderChars) char c - eof <|> lookAhead (oneOf mathPostChars *> pure ()) + eof <|> () <$ lookAhead (oneOf mathPostChars) return res -- cgit v1.2.3 From 5fc252270c8332908e3ad9ec12d16c08c49de4a2 Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Mon, 14 Apr 2014 15:04:32 +0200 Subject: Org reader: Fix code for subexpression parsing --- src/Text/Pandoc/Readers/Org.hs | 25 ++++--------------------- 1 file changed, 4 insertions(+), 21 deletions(-) (limited to 'src/Text/Pandoc/Readers/Org.hs') diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index c4ea64ba7..ec0436f4c 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -844,24 +844,12 @@ notAfterForbiddenBorderChar = do -- | Read a sub- or superscript expression subOrSuperExpr :: OrgParser Inlines -subOrSuperExpr = try $ do - choice [ balancedSexp '{' '}' - , balancedSexp '(' ')' >>= return . enclosing ('(', ')') +subOrSuperExpr = try $ + choice [ id <$> charsInBalanced '{' '}' (noneOf "\n\r") + , enclosing ('(', ')') <$> charsInBalanced '(' ')' (noneOf "\n\r") , simpleSubOrSuperString ] >>= parseFromString (mconcat <$> many inline) - --- | Read a balanced sexp -balancedSexp :: Char - -> Char - -> OrgParser String -balancedSexp l r = try $ do - char l - res <- concat <$> many ( many1 (noneOf ([l, r] ++ "\n\r")) - <|> try (string [l, r]) - <|> enclosing (l, r) <$> balancedSexp l r - ) - char r - return res + where enclosing (left, right) s = left : s ++ [right] simpleSubOrSuperString :: OrgParser String simpleSubOrSuperString = try $ @@ -869,8 +857,3 @@ simpleSubOrSuperString = try $ , mappend <$> option [] ((:[]) <$> oneOf "+-") <*> many1 alphaNum ] - -enclosing :: (a, a) - -> [a] - -> [a] -enclosing (left, right) s = left : s ++ [right] -- cgit v1.2.3 From 92582c6272a3a171c406699e46e88afc4835d85c Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Wed, 16 Apr 2014 11:58:16 +0200 Subject: Org reader: introduce Reader environment around Blocks/Inlines This introduces a Reader environment in the style of Text.Pandoc.Parsing.F, but adapted to the Org reader parser. --- src/Text/Pandoc/Readers/Org.hs | 306 ++++++++++++++++++++++++----------------- 1 file changed, 176 insertions(+), 130 deletions(-) (limited to 'src/Text/Pandoc/Readers/Org.hs') diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index ec0436f4c..bdff4869c 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -1,4 +1,5 @@ {-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE GeneralizedNewtypeDeriving #-} {- Copyright (C) 2014 Albert Krewinkel @@ -29,21 +30,26 @@ Conversion of org-mode formatted plain text to 'Pandoc' document. module Text.Pandoc.Readers.Org ( readOrg ) where import qualified Text.Pandoc.Builder as B -import Text.Pandoc.Builder (Inlines, Blocks, trimInlines, (<>), HasMeta(..)) +import Text.Pandoc.Builder ( Inlines, Blocks, HasMeta(..), (<>) + , trimInlines ) import Text.Pandoc.Definition import Text.Pandoc.Options import qualified Text.Pandoc.Parsing as P -import Text.Pandoc.Parsing hiding (newline, orderedListMarker, updateLastStrPos) +import Text.Pandoc.Parsing hiding ( F, unF, askF, asksF, runF + , newline, orderedListMarker + , updateLastStrPos ) import Text.Pandoc.Shared (compactify') -import Control.Applicative (pure, (<$>), (<$), (<*>), (<*), (*>), (<**>)) +import Control.Applicative ( Applicative, pure + , (<$>), (<$), (<*>), (<*), (*>), (<**>) ) import Control.Arrow ((***)) -import Control.Monad (guard, when) +import Control.Monad (foldM, guard, liftM, liftM2, when) +import Control.Monad.Reader (Reader, runReader) import Data.Char (toLower) import Data.Default -import Data.List (foldl', isPrefixOf, isSuffixOf) +import Data.List (isPrefixOf, isSuffixOf) import Data.Maybe (listToMaybe, fromMaybe) -import Data.Monoid (mconcat, mempty, mappend) +import Data.Monoid (Monoid, mconcat, mempty, mappend) -- | Parse org-mode string and return a Pandoc document. readOrg :: ReaderOptions -- ^ Reader options @@ -55,10 +61,10 @@ type OrgParser = Parser [Char] OrgParserState parseOrg:: OrgParser Pandoc parseOrg = do - blocks' <- B.toList <$> parseBlocks + blocks' <- parseBlocks st <- getState - let meta = orgStateMeta st - return $ Pandoc meta $ filter (/= Null) blocks' + let meta = runF (orgStateMeta' st) st + return $ Pandoc meta $ filter (/= Null) (B.toList $ runF blocks' st) -- -- Parser State for Org @@ -73,7 +79,8 @@ data OrgParserState = OrgParserState , orgStateLastPreCharPos :: Maybe SourcePos , orgStateLastStrPos :: Maybe SourcePos , orgStateMeta :: Meta - } deriving (Show) + , orgStateMeta' :: F Meta + } instance HasReaderOptions OrgParserState where extractReaderOptions = orgStateOptions @@ -96,6 +103,7 @@ defaultOrgParserState = OrgParserState , orgStateLastPreCharPos = Nothing , orgStateLastStrPos = Nothing , orgStateMeta = nullMeta + , orgStateMeta' = return nullMeta } updateLastStrPos :: OrgParser () @@ -138,6 +146,27 @@ resetEmphasisNewlines :: OrgParser () resetEmphasisNewlines = updateState $ \s -> s{ orgStateEmphasisNewlines = Nothing } + +-- +-- Adaptions and specializations of parsing utilities +-- + +newtype F a = F { unF :: Reader OrgParserState a + } deriving (Monad, Applicative, Functor) + +runF :: F a -> OrgParserState -> a +runF = runReader . unF + +instance Monoid a => Monoid (F a) where + mempty = return mempty + mappend = liftM2 mappend + mconcat = fmap mconcat . sequence + +trimInlinesF :: F Inlines -> F Inlines +trimInlinesF = liftM trimInlines + + +-- | Like @Text.Parsec.Char.newline@, but causes additional state changes. newline :: OrgParser Char newline = P.newline @@ -148,10 +177,10 @@ newline = -- parsing blocks -- -parseBlocks :: OrgParser Blocks +parseBlocks :: OrgParser (F Blocks) parseBlocks = mconcat <$> manyTill block eof -block :: OrgParser Blocks +block :: OrgParser (F Blocks) block = choice [ mempty <$ blanklines , orgBlock , example @@ -159,7 +188,7 @@ block = choice [ mempty <$ blanklines , figure , specialLine , header - , hline + , return <$> hline , list , table , paraOrPlain @@ -169,15 +198,15 @@ block = choice [ mempty <$ blanklines -- Org Blocks (#+BEGIN_... / #+END_...) -- -orgBlock :: OrgParser Blocks +orgBlock :: OrgParser (F Blocks) orgBlock = try $ do (indent, blockType, args) <- blockHeader blockStr <- rawBlockContent indent blockType let classArgs = [ translateLang . fromMaybe [] $ listToMaybe args ] case blockType of "comment" -> return mempty - "src" -> return $ B.codeBlockWith ("", classArgs, []) blockStr - _ -> B.divWith ("", [blockType], []) + "src" -> return . return $ B.codeBlockWith ("", classArgs, []) blockStr + _ -> fmap (B.divWith ("", [blockType], [])) <$> parseFromString parseBlocks blockStr blockHeader :: OrgParser (Int, String, [String]) @@ -222,15 +251,16 @@ commaEscaped (',':cs@('*':_)) = cs commaEscaped (',':cs@('#':'+':_)) = cs commaEscaped cs = cs -example :: OrgParser Blocks -example = try $ - B.codeBlockWith ("", ["example"], []) . unlines <$> many1 exampleLine +example :: OrgParser (F Blocks) +example = try $ do + body <- unlines <$> many1 exampleLine + return . return $ B.codeBlockWith ("", ["example"], []) body exampleLine :: OrgParser String exampleLine = try $ string ": " *> anyLine -- Drawers for properties or a logbook -drawer :: OrgParser Blocks +drawer :: OrgParser (F Blocks) drawer = try $ do drawerStart manyTill drawerLine (try drawerEnd) @@ -256,18 +286,20 @@ drawerEnd = try $ -- -- Figures (Image on a line by itself, preceded by name and/or caption) -figure :: OrgParser Blocks +figure :: OrgParser (F Blocks) figure = try $ do (tit, cap) <- (maybe mempty withFigPrefix *** fromMaybe mempty) <$> nameAndOrCaption src <- skipSpaces *> selfTarget <* skipSpaces <* newline guard (isImageFilename src) - return . B.para $ B.image src tit cap + return $ do + cap' <- cap + return $ B.para $ B.image src tit cap' where withFigPrefix cs = if "fig:" `isPrefixOf` cs then cs else "fig:" ++ cs -nameAndOrCaption :: OrgParser (Maybe String, Maybe Inlines) +nameAndOrCaption :: OrgParser (Maybe String, Maybe (F Inlines)) nameAndOrCaption = try $ nameFirst <|> captionFirst where nameFirst = try $ do @@ -279,7 +311,7 @@ nameAndOrCaption = try $ nameFirst <|> captionFirst n <- optionMaybe name return (n, Just c) -caption :: OrgParser Inlines +caption :: OrgParser (F Inlines) caption = try $ annotation "CAPTION" *> inlinesTillNewline name :: OrgParser String @@ -289,8 +321,8 @@ annotation :: String -> OrgParser String annotation ann = try $ metaLineStart *> stringAnyCase ann <* char ':' -- Comments, Options and Metadata -specialLine :: OrgParser Blocks -specialLine = try $ metaLine <|> commentLine +specialLine :: OrgParser (F Blocks) +specialLine = fmap return . try $ metaLine <|> commentLine metaLine :: OrgParser Blocks metaLine = try $ metaLineStart *> declarationLine @@ -308,12 +340,15 @@ commentLineStart = try $ mappend <$> many spaceChar <*> string "# " declarationLine :: OrgParser Blocks declarationLine = try $ do - meta' <- B.setMeta <$> metaKey <*> metaValue <*> pure nullMeta - updateState $ \st -> st { orgStateMeta = orgStateMeta st <> meta' } + key <- metaKey + inlinesF <- metaInlines + updateState $ \st -> + let meta' = B.setMeta <$> pure key <*> inlinesF <*> pure nullMeta + in st { orgStateMeta' = orgStateMeta' st <> meta' } return mempty -metaValue :: OrgParser MetaValue -metaValue = MetaInlines . B.toList <$> inlinesTillNewline +metaInlines :: OrgParser (F MetaValue) +metaInlines = fmap (MetaInlines . B.toList) <$> inlinesTillNewline metaKey :: OrgParser String metaKey = map toLower <$> many1 (noneOf ": \n\r") @@ -321,16 +356,20 @@ metaKey = map toLower <$> many1 (noneOf ": \n\r") <* skipSpaces -- | Headers -header :: OrgParser Blocks -header = try $ - B.header <$> headerStart - <*> inlinesTillNewline +header :: OrgParser (F Blocks) +header = try $ do + level <- headerStart + title <- inlinesTillNewline + return $ B.header level <$> title headerStart :: OrgParser Int headerStart = try $ (length <$> many1 (char '*')) <* many1 (char ' ') --- Horizontal Line (five dashes or more) +-- Don't use (or need) the reader wrapper here, we want hline to be +-- @show@able. Otherwise we can't use it with @notFollowedBy'@. + +-- | Horizontal Line (five -- dashes or more) hline :: OrgParser Blocks hline = try $ do skipSpaces @@ -344,22 +383,23 @@ hline = try $ do -- Tables -- -data OrgTableRow = OrgContentRow [Blocks] +data OrgTableRow = OrgContentRow (F [Blocks]) | OrgAlignRow [Alignment] | OrgHlineRow - deriving (Eq, Show) data OrgTable = OrgTable { orgTableColumns :: Int , orgTableAlignments :: [Alignment] , orgTableHeader :: [Blocks] , orgTableRows :: [[Blocks]] - } deriving (Eq, Show) + } -table :: OrgParser Blocks +table :: OrgParser (F Blocks) table = try $ do lookAhead tableStart - orgToPandocTable . normalizeTable . rowsToTable <$> tableRows + do + rows <- tableRows + return $ return . orgToPandocTable . normalizeTable =<< rowsToTable rows orgToPandocTable :: OrgTable -> Blocks @@ -374,11 +414,11 @@ tableRows = try $ many (tableAlignRow <|> tableHline <|> tableContentRow) tableContentRow :: OrgParser OrgTableRow tableContentRow = try $ - OrgContentRow <$> (tableStart *> manyTill tableContentCell newline) + OrgContentRow . sequence <$> (tableStart *> manyTill tableContentCell newline) -tableContentCell :: OrgParser Blocks +tableContentCell :: OrgParser (F Blocks) tableContentCell = try $ - B.plain . trimInlines . mconcat <$> many1Till inline endOfCell + fmap B.plain . trimInlinesF . mconcat <$> many1Till inline endOfCell endOfCell :: OrgParser Char endOfCell = try $ char '|' <|> lookAhead newline @@ -410,8 +450,8 @@ tableHline = try $ OrgHlineRow <$ (tableStart *> char '-' *> anyLine) rowsToTable :: [OrgTableRow] - -> OrgTable -rowsToTable = foldl' (flip rowToContent) zeroTable + -> F OrgTable +rowsToTable = foldM (flip rowToContent) zeroTable where zeroTable = OrgTable 0 mempty mempty mempty normalizeTable :: OrgTable @@ -430,57 +470,64 @@ normalizeTable (OrgTable cols aligns heads lns) = -- line as a header. All other horizontal lines are discarded. rowToContent :: OrgTableRow -> OrgTable - -> OrgTable -rowToContent OrgHlineRow = maybeBodyToHeader -rowToContent (OrgContentRow rs) = setLongestRow rs . appendToBody rs -rowToContent (OrgAlignRow as) = setLongestRow as . setAligns as + -> F OrgTable +rowToContent OrgHlineRow t = maybeBodyToHeader t +rowToContent (OrgAlignRow as) t = setLongestRow as =<< setAligns as t +rowToContent (OrgContentRow rf) t = do + rs <- rf + setLongestRow rs =<< appendToBody rs t setLongestRow :: [a] -> OrgTable - -> OrgTable -setLongestRow rs t = t{ orgTableColumns = max (length rs) (orgTableColumns t) } + -> F OrgTable +setLongestRow rs t = + return t{ orgTableColumns = max (length rs) (orgTableColumns t) } maybeBodyToHeader :: OrgTable - -> OrgTable + -> F OrgTable maybeBodyToHeader t = case t of OrgTable{ orgTableHeader = [], orgTableRows = b:[] } -> - t{ orgTableHeader = b , orgTableRows = [] } - _ -> t + return t{ orgTableHeader = b , orgTableRows = [] } + _ -> return t appendToBody :: [Blocks] -> OrgTable - -> OrgTable -appendToBody r t = t{ orgTableRows = orgTableRows t ++ [r] } + -> F OrgTable +appendToBody r t = return t{ orgTableRows = orgTableRows t ++ [r] } setAligns :: [Alignment] -> OrgTable - -> OrgTable -setAligns aligns t = t{ orgTableAlignments = aligns } + -> F OrgTable +setAligns aligns t = return $ t{ orgTableAlignments = aligns } -- Paragraphs or Plain text -paraOrPlain :: OrgParser Blocks +paraOrPlain :: OrgParser (F Blocks) paraOrPlain = try $ - parseInlines <**> option B.plain (try $ newline *> pure B.para) + parseInlines <**> (fmap <$> option B.plain (try $ newline *> pure B.para)) -inlinesTillNewline :: OrgParser Inlines -inlinesTillNewline = trimInlines . mconcat <$> manyTill inline newline +inlinesTillNewline :: OrgParser (F Inlines) +inlinesTillNewline = trimInlinesF . mconcat <$> manyTill inline newline -- -- list blocks -- -list :: OrgParser Blocks +list :: OrgParser (F Blocks) list = choice [ definitionList, bulletList, orderedList ] "list" -definitionList :: OrgParser Blocks -definitionList = B.definitionList <$> many1 (definitionListItem bulletListStart) +definitionList :: OrgParser (F Blocks) +definitionList = fmap B.definitionList . sequence + <$> many1 (definitionListItem bulletListStart) -bulletList :: OrgParser Blocks -bulletList = B.bulletList . compactify' <$> many1 (listItem bulletListStart) +bulletList :: OrgParser (F Blocks) +bulletList = fmap B.bulletList . fmap compactify' . sequence + <$> many1 (listItem bulletListStart) -orderedList :: OrgParser Blocks -orderedList = B.orderedList . compactify' <$> many1 (listItem orderedListStart) +orderedList :: OrgParser (F Blocks) +-- orderedList = B.orderedList . compactify' <$> many1 (listItem orderedListStart) +orderedList = fmap B.orderedList . fmap compactify' . sequence + <$> many1 (listItem orderedListStart) genericListStart :: OrgParser String -> OrgParser Int @@ -499,7 +546,7 @@ orderedListStart = genericListStart orderedListMarker where orderedListMarker = mappend <$> many1 digit <*> (pure <$> oneOf ".)") definitionListItem :: OrgParser Int - -> OrgParser (Inlines, [Blocks]) + -> OrgParser (F (Inlines, [Blocks])) definitionListItem parseMarkerGetLength = try $ do markerLength <- parseMarkerGetLength term <- manyTill (noneOf "\n\r") (try $ string "::") @@ -507,12 +554,12 @@ definitionListItem parseMarkerGetLength = try $ do cont <- concat <$> many (listContinuation markerLength) term' <- parseFromString inline term contents' <- parseFromString parseBlocks $ first ++ cont - return (term', [contents']) + return $ (,) <$> term' <*> fmap (:[]) contents' -- parse raw text for one list item, excluding start marker and continuations listItem :: OrgParser Int - -> OrgParser Blocks + -> OrgParser (F Blocks) listItem start = try $ do markerLength <- try start firstLine <- anyLineNewline @@ -536,11 +583,11 @@ anyLineNewline = (++ "\n") <$> anyLine -- inline -- -inline :: OrgParser Inlines +inline :: OrgParser (F Inlines) inline = choice [ whitespace , linebreak - , link + , linkOrImage , str , endline , emph @@ -557,29 +604,29 @@ inline = ] <* (guard =<< newlinesCountWithinLimits) "inline" -parseInlines :: OrgParser Inlines -parseInlines = trimInlines . mconcat <$> many1 inline +parseInlines :: OrgParser (F Inlines) +parseInlines = trimInlinesF . mconcat <$> many1 inline -- treat these as potentially non-text when parsing inline: specialChars :: [Char] specialChars = "\"$'()*+-./:<=>[\\]^_{|}~" -whitespace :: OrgParser Inlines -whitespace = B.space <$ skipMany1 spaceChar - <* updateLastPreCharPos - <* updateLastForbiddenCharPos +whitespace :: OrgParser (F Inlines) +whitespace = pure B.space <$ skipMany1 spaceChar + <* updateLastPreCharPos + <* updateLastForbiddenCharPos "whitespace" -linebreak :: OrgParser Inlines -linebreak = try $ B.linebreak <$ string "\\\\" <* skipSpaces <* newline +linebreak :: OrgParser (F Inlines) +linebreak = try $ pure B.linebreak <$ string "\\\\" <* skipSpaces <* newline -str :: OrgParser Inlines -str = B.str <$> many1 (noneOf $ specialChars ++ "\n\r ") - <* updateLastStrPos +str :: OrgParser (F Inlines) +str = return . B.str <$> many1 (noneOf $ specialChars ++ "\n\r ") + <* updateLastStrPos -- an endline character that can be treated as a space, not a structural break -endline :: OrgParser Inlines +endline :: OrgParser (F Inlines) endline = try $ do newline notFollowedBy blankline @@ -595,29 +642,29 @@ endline = try $ do decEmphasisNewlinesCount guard =<< newlinesCountWithinLimits updateLastPreCharPos - return B.space + return . return $ B.space -link :: OrgParser Inlines -link = explicitOrImageLink <|> selflinkOrImage "link" +linkOrImage :: OrgParser (F Inlines) +linkOrImage = explicitOrImageLink <|> selflinkOrImage "link or image" -explicitOrImageLink :: OrgParser Inlines +explicitOrImageLink :: OrgParser (F Inlines) explicitOrImageLink = try $ do char '[' src <- linkTarget title <- enclosedRaw (char '[') (char ']') title' <- parseFromString (mconcat <$> many inline) title char ']' - return . B.link src "" - $ if isImageFilename src && isImageFilename title - then B.image title "" "" - else title' + return $ B.link src "" <$> + if isImageFilename src && isImageFilename title + then return $ B.image title mempty mempty + else title' -selflinkOrImage :: OrgParser Inlines +selflinkOrImage :: OrgParser (F Inlines) selflinkOrImage = try $ do src <- char '[' *> linkTarget <* char ']' - return $ if isImageFilename src - then B.image src "" "" - else B.link src "" (B.str src) + return . return $ if isImageFilename src + then B.image src "" "" + else B.link src "" (B.str src) selfTarget :: OrgParser String selfTarget = try $ char '[' *> linkTarget <* char ']' @@ -634,51 +681,50 @@ isImageFilename filename = imageExtensions = [ "jpeg" , "jpg" , "png" , "gif" , "svg" ] protocols = [ "file", "http", "https" ] -emph :: OrgParser Inlines -emph = B.emph <$> emphasisBetween '/' +emph :: OrgParser (F Inlines) +emph = fmap B.emph <$> emphasisBetween '/' -strong :: OrgParser Inlines -strong = B.strong <$> emphasisBetween '*' +strong :: OrgParser (F Inlines) +strong = fmap B.strong <$> emphasisBetween '*' -strikeout :: OrgParser Inlines -strikeout = B.strikeout <$> emphasisBetween '+' +strikeout :: OrgParser (F Inlines) +strikeout = fmap B.strikeout <$> emphasisBetween '+' -- There is no underline, so we use strong instead. -underline :: OrgParser Inlines -underline = B.strong <$> emphasisBetween '_' - -code :: OrgParser Inlines -code = B.code <$> verbatimBetween '=' +underline :: OrgParser (F Inlines) +underline = fmap B.strong <$> emphasisBetween '_' -verbatim :: OrgParser Inlines -verbatim = B.rawInline "" <$> verbatimBetween '~' +code :: OrgParser (F Inlines) +code = return . B.code <$> verbatimBetween '=' -math :: OrgParser Inlines -math = B.math <$> choice [ math1CharBetween '$' - , mathStringBetween '$' - , rawMathBetween "\\(" "\\)" - ] +verbatim :: OrgParser (F Inlines) +verbatim = return . B.rawInline "" <$> verbatimBetween '~' -displayMath :: OrgParser Inlines -displayMath = B.displayMath <$> choice [ rawMathBetween "\\[" "\\]" - , rawMathBetween "$$" "$$" - ] +subscript :: OrgParser (F Inlines) +subscript = fmap B.subscript <$> try (char '_' *> subOrSuperExpr) -subscript :: OrgParser Inlines -subscript = B.subscript <$> try (char '_' *> subOrSuperExpr) +superscript :: OrgParser (F Inlines) +superscript = fmap B.superscript <$> try (char '^' *> subOrSuperExpr) -superscript :: OrgParser Inlines -superscript = B.superscript <$> try (char '^' *> subOrSuperExpr) +math :: OrgParser (F Inlines) +math = return . B.math <$> choice [ math1CharBetween '$' + , mathStringBetween '$' + , rawMathBetween "\\(" "\\)" + ] -symbol :: OrgParser Inlines -symbol = B.str . (: "") <$> (oneOf specialChars >>= updatePositions) +displayMath :: OrgParser (F Inlines) +displayMath = return . B.displayMath <$> choice [ rawMathBetween "\\[" "\\]" + , rawMathBetween "$$" "$$" + ] +symbol :: OrgParser (F Inlines) +symbol = return . B.str . (: "") <$> (oneOf specialChars >>= updatePositions) where updatePositions c | c `elem` emphasisPreChars = c <$ updateLastPreCharPos | c `elem` emphasisForbiddenBorderChars = c <$ updateLastForbiddenCharPos | otherwise = return c emphasisBetween :: Char - -> OrgParser Inlines + -> OrgParser (F Inlines) emphasisBetween c = try $ do startEmphasisNewlinesCounting emphasisAllowedNewlines res <- enclosedInlines (emphasisStart c) (emphasisEnd c) @@ -755,9 +801,9 @@ mathEnd c = try $ do enclosedInlines :: OrgParser a -> OrgParser b - -> OrgParser Inlines + -> OrgParser (F Inlines) enclosedInlines start end = try $ - trimInlines . mconcat <$> enclosed start end inline + trimInlinesF . mconcat <$> enclosed start end inline enclosedRaw :: OrgParser a -> OrgParser b @@ -843,7 +889,7 @@ notAfterForbiddenBorderChar = do return $ lastFBCPos /= Just pos -- | Read a sub- or superscript expression -subOrSuperExpr :: OrgParser Inlines +subOrSuperExpr :: OrgParser (F Inlines) subOrSuperExpr = try $ choice [ id <$> charsInBalanced '{' '}' (noneOf "\n\r") , enclosing ('(', ')') <$> charsInBalanced '(' ')' (noneOf "\n\r") -- cgit v1.2.3 From 0672f58a445c289c58e42cffbbf32a273e801e39 Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Sun, 6 Apr 2014 18:43:49 +0200 Subject: Org reader: Support footnotes --- src/Text/Pandoc/Readers/Org.hs | 66 ++++++++++++++++++++++++++++++++++++++++-- tests/Tests/Readers/Org.hs | 4 +++ 2 files changed, 68 insertions(+), 2 deletions(-) (limited to 'src/Text/Pandoc/Readers/Org.hs') diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index bdff4869c..17f8a1c9e 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -44,7 +44,7 @@ import Control.Applicative ( Applicative, pure , (<$>), (<$), (<*>), (<*), (*>), (<**>) ) import Control.Arrow ((***)) import Control.Monad (foldM, guard, liftM, liftM2, when) -import Control.Monad.Reader (Reader, runReader) +import Control.Monad.Reader (Reader, runReader, ask, asks) import Data.Char (toLower) import Data.Default import Data.List (isPrefixOf, isSuffixOf) @@ -59,7 +59,7 @@ readOrg opts s = readWith parseOrg def{ orgStateOptions = opts } (s ++ "\n\n") type OrgParser = Parser [Char] OrgParserState -parseOrg:: OrgParser Pandoc +parseOrg :: OrgParser Pandoc parseOrg = do blocks' <- parseBlocks st <- getState @@ -70,6 +70,9 @@ parseOrg = do -- Parser State for Org -- +type OrgNoteRecord = (String, F Blocks) +type OrgNoteTable = [OrgNoteRecord] + -- | Org-mode parser state data OrgParserState = OrgParserState { orgStateOptions :: ReaderOptions @@ -80,6 +83,7 @@ data OrgParserState = OrgParserState , orgStateLastStrPos :: Maybe SourcePos , orgStateMeta :: Meta , orgStateMeta' :: F Meta + , orgStateNotes' :: OrgNoteTable } instance HasReaderOptions OrgParserState where @@ -104,6 +108,7 @@ defaultOrgParserState = OrgParserState , orgStateLastStrPos = Nothing , orgStateMeta = nullMeta , orgStateMeta' = return nullMeta + , orgStateNotes' = [] } updateLastStrPos :: OrgParser () @@ -146,6 +151,11 @@ resetEmphasisNewlines :: OrgParser () resetEmphasisNewlines = updateState $ \s -> s{ orgStateEmphasisNewlines = Nothing } +addToNotesTable :: OrgNoteRecord -> OrgParser () +addToNotesTable note = do + oldnotes <- orgStateNotes' <$> getState + updateState $ \s -> s{ orgStateNotes' = note:oldnotes } + -- -- Adaptions and specializations of parsing utilities @@ -157,6 +167,12 @@ newtype F a = F { unF :: Reader OrgParserState a runF :: F a -> OrgParserState -> a runF = runReader . unF +askF :: F OrgParserState +askF = F ask + +asksF :: (OrgParserState -> a) -> F a +asksF f = F $ asks f + instance Monoid a => Monoid (F a) where mempty = return mempty mappend = liftM2 mappend @@ -191,6 +207,7 @@ block = choice [ mempty <$ blanklines , return <$> hline , list , table + , noteBlock , paraOrPlain ] "block" @@ -500,6 +517,16 @@ setAligns :: [Alignment] -> F OrgTable setAligns aligns t = return $ t{ orgTableAlignments = aligns } +-- +-- Footnote defintions +-- +noteBlock :: OrgParser (F Blocks) +noteBlock = try $ do + ref <- noteMarker + content <- skipSpaces *> paraOrPlain + addToNotesTable (ref, content) + return mempty + -- Paragraphs or Plain text paraOrPlain :: OrgParser (F Blocks) paraOrPlain = try $ @@ -587,6 +614,7 @@ inline :: OrgParser (F Inlines) inline = choice [ whitespace , linebreak + , footnote , linkOrImage , str , endline @@ -632,6 +660,7 @@ endline = try $ do notFollowedBy blankline notFollowedBy' exampleLine notFollowedBy' hline + notFollowedBy' noteMarker notFollowedBy' tableStart notFollowedBy' drawerStart notFollowedBy' headerStart @@ -644,6 +673,39 @@ endline = try $ do updateLastPreCharPos return . return $ B.space +footnote :: OrgParser (F Inlines) +footnote = try $ inlineNote <|> referencedNote + +inlineNote :: OrgParser (F Inlines) +inlineNote = try $ do + string "[fn:" + ref <- many alphaNum + char ':' + note <- fmap B.para . trimInlinesF . mconcat <$> many1Till inline (char ']') + when (not $ null ref) $ + addToNotesTable ("fn:" ++ ref, note) + return $ B.note <$> note + +referencedNote :: OrgParser (F Inlines) +referencedNote = try $ do + ref <- noteMarker + return $ do + notes <- asksF orgStateNotes' + case lookup ref notes of + Nothing -> return $ B.str $ "[" ++ ref ++ "]" + Just contents -> do + st <- askF + let contents' = runF contents st{ orgStateNotes' = [] } + return $ B.note contents' + +noteMarker :: OrgParser String +noteMarker = try $ do + char '[' + choice [ many1Till digit (char ']') + , (++) <$> string "fn:" + <*> many1Till (noneOf "\n\r\t ") (char ']') + ] + linkOrImage :: OrgParser (F Inlines) linkOrImage = explicitOrImageLink <|> selflinkOrImage "link or image" diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs index f39bd7992..7f9c5f1d5 100644 --- a/tests/Tests/Readers/Org.hs +++ b/tests/Tests/Readers/Org.hs @@ -98,6 +98,10 @@ tests = "line \\\\ \nbreak" =?> para ("line" <> linebreak <> "break") + , "Inline note" =: + "[fn::Schreib mir eine E-Mail]" =?> + para (note $ para "Schreib mir eine E-Mail") + , "Markup-chars not occuring on word break are symbols" =: unlines [ "this+that+ +so+on" , "seven*eight* nine*" -- cgit v1.2.3 From 6d6724cf2c6ae6bcc0df312c476e45644c972a85 Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Thu, 17 Apr 2014 18:09:27 +0200 Subject: Org reader: Support more types of '#+BEGIN_' blocks Support for standard org-blocks is improved. The parser now handles "HTML", "LATEX", "ASCII", "EXAMPLE", "QUOTE" and "VERSE" blocks in a sensible fashion. --- src/Text/Pandoc/Readers/Org.hs | 41 +++++++++++++++--- tests/Tests/Readers/Org.hs | 97 ++++++++++++++++++++++++++++++++---------- 2 files changed, 108 insertions(+), 30 deletions(-) (limited to 'src/Text/Pandoc/Readers/Org.hs') diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index 17f8a1c9e..88e81f5fc 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -37,6 +37,7 @@ import Text.Pandoc.Options import qualified Text.Pandoc.Parsing as P import Text.Pandoc.Parsing hiding ( F, unF, askF, asksF, runF , newline, orderedListMarker + , parseFromString , updateLastStrPos ) import Text.Pandoc.Shared (compactify') @@ -47,7 +48,7 @@ import Control.Monad (foldM, guard, liftM, liftM2, when) import Control.Monad.Reader (Reader, runReader, ask, asks) import Data.Char (toLower) import Data.Default -import Data.List (isPrefixOf, isSuffixOf) +import Data.List (intersperse, isPrefixOf, isSuffixOf) import Data.Maybe (listToMaybe, fromMaybe) import Data.Monoid (Monoid, mconcat, mempty, mappend) @@ -156,6 +157,16 @@ addToNotesTable note = do oldnotes <- orgStateNotes' <$> getState updateState $ \s -> s{ orgStateNotes' = note:oldnotes } +-- The version Text.Pandoc.Parsing cannot be used, as we need additional parts +-- of the state saved and restored. +parseFromString :: OrgParser a -> String -> OrgParser a +parseFromString parser str' = do + oldLastPreCharPos <- orgStateLastPreCharPos <$> getState + updateState $ \s -> s{ orgStateLastPreCharPos = Nothing } + result <- P.parseFromString parser str' + updateState $ \s -> s{ orgStateLastPreCharPos = oldLastPreCharPos } + return result + -- -- Adaptions and specializations of parsing utilities @@ -218,13 +229,27 @@ block = choice [ mempty <$ blanklines orgBlock :: OrgParser (F Blocks) orgBlock = try $ do (indent, blockType, args) <- blockHeader - blockStr <- rawBlockContent indent blockType + content <- rawBlockContent indent blockType + contentBlocks <- parseFromString parseBlocks (content ++ "\n") let classArgs = [ translateLang . fromMaybe [] $ listToMaybe args ] case blockType of "comment" -> return mempty - "src" -> return . return $ B.codeBlockWith ("", classArgs, []) blockStr - _ -> fmap (B.divWith ("", [blockType], [])) - <$> parseFromString parseBlocks blockStr + "src" -> returnF $ B.codeBlockWith ("", classArgs, []) content + "html" -> returnF $ B.rawBlock "html" content + "latex" -> returnF $ B.rawBlock "latex" content + "ascii" -> returnF $ B.rawBlock "ascii" content + "example" -> returnF $ exampleCode content + "quote" -> return $ B.blockQuote <$> contentBlocks + "verse" -> parseVerse content + _ -> return $ B.divWith ("", [blockType], []) <$> contentBlocks + where + returnF :: a -> OrgParser (F a) + returnF = return . return + + parseVerse :: String -> OrgParser (F Blocks) + parseVerse cs = + fmap B.para . mconcat . intersperse (pure B.linebreak) + <$> mapM (parseFromString parseInlines) (lines cs) blockHeader :: OrgParser (Int, String, [String]) blockHeader = (,,) <$> blockIndent @@ -270,8 +295,10 @@ commaEscaped cs = cs example :: OrgParser (F Blocks) example = try $ do - body <- unlines <$> many1 exampleLine - return . return $ B.codeBlockWith ("", ["example"], []) body + return . return . exampleCode =<< unlines <$> many1 exampleLine + +exampleCode :: String -> Blocks +exampleCode = B.codeBlockWith ("", ["example"], []) exampleLine :: OrgParser String exampleLine = try $ string ": " *> anyLine diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs index 7f9c5f1d5..7d5bfe650 100644 --- a/tests/Tests/Readers/Org.hs +++ b/tests/Tests/Readers/Org.hs @@ -363,29 +363,6 @@ tests = , "#+END_COMMENT"] =?> (mempty::Blocks) - , "Source Block in Text" =: - unlines [ "Low German greeting" - , " #+BEGIN_SRC haskell" - , " main = putStrLn greeting" - , " where greeting = \"moin\"" - , " #+END_SRC" ] =?> - let attr' = ("", ["haskell"], []) - code' = "main = putStrLn greeting\n" ++ - " where greeting = \"moin\"\n" - in mconcat [ para $ spcSep [ "Low", "German", "greeting" ] - , codeBlockWith attr' code' - ] - - , "Source Block" =: - unlines [ " #+BEGIN_SRC haskell" - , " main = putStrLn greeting" - , " where greeting = \"moin\"" - , " #+END_SRC" ] =?> - let attr' = ("", ["haskell"], []) - code' = "main = putStrLn greeting\n" ++ - " where greeting = \"moin\"\n" - in codeBlockWith attr' code' - , "Figure" =: unlines [ "#+caption: A very courageous man." , "#+name: goodguy" @@ -661,4 +638,78 @@ tests = , [ plain "2" , plain mempty , plain mempty ] ] ] + + , testGroup "Blocks" + [ "Source block" =: + unlines [ " #+BEGIN_SRC haskell" + , " main = putStrLn greeting" + , " where greeting = \"moin\"" + , " #+END_SRC" ] =?> + let attr' = ("", ["haskell"], []) + code' = "main = putStrLn greeting\n" ++ + " where greeting = \"moin\"\n" + in codeBlockWith attr' code' + + , "Source block between paragraphs" =: + unlines [ "Low German greeting" + , " #+BEGIN_SRC haskell" + , " main = putStrLn greeting" + , " where greeting = \"Moin!\"" + , " #+END_SRC" ] =?> + let attr' = ("", ["haskell"], []) + code' = "main = putStrLn greeting\n" ++ + " where greeting = \"Moin!\"\n" + in mconcat [ para $ spcSep [ "Low", "German", "greeting" ] + , codeBlockWith attr' code' + ] + + , "Example block" =: + unlines [ "#+begin_example" + , "A chosen representation of" + , "a rule." + , "#+eND_exAMPle" + ] =?> + codeBlockWith ("", ["example"], []) + "A chosen representation of\na rule.\n" + + , "HTML block" =: + unlines [ "#+BEGIN_HTML" + , "" + , "#+END_HTML" + ] =?> + rawBlock "html" "\n" + + , "Quote block" =: + unlines [ "#+BEGIN_QUOTE" + , "/Niemand/ hat die Absicht, eine Mauer zu errichten!" + , "#+END_QUOTE" + ] =?> + blockQuote (para (spcSep [ emph "Niemand", "hat", "die", "Absicht," + , "eine", "Mauer", "zu", "errichten!" + ])) + + , "Verse block" =: + unlines [ "The first lines of Goethe's /Faust/:" + , "#+begin_verse" + , "Habe nun, ach! Philosophie," + , "Juristerei und Medizin," + , "Und leider auch Theologie!" + , "Durchaus studiert, mit heißem Bemühn." + , "#+end_verse" + ] =?> + mconcat + [ para $ spcSep [ "The", "first", "lines", "of" + , "Goethe's", emph "Faust" <> ":"] + , para $ mconcat + [ spcSep [ "Habe", "nun,", "ach!", "Philosophie," ] + , linebreak + , spcSep [ "Juristerei", "und", "Medizin," ] + , linebreak + , spcSep [ "Und", "leider", "auch", "Theologie!" ] + , linebreak + , spcSep [ "Durchaus", "studiert,", "mit", "heißem", "Bemühn." ] + ] + ] + + ] ] -- cgit v1.2.3 From f19d7233d8d3e47912b760fc62a253e5baf8275a Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Fri, 18 Apr 2014 08:33:25 +0200 Subject: Org reader: Fix parsing of loose lists Loose lists (i.e. lists with blankline separated items), were parsed as multiple lists, each containing a single item. This patch fixes this issue. --- src/Text/Pandoc/Readers/Org.hs | 11 +++++++---- tests/Tests/Readers/Org.hs | 21 ++++++++++++++++----- 2 files changed, 23 insertions(+), 9 deletions(-) (limited to 'src/Text/Pandoc/Readers/Org.hs') diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index 88e81f5fc..1fa8d4d5e 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -605,9 +605,10 @@ definitionListItem parseMarkerGetLength = try $ do markerLength <- parseMarkerGetLength term <- manyTill (noneOf "\n\r") (try $ string "::") first <- anyLineNewline + blank <- option "" ("\n" <$ blankline) cont <- concat <$> many (listContinuation markerLength) term' <- parseFromString inline term - contents' <- parseFromString parseBlocks $ first ++ cont + contents' <- parseFromString parseBlocks $ first ++ blank ++ cont return $ (,) <$> term' <*> fmap (:[]) contents' @@ -617,16 +618,18 @@ listItem :: OrgParser Int listItem start = try $ do markerLength <- try start firstLine <- anyLineNewline + blank <- option "" ("\n" <$ blankline) rest <- concat <$> many (listContinuation markerLength) - parseFromString parseBlocks $ firstLine ++ rest + parseFromString parseBlocks $ firstLine ++ blank ++ rest -- continuation of a list item - indented and separated by blankline or endline. -- Note: nested lists are parsed as continuations. listContinuation :: Int -> OrgParser String listContinuation markerLength = try $ - mappend <$> many blankline - <*> (concat <$> many1 listLine) + notFollowedBy' blankline + *> (mappend <$> (concat <$> many1 listLine) + <*> many blankline) where listLine = try $ indentWith markerLength *> anyLineNewline anyLineNewline :: OrgParser String diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs index 7d5bfe650..572fc501f 100644 --- a/tests/Tests/Readers/Org.hs +++ b/tests/Tests/Readers/Org.hs @@ -518,13 +518,24 @@ tests = , ("TTL", [ plain $ "transistor-transistor" <> space <> "logic" ]) , ("PSK", [ mconcat - [ para $ "phase-shift" <> space <> "keying" - , plain $ spcSep [ "a", "digital" - , "modulation", "scheme" ] + [ para $ "phase-shift" <> space <> "keying" + , para $ spcSep [ "a", "digital" + , "modulation", "scheme" ] ] - ] - ) + ]) ] + + , "Loose bullet list" =: + unlines [ "- apple" + , "" + , "- orange" + , "" + , "- peach" + ] =?> + bulletList [ para "apple" + , para "orange" + , para "peach" + ] ] , testGroup "Tables" -- cgit v1.2.3 From 09441b65a83f372410394a88af7808f494c3aa57 Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Fri, 18 Apr 2014 10:15:58 +0200 Subject: Org reader: Add support for plain LaTeX fragments This adds support for LaTeX fragments like the following: ``` \begin{equation} \int fg \mathrm{d}x \end{equation} ``` --- src/Text/Pandoc/Readers/Org.hs | 41 ++++++++++++++++++++++++++++++++++++++++- tests/Tests/Readers/Org.hs | 20 +++++++++++++++++++- 2 files changed, 59 insertions(+), 2 deletions(-) (limited to 'src/Text/Pandoc/Readers/Org.hs') diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index 1fa8d4d5e..66cfe720e 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -218,6 +218,7 @@ block = choice [ mempty <$ blanklines , return <$> hline , list , table + , latexFragment , noteBlock , paraOrPlain ] "block" @@ -544,6 +545,41 @@ setAligns :: [Alignment] -> F OrgTable setAligns aligns t = return $ t{ orgTableAlignments = aligns } + +-- +-- LaTeX fragments +-- +latexFragment :: OrgParser (F Blocks) +latexFragment = try $ do + envName <- latexEnvStart + content <- mconcat <$> manyTill anyLineNewline (latexEnd envName) + return . return $ B.rawBlock "latex" (content `inLatexEnv` envName) + where + c `inLatexEnv` e = mconcat [ "\\begin{", e, "}\n" + , c + , "\\end{", e, "}\n" + ] + +latexEnvStart :: OrgParser String +latexEnvStart = try $ do + skipSpaces *> string "\\begin{" + *> latexEnvName + <* string "}" + <* blankline + +latexEnd :: String -> OrgParser () +latexEnd envName = try $ + () <$ skipSpaces + <* string ("\\end{" ++ envName ++ "}") + <* blankline + +-- | Parses a LaTeX environment name. +latexEnvName :: OrgParser String +latexEnvName = try $ do + mappend <$> many1 alphaNum + <*> option "" (string "*") + + -- -- Footnote defintions -- @@ -683,7 +719,9 @@ str :: OrgParser (F Inlines) str = return . B.str <$> many1 (noneOf $ specialChars ++ "\n\r ") <* updateLastStrPos --- an endline character that can be treated as a space, not a structural break +-- | An endline character that can be treated as a space, not a structural +-- break. This should reflect the values of the Emacs variable +-- @org-element-pagaraph-separate@. endline :: OrgParser (F Inlines) endline = try $ do newline @@ -695,6 +733,7 @@ endline = try $ do notFollowedBy' drawerStart notFollowedBy' headerStart notFollowedBy' metaLineStart + notFollowedBy' latexEnvStart notFollowedBy' commentLineStart notFollowedBy' bulletListStart notFollowedBy' orderedListStart diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs index 572fc501f..1ac2c1fd8 100644 --- a/tests/Tests/Readers/Org.hs +++ b/tests/Tests/Readers/Org.hs @@ -650,7 +650,7 @@ tests = ] ] - , testGroup "Blocks" + , testGroup "Blocks and fragments" [ "Source block" =: unlines [ " #+BEGIN_SRC haskell" , " main = putStrLn greeting" @@ -722,5 +722,23 @@ tests = ] ] + , "LaTeX fragment" =: + unlines [ "\\begin{equation}" + , "X_i = \\begin{cases}" + , " G_{\\alpha(i)} & \\text{if }\\alpha(i-1) = \\alpha(i)\\\\" + , " C_{\\alpha(i)} & \\text{otherwise}" + , " \\end{cases}" + , "\\end{equation}" + ] =?> + rawBlock "latex" + (unlines [ "\\begin{equation}" + , "X_i = \\begin{cases}" + , " G_{\\alpha(i)} & \\text{if }\\alpha(i-1) =" ++ + " \\alpha(i)\\\\" + , " C_{\\alpha(i)} & \\text{otherwise}" + , " \\end{cases}" + , "\\end{equation}" + ]) + ] ] -- cgit v1.2.3 From 6ded3d41d94c1e90d1d30a1f99ddad62e62d9ce6 Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Fri, 18 Apr 2014 20:47:50 +0200 Subject: Org reader: Apply captions to code blocks and tables The `Table` blocktype already takes the caption as an argument, while code blocks are wrapped in a `Div` block together with a labelling `Span`. --- src/Text/Pandoc/Readers/Org.hs | 134 +++++++++++++++++++++++++++++------------ tests/Tests/Readers/Org.hs | 31 +++++++++- 2 files changed, 124 insertions(+), 41 deletions(-) (limited to 'src/Text/Pandoc/Readers/Org.hs') diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index 66cfe720e..025158060 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -43,13 +43,13 @@ import Text.Pandoc.Shared (compactify') import Control.Applicative ( Applicative, pure , (<$>), (<$), (<*>), (<*), (*>), (<**>) ) -import Control.Arrow ((***)) import Control.Monad (foldM, guard, liftM, liftM2, when) import Control.Monad.Reader (Reader, runReader, ask, asks) import Data.Char (toLower) import Data.Default import Data.List (intersperse, isPrefixOf, isSuffixOf) -import Data.Maybe (listToMaybe, fromMaybe) +import qualified Data.Map as M +import Data.Maybe (listToMaybe, fromMaybe, isJust) import Data.Monoid (Monoid, mconcat, mempty, mappend) -- | Parse org-mode string and return a Pandoc document. @@ -74,9 +74,12 @@ parseOrg = do type OrgNoteRecord = (String, F Blocks) type OrgNoteTable = [OrgNoteRecord] +type OrgBlockAttributes = M.Map String String + -- | Org-mode parser state data OrgParserState = OrgParserState { orgStateOptions :: ReaderOptions + , orgStateBlockAttributes :: OrgBlockAttributes , orgStateEmphasisCharStack :: [Char] , orgStateEmphasisNewlines :: Maybe Int , orgStateLastForbiddenCharPos :: Maybe SourcePos @@ -102,6 +105,7 @@ instance Default OrgParserState where defaultOrgParserState :: OrgParserState defaultOrgParserState = OrgParserState { orgStateOptions = def + , orgStateBlockAttributes = M.empty , orgStateEmphasisCharStack = [] , orgStateEmphasisNewlines = Nothing , orgStateLastForbiddenCharPos = Nothing @@ -112,6 +116,19 @@ defaultOrgParserState = OrgParserState , orgStateNotes' = [] } +addBlockAttribute :: String -> String -> OrgParser () +addBlockAttribute key val = updateState $ \s -> + let attrs = orgStateBlockAttributes s + in s{ orgStateBlockAttributes = M.insert key val attrs } + +lookupBlockAttribute :: String -> OrgParser (Maybe String) +lookupBlockAttribute key = + M.lookup key . orgStateBlockAttributes <$> getState + +resetBlockAttributes :: OrgParser () +resetBlockAttributes = updateState $ \s -> + s{ orgStateBlockAttributes = orgStateBlockAttributes def } + updateLastStrPos :: OrgParser () updateLastStrPos = getPosition >>= \p -> updateState $ \s -> s{ orgStateLastStrPos = Just p } @@ -125,19 +142,19 @@ updateLastPreCharPos = getPosition >>= \p -> updateState $ \s -> s{ orgStateLastPreCharPos = Just p} pushToInlineCharStack :: Char -> OrgParser () -pushToInlineCharStack c = updateState $ \st -> - st { orgStateEmphasisCharStack = c:orgStateEmphasisCharStack st } +pushToInlineCharStack c = updateState $ \s -> + s{ orgStateEmphasisCharStack = c:orgStateEmphasisCharStack s } popInlineCharStack :: OrgParser () -popInlineCharStack = updateState $ \st -> - st { orgStateEmphasisCharStack = drop 1 . orgStateEmphasisCharStack $ st } +popInlineCharStack = updateState $ \s -> + s{ orgStateEmphasisCharStack = drop 1 . orgStateEmphasisCharStack $ s } surroundingEmphasisChar :: OrgParser [Char] surroundingEmphasisChar = take 1 . drop 1 . orgStateEmphasisCharStack <$> getState startEmphasisNewlinesCounting :: Int -> OrgParser () startEmphasisNewlinesCounting maxNewlines = updateState $ \s -> - s { orgStateEmphasisNewlines = Just maxNewlines } + s{ orgStateEmphasisNewlines = Just maxNewlines } decEmphasisNewlinesCount :: OrgParser () decEmphasisNewlinesCount = updateState $ \s -> @@ -209,20 +226,50 @@ parseBlocks = mconcat <$> manyTill block eof block :: OrgParser (F Blocks) block = choice [ mempty <$ blanklines - , orgBlock + , optionalAttributes $ choice + [ orgBlock + , figure + , table + ] , example , drawer - , figure , specialLine , header , return <$> hline , list - , table , latexFragment , noteBlock , paraOrPlain ] "block" +optionalAttributes :: OrgParser (F Blocks) -> OrgParser (F Blocks) +optionalAttributes parser = try $ + resetBlockAttributes *> parseBlockAttributes *> parser + +parseBlockAttributes :: OrgParser () +parseBlockAttributes = do + attrs <- many attribute + () <$ mapM (uncurry parseAndAddAttribute) attrs + where + attribute :: OrgParser (String, String) + attribute = try $ do + key <- metaLineStart *> many1Till (noneOf "\n\r") (char ':') + val <- skipSpaces *> anyLine + return (map toLower key, val) + +parseAndAddAttribute :: String -> String -> OrgParser () +parseAndAddAttribute key value = do + let key' = map toLower key + () <$ addBlockAttribute key' value + +lookupInlinesAttr :: String -> OrgParser (Maybe (F Inlines)) +lookupInlinesAttr attr = try $ do + val <- lookupBlockAttribute attr + maybe (return Nothing) + (fmap Just . parseFromString parseInlines) + val + + -- -- Org Blocks (#+BEGIN_... / #+END_...) -- @@ -235,13 +282,13 @@ orgBlock = try $ do let classArgs = [ translateLang . fromMaybe [] $ listToMaybe args ] case blockType of "comment" -> return mempty - "src" -> returnF $ B.codeBlockWith ("", classArgs, []) content "html" -> returnF $ B.rawBlock "html" content "latex" -> returnF $ B.rawBlock "latex" content "ascii" -> returnF $ B.rawBlock "ascii" content "example" -> returnF $ exampleCode content "quote" -> return $ B.blockQuote <$> contentBlocks "verse" -> parseVerse content + "src" -> codeBlockWithAttr classArgs content _ -> return $ B.divWith ("", [blockType], []) <$> contentBlocks where returnF :: a -> OrgParser (F a) @@ -260,6 +307,18 @@ blockHeader = (,,) <$> blockIndent blockType = map toLower <$> (stringAnyCase "#+begin_" *> many letter) blockArgs = manyTill (many nonspaceChar <* skipSpaces) newline +codeBlockWithAttr :: [String] -> String -> OrgParser (F Blocks) +codeBlockWithAttr classArgs content = do + identifier <- fromMaybe "" <$> lookupBlockAttribute "name" + caption <- lookupInlinesAttr "caption" + let codeBlck = B.codeBlockWith (identifier, classArgs, []) content + return $ maybe (pure codeBlck) (labelDiv codeBlck) caption + where + labelDiv blk value = + B.divWith nullAttr <$> (mappend <$> labelledBlock value + <*> pure blk) + labelledBlock = fmap (B.plain . B.spanWith ("", ["label"], [])) + rawBlockContent :: Int -> String -> OrgParser String rawBlockContent indent blockType = unlines . map commaEscaped <$> manyTill indentedLine blockEnder @@ -333,38 +392,26 @@ drawerEnd = try $ -- Figures (Image on a line by itself, preceded by name and/or caption) figure :: OrgParser (F Blocks) figure = try $ do - (tit, cap) <- (maybe mempty withFigPrefix *** fromMaybe mempty) - <$> nameAndOrCaption + (cap, nam) <- nameAndCaption src <- skipSpaces *> selfTarget <* skipSpaces <* newline guard (isImageFilename src) return $ do cap' <- cap - return $ B.para $ B.image src tit cap' - where withFigPrefix cs = if "fig:" `isPrefixOf` cs - then cs - else "fig:" ++ cs - -nameAndOrCaption :: OrgParser (Maybe String, Maybe (F Inlines)) -nameAndOrCaption = try $ nameFirst <|> captionFirst + return $ B.para $ B.image src nam cap' where - nameFirst = try $ do - n <- name - c <- optionMaybe caption - return (Just n, c) - captionFirst = try $ do - c <- caption - n <- optionMaybe name - return (n, Just c) - -caption :: OrgParser (F Inlines) -caption = try $ annotation "CAPTION" *> inlinesTillNewline - -name :: OrgParser String -name = try $ annotation "NAME" *> skipSpaces *> manyTill anyChar newline - -annotation :: String -> OrgParser String -annotation ann = try $ metaLineStart *> stringAnyCase ann <* char ':' + nameAndCaption = + do + maybeCap <- lookupInlinesAttr "caption" + maybeNam <- lookupBlockAttribute "name" + guard $ isJust maybeCap || isJust maybeNam + return ( fromMaybe mempty maybeCap + , maybe mempty withFigPrefix maybeNam ) + withFigPrefix cs = + if "fig:" `isPrefixOf` cs + then cs + else "fig:" ++ cs +-- -- Comments, Options and Metadata specialLine :: OrgParser (F Blocks) specialLine = fmap return . try $ metaLine <|> commentLine @@ -400,6 +447,10 @@ metaKey = map toLower <$> many1 (noneOf ": \n\r") <* char ':' <* skipSpaces +-- +-- Headers +-- + -- | Headers header :: OrgParser (F Blocks) header = try $ do @@ -411,6 +462,7 @@ headerStart :: OrgParser Int headerStart = try $ (length <$> many1 (char '*')) <* many1 (char ' ') + -- Don't use (or need) the reader wrapper here, we want hline to be -- @show@able. Otherwise we can't use it with @notFollowedBy'@. @@ -444,12 +496,14 @@ table = try $ do lookAhead tableStart do rows <- tableRows - return $ return . orgToPandocTable . normalizeTable =<< rowsToTable rows + cptn <- fromMaybe (pure "") <$> lookupInlinesAttr "caption" + return $ (<$> cptn) . orgToPandocTable . normalizeTable =<< rowsToTable rows orgToPandocTable :: OrgTable + -> Inlines -> Blocks -orgToPandocTable (OrgTable _ aligns heads lns) = - B.table "" (zip aligns $ repeat 0) heads lns +orgToPandocTable (OrgTable _ aligns heads lns) caption = + B.table caption (zip aligns $ repeat 0) heads lns tableStart :: OrgParser Char tableStart = try $ skipSpaces *> char '|' diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs index 1ac2c1fd8..80a95d36b 100644 --- a/tests/Tests/Readers/Org.hs +++ b/tests/Tests/Readers/Org.hs @@ -8,7 +8,7 @@ import Tests.Arbitrary() import Text.Pandoc.Builder import Text.Pandoc import Data.List (intersperse) -import Data.Monoid (mempty, mconcat) +import Data.Monoid (mempty, mappend, mconcat) org :: String -> Pandoc org = readOrg def @@ -648,6 +648,18 @@ tests = [ [ plain "1" , plain "One" , plain "foo" ] , [ plain "2" , plain mempty , plain mempty ] ] + + , "Table with caption" =: + unlines [ "#+CAPTION: Hitchhiker's Multiplication Table" + , "| x | 6 |" + , "| 9 | 42 |" + ] =?> + table "Hitchhiker's Multiplication Table" + [(AlignDefault, 0), (AlignDefault, 0)] + [] + [ [ plain "x", plain "6" ] + , [ plain "9", plain "42" ] + ] ] , testGroup "Blocks and fragments" @@ -740,5 +752,22 @@ tests = , "\\end{equation}" ]) + , "Code block with caption" =: + unlines [ "#+CAPTION: Functor laws in Haskell" + , "#+NAME: functor-laws" + , "#+BEGIN_SRC haskell" + , "fmap id = id" + , "fmap (p . q) = (fmap p) . (fmap q)" + , "#+END_SRC" + ] =?> + divWith + nullAttr + (mappend + (plain $ spanWith ("", ["label"], []) + (spcSep [ "Functor", "laws", "in", "Haskell" ])) + (codeBlockWith ("functor-laws", ["haskell"], []) + (unlines [ "fmap id = id" + , "fmap (p . q) = (fmap p) . (fmap q)" + ]))) ] ] -- cgit v1.2.3 From a69416091ba035ab1661ff306ef3e51fd926488b Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Sat, 19 Apr 2014 11:25:39 +0200 Subject: Org reader: Fix distinction of images and normal links Fixed a false assumption about the precedence of (&&) vs (||). --- src/Text/Pandoc/Readers/Org.hs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/Text/Pandoc/Readers/Org.hs') diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index 025158060..66211b20e 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -860,8 +860,8 @@ linkTarget = enclosed (char '[') (char ']') (noneOf "\n\r]") isImageFilename :: String -> Bool isImageFilename filename = any (\x -> ('.':x) `isSuffixOf` filename) imageExtensions && - any (\x -> (x++":") `isPrefixOf` filename) protocols || - ':' `notElem` filename + (any (\x -> (x++":") `isPrefixOf` filename) protocols || + ':' `notElem` filename) where imageExtensions = [ "jpeg" , "jpg" , "png" , "gif" , "svg" ] protocols = [ "file", "http", "https" ] -- cgit v1.2.3 From 8e91d362a392d1ee90a497f39cfcf90fee8d8da0 Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Sat, 19 Apr 2014 13:15:47 +0200 Subject: Org reader: Fix parsing of footnotes Footnotes can consist of multiple blocks and end only at a header or at the beginning of another footnote. This fixes the previous behavior, which restricted notes to a single paragraph. --- src/Text/Pandoc/Readers/Org.hs | 8 ++++++-- tests/Tests/Readers/Org.hs | 42 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 48 insertions(+), 2 deletions(-) (limited to 'src/Text/Pandoc/Readers/Org.hs') diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index 66211b20e..0bc0a2668 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -639,10 +639,14 @@ latexEnvName = try $ do -- noteBlock :: OrgParser (F Blocks) noteBlock = try $ do - ref <- noteMarker - content <- skipSpaces *> paraOrPlain + ref <- noteMarker <* skipSpaces + content <- mconcat <$> blocksTillHeaderOrNote addToNotesTable (ref, content) return mempty + where + blocksTillHeaderOrNote = + many1Till block (eof <|> () <$ lookAhead noteMarker + <|> () <$ lookAhead headerStart) -- Paragraphs or Plain text paraOrPlain :: OrgParser (F Blocks) diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs index 80a95d36b..4cc405c0f 100644 --- a/tests/Tests/Readers/Org.hs +++ b/tests/Tests/Readers/Org.hs @@ -383,6 +383,48 @@ tests = ] =?> para (image "the-red-queen.jpg" "fig:redqueen" "Used as a metapher in evolutionary biology.") + + , "Footnote" =: + unlines [ "A footnote[1]" + , "" + , "[1] First paragraph" + , "" + , "second paragraph" + ] =?> + para (mconcat + [ "A", space, "footnote" + , note $ mconcat [ para ("First" <> space <> "paragraph") + , para ("second" <> space <> "paragraph") + ] + ]) + + , "Two footnotes" =: + unlines [ "Footnotes[fn:1][fn:2]" + , "" + , "[fn:1] First note." + , "" + , "[fn:2] Second note." + ] =?> + para (mconcat + [ "Footnotes" + , note $ para ("First" <> space <> "note.") + , note $ para ("Second" <> space <> "note.") + ]) + + , "Footnote followed by header" =: + unlines [ "Another note[fn:yay]" + , "" + , "[fn:yay] This is great!" + , "" + , "** Headline" + ] =?> + mconcat + [ para (mconcat + [ "Another", space, "note" + , note $ para ("This" <> space <> "is" <> space <> "great!") + ]) + , header 2 "Headline" + ] ] , testGroup "Lists" $ -- cgit v1.2.3 From 8276449520ba85c78f0b4e919fbc9bcf893a74f0 Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Sat, 19 Apr 2014 15:05:03 +0200 Subject: Org reader: Allow for compact definition lists Use `Text.Pandoc.Shared.compactify'DL` to allow for compact definition lists. --- src/Text/Pandoc/Readers/Org.hs | 4 ++-- tests/Tests/Readers/Org.hs | 12 ++++++++++++ 2 files changed, 14 insertions(+), 2 deletions(-) (limited to 'src/Text/Pandoc/Readers/Org.hs') diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index 0bc0a2668..c71cc24be 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -39,7 +39,7 @@ import Text.Pandoc.Parsing hiding ( F, unF, askF, asksF, runF , newline, orderedListMarker , parseFromString , updateLastStrPos ) -import Text.Pandoc.Shared (compactify') +import Text.Pandoc.Shared (compactify', compactify'DL) import Control.Applicative ( Applicative, pure , (<$>), (<$), (<*>), (<*), (*>), (<**>) ) @@ -665,7 +665,7 @@ list :: OrgParser (F Blocks) list = choice [ definitionList, bulletList, orderedList ] "list" definitionList :: OrgParser (F Blocks) -definitionList = fmap B.definitionList . sequence +definitionList = fmap B.definitionList . fmap compactify'DL . sequence <$> many1 (definitionListItem bulletListStart) bulletList :: OrgParser (F Blocks) diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs index 4cc405c0f..f62b73ce4 100644 --- a/tests/Tests/Readers/Org.hs +++ b/tests/Tests/Readers/Org.hs @@ -567,6 +567,18 @@ tests = ]) ] + , "Compact definition list" =: + unlines [ "- ATP :: adenosine 5' triphosphate" + , "- DNA :: deoxyribonucleic acid" + , "- PCR :: polymerase chain reaction" + , "" + ] =?> + definitionList + [ ("ATP", [ plain $ spcSep [ "adenosine", "5'", "triphosphate" ] ]) + , ("DNA", [ plain $ spcSep [ "deoxyribonucleic", "acid" ] ]) + , ("PCR", [ plain $ spcSep [ "polymerase", "chain", "reaction" ] ]) + ] + , "Loose bullet list" =: unlines [ "- apple" , "" -- cgit v1.2.3 From c128daba9dee096ce0e78b81a381f43337b74285 Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Thu, 24 Apr 2014 17:42:01 +0200 Subject: Org reader: Recognize plain and angle links This adds support for plain links (like http://zeitlens.com) and angle links (like ). --- src/Text/Pandoc/Readers/Org.hs | 34 +++++++++++++++++++++++++--------- tests/Tests/Readers/Org.hs | 14 ++++++++++++++ 2 files changed, 39 insertions(+), 9 deletions(-) (limited to 'src/Text/Pandoc/Readers/Org.hs') diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index c71cc24be..7a50b1db9 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -45,7 +45,7 @@ import Control.Applicative ( Applicative, pure , (<$>), (<$), (<*>), (<*), (*>), (<**>) ) import Control.Monad (foldM, guard, liftM, liftM2, when) import Control.Monad.Reader (Reader, runReader, ask, asks) -import Data.Char (toLower) +import Data.Char (isAlphaNum, toLower) import Data.Default import Data.List (intersperse, isPrefixOf, isSuffixOf) import qualified Data.Map as M @@ -209,6 +209,9 @@ instance Monoid a => Monoid (F a) where trimInlinesF :: F Inlines -> F Inlines trimInlinesF = liftM trimInlines +returnF :: a -> OrgParser (F a) +returnF = return . return + -- | Like @Text.Parsec.Char.newline@, but causes additional state changes. newline :: OrgParser Char @@ -291,9 +294,6 @@ orgBlock = try $ do "src" -> codeBlockWithAttr classArgs content _ -> return $ B.divWith ("", [blockType], []) <$> contentBlocks where - returnF :: a -> OrgParser (F a) - returnF = return . return - parseVerse :: String -> OrgParser (F Blocks) parseVerse cs = fmap B.para . mconcat . intersperse (pure B.linebreak) @@ -834,7 +834,11 @@ noteMarker = try $ do ] linkOrImage :: OrgParser (F Inlines) -linkOrImage = explicitOrImageLink <|> selflinkOrImage "link or image" +linkOrImage = explicitOrImageLink + <|> selflinkOrImage + <|> angleLink + <|> plainLink + "link or image" explicitOrImageLink :: OrgParser (F Inlines) explicitOrImageLink = try $ do @@ -851,15 +855,27 @@ explicitOrImageLink = try $ do selflinkOrImage :: OrgParser (F Inlines) selflinkOrImage = try $ do src <- char '[' *> linkTarget <* char ']' - return . return $ if isImageFilename src - then B.image src "" "" - else B.link src "" (B.str src) + returnF $ if isImageFilename src + then B.image src "" "" + else B.link src "" (B.str src) + +plainLink :: OrgParser (F Inlines) +plainLink = try $ do + (orig, src) <- uri + returnF $ B.link src "" (B.str orig) + +angleLink :: OrgParser (F Inlines) +angleLink = try $ do + char '<' + link <- plainLink + char '>' + return link selfTarget :: OrgParser String selfTarget = try $ char '[' *> linkTarget <* char ']' linkTarget :: OrgParser String -linkTarget = enclosed (char '[') (char ']') (noneOf "\n\r]") +linkTarget = enclosed (char '[') (char ']') (noneOf "\n\r[]") isImageFilename :: String -> Bool isImageFilename filename = diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs index f62b73ce4..ed774f527 100644 --- a/tests/Tests/Readers/Org.hs +++ b/tests/Tests/Readers/Org.hs @@ -188,6 +188,20 @@ tests = , "Image link" =: "[[sunset.png][dusk.svg]]" =?> (para $ link "sunset.png" "" (image "dusk.svg" "" "")) + + , "Plain link" =: + "Posts on http://zeitlens.com/ can be funny at times." =?> + (para $ spcSep [ "Posts", "on" + , link "http://zeitlens.com/" "" "http://zeitlens.com/" + , "can", "be", "funny", "at", "times." + ]) + + , "Angle link" =: + "Look at for fnords." =?> + (para $ spcSep [ "Look", "at" + , link "http://moltkeplatz.de" "" "http://moltkeplatz.de" + , "for", "fnords." + ]) ] , testGroup "Meta Information" $ -- cgit v1.2.3 From 2f724aaaa4875a21560870d25c3d212f974c6dde Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Thu, 24 Apr 2014 17:42:01 +0200 Subject: Org reader: Read anchors as empty spans Anchors (like <>) are parsed as empty spans. --- src/Text/Pandoc/Readers/Org.hs | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'src/Text/Pandoc/Readers/Org.hs') diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index 7a50b1db9..7f1893936 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -740,6 +740,7 @@ inline = , linebreak , footnote , linkOrImage + , anchor , str , endline , emph @@ -886,6 +887,30 @@ isImageFilename filename = imageExtensions = [ "jpeg" , "jpg" , "png" , "gif" , "svg" ] protocols = [ "file", "http", "https" ] +-- | Parse an anchor like @<>@ and return an empty span with +-- @anchor-id@ set as id. Legal anchors in org-mode are defined through +-- @org-target-regexp@, which is fairly liberal. Since no link is created if +-- @anchor-id@ contains spaces, we are more restrictive in what is accepted as +-- an anchor. + +anchor :: OrgParser (F Inlines) +anchor = try $ pure <$> (B.spanWith <$> attributes <*> pure mempty) + where + name = string "<<" + *> many1 (noneOf "\t\n\r<>\"' ") + <* string ">>" + attributes = name >>= \n -> return (solidify n, [], []) + +-- | Replace every char but [a-zA-Z0-9_.-:] with a hypen '-'. This mirrors +-- the org function @org-export-solidify-link-text@. + +solidify :: String -> String +solidify = map replaceSpecialChar + where replaceSpecialChar c + | isAlphaNum c = c + | c `elem` "_.-:" = c + | otherwise = '-' + emph :: OrgParser (F Inlines) emph = fmap B.emph <$> emphasisBetween '/' -- cgit v1.2.3 From 2eec20d92fd0f498da5b66ac03cf6f8159392323 Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Fri, 25 Apr 2014 15:29:28 +0200 Subject: Org reader: Enable internal links Internal links in Org are possible by using an anchor-name as the target of a link: [[some-anchor][This]] is an internal link. It links <> here. --- src/Text/Pandoc/Readers/Org.hs | 50 ++++++++++++++++++++++++++++++++---------- tests/Tests/Readers/Org.hs | 25 +++++++++++++++++++++ 2 files changed, 63 insertions(+), 12 deletions(-) (limited to 'src/Text/Pandoc/Readers/Org.hs') diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index 7f1893936..0e52bff90 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -79,6 +79,7 @@ type OrgBlockAttributes = M.Map String String -- | Org-mode parser state data OrgParserState = OrgParserState { orgStateOptions :: ReaderOptions + , orgStateAnchorIds :: [String] , orgStateBlockAttributes :: OrgBlockAttributes , orgStateEmphasisCharStack :: [Char] , orgStateEmphasisNewlines :: Maybe Int @@ -105,6 +106,7 @@ instance Default OrgParserState where defaultOrgParserState :: OrgParserState defaultOrgParserState = OrgParserState { orgStateOptions = def + , orgStateAnchorIds = [] , orgStateBlockAttributes = M.empty , orgStateEmphasisCharStack = [] , orgStateEmphasisNewlines = Nothing @@ -116,6 +118,10 @@ defaultOrgParserState = OrgParserState , orgStateNotes' = [] } +recordAnchorId :: String -> OrgParser () +recordAnchorId i = updateState $ \s -> + s{ orgStateAnchorIds = i : (orgStateAnchorIds s) } + addBlockAttribute :: String -> String -> OrgParser () addBlockAttribute key val = updateState $ \s -> let attrs = orgStateBlockAttributes s @@ -848,17 +854,14 @@ explicitOrImageLink = try $ do title <- enclosedRaw (char '[') (char ']') title' <- parseFromString (mconcat <$> many inline) title char ']' - return $ B.link src "" <$> - if isImageFilename src && isImageFilename title - then return $ B.image title mempty mempty - else title' + return $ if isImageFilename src && isImageFilename title + then pure $ B.link src "" $ B.image title mempty mempty + else linkToInlinesF src =<< title' selflinkOrImage :: OrgParser (F Inlines) selflinkOrImage = try $ do src <- char '[' *> linkTarget <* char ']' - returnF $ if isImageFilename src - then B.image src "" "" - else B.link src "" (B.str src) + return $ linkToInlinesF src (B.str src) plainLink :: OrgParser (F Inlines) plainLink = try $ do @@ -878,6 +881,26 @@ selfTarget = try $ char '[' *> linkTarget <* char ']' linkTarget :: OrgParser String linkTarget = enclosed (char '[') (char ']') (noneOf "\n\r[]") +linkToInlinesF :: String -> Inlines -> F Inlines +linkToInlinesF s@('#':_) = pure . B.link s "" +linkToInlinesF s + | isImageFilename s = const . pure $ B.image s "" "" + | isUri s = pure . B.link s "" + | isRelativeUrl s = pure . B.link s "" +linkToInlinesF s = \title -> do + anchorB <- (s `elem`) <$> asksF orgStateAnchorIds + if anchorB + then pure $ B.link ('#':s) "" title + else pure $ B.emph title + +isRelativeUrl :: String -> Bool +isRelativeUrl s = (':' `notElem` s) && ("./" `isPrefixOf` s) + +isUri :: String -> Bool +isUri s = let (scheme, path) = break (== ':') s + in all (\c -> isAlphaNum c || c `elem` ".-") scheme + && not (null path) + isImageFilename :: String -> Bool isImageFilename filename = any (\x -> ('.':x) `isSuffixOf` filename) imageExtensions && @@ -894,12 +917,15 @@ isImageFilename filename = -- an anchor. anchor :: OrgParser (F Inlines) -anchor = try $ pure <$> (B.spanWith <$> attributes <*> pure mempty) +anchor = try $ do + anchorId <- parseAnchor + recordAnchorId anchorId + returnF $ B.spanWith (solidify anchorId, [], []) mempty where - name = string "<<" - *> many1 (noneOf "\t\n\r<>\"' ") - <* string ">>" - attributes = name >>= \n -> return (solidify n, [], []) + parseAnchor = string "<<" + *> many1 (noneOf "\t\n\r<>\"' ") + <* string ">>" + <* skipSpaces -- | Replace every char but [a-zA-Z0-9_.-:] with a hypen '-'. This mirrors -- the org function @org-export-solidify-link-text@. diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs index ed774f527..96747d148 100644 --- a/tests/Tests/Readers/Org.hs +++ b/tests/Tests/Readers/Org.hs @@ -202,6 +202,11 @@ tests = , link "http://moltkeplatz.de" "" "http://moltkeplatz.de" , "for", "fnords." ]) + + , "Anchor" =: + "<> Link here later." =?> + (para $ spanWith ("anchor", [], []) mempty <> + "Link" <> space <> "here" <> space <> "later.") ] , testGroup "Meta Information" $ @@ -279,6 +284,26 @@ tests = , ":END:" ] =?> para (":FOO:" <> space <> ":END:") + + , "Anchor reference" =: + unlines [ "<> Target." + , "" + , "[[link-here][See here!]]" + ] =?> + (para (spanWith ("link-here", [], []) mempty <> "Target.") <> + para (link "#link-here" "" ("See" <> space <> "here!"))) + + , "Search links are read as emph" =: + "[[Wally][Where's Wally?]]" =?> + (para (emph $ "Where's" <> space <> "Wally?")) + + , "Link to nonexistent anchor" =: + unlines [ "<> Target." + , "" + , "[[link$here][See here!]]" + ] =?> + (para (spanWith ("link-here", [], []) mempty <> "Target.") <> + para (emph ("See" <> space <> "here!"))) ] , testGroup "Basic Blocks" $ -- cgit v1.2.3 From 8726eebcd363ccb33ea8c297b004feca7ef37ceb Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Wed, 30 Apr 2014 11:16:01 +0200 Subject: Org reader: Add support for custom link types Org allows users to define their own custom link types. E.g., in a document with a lot of links to Wikipedia articles, one can define a custom wikipedia link-type via #+LINK: wp https://en.wikipedia.org/wiki/ This allows to write [[wp:Org_mode][Org-mode]] instead of the equivallent [[https://en.wikipedia.org/wiki/Org_mode][Org-mode]]. --- src/Text/Pandoc/Readers/Org.hs | 68 +++++++++++++++++++++++++++++++++++++----- tests/Tests/Readers/Org.hs | 26 ++++++++++++++++ 2 files changed, 86 insertions(+), 8 deletions(-) (limited to 'src/Text/Pandoc/Readers/Org.hs') diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index 0e52bff90..d68ef45ef 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -43,7 +43,7 @@ import Text.Pandoc.Shared (compactify', compactify'DL) import Control.Applicative ( Applicative, pure , (<$>), (<$), (<*>), (<*), (*>), (<**>) ) -import Control.Monad (foldM, guard, liftM, liftM2, when) +import Control.Monad (foldM, guard, liftM, liftM2, mzero, when) import Control.Monad.Reader (Reader, runReader, ask, asks) import Data.Char (isAlphaNum, toLower) import Data.Default @@ -51,6 +51,7 @@ import Data.List (intersperse, isPrefixOf, isSuffixOf) import qualified Data.Map as M import Data.Maybe (listToMaybe, fromMaybe, isJust) import Data.Monoid (Monoid, mconcat, mempty, mappend) +import Network.HTTP (urlEncode) -- | Parse org-mode string and return a Pandoc document. readOrg :: ReaderOptions -- ^ Reader options @@ -76,6 +77,8 @@ type OrgNoteTable = [OrgNoteRecord] type OrgBlockAttributes = M.Map String String +type OrgLinkFormatters = M.Map String (String -> String) + -- | Org-mode parser state data OrgParserState = OrgParserState { orgStateOptions :: ReaderOptions @@ -86,6 +89,7 @@ data OrgParserState = OrgParserState , orgStateLastForbiddenCharPos :: Maybe SourcePos , orgStateLastPreCharPos :: Maybe SourcePos , orgStateLastStrPos :: Maybe SourcePos + , orgStateLinkFormatters :: OrgLinkFormatters , orgStateMeta :: Meta , orgStateMeta' :: F Meta , orgStateNotes' :: OrgNoteTable @@ -113,6 +117,7 @@ defaultOrgParserState = OrgParserState , orgStateLastForbiddenCharPos = Nothing , orgStateLastPreCharPos = Nothing , orgStateLastStrPos = Nothing + , orgStateLinkFormatters = M.empty , orgStateMeta = nullMeta , orgStateMeta' = return nullMeta , orgStateNotes' = [] @@ -175,6 +180,13 @@ resetEmphasisNewlines :: OrgParser () resetEmphasisNewlines = updateState $ \s -> s{ orgStateEmphasisNewlines = Nothing } +addLinkFormat :: String + -> (String -> String) + -> OrgParser () +addLinkFormat key formatter = updateState $ \s -> + let fs = orgStateLinkFormatters s + in s{ orgStateLinkFormatters = M.insert key formatter fs } + addToNotesTable :: OrgNoteRecord -> OrgParser () addToNotesTable note = do oldnotes <- orgStateNotes' <$> getState @@ -423,7 +435,8 @@ specialLine :: OrgParser (F Blocks) specialLine = fmap return . try $ metaLine <|> commentLine metaLine :: OrgParser Blocks -metaLine = try $ metaLineStart *> declarationLine +metaLine = try $ mempty + <$ (metaLineStart *> (optionLine <|> declarationLine)) commentLine :: OrgParser Blocks commentLine = try $ commentLineStart *> anyLine *> pure mempty @@ -436,14 +449,14 @@ metaLineStart = try $ mappend <$> many spaceChar <*> string "#+" commentLineStart :: OrgParser String commentLineStart = try $ mappend <$> many spaceChar <*> string "# " -declarationLine :: OrgParser Blocks +declarationLine :: OrgParser () declarationLine = try $ do key <- metaKey inlinesF <- metaInlines updateState $ \st -> let meta' = B.setMeta <$> pure key <*> inlinesF <*> pure nullMeta in st { orgStateMeta' = orgStateMeta' st <> meta' } - return mempty + return () metaInlines :: OrgParser (F MetaValue) metaInlines = fmap (MetaInlines . B.toList) <$> inlinesTillNewline @@ -453,6 +466,35 @@ metaKey = map toLower <$> many1 (noneOf ": \n\r") <* char ':' <* skipSpaces +optionLine :: OrgParser () +optionLine = try $ do + key <- metaKey + case key of + "link" -> parseLinkFormat >>= uncurry addLinkFormat + _ -> mzero + +parseLinkFormat :: OrgParser ((String, String -> String)) +parseLinkFormat = try $ do + linkType <- (:) <$> letter <*> many (alphaNum <|> oneOf "-_") <* skipSpaces + linkSubst <- parseFormat + return (linkType, linkSubst) + +-- | An ad-hoc, single-argument-only implementation of a printf-style format +-- parser. +parseFormat :: OrgParser (String -> String) +parseFormat = try $ do + replacePlain <|> replaceUrl <|> justAppend + where + -- inefficient, but who cares + replacePlain = try $ (\x -> concat . flip intersperse x) + <$> sequence [tillSpecifier 's', rest] + replaceUrl = try $ (\x -> concat . flip intersperse x . urlEncode) + <$> sequence [tillSpecifier 'h', rest] + justAppend = try $ (++) <$> rest + + rest = manyTill anyChar (eof <|> () <$ oneOf "\n\r") + tillSpecifier c = manyTill (noneOf "\n\r") (try $ string ('%':c:"")) + -- -- Headers -- @@ -850,13 +892,15 @@ linkOrImage = explicitOrImageLink explicitOrImageLink :: OrgParser (F Inlines) explicitOrImageLink = try $ do char '[' - src <- linkTarget + srcF <- applyCustomLinkFormat =<< linkTarget title <- enclosedRaw (char '[') (char ']') title' <- parseFromString (mconcat <$> many inline) title char ']' - return $ if isImageFilename src && isImageFilename title - then pure $ B.link src "" $ B.image title mempty mempty - else linkToInlinesF src =<< title' + return $ do + src <- srcF + if isImageFilename src && isImageFilename title + then pure $ B.link src "" $ B.image title mempty mempty + else linkToInlinesF src =<< title' selflinkOrImage :: OrgParser (F Inlines) selflinkOrImage = try $ do @@ -881,6 +925,14 @@ selfTarget = try $ char '[' *> linkTarget <* char ']' linkTarget :: OrgParser String linkTarget = enclosed (char '[') (char ']') (noneOf "\n\r[]") +applyCustomLinkFormat :: String -> OrgParser (F String) +applyCustomLinkFormat link = do + let (linkType, rest) = break (== ':') link + return $ do + formatter <- M.lookup linkType <$> asksF orgStateLinkFormatters + return $ maybe link ($ drop 1 rest) formatter + + linkToInlinesF :: String -> Inlines -> F Inlines linkToInlinesF s@('#':_) = pure . B.link s "" linkToInlinesF s diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs index 96747d148..78684f0f1 100644 --- a/tests/Tests/Readers/Org.hs +++ b/tests/Tests/Readers/Org.hs @@ -304,6 +304,32 @@ tests = ] =?> (para (spanWith ("link-here", [], []) mempty <> "Target.") <> para (emph ("See" <> space <> "here!"))) + + , "Link abbreviation" =: + unlines [ "#+LINK: wp https://en.wikipedia.org/wiki/%s" + , "[[wp:Org_mode][Wikipedia on Org-mode]]" + ] =?> + (para (link "https://en.wikipedia.org/wiki/Org_mode" "" + ("Wikipedia" <> space <> "on" <> space <> "Org-mode"))) + + , "Link abbreviation, defined after first use" =: + unlines [ "[[zl:non-sense][Non-sense articles]]" + , "#+LINK: zl http://zeitlens.com/tags/%s.html" + ] =?> + (para (link "http://zeitlens.com/tags/non-sense.html" "" + ("Non-sense" <> space <> "articles"))) + + , "Link abbreviation, URL encoded arguments" =: + unlines [ "#+link: expl http://example.com/%h/foo" + , "[[expl:Hello, World!][Moin!]]" + ] =?> + (para (link "http://example.com/Hello%2C%20World%21/foo" "" "Moin!")) + + , "Link abbreviation, append arguments" =: + unlines [ "#+link: expl http://example.com/" + , "[[expl:foo][bar]]" + ] =?> + (para (link "http://example.com/foo" "" "bar")) ] , testGroup "Basic Blocks" $ -- cgit v1.2.3 From 71bd4fb2b3778d3906a63938625ebcadca40b8c8 Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Mon, 5 May 2014 14:39:25 +0200 Subject: Org reader: Read inline code blocks Org's inline code blocks take forms like `src_haskell(print "hi")` and are frequently used to include results from computations called from within the document. The blocks are read as inline code and marked with the special class `rundoc-block`. Proper handling and execution of these blocks is the subject of a separate library, rundoc, which is work in progress. This closes #1278. --- src/Text/Pandoc/Readers/Org.hs | 43 +++++++++++++++++++++++++++++++++++++++--- tests/Tests/Readers/Org.hs | 18 ++++++++++++++++++ 2 files changed, 58 insertions(+), 3 deletions(-) (limited to 'src/Text/Pandoc/Readers/Org.hs') diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index d68ef45ef..dba61dfe0 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -43,6 +43,7 @@ import Text.Pandoc.Shared (compactify', compactify'DL) import Control.Applicative ( Applicative, pure , (<$>), (<$), (<*>), (<*), (*>), (<**>) ) +import Control.Arrow (first) import Control.Monad (foldM, guard, liftM, liftM2, mzero, when) import Control.Monad.Reader (Reader, runReader, ask, asks) import Data.Char (isAlphaNum, toLower) @@ -721,7 +722,6 @@ bulletList = fmap B.bulletList . fmap compactify' . sequence <$> many1 (listItem bulletListStart) orderedList :: OrgParser (F Blocks) --- orderedList = B.orderedList . compactify' <$> many1 (listItem orderedListStart) orderedList = fmap B.orderedList . fmap compactify' . sequence <$> many1 (listItem orderedListStart) @@ -746,11 +746,11 @@ definitionListItem :: OrgParser Int definitionListItem parseMarkerGetLength = try $ do markerLength <- parseMarkerGetLength term <- manyTill (noneOf "\n\r") (try $ string "::") - first <- anyLineNewline + line1 <- anyLineNewline blank <- option "" ("\n" <$ blankline) cont <- concat <$> many (listContinuation markerLength) term' <- parseFromString inline term - contents' <- parseFromString parseBlocks $ first ++ blank ++ cont + contents' <- parseFromString parseBlocks $ line1 ++ blank ++ cont return $ (,) <$> term' <*> fmap (:[]) contents' @@ -789,6 +789,7 @@ inline = , footnote , linkOrImage , anchor + , inlineCodeBlock , str , endline , emph @@ -989,6 +990,42 @@ solidify = map replaceSpecialChar | c `elem` "_.-:" = c | otherwise = '-' +-- | Parses an inline code block and marks it as an babel block. +inlineCodeBlock :: OrgParser (F Inlines) +inlineCodeBlock = try $ do + string "src_" + lang <- many1 orgArgWordChar + opts <- option [] $ enclosedByPair '[' ']' blockOption + inlineCode <- enclosedByPair '{' '}' (noneOf "\n\r") + let attrClasses = [translateLang lang, rundocBlockClass] + let attrKeyVal = map toRundocAttrib (("language", lang) : opts) + returnF $ B.codeWith ("", attrClasses, attrKeyVal) inlineCode + where enclosedByPair s e p = char s *> many1Till p (char e) + +-- | The class-name used to mark rundoc blocks. +rundocBlockClass :: String +rundocBlockClass = "rundoc-block" + +blockOption :: OrgParser (String, String) +blockOption = try $ (,) <$> orgArgKey <*> orgArgValue + +orgArgKey :: OrgParser String +orgArgKey = try $ + skipSpaces *> char ':' + *> many1 orgArgWordChar + <* many1 spaceChar + +orgArgValue :: OrgParser String +orgArgValue = try $ + skipSpaces *> many1 orgArgWordChar + <* skipSpaces + +orgArgWordChar :: OrgParser Char +orgArgWordChar = alphaNum <|> oneOf "-_" + +toRundocAttrib :: (String, String) -> (String, String) +toRundocAttrib = first ("rundoc-" ++) + emph :: OrgParser (F Inlines) emph = fmap B.emph <$> emphasisBetween '/' diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs index 78684f0f1..949976aba 100644 --- a/tests/Tests/Readers/Org.hs +++ b/tests/Tests/Readers/Org.hs @@ -207,6 +207,24 @@ tests = "<> Link here later." =?> (para $ spanWith ("anchor", [], []) mempty <> "Link" <> space <> "here" <> space <> "later.") + + , "Inline code block" =: + "src_emacs-lisp{(message \"Hello\")}" =?> + (para $ codeWith ( "" + , [ "commonlisp", "rundoc-block" ] + , [ ("rundoc-language", "emacs-lisp") ]) + "(message \"Hello\")") + + , "Inline code block with arguments" =: + "src_sh[:export both :results output]{echo 'Hello, World'}" =?> + (para $ codeWith ( "" + , [ "bash", "rundoc-block" ] + , [ ("rundoc-language", "sh") + , ("rundoc-export", "both") + , ("rundoc-results", "output") + ] + ) + "echo 'Hello, World'") ] , testGroup "Meta Information" $ -- cgit v1.2.3 From 7760504bb26f215e7d0c57da843f1f1dcc8c1186 Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Thu, 8 May 2014 17:01:58 +0200 Subject: Org reader: refactor #+BEGIN..#+END block parsing code --- src/Text/Pandoc/Readers/Org.hs | 122 +++++++++++++++++++++++++++-------------- 1 file changed, 80 insertions(+), 42 deletions(-) (limited to 'src/Text/Pandoc/Readers/Org.hs') diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index dba61dfe0..9df8ce0b3 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -50,7 +50,7 @@ import Data.Char (isAlphaNum, toLower) import Data.Default import Data.List (intersperse, isPrefixOf, isSuffixOf) import qualified Data.Map as M -import Data.Maybe (listToMaybe, fromMaybe, isJust) +import Data.Maybe (fromMaybe, isJust) import Data.Monoid (Monoid, mconcat, mempty, mappend) import Network.HTTP (urlEncode) @@ -162,7 +162,8 @@ popInlineCharStack = updateState $ \s -> s{ orgStateEmphasisCharStack = drop 1 . orgStateEmphasisCharStack $ s } surroundingEmphasisChar :: OrgParser [Char] -surroundingEmphasisChar = take 1 . drop 1 . orgStateEmphasisCharStack <$> getState +surroundingEmphasisChar = + take 1 . drop 1 . orgStateEmphasisCharStack <$> getState startEmphasisNewlinesCounting :: Int -> OrgParser () startEmphasisNewlinesCounting maxNewlines = updateState $ \s -> @@ -170,7 +171,7 @@ startEmphasisNewlinesCounting maxNewlines = updateState $ \s -> decEmphasisNewlinesCount :: OrgParser () decEmphasisNewlinesCount = updateState $ \s -> - s{ orgStateEmphasisNewlines = (\n -> n - 1) <$> orgStateEmphasisNewlines s } + s{ orgStateEmphasisNewlines = (\n -> n - 1) <$> orgStateEmphasisNewlines s } newlinesCountWithinLimits :: OrgParser Bool newlinesCountWithinLimits = do @@ -296,41 +297,60 @@ lookupInlinesAttr attr = try $ do -- Org Blocks (#+BEGIN_... / #+END_...) -- +type BlockProperties = (Int, String) -- (Indentation, Block-Type) + orgBlock :: OrgParser (F Blocks) orgBlock = try $ do - (indent, blockType, args) <- blockHeader - content <- rawBlockContent indent blockType - contentBlocks <- parseFromString parseBlocks (content ++ "\n") - let classArgs = [ translateLang . fromMaybe [] $ listToMaybe args ] - case blockType of - "comment" -> return mempty - "html" -> returnF $ B.rawBlock "html" content - "latex" -> returnF $ B.rawBlock "latex" content - "ascii" -> returnF $ B.rawBlock "ascii" content - "example" -> returnF $ exampleCode content - "quote" -> return $ B.blockQuote <$> contentBlocks - "verse" -> parseVerse content - "src" -> codeBlockWithAttr classArgs content - _ -> return $ B.divWith ("", [blockType], []) <$> contentBlocks + blockProp@(_, blkType) <- blockHeaderStart + ($ blockProp) $ + case blkType of + "comment" -> withRaw' (const mempty) + "html" -> withRaw' (return . (B.rawBlock blkType)) + "latex" -> withRaw' (return . (B.rawBlock blkType)) + "ascii" -> withRaw' (return . (B.rawBlock blkType)) + "example" -> withRaw' (return . exampleCode) + "quote" -> withParsed (fmap B.blockQuote) + "verse" -> verseBlock + "src" -> codeBlock + _ -> withParsed (fmap $ divWithClass blkType) + +blockHeaderStart :: OrgParser (Int, String) +blockHeaderStart = try $ (,) <$> indent <*> blockType where - parseVerse :: String -> OrgParser (F Blocks) - parseVerse cs = - fmap B.para . mconcat . intersperse (pure B.linebreak) - <$> mapM (parseFromString parseInlines) (lines cs) - -blockHeader :: OrgParser (Int, String, [String]) -blockHeader = (,,) <$> blockIndent - <*> blockType - <*> (skipSpaces *> blockArgs) - where blockIndent = length <$> many spaceChar - blockType = map toLower <$> (stringAnyCase "#+begin_" *> many letter) - blockArgs = manyTill (many nonspaceChar <* skipSpaces) newline - -codeBlockWithAttr :: [String] -> String -> OrgParser (F Blocks) -codeBlockWithAttr classArgs content = do - identifier <- fromMaybe "" <$> lookupBlockAttribute "name" - caption <- lookupInlinesAttr "caption" - let codeBlck = B.codeBlockWith (identifier, classArgs, []) content + indent = length <$> many spaceChar + blockType = map toLower <$> (stringAnyCase "#+begin_" *> many orgArgWordChar) + +withRaw' :: (String -> F Blocks) -> BlockProperties -> OrgParser (F Blocks) +withRaw' f blockProp = (ignHeaders *> (f <$> rawBlockContent blockProp)) + +withParsed :: (F Blocks -> F Blocks) -> BlockProperties -> OrgParser (F Blocks) +withParsed f blockProp = (ignHeaders *> (f <$> parsedBlockContent blockProp)) + +ignHeaders :: OrgParser () +ignHeaders = (() <$ newline) <|> (() <$ anyLine) + +divWithClass :: String -> Blocks -> Blocks +divWithClass cls = B.divWith ("", [cls], []) + +verseBlock :: BlockProperties -> OrgParser (F Blocks) +verseBlock blkProp = try $ do + ignHeaders + content <- rawBlockContent blkProp + fmap B.para . mconcat . intersperse (pure B.linebreak) + <$> mapM (parseFromString parseInlines) (lines content) + +codeBlock :: BlockProperties -> OrgParser (F Blocks) +codeBlock blkProp = do + skipSpaces + language <- optionMaybe orgArgWord + (classes, kv) <- codeHeaderArgs + id' <- fromMaybe "" <$> lookupBlockAttribute "name" + caption <- lookupInlinesAttr "caption" + content <- rawBlockContent blkProp + let attr = ( id' + , maybe id (\l -> (l:)) language $ classes + , kv ) + let codeBlck = B.codeBlockWith attr content return $ maybe (pure codeBlck) (labelDiv codeBlck) caption where labelDiv blk value = @@ -338,14 +358,21 @@ codeBlockWithAttr classArgs content = do <*> pure blk) labelledBlock = fmap (B.plain . B.spanWith ("", ["label"], [])) -rawBlockContent :: Int -> String -> OrgParser String -rawBlockContent indent blockType = +rawBlockContent :: BlockProperties -> OrgParser String +rawBlockContent (indent, blockType) = try $ unlines . map commaEscaped <$> manyTill indentedLine blockEnder where - indentedLine = try $ choice [ blankline *> pure "\n" - , indentWith indent *> anyLine - ] - blockEnder = try $ indentWith indent *> stringAnyCase ("#+end_" <> blockType) + indentedLine = try $ + choice [ blankline *> pure "\n" + , indentWith indent *> anyLine + ] + blockEnder = try $ + indentWith indent *> stringAnyCase ("#+end_" <> blockType) + +parsedBlockContent :: BlockProperties -> OrgParser (F Blocks) +parsedBlockContent blkProps = try $ do + raw <- rawBlockContent blkProps + parseFromString parseBlocks (raw ++ "\n") -- indent by specified number of spaces (or equiv. tabs) indentWith :: Int -> OrgParser String @@ -356,6 +383,13 @@ indentWith num = do else choice [ try (count num (char ' ')) , try (char '\t' >> count (num - tabStop) (char ' ')) ] +orgArgWord :: OrgParser String +orgArgWord = many1 orgArgWordChar + +codeHeaderArgs :: OrgParser ([String], [(String, String)]) +codeHeaderArgs = + (\x -> (x, [])) <$> manyTill (many nonspaceChar <* skipSpaces) newline + translateLang :: String -> String translateLang "C" = "c" translateLang "C++" = "cpp" @@ -1002,9 +1036,13 @@ inlineCodeBlock = try $ do returnF $ B.codeWith ("", attrClasses, attrKeyVal) inlineCode where enclosedByPair s e p = char s *> many1Till p (char e) +-- | Prefix used for Rundoc classes and arguments. +rundocPrefix :: String +rundocPrefix = "rundoc-" + -- | The class-name used to mark rundoc blocks. rundocBlockClass :: String -rundocBlockClass = "rundoc-block" +rundocBlockClass = rundocPrefix ++ "block" blockOption :: OrgParser (String, String) blockOption = try $ (,) <$> orgArgKey <*> orgArgValue -- cgit v1.2.3 From 757c4f68f3f3cab99db9499936e3ae4775ebbddf Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Fri, 9 May 2014 18:07:37 +0200 Subject: Org reader: Support arguments for code blocks The general form of source block headers (`#+BEGIN_SRC
`) was not recognized by the reader. This patch adds support for the above form, adds header arguments to the block's key-value pairs and marks the block as a rundoc block if header arguments are present. This closes #1286. --- src/Text/Pandoc/Readers/Org.hs | 98 ++++++++++++++++++++++++------------------ tests/Tests/Readers/Org.hs | 14 ++++++ 2 files changed, 70 insertions(+), 42 deletions(-) (limited to 'src/Text/Pandoc/Readers/Org.hs') diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index 9df8ce0b3..c05ac92d0 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -276,7 +276,7 @@ parseBlockAttributes = do where attribute :: OrgParser (String, String) attribute = try $ do - key <- metaLineStart *> many1Till (noneOf "\n\r") (char ':') + key <- metaLineStart *> many1Till nonspaceChar (char ':') val <- skipSpaces *> anyLine return (map toLower key, val) @@ -342,16 +342,11 @@ verseBlock blkProp = try $ do codeBlock :: BlockProperties -> OrgParser (F Blocks) codeBlock blkProp = do skipSpaces - language <- optionMaybe orgArgWord - (classes, kv) <- codeHeaderArgs + (classes, kv) <- codeHeaderArgs <|> (mempty <$ ignHeaders) id' <- fromMaybe "" <$> lookupBlockAttribute "name" - caption <- lookupInlinesAttr "caption" content <- rawBlockContent blkProp - let attr = ( id' - , maybe id (\l -> (l:)) language $ classes - , kv ) - let codeBlck = B.codeBlockWith attr content - return $ maybe (pure codeBlck) (labelDiv codeBlck) caption + let codeBlck = B.codeBlockWith ( id', classes, kv ) content + maybe (pure codeBlck) (labelDiv codeBlck) <$> lookupInlinesAttr "caption" where labelDiv blk value = B.divWith nullAttr <$> (mappend <$> labelledBlock value @@ -383,12 +378,33 @@ indentWith num = do else choice [ try (count num (char ' ')) , try (char '\t' >> count (num - tabStop) (char ' ')) ] +type SwitchOption = (Char, Maybe String) + orgArgWord :: OrgParser String orgArgWord = many1 orgArgWordChar +-- | Parse code block arguments +-- TODO: We currently don't handle switches. codeHeaderArgs :: OrgParser ([String], [(String, String)]) -codeHeaderArgs = - (\x -> (x, [])) <$> manyTill (many nonspaceChar <* skipSpaces) newline +codeHeaderArgs = try $ do + language <- skipSpaces *> orgArgWord + _ <- skipSpaces *> (try $ switch `sepBy` (many1 spaceChar)) + parameters <- manyTill blockOption newline + let pandocLang = translateLang language + return $ + if hasRundocParameters parameters + then ( [ pandocLang, rundocBlockClass ] + , map toRundocAttrib (("language", language) : parameters) + ) + else ([ pandocLang ], parameters) + where hasRundocParameters = not . null + +switch :: OrgParser SwitchOption +switch = try $ simpleSwitch <|> lineNumbersSwitch + where + simpleSwitch = (\c -> (c, Nothing)) <$> (oneOf "-+" *> letter) + lineNumbersSwitch = (\ls -> ('l', Just ls)) <$> + (string "-l \"" *> many1Till nonspaceChar (char '"')) translateLang :: String -> String translateLang "C" = "c" @@ -401,6 +417,32 @@ translateLang "sh" = "bash" translateLang "sqlite" = "sql" translateLang cs = cs +-- | Prefix used for Rundoc classes and arguments. +rundocPrefix :: String +rundocPrefix = "rundoc-" + +-- | The class-name used to mark rundoc blocks. +rundocBlockClass :: String +rundocBlockClass = rundocPrefix ++ "block" + +blockOption :: OrgParser (String, String) +blockOption = try $ (,) <$> orgArgKey <*> orgArgValue + +orgArgKey :: OrgParser String +orgArgKey = try $ + skipSpaces *> char ':' + *> many1 orgArgWordChar + +orgArgValue :: OrgParser String +orgArgValue = try $ + skipSpaces *> many1 orgArgWordChar <* skipSpaces + +orgArgWordChar :: OrgParser Char +orgArgWordChar = alphaNum <|> oneOf "-_" + +toRundocAttrib :: (String, String) -> (String, String) +toRundocAttrib = first ("rundoc-" ++) + commaEscaped :: String -> String commaEscaped (',':cs@('*':_)) = cs commaEscaped (',':cs@('#':'+':_)) = cs @@ -425,7 +467,7 @@ drawer = try $ do drawerStart :: OrgParser String drawerStart = try $ - skipSpaces *> drawerName <* skipSpaces <* newline + skipSpaces *> drawerName <* skipSpaces <* P.newline where drawerName = try $ char ':' *> validDrawerName <* char ':' validDrawerName = stringAnyCase "PROPERTIES" <|> stringAnyCase "LOGBOOK" @@ -435,7 +477,7 @@ drawerLine = try anyLine drawerEnd :: OrgParser String drawerEnd = try $ - skipSpaces *> stringAnyCase ":END:" <* skipSpaces <* newline + skipSpaces *> stringAnyCase ":END:" <* skipSpaces <* P.newline -- @@ -446,7 +488,7 @@ drawerEnd = try $ figure :: OrgParser (F Blocks) figure = try $ do (cap, nam) <- nameAndCaption - src <- skipSpaces *> selfTarget <* skipSpaces <* newline + src <- skipSpaces *> selfTarget <* skipSpaces <* P.newline guard (isImageFilename src) return $ do cap' <- cap @@ -1036,34 +1078,6 @@ inlineCodeBlock = try $ do returnF $ B.codeWith ("", attrClasses, attrKeyVal) inlineCode where enclosedByPair s e p = char s *> many1Till p (char e) --- | Prefix used for Rundoc classes and arguments. -rundocPrefix :: String -rundocPrefix = "rundoc-" - --- | The class-name used to mark rundoc blocks. -rundocBlockClass :: String -rundocBlockClass = rundocPrefix ++ "block" - -blockOption :: OrgParser (String, String) -blockOption = try $ (,) <$> orgArgKey <*> orgArgValue - -orgArgKey :: OrgParser String -orgArgKey = try $ - skipSpaces *> char ':' - *> many1 orgArgWordChar - <* many1 spaceChar - -orgArgValue :: OrgParser String -orgArgValue = try $ - skipSpaces *> many1 orgArgWordChar - <* skipSpaces - -orgArgWordChar :: OrgParser Char -orgArgWordChar = alphaNum <|> oneOf "-_" - -toRundocAttrib :: (String, String) -> (String, String) -toRundocAttrib = first ("rundoc-" ++) - emph :: OrgParser (F Inlines) emph = fmap B.emph <$> emphasisBetween '/' diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs index 949976aba..a78e8861f 100644 --- a/tests/Tests/Readers/Org.hs +++ b/tests/Tests/Readers/Org.hs @@ -822,6 +822,20 @@ tests = in mconcat [ para $ spcSep [ "Low", "German", "greeting" ] , codeBlockWith attr' code' ] + , "Source block with rundoc/babel arguments" =: + unlines [ "#+BEGIN_SRC emacs-lisp :exports both" + , "(progn (message \"Hello, World!\")" + , " (+ 23 42))" + , "#+END_SRC" ] =?> + let classes = [ "commonlisp" -- as kate doesn't know emacs-lisp syntax + , "rundoc-block" + ] + params = [ ("rundoc-language", "emacs-lisp") + , ("rundoc-exports", "both") + ] + code' = unlines [ "(progn (message \"Hello, World!\")" + , " (+ 23 42))" ] + in codeBlockWith ("", classes, params) code' , "Example block" =: unlines [ "#+begin_example" -- cgit v1.2.3 From 07694b30184bcf2ed0e2998016df394f47a1996f Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Fri, 9 May 2014 18:23:23 +0200 Subject: Org reader: Fix parsing of blank lines within blocks Blank lines were parsed as two newlines instead of just one. Thanks to Xiao Hanyu (@xiaohanyu) for pointing this out. --- src/Text/Pandoc/Readers/Org.hs | 8 ++------ tests/Tests/Readers/Org.hs | 9 +++++++++ 2 files changed, 11 insertions(+), 6 deletions(-) (limited to 'src/Text/Pandoc/Readers/Org.hs') diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index c05ac92d0..0f218d43f 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -357,12 +357,8 @@ rawBlockContent :: BlockProperties -> OrgParser String rawBlockContent (indent, blockType) = try $ unlines . map commaEscaped <$> manyTill indentedLine blockEnder where - indentedLine = try $ - choice [ blankline *> pure "\n" - , indentWith indent *> anyLine - ] - blockEnder = try $ - indentWith indent *> stringAnyCase ("#+end_" <> blockType) + indentedLine = try $ ("" <$ blankline) <|> (indentWith indent *> anyLine) + blockEnder = try $ indentWith indent *> stringAnyCase ("#+end_" <> blockType) parsedBlockContent :: BlockProperties -> OrgParser (F Blocks) parsedBlockContent blkProps = try $ do diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs index a78e8861f..87b0d0c90 100644 --- a/tests/Tests/Readers/Org.hs +++ b/tests/Tests/Readers/Org.hs @@ -920,5 +920,14 @@ tests = (unlines [ "fmap id = id" , "fmap (p . q) = (fmap p) . (fmap q)" ]))) + + , "Convert blank lines in blocks to single newlines" =: + unlines [ "#+begin_html" + , "" + , "boring" + , "" + , "#+end_html" + ] =?> + rawBlock "html" "\nboring\n\n" ] ] -- cgit v1.2.3 From c5fd631b550a3b05b60de1684c80387bc46a88cc Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Sat, 10 May 2014 11:25:20 +0200 Subject: Org reader: Fix block parameter reader, relax constraints The reader produced wrong results for block containing non-letter chars in their parameter arguments. This patch relaxes constraints in that it allows block header arguments to contain any non-space character (except for ']' for inline blocks). Thanks to Xiao Hanyu for noticing this. --- src/Text/Pandoc/Readers/Org.hs | 19 +++++++++++++------ tests/Tests/Readers/Org.hs | 12 ++++++++++++ 2 files changed, 25 insertions(+), 6 deletions(-) (limited to 'src/Text/Pandoc/Readers/Org.hs') diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index 0f218d43f..2e4a29beb 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -318,7 +318,7 @@ blockHeaderStart :: OrgParser (Int, String) blockHeaderStart = try $ (,) <$> indent <*> blockType where indent = length <$> many spaceChar - blockType = map toLower <$> (stringAnyCase "#+begin_" *> many orgArgWordChar) + blockType = map toLower <$> (stringAnyCase "#+begin_" *> orgArgWord) withRaw' :: (String -> F Blocks) -> BlockProperties -> OrgParser (F Blocks) withRaw' f blockProp = (ignHeaders *> (f <$> rawBlockContent blockProp)) @@ -422,16 +422,23 @@ rundocBlockClass :: String rundocBlockClass = rundocPrefix ++ "block" blockOption :: OrgParser (String, String) -blockOption = try $ (,) <$> orgArgKey <*> orgArgValue +blockOption = try $ (,) <$> orgArgKey <*> orgParamValue + +inlineBlockOption :: OrgParser (String, String) +inlineBlockOption = try $ (,) <$> orgArgKey <*> orgInlineParamValue orgArgKey :: OrgParser String orgArgKey = try $ skipSpaces *> char ':' *> many1 orgArgWordChar -orgArgValue :: OrgParser String -orgArgValue = try $ - skipSpaces *> many1 orgArgWordChar <* skipSpaces +orgParamValue :: OrgParser String +orgParamValue = try $ + skipSpaces *> many1 (noneOf "\t\n\r ") <* skipSpaces + +orgInlineParamValue :: OrgParser String +orgInlineParamValue = try $ + skipSpaces *> many1 (noneOf "\t\n\r ]") <* skipSpaces orgArgWordChar :: OrgParser Char orgArgWordChar = alphaNum <|> oneOf "-_" @@ -1067,7 +1074,7 @@ inlineCodeBlock :: OrgParser (F Inlines) inlineCodeBlock = try $ do string "src_" lang <- many1 orgArgWordChar - opts <- option [] $ enclosedByPair '[' ']' blockOption + opts <- option [] $ enclosedByPair '[' ']' inlineBlockOption inlineCode <- enclosedByPair '{' '}' (noneOf "\n\r") let attrClasses = [translateLang lang, rundocBlockClass] let attrKeyVal = map toRundocAttrib (("language", lang) : opts) diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs index 87b0d0c90..4ef7a7731 100644 --- a/tests/Tests/Readers/Org.hs +++ b/tests/Tests/Readers/Org.hs @@ -929,5 +929,17 @@ tests = , "#+end_html" ] =?> rawBlock "html" "\nboring\n\n" + + , "Non-letter chars in source block parameters" =: + unlines [ "#+BEGIN_SRC C :tangle xxxx.c :city Zürich" + , "code body" + , "#+END_SRC" + ] =?> + let classes = [ "c", "rundoc-block" ] + params = [ ("rundoc-language", "C") + , ("rundoc-tangle", "xxxx.c") + , ("rundoc-city", "Zürich") + ] + in codeBlockWith ( "", classes, params) "code body\n" ] ] -- cgit v1.2.3 From 9df589b9c5a4f2dcb19445239dfae41b54625330 Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Wed, 14 May 2014 14:45:37 +0200 Subject: Introduce class HasLastStrPosition, generalize functions Both `ParserState` and `OrgParserState` keep track of the parser position at which the last string ended. This patch introduces a new class `HasLastStrPosition` and makes the above types instances of that class. This enables the generalization of functions updating the state or checking if one is right after a string. --- src/Text/Pandoc/Parsing.hs | 32 +++++++++++++++++++++++--------- src/Text/Pandoc/Readers/Markdown.hs | 11 +++-------- src/Text/Pandoc/Readers/Org.hs | 11 ++++------- 3 files changed, 30 insertions(+), 24 deletions(-) (limited to 'src/Text/Pandoc/Readers/Org.hs') diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs index d1e55cbc4..344f6c7ba 100644 --- a/src/Text/Pandoc/Parsing.hs +++ b/src/Text/Pandoc/Parsing.hs @@ -54,7 +54,6 @@ module Text.Pandoc.Parsing ( (>>~), withRaw, escaped, characterReference, - updateLastStrPos, anyOrderedListMarker, orderedListMarker, charRef, @@ -66,11 +65,14 @@ module Text.Pandoc.Parsing ( (>>~), testStringWith, guardEnabled, guardDisabled, + updateLastStrPos, + notAfterString, ParserState (..), HasReaderOptions (..), HasHeaderMap (..), HasIdentifierList (..), HasMacros (..), + HasLastStrPosition (..), defaultParserState, HeaderType (..), ParserContext (..), @@ -904,6 +906,14 @@ instance HasMacros ParserState where extractMacros = stateMacros updateMacros f st = st{ stateMacros = f $ stateMacros st } +class HasLastStrPosition st where + setLastStrPos :: SourcePos -> st -> st + getLastStrPos :: st -> Maybe SourcePos + +instance HasLastStrPosition ParserState where + setLastStrPos pos st = st{ stateLastStrPos = Just pos } + getLastStrPos st = stateLastStrPos st + defaultParserState :: ParserState defaultParserState = ParserState { stateOptions = def, @@ -938,6 +948,17 @@ guardEnabled ext = getOption readerExtensions >>= guard . Set.member ext guardDisabled :: HasReaderOptions st => Extension -> Parser s st () guardDisabled ext = getOption readerExtensions >>= guard . not . Set.member ext +-- | Update the position on which the last string ended. +updateLastStrPos :: HasLastStrPosition st => Parser s st () +updateLastStrPos = getPosition >>= updateState . setLastStrPos + +-- | Whether we are right after the end of a string. +notAfterString :: HasLastStrPosition st => Parser s st Bool +notAfterString = do + pos <- getPosition + st <- getState + return $ getLastStrPos st /= Just pos + data HeaderType = SingleHeader Char -- ^ Single line of characters underneath | DoubleHeader Char -- ^ Lines of characters above and below @@ -1049,17 +1070,11 @@ charOrRef cs = guard (c `elem` cs) return c) -updateLastStrPos :: Parser [Char] ParserState () -updateLastStrPos = getPosition >>= \p -> - updateState $ \s -> s{ stateLastStrPos = Just p } - singleQuoteStart :: Parser [Char] ParserState () singleQuoteStart = do failIfInQuoteContext InSingleQuote - pos <- getPosition - st <- getState -- single quote start can't be right after str - guard $ stateLastStrPos st /= Just pos + guard =<< notAfterString () <$ charOrRef "'\8216\145" singleQuoteEnd :: Parser [Char] st () @@ -1156,4 +1171,3 @@ applyMacros' target = do then do macros <- extractMacros `fmap` getState return $ applyMacros macros target else return target - diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index d1637b701..1ac98e94c 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -1474,9 +1474,7 @@ strongOrEmph = enclosure '*' <|> (checkIntraword >> enclosure '_') where checkIntraword = do exts <- getOption readerExtensions when (Ext_intraword_underscores `Set.member` exts) $ do - pos <- getPosition - lastStrPos <- stateLastStrPos <$> getState - guard $ lastStrPos /= Just pos + guard =<< notAfterString -- | Parses a list of inlines between start and end delimiters. inlinesBetween :: (Show b) @@ -1518,8 +1516,7 @@ nonEndline = satisfy (/='\n') str :: MarkdownParser (F Inlines) str = do result <- many1 alphaNum - pos <- getPosition - updateState $ \s -> s{ stateLastStrPos = Just pos } + updateLastStrPos let spacesToNbr = map (\c -> if c == ' ' then '\160' else c) isSmart <- getOption readerSmart if isSmart @@ -1821,9 +1818,7 @@ citeKey :: MarkdownParser (Bool, String) citeKey = try $ do -- make sure we're not right after an alphanumeric, -- since foo@bar.baz is probably an email address - lastStrPos <- stateLastStrPos <$> getState - pos <- getPosition - guard $ lastStrPos /= Just pos + guard =<< notAfterString suppress_author <- option False (char '-' >> return True) char '@' first <- letter <|> char '_' diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index 2e4a29beb..5dbcaee98 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -105,6 +105,10 @@ instance HasMeta OrgParserState where deleteMeta field st = st{ orgStateMeta = deleteMeta field $ orgStateMeta st } +instance HasLastStrPosition OrgParserState where + getLastStrPos = orgStateLastStrPos + setLastStrPos pos st = st{ orgStateLastStrPos = Just pos } + instance Default OrgParserState where def = defaultOrgParserState @@ -1274,13 +1278,6 @@ afterEmphasisPreChar = do lastPrePos <- orgStateLastPreCharPos <$> getState return . fromMaybe True $ (== pos) <$> lastPrePos --- | Whether we are right after the end of a string -notAfterString :: OrgParser Bool -notAfterString = do - pos <- getPosition - lastStrPos <- orgStateLastStrPos <$> getState - return $ lastStrPos /= Just pos - -- | Whether the parser is right after a forbidden border char notAfterForbiddenBorderChar :: OrgParser Bool notAfterForbiddenBorderChar = do -- cgit v1.2.3 From ceeb701c254c6dc4c054e10dd151d9ef6f751ad7 Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Wed, 14 May 2014 14:49:30 +0200 Subject: Org reader: support Pandocs citation extension Citations are defined via the "normal citation" syntax used in markdown, with the sole difference that newlines are not allowed between "[...]". This is for consistency, as org-mode generally disallows newlines between square brackets. The extension is turned on by default and can be turned off via the default syntax-extension mechanism, i.e. by specifying "org-citation" as the input format. Move `citeKey` from Readers.Markdown into Parsing The function can be used by other readers, so it is made accessible for all parsers. --- src/Text/Pandoc.hs | 2 +- src/Text/Pandoc/Readers/Org.hs | 55 ++++++++++++++++++++++++++++++++++++++++-- tests/Tests/Readers/Org.hs | 22 +++++++++++++++++ 3 files changed, 76 insertions(+), 3 deletions(-) (limited to 'src/Text/Pandoc/Readers/Org.hs') diff --git a/src/Text/Pandoc.hs b/src/Text/Pandoc.hs index dd5bc18f6..130338f0e 100644 --- a/src/Text/Pandoc.hs +++ b/src/Text/Pandoc.hs @@ -275,6 +275,7 @@ getDefaultExtensions "markdown_mmd" = multimarkdownExtensions getDefaultExtensions "markdown_github" = githubMarkdownExtensions getDefaultExtensions "markdown" = pandocExtensions getDefaultExtensions "plain" = pandocExtensions +getDefaultExtensions "org" = Set.fromList [Ext_citations] getDefaultExtensions "textile" = Set.fromList [Ext_auto_identifiers, Ext_raw_tex] getDefaultExtensions _ = Set.fromList [Ext_auto_identifiers] @@ -319,4 +320,3 @@ readJSON _ = either error id . eitherDecode' . UTF8.fromStringLazy writeJSON :: WriterOptions -> Pandoc -> String writeJSON _ = UTF8.toStringLazy . encode - diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index 5dbcaee98..86dda2732 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -869,6 +869,7 @@ inline :: OrgParser (F Inlines) inline = choice [ whitespace , linebreak + , cite , footnote , linkOrImage , anchor @@ -933,6 +934,51 @@ endline = try $ do updateLastPreCharPos return . return $ B.space +cite :: OrgParser (F Inlines) +cite = try $ do + guardEnabled Ext_citations + (cs, raw) <- withRaw normalCite + return $ (flip B.cite (B.text raw)) <$> cs + +normalCite :: OrgParser (F [Citation]) +normalCite = try $ char '[' + *> skipSpaces + *> citeList + <* skipSpaces + <* char ']' + +citeList :: OrgParser (F [Citation]) +citeList = sequence <$> sepBy1 citation (try $ char ';' *> skipSpaces) + +citation :: OrgParser (F Citation) +citation = try $ do + pref <- prefix + (suppress_author, key) <- citeKey + suff <- suffix + return $ do + x <- pref + y <- suff + return $ Citation{ citationId = key + , citationPrefix = B.toList x + , citationSuffix = B.toList y + , citationMode = if suppress_author + then SuppressAuthor + else NormalCitation + , citationNoteNum = 0 + , citationHash = 0 + } + where + prefix = trimInlinesF . mconcat <$> + manyTill inline (char ']' <|> (']' <$ lookAhead citeKey)) + suffix = try $ do + hasSpace <- option False (notFollowedBy nonspaceChar >> return True) + skipSpaces + rest <- trimInlinesF . mconcat <$> + many (notFollowedBy (oneOf ";]") *> inline) + return $ if hasSpace + then (B.space <>) <$> rest + else rest + footnote :: OrgParser (F Inlines) footnote = try $ inlineNote <|> referencedNote @@ -1007,7 +1053,7 @@ selfTarget :: OrgParser String selfTarget = try $ char '[' *> linkTarget <* char ']' linkTarget :: OrgParser String -linkTarget = enclosed (char '[') (char ']') (noneOf "\n\r[]") +linkTarget = enclosedByPair '[' ']' (noneOf "\n\r[]") applyCustomLinkFormat :: String -> OrgParser (F String) applyCustomLinkFormat link = do @@ -1083,7 +1129,12 @@ inlineCodeBlock = try $ do let attrClasses = [translateLang lang, rundocBlockClass] let attrKeyVal = map toRundocAttrib (("language", lang) : opts) returnF $ B.codeWith ("", attrClasses, attrKeyVal) inlineCode - where enclosedByPair s e p = char s *> many1Till p (char e) + +enclosedByPair :: Char -- ^ opening char + -> Char -- ^ closing char + -> OrgParser a -- ^ parser + -> OrgParser [a] +enclosedByPair s e p = char s *> many1Till p (char e) emph :: OrgParser (F Inlines) emph = fmap B.emph <$> emphasisBetween '/' diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs index 4ef7a7731..ca97ba348 100644 --- a/tests/Tests/Readers/Org.hs +++ b/tests/Tests/Readers/Org.hs @@ -225,6 +225,28 @@ tests = ] ) "echo 'Hello, World'") + + , "Citation" =: + "[@nonexistent]" =?> + let citation = Citation + { citationId = "nonexistent" + , citationPrefix = [] + , citationSuffix = [] + , citationMode = NormalCitation + , citationNoteNum = 0 + , citationHash = 0} + in (para $ cite [citation] "[@nonexistent]") + + , "Citation containing text" =: + "[see @item1 p. 34-35]" =?> + let citation = Citation + { citationId = "item1" + , citationPrefix = [Str "see"] + , citationSuffix = [Space ,Str "p.",Space,Str "34-35"] + , citationMode = NormalCitation + , citationNoteNum = 0 + , citationHash = 0} + in (para $ cite [citation] "[see @item1 p. 34-35]") ] , testGroup "Meta Information" $ -- cgit v1.2.3 From 3238a2f9191b83864abd682261634a603ec89056 Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Tue, 20 May 2014 22:29:21 +0200 Subject: Org reader: support for inline LaTeX Inline LaTeX is now accepted and parsed by the org-mode reader. Both, math symbols (like \tau) and LaTeX commands (like \cite{Coffee}), can be used without any further escaping. --- src/Text/Pandoc/Readers/LaTeX.hs | 1 + src/Text/Pandoc/Readers/Org.hs | 32 +++++++++++++++++++++++++++++++- tests/Tests/Readers/Org.hs | 27 +++++++++++++++++++++++++++ 3 files changed, 59 insertions(+), 1 deletion(-) (limited to 'src/Text/Pandoc/Readers/Org.hs') diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index 6f870318f..7fc587882 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -31,6 +31,7 @@ Conversion of LaTeX to 'Pandoc' document. module Text.Pandoc.Readers.LaTeX ( readLaTeX, rawLaTeXInline, rawLaTeXBlock, + inlineCommand, handleIncludes ) where diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index 86dda2732..c3ea8d7c2 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -39,12 +39,15 @@ import Text.Pandoc.Parsing hiding ( F, unF, askF, asksF, runF , newline, orderedListMarker , parseFromString , updateLastStrPos ) +import Text.Pandoc.Readers.LaTeX (inlineCommand, rawLaTeXInline) import Text.Pandoc.Shared (compactify', compactify'DL) +import Text.Parsec.Pos (updatePosString) +import Text.TeXMath (texMathToPandoc, DisplayType(..)) import Control.Applicative ( Applicative, pure , (<$>), (<$), (<*>), (<*), (*>), (<**>) ) import Control.Arrow (first) -import Control.Monad (foldM, guard, liftM, liftM2, mzero, when) +import Control.Monad (foldM, guard, liftM, liftM2, mplus, mzero, when) import Control.Monad.Reader (Reader, runReader, ask, asks) import Data.Char (isAlphaNum, toLower) import Data.Default @@ -886,6 +889,7 @@ inline = , verbatim , subscript , superscript + , inlineLaTeX , symbol ] <* (guard =<< newlinesCountWithinLimits) "inline" @@ -1351,3 +1355,29 @@ simpleSubOrSuperString = try $ , mappend <$> option [] ((:[]) <$> oneOf "+-") <*> many1 alphaNum ] + +inlineLaTeX :: OrgParser (F Inlines) +inlineLaTeX = try $ do + cmd <- inlineLaTeXCommand + maybe mzero returnF $ parseAsMath cmd `mplus` parseAsInlineLaTeX cmd + where + parseAsMath :: String -> Maybe Inlines + parseAsMath cs = maybeRight $ B.fromList <$> texMathToPandoc DisplayInline cs + + parseAsInlineLaTeX :: String -> Maybe Inlines + parseAsInlineLaTeX cs = maybeRight $ runParser inlineCommand state "" cs + + state :: ParserState + state = def{ stateOptions = def{ readerParseRaw = True }} + +maybeRight :: Either a b -> Maybe b +maybeRight = either (const Nothing) Just + +inlineLaTeXCommand :: OrgParser String +inlineLaTeXCommand = try $ do + rest <- getInput + pos <- getPosition + case runParser rawLaTeXInline def "source" rest of + Right (RawInline _ cs) -> cs <$ (setInput $ drop (length cs) rest) + <* (setPosition $ updatePosString pos cs) + _ -> mzero diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs index ca97ba348..4ed77887f 100644 --- a/tests/Tests/Readers/Org.hs +++ b/tests/Tests/Readers/Org.hs @@ -247,6 +247,33 @@ tests = , citationNoteNum = 0 , citationHash = 0} in (para $ cite [citation] "[see @item1 p. 34-35]") + + , "Inline LaTeX symbol" =: + "\\dots" =?> + para "…" + + , "Inline LaTeX command" =: + "\\textit{Emphasised}" =?> + para (emph "Emphasised") + + , "Inline LaTeX math symbol" =: + "\\tau" =?> + para (emph "τ") + + , "Unknown inline LaTeX command" =: + "\\notacommand{foo}" =?> + para (rawInline "latex" "\\notacommand{foo}") + + , "LaTeX citation" =: + "\\cite{Coffee}" =?> + let citation = Citation + { citationId = "Coffee" + , citationPrefix = [] + , citationSuffix = [] + , citationMode = AuthorInText + , citationNoteNum = 0 + , citationHash = 0} + in (para . cite [citation] $ rawInline "latex" "\\cite{Coffee}") ] , testGroup "Meta Information" $ -- cgit v1.2.3 From 01ef573ac2f6620e9f70ae8965e5ccc664e3aec4 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Mon, 16 Jun 2014 14:18:06 -0700 Subject: Org reader: fixed #1342. This change rewrites `inlineLaTeXCommand` so that parsec will know when input is being consumed. Previously a run-time error would be produced with some input involving raw latex. (I believe this does not affect the last release, as the inline latex reading was added recently.) --- src/Text/Pandoc/Readers/Org.hs | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) (limited to 'src/Text/Pandoc/Readers/Org.hs') diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index c3ea8d7c2..0e872abf0 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -38,10 +38,9 @@ import qualified Text.Pandoc.Parsing as P import Text.Pandoc.Parsing hiding ( F, unF, askF, asksF, runF , newline, orderedListMarker , parseFromString - , updateLastStrPos ) + ) import Text.Pandoc.Readers.LaTeX (inlineCommand, rawLaTeXInline) import Text.Pandoc.Shared (compactify', compactify'DL) -import Text.Parsec.Pos (updatePosString) import Text.TeXMath (texMathToPandoc, DisplayType(..)) import Control.Applicative ( Applicative, pure @@ -148,10 +147,6 @@ resetBlockAttributes :: OrgParser () resetBlockAttributes = updateState $ \s -> s{ orgStateBlockAttributes = orgStateBlockAttributes def } -updateLastStrPos :: OrgParser () -updateLastStrPos = getPosition >>= \p -> - updateState $ \s -> s{ orgStateLastStrPos = Just p } - updateLastForbiddenCharPos :: OrgParser () updateLastForbiddenCharPos = getPosition >>= \p -> updateState $ \s -> s{ orgStateLastForbiddenCharPos = Just p} @@ -1376,8 +1371,9 @@ maybeRight = either (const Nothing) Just inlineLaTeXCommand :: OrgParser String inlineLaTeXCommand = try $ do rest <- getInput - pos <- getPosition case runParser rawLaTeXInline def "source" rest of - Right (RawInline _ cs) -> cs <$ (setInput $ drop (length cs) rest) - <* (setPosition $ updatePosString pos cs) + Right (RawInline _ cs) -> do + let len = length cs + count len anyChar + return cs _ -> mzero -- cgit v1.2.3 From 78ee2416d105bd25337819a49835623a8a296224 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Mon, 16 Jun 2014 22:03:26 -0700 Subject: Org reader: make tildes create inline code. Closes #1345. Also relabeled 'code' and 'verbatim' parsers to accord with the org-mode manual. I'm not sure what the distinction between code and verbatim is supposed to be, but I'm pretty sure both should be represented as Code inlines in pandoc. The previous behavior resulted in the text not appearing in any output format. --- src/Text/Pandoc/Readers/Org.hs | 8 ++++---- tests/Tests/Readers/Org.hs | 6 +++--- 2 files changed, 7 insertions(+), 7 deletions(-) (limited to 'src/Text/Pandoc/Readers/Org.hs') diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index 0e872abf0..7a35e2ca0 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -1148,11 +1148,11 @@ strikeout = fmap B.strikeout <$> emphasisBetween '+' underline :: OrgParser (F Inlines) underline = fmap B.strong <$> emphasisBetween '_' -code :: OrgParser (F Inlines) -code = return . B.code <$> verbatimBetween '=' - verbatim :: OrgParser (F Inlines) -verbatim = return . B.rawInline "" <$> verbatimBetween '~' +verbatim = return . B.code <$> verbatimBetween '=' + +code :: OrgParser (F Inlines) +code = return . B.code <$> verbatimBetween '~' subscript :: OrgParser (F Inlines) subscript = fmap B.subscript <$> try (char '_' *> subOrSuperExpr) diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs index 4ed77887f..f8240ca3d 100644 --- a/tests/Tests/Readers/Org.hs +++ b/tests/Tests/Readers/Org.hs @@ -50,13 +50,13 @@ tests = "+Kill Bill+" =?> para (strikeout . spcSep $ [ "Kill", "Bill" ]) - , "Code" =: + , "Verbatim" =: "=Robot.rock()=" =?> para (code "Robot.rock()") - , "Verbatim" =: + , "Code" =: "~word for word~" =?> - para (rawInline "" "word for word") + para (code "word for word") , "Math $..$" =: "$E=mc^2$" =?> -- cgit v1.2.3