From 4ebf6f6ebf7d679252ade08203ec13e3e92c2db5 Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Sun, 6 Apr 2014 19:09:33 +0200 Subject: Org reader: Minor code clean-up --- src/Text/Pandoc/Readers/Org.hs | 51 +++++++++++++++++------------------------- 1 file changed, 21 insertions(+), 30 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index 5ad2531ac..6652925aa 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -268,8 +268,12 @@ data OrgTable = OrgTable table :: OrgParser Blocks table = try $ do lookAhead tableStart - OrgTable _ aligns heads lns <- normalizeTable . rowsToTable <$> tableRows - return $ B.table "" (zip aligns $ repeat 0) heads lns + orgToPandocTable . normalizeTable . rowsToTable <$> tableRows + +orgToPandocTable :: OrgTable + -> Blocks +orgToPandocTable (OrgTable _ aligns heads lns) = + B.table "" (zip aligns $ repeat 0) heads lns tableStart :: OrgParser Char tableStart = try $ skipSpaces *> char '|' @@ -403,20 +407,14 @@ orderedListStart = genericListStart orderedListMarker -- Ordered list markers allowed in org-mode where orderedListMarker = mappend <$> many1 digit <*> (pure <$> oneOf ".)") +-- parse raw text for one list item, excluding start marker and continuations listItem :: OrgParser Int -> OrgParser Blocks listItem start = try $ do - (markerLength, first) <- try (start >>= rawListItem) - rest <- many (listContinuation markerLength) - parseFromString parseBlocks $ concat (first:rest) - --- parse raw text for one list item, excluding start marker and continuations -rawListItem :: Int - -> OrgParser (Int, String) -rawListItem markerLength = try $ do - firstLine <- anyLine - restLines <- many (listLine markerLength) - return (markerLength, (firstLine ++ "\n" ++ (concat restLines))) + markerLength <- try start + firstLine <- anyLineNewline + rest <- concat <$> many (listContinuation markerLength) + parseFromString parseBlocks $ firstLine ++ rest -- continuation of a list item - indented and separated by blankline or endline. -- Note: nested lists are parsed as continuations. @@ -424,14 +422,11 @@ listContinuation :: Int -> OrgParser String listContinuation markerLength = try $ mappend <$> many blankline - <*> (concat <$> many1 (listLine markerLength)) + <*> (concat <$> many1 listLine) + where listLine = try $ indentWith markerLength *> anyLineNewline --- parse a line of a list item -listLine :: Int - -> OrgParser String -listLine markerLength = try $ - indentWith markerLength *> anyLine - <**> pure (++ "\n") +anyLineNewline :: OrgParser String +anyLineNewline = (++ "\n") <$> anyLine -- @@ -491,12 +486,11 @@ explicitOrImageLink = try $ do char '[' src <- enclosedRaw (char '[') (char ']') title <- enclosedRaw (char '[') (char ']') - title' <- parseFromString (mconcat . butLast <$> many inline) (title++"\n") + title' <- parseFromString (mconcat <$> many inline) title char ']' return $ if (isImage src) && (isImage title) then B.link src "" (B.image title "" "") else B.link src "" title' - where butLast = reverse . tail . reverse selflinkOrImage :: OrgParser Inlines selflinkOrImage = try $ do @@ -552,11 +546,8 @@ inlinesEnclosedBy c = try $ do updateState $ \st -> st { orgInlineCharStack = c:(orgInlineCharStack st) } res <- enclosedInlines (atStart (char c) <* endsOnThisOrNextLine c) (atEnd $ char c) - updateState $ \st -> st { orgInlineCharStack = shift . orgInlineCharStack $ st } + updateState $ \st -> st { orgInlineCharStack = drop 1 . orgInlineCharStack $ st } return res - where shift xs - | null xs = [] - | otherwise = tail xs enclosedRaw :: OrgParser a -> OrgParser b @@ -583,14 +574,13 @@ atStart p = do atEnd :: OrgParser a -> OrgParser a atEnd p = try $ do p <* lookingAtEndOfWord - where lookingAtEndOfWord = lookAhead . oneOf =<< postWordChars + where lookingAtEndOfWord = + eof <|> const (return ()) =<< lookAhead . oneOf =<< postWordChars postWordChars :: OrgParser [Char] postWordChars = do st <- getState - return $ "\t\n\r !\"'),-.:?}" ++ (safeSecond . orgInlineCharStack $ st) - where safeSecond (_:x2:_) = [x2] - safeSecond _ = [] + return $ "\t\n\r !\"'),-.:?}" ++ (take 1 . drop 1 . orgInlineCharStack $ st) -- FIXME: These functions are hacks and should be replaced endsOnThisOrNextLine :: Char @@ -608,6 +598,7 @@ endsOnThisLine input c doOnOtherLines = do postWordChars' <- postWordChars case break (`elem` c:"\n") input of (_,'\n':rest) -> doOnOtherLines rest + (_,_:[]) -> return () (_,_:rest@(n:_)) -> if n `elem` postWordChars' then return () else endsOnThisLine rest c doOnOtherLines -- cgit v1.2.3 From 480b33b7100048ef3fad51754ae76c21daa8b86f Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Sun, 6 Apr 2014 14:49:57 +0200 Subject: Org reader: Add support for definition lists --- src/Text/Pandoc/Readers/Org.hs | 17 ++++++++++++++++- tests/Tests/Readers/Org.hs | 26 +++++++++++++++++++++++--- 2 files changed, 39 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index 6652925aa..20bca3e28 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -383,7 +383,10 @@ restOfLine = mconcat <$> manyTill inline newline -- list :: OrgParser Blocks -list = choice [ bulletList, orderedList ] "list" +list = choice [ definitionList, bulletList, orderedList ] "list" + +definitionList :: OrgParser Blocks +definitionList = B.definitionList <$> many1 (definitionListItem bulletListStart) bulletList :: OrgParser Blocks bulletList = B.bulletList . compactify' <$> many1 (listItem bulletListStart) @@ -407,6 +410,18 @@ orderedListStart = genericListStart orderedListMarker -- Ordered list markers allowed in org-mode where orderedListMarker = mappend <$> many1 digit <*> (pure <$> oneOf ".)") +definitionListItem :: OrgParser Int + -> OrgParser (Inlines, [Blocks]) +definitionListItem parseMarkerGetLength = try $ do + markerLength <- parseMarkerGetLength + term <- manyTill (noneOf "\n\r") (try $ string "::") + first <- anyLineNewline + cont <- concat <$> many (listContinuation markerLength) + term' <- parseFromString inline term + contents' <- parseFromString parseBlocks $ first ++ cont + return (term', [contents']) + + -- parse raw text for one list item, excluding start marker and continuations listItem :: OrgParser Int -> OrgParser Blocks diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs index 1088d6611..eb9f4d741 100644 --- a/tests/Tests/Readers/Org.hs +++ b/tests/Tests/Readers/Org.hs @@ -43,8 +43,8 @@ tests = para (strong "Cider") , "Strong Emphasis" =: - "/*strength*/" =?> - para (emph . strong $ "strength") + "/*strength*/" =?> + para (emph . strong $ "strength") , "Strikeout" =: "+Kill Bill+" =?> @@ -428,7 +428,27 @@ tests = , "Bullet List in Ordered List" =: ("1. GNU\n" ++ " - Freedom\n") =?> - orderedList [ (para "GNU") <> bulletList [ (plain "Freedom") ] ] + orderedList [ (para "GNU") <> bulletList [ (plain "Freedom") ] ] + + , "Definition List" =: + unlines [ "- PLL :: phase-locked loop" + , "- TTL ::" + , " transistor-transistor logic" + , "- PSK::phase-shift keying" + , "" + , " a digital modulation scheme" + ] =?> + definitionList [ ("PLL", [ plain $ "phase-locked" <> space <> "loop" ]) + , ("TTL", [ plain $ "transistor-transistor" <> space <> + "logic" ]) + , ("PSK", [ mconcat + [ para $ "phase-shift" <> space <> "keying" + , plain $ spcSep [ "a", "digital" + , "modulation", "scheme" ] + ] + ] + ) + ] ] , testGroup "Tables" -- cgit v1.2.3 From c47bd8404fda0a782719848ef190b56eb0fdb9dc Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Mon, 7 Apr 2014 11:00:30 +0200 Subject: Org reader: Support inline math (like $E=mc^2$) Closes #1223. --- src/Text/Pandoc/Readers/Org.hs | 22 ++++++++++++++++------ tests/Tests/Readers/Org.hs | 4 ++++ 2 files changed, 20 insertions(+), 6 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index 20bca3e28..2bb6ee122 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -459,6 +459,7 @@ inline = choice inlineParsers "inline" , strikeout , underline , code + , math , verbatim , subscript , superscript @@ -530,10 +531,13 @@ underline = B.strong <$> inlinesEnclosedBy '_' code :: OrgParser Inlines code = B.code <$> rawEnclosedBy '=' -verbatim :: OrgParser Inlines +math :: OrgParser Inlines +math = B.math <$> rawEnclosedBy '$' + +verbatim :: OrgParser Inlines verbatim = B.rawInline "" <$> rawEnclosedBy '~' -subscript :: OrgParser Inlines +subscript :: OrgParser Inlines subscript = B.subscript <$> (try $ char '_' *> maybeGroupedByBraces) superscript :: OrgParser Inlines @@ -580,18 +584,24 @@ rawEnclosedBy c = enclosedRaw (atStart $ char c) (atEnd $ char c) -- succeeds only if we're not right after a str (ie. in middle of word) atStart :: OrgParser a -> OrgParser a atStart p = do - pos <- getPosition - st <- getState - guard $ orgLastStrPos st /= Just pos + guard =<< not <$> isRightAfterString p -- | succeeds only if we're at the end of a word atEnd :: OrgParser a -> OrgParser a atEnd p = try $ do - p <* lookingAtEndOfWord + p <* lookingAtEndOfWord where lookingAtEndOfWord = eof <|> const (return ()) =<< lookAhead . oneOf =<< postWordChars +isRightAfterString :: OrgParser Bool +isRightAfterString = do + pos <- getPosition + st <- getState + -- the position `Nothing` isn't after a String, either, hence the double + -- negation + return $ not $ orgLastStrPos st /= Just pos + postWordChars :: OrgParser [Char] postWordChars = do st <- getState diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs index eb9f4d741..77b9d9327 100644 --- a/tests/Tests/Readers/Org.hs +++ b/tests/Tests/Readers/Org.hs @@ -54,6 +54,10 @@ tests = "=Robot.rock()=" =?> para (code "Robot.rock()") + , "Math" =: + "$E=mc^2$" =?> + para (math "E=mc^2") + , "Verbatim" =: "~word for word~" =?> para (rawInline "" "word for word") -- cgit v1.2.3