From ec24f9761c71961821c180d3f6eeb3b5c08aaebf Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Thu, 24 Apr 2014 12:56:25 +0200 Subject: RST reader: Remove duplicate 'http' in PEP links The generated link to PEPs had a duplicate 'http://' in its URL. --- src/Text/Pandoc/Readers/RST.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/Text/Pandoc/Readers') diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs index a574f343a..7785861cc 100644 --- a/src/Text/Pandoc/Readers/RST.hs +++ b/src/Text/Pandoc/Readers/RST.hs @@ -1005,7 +1005,7 @@ renderRole contents fmt role attr = case role of where rfcUrl = "http://www.faqs.org/rfcs/rfc" ++ rfcNo ++ ".html" pepLink pepNo = B.link pepUrl ("PEP " ++ pepNo) $ B.str ("PEP " ++ pepNo) where padNo = replicate (4 - length pepNo) '0' ++ pepNo - pepUrl = "http://http://www.python.org/dev/peps/pep-" ++ padNo ++ "/" + pepUrl = "http://www.python.org/dev/peps/pep-" ++ padNo ++ "/" roleNameEndingIn :: RSTParser Char -> RSTParser String roleNameEndingIn end = many1Till (letter <|> char '-') end -- cgit v1.2.3 From c128daba9dee096ce0e78b81a381f43337b74285 Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Thu, 24 Apr 2014 17:42:01 +0200 Subject: Org reader: Recognize plain and angle links This adds support for plain links (like http://zeitlens.com) and angle links (like ). --- src/Text/Pandoc/Readers/Org.hs | 34 +++++++++++++++++++++++++--------- tests/Tests/Readers/Org.hs | 14 ++++++++++++++ 2 files changed, 39 insertions(+), 9 deletions(-) (limited to 'src/Text/Pandoc/Readers') diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index c71cc24be..7a50b1db9 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -45,7 +45,7 @@ import Control.Applicative ( Applicative, pure , (<$>), (<$), (<*>), (<*), (*>), (<**>) ) import Control.Monad (foldM, guard, liftM, liftM2, when) import Control.Monad.Reader (Reader, runReader, ask, asks) -import Data.Char (toLower) +import Data.Char (isAlphaNum, toLower) import Data.Default import Data.List (intersperse, isPrefixOf, isSuffixOf) import qualified Data.Map as M @@ -209,6 +209,9 @@ instance Monoid a => Monoid (F a) where trimInlinesF :: F Inlines -> F Inlines trimInlinesF = liftM trimInlines +returnF :: a -> OrgParser (F a) +returnF = return . return + -- | Like @Text.Parsec.Char.newline@, but causes additional state changes. newline :: OrgParser Char @@ -291,9 +294,6 @@ orgBlock = try $ do "src" -> codeBlockWithAttr classArgs content _ -> return $ B.divWith ("", [blockType], []) <$> contentBlocks where - returnF :: a -> OrgParser (F a) - returnF = return . return - parseVerse :: String -> OrgParser (F Blocks) parseVerse cs = fmap B.para . mconcat . intersperse (pure B.linebreak) @@ -834,7 +834,11 @@ noteMarker = try $ do ] linkOrImage :: OrgParser (F Inlines) -linkOrImage = explicitOrImageLink <|> selflinkOrImage "link or image" +linkOrImage = explicitOrImageLink + <|> selflinkOrImage + <|> angleLink + <|> plainLink + "link or image" explicitOrImageLink :: OrgParser (F Inlines) explicitOrImageLink = try $ do @@ -851,15 +855,27 @@ explicitOrImageLink = try $ do selflinkOrImage :: OrgParser (F Inlines) selflinkOrImage = try $ do src <- char '[' *> linkTarget <* char ']' - return . return $ if isImageFilename src - then B.image src "" "" - else B.link src "" (B.str src) + returnF $ if isImageFilename src + then B.image src "" "" + else B.link src "" (B.str src) + +plainLink :: OrgParser (F Inlines) +plainLink = try $ do + (orig, src) <- uri + returnF $ B.link src "" (B.str orig) + +angleLink :: OrgParser (F Inlines) +angleLink = try $ do + char '<' + link <- plainLink + char '>' + return link selfTarget :: OrgParser String selfTarget = try $ char '[' *> linkTarget <* char ']' linkTarget :: OrgParser String -linkTarget = enclosed (char '[') (char ']') (noneOf "\n\r]") +linkTarget = enclosed (char '[') (char ']') (noneOf "\n\r[]") isImageFilename :: String -> Bool isImageFilename filename = diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs index f62b73ce4..ed774f527 100644 --- a/tests/Tests/Readers/Org.hs +++ b/tests/Tests/Readers/Org.hs @@ -188,6 +188,20 @@ tests = , "Image link" =: "[[sunset.png][dusk.svg]]" =?> (para $ link "sunset.png" "" (image "dusk.svg" "" "")) + + , "Plain link" =: + "Posts on http://zeitlens.com/ can be funny at times." =?> + (para $ spcSep [ "Posts", "on" + , link "http://zeitlens.com/" "" "http://zeitlens.com/" + , "can", "be", "funny", "at", "times." + ]) + + , "Angle link" =: + "Look at for fnords." =?> + (para $ spcSep [ "Look", "at" + , link "http://moltkeplatz.de" "" "http://moltkeplatz.de" + , "for", "fnords." + ]) ] , testGroup "Meta Information" $ -- cgit v1.2.3 From 2f724aaaa4875a21560870d25c3d212f974c6dde Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Thu, 24 Apr 2014 17:42:01 +0200 Subject: Org reader: Read anchors as empty spans Anchors (like <>) are parsed as empty spans. --- src/Text/Pandoc/Readers/Org.hs | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'src/Text/Pandoc/Readers') diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index 7a50b1db9..7f1893936 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -740,6 +740,7 @@ inline = , linebreak , footnote , linkOrImage + , anchor , str , endline , emph @@ -886,6 +887,30 @@ isImageFilename filename = imageExtensions = [ "jpeg" , "jpg" , "png" , "gif" , "svg" ] protocols = [ "file", "http", "https" ] +-- | Parse an anchor like @<>@ and return an empty span with +-- @anchor-id@ set as id. Legal anchors in org-mode are defined through +-- @org-target-regexp@, which is fairly liberal. Since no link is created if +-- @anchor-id@ contains spaces, we are more restrictive in what is accepted as +-- an anchor. + +anchor :: OrgParser (F Inlines) +anchor = try $ pure <$> (B.spanWith <$> attributes <*> pure mempty) + where + name = string "<<" + *> many1 (noneOf "\t\n\r<>\"' ") + <* string ">>" + attributes = name >>= \n -> return (solidify n, [], []) + +-- | Replace every char but [a-zA-Z0-9_.-:] with a hypen '-'. This mirrors +-- the org function @org-export-solidify-link-text@. + +solidify :: String -> String +solidify = map replaceSpecialChar + where replaceSpecialChar c + | isAlphaNum c = c + | c `elem` "_.-:" = c + | otherwise = '-' + emph :: OrgParser (F Inlines) emph = fmap B.emph <$> emphasisBetween '/' -- cgit v1.2.3 From 2eec20d92fd0f498da5b66ac03cf6f8159392323 Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Fri, 25 Apr 2014 15:29:28 +0200 Subject: Org reader: Enable internal links Internal links in Org are possible by using an anchor-name as the target of a link: [[some-anchor][This]] is an internal link. It links <> here. --- src/Text/Pandoc/Readers/Org.hs | 50 ++++++++++++++++++++++++++++++++---------- tests/Tests/Readers/Org.hs | 25 +++++++++++++++++++++ 2 files changed, 63 insertions(+), 12 deletions(-) (limited to 'src/Text/Pandoc/Readers') diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index 7f1893936..0e52bff90 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -79,6 +79,7 @@ type OrgBlockAttributes = M.Map String String -- | Org-mode parser state data OrgParserState = OrgParserState { orgStateOptions :: ReaderOptions + , orgStateAnchorIds :: [String] , orgStateBlockAttributes :: OrgBlockAttributes , orgStateEmphasisCharStack :: [Char] , orgStateEmphasisNewlines :: Maybe Int @@ -105,6 +106,7 @@ instance Default OrgParserState where defaultOrgParserState :: OrgParserState defaultOrgParserState = OrgParserState { orgStateOptions = def + , orgStateAnchorIds = [] , orgStateBlockAttributes = M.empty , orgStateEmphasisCharStack = [] , orgStateEmphasisNewlines = Nothing @@ -116,6 +118,10 @@ defaultOrgParserState = OrgParserState , orgStateNotes' = [] } +recordAnchorId :: String -> OrgParser () +recordAnchorId i = updateState $ \s -> + s{ orgStateAnchorIds = i : (orgStateAnchorIds s) } + addBlockAttribute :: String -> String -> OrgParser () addBlockAttribute key val = updateState $ \s -> let attrs = orgStateBlockAttributes s @@ -848,17 +854,14 @@ explicitOrImageLink = try $ do title <- enclosedRaw (char '[') (char ']') title' <- parseFromString (mconcat <$> many inline) title char ']' - return $ B.link src "" <$> - if isImageFilename src && isImageFilename title - then return $ B.image title mempty mempty - else title' + return $ if isImageFilename src && isImageFilename title + then pure $ B.link src "" $ B.image title mempty mempty + else linkToInlinesF src =<< title' selflinkOrImage :: OrgParser (F Inlines) selflinkOrImage = try $ do src <- char '[' *> linkTarget <* char ']' - returnF $ if isImageFilename src - then B.image src "" "" - else B.link src "" (B.str src) + return $ linkToInlinesF src (B.str src) plainLink :: OrgParser (F Inlines) plainLink = try $ do @@ -878,6 +881,26 @@ selfTarget = try $ char '[' *> linkTarget <* char ']' linkTarget :: OrgParser String linkTarget = enclosed (char '[') (char ']') (noneOf "\n\r[]") +linkToInlinesF :: String -> Inlines -> F Inlines +linkToInlinesF s@('#':_) = pure . B.link s "" +linkToInlinesF s + | isImageFilename s = const . pure $ B.image s "" "" + | isUri s = pure . B.link s "" + | isRelativeUrl s = pure . B.link s "" +linkToInlinesF s = \title -> do + anchorB <- (s `elem`) <$> asksF orgStateAnchorIds + if anchorB + then pure $ B.link ('#':s) "" title + else pure $ B.emph title + +isRelativeUrl :: String -> Bool +isRelativeUrl s = (':' `notElem` s) && ("./" `isPrefixOf` s) + +isUri :: String -> Bool +isUri s = let (scheme, path) = break (== ':') s + in all (\c -> isAlphaNum c || c `elem` ".-") scheme + && not (null path) + isImageFilename :: String -> Bool isImageFilename filename = any (\x -> ('.':x) `isSuffixOf` filename) imageExtensions && @@ -894,12 +917,15 @@ isImageFilename filename = -- an anchor. anchor :: OrgParser (F Inlines) -anchor = try $ pure <$> (B.spanWith <$> attributes <*> pure mempty) +anchor = try $ do + anchorId <- parseAnchor + recordAnchorId anchorId + returnF $ B.spanWith (solidify anchorId, [], []) mempty where - name = string "<<" - *> many1 (noneOf "\t\n\r<>\"' ") - <* string ">>" - attributes = name >>= \n -> return (solidify n, [], []) + parseAnchor = string "<<" + *> many1 (noneOf "\t\n\r<>\"' ") + <* string ">>" + <* skipSpaces -- | Replace every char but [a-zA-Z0-9_.-:] with a hypen '-'. This mirrors -- the org function @org-export-solidify-link-text@. diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs index ed774f527..96747d148 100644 --- a/tests/Tests/Readers/Org.hs +++ b/tests/Tests/Readers/Org.hs @@ -202,6 +202,11 @@ tests = , link "http://moltkeplatz.de" "" "http://moltkeplatz.de" , "for", "fnords." ]) + + , "Anchor" =: + "<> Link here later." =?> + (para $ spanWith ("anchor", [], []) mempty <> + "Link" <> space <> "here" <> space <> "later.") ] , testGroup "Meta Information" $ @@ -279,6 +284,26 @@ tests = , ":END:" ] =?> para (":FOO:" <> space <> ":END:") + + , "Anchor reference" =: + unlines [ "<> Target." + , "" + , "[[link-here][See here!]]" + ] =?> + (para (spanWith ("link-here", [], []) mempty <> "Target.") <> + para (link "#link-here" "" ("See" <> space <> "here!"))) + + , "Search links are read as emph" =: + "[[Wally][Where's Wally?]]" =?> + (para (emph $ "Where's" <> space <> "Wally?")) + + , "Link to nonexistent anchor" =: + unlines [ "<> Target." + , "" + , "[[link$here][See here!]]" + ] =?> + (para (spanWith ("link-here", [], []) mempty <> "Target.") <> + para (emph ("See" <> space <> "here!"))) ] , testGroup "Basic Blocks" $ -- cgit v1.2.3