aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorJohn MacFarlane <fiddlosopher@gmail.com>2013-03-17 17:43:51 -0700
committerJohn MacFarlane <fiddlosopher@gmail.com>2013-03-19 20:22:14 -0700
commit74d53f4347623631c17be557d3682dd807214263 (patch)
treea2215a0499b465ac057b2a89ae57d450e609db0d /src
parentb3661adadae5eecc21e97b206d222c89d443dfda (diff)
downloadpandoc-74d53f4347623631c17be557d3682dd807214263.tar.gz
Added Text.Pandoc.Readers.OPML, exporting readOPML.
The _note attribute is supported. This is unofficial, but used e.g. in OmniOutliner and supported by multimarkdown. We treat the contents as markdown blocks under a section header. Added to documentation and tests.
Diffstat (limited to 'src')
-rw-r--r--src/Text/Pandoc.hs3
-rw-r--r--src/Text/Pandoc/Readers/OPML.hs95
2 files changed, 98 insertions, 0 deletions
diff --git a/src/Text/Pandoc.hs b/src/Text/Pandoc.hs
index 8201bc881..80ddb72d7 100644
--- a/src/Text/Pandoc.hs
+++ b/src/Text/Pandoc.hs
@@ -72,6 +72,7 @@ module Text.Pandoc
, readHtml
, readTextile
, readDocBook
+ , readOPML
, readNative
-- * Writers: converting /from/ Pandoc format
, Writer (..)
@@ -113,6 +114,7 @@ import Text.Pandoc.Readers.Markdown
import Text.Pandoc.Readers.MediaWiki
import Text.Pandoc.Readers.RST
import Text.Pandoc.Readers.DocBook
+import Text.Pandoc.Readers.OPML
import Text.Pandoc.Readers.LaTeX
import Text.Pandoc.Readers.HTML
import Text.Pandoc.Readers.Textile
@@ -192,6 +194,7 @@ readers = [("native" , \_ s -> return $ readNative s)
,("rst" , \o s -> return $ readRST o s)
,("mediawiki" , \o s -> return $ readMediaWiki o s)
,("docbook" , \o s -> return $ readDocBook o s)
+ ,("opml" , \o s -> return $ readOPML o s)
,("textile" , \o s -> return $ readTextile o s) -- TODO : textile+lhs
,("html" , \o s -> return $ readHtml o s)
,("latex" , \o s -> return $ readLaTeX o s)
diff --git a/src/Text/Pandoc/Readers/OPML.hs b/src/Text/Pandoc/Readers/OPML.hs
new file mode 100644
index 000000000..53b599349
--- /dev/null
+++ b/src/Text/Pandoc/Readers/OPML.hs
@@ -0,0 +1,95 @@
+module Text.Pandoc.Readers.OPML ( readOPML ) where
+import Data.Char (toUpper)
+import Text.Pandoc.Options
+import Text.Pandoc.Definition
+import Text.Pandoc.Builder
+import Text.Pandoc.Readers.HTML (readHtml)
+import Text.Pandoc.Readers.Markdown (readMarkdown)
+import Text.XML.Light
+import Text.HTML.TagSoup.Entity (lookupEntity)
+import Data.Generics
+import Data.Monoid
+import Control.Monad.State
+import Control.Applicative ((<$>), (<$))
+
+type OPML = State OPMLState
+
+data OPMLState = OPMLState{
+ opmlSectionLevel :: Int
+ , opmlDocTitle :: Inlines
+ , opmlDocAuthors :: [Inlines]
+ , opmlDocDate :: Inlines
+ } deriving Show
+
+readOPML :: ReaderOptions -> String -> Pandoc
+readOPML _ inp = setTitle (opmlDocTitle st')
+ $ setAuthors (opmlDocAuthors st')
+ $ setDate (opmlDocDate st')
+ $ doc $ mconcat bs
+ where (bs, st') = runState (mapM parseBlock $ normalizeTree $ parseXML inp)
+ OPMLState{ opmlSectionLevel = 0
+ , opmlDocTitle = mempty
+ , opmlDocAuthors = []
+ , opmlDocDate = mempty
+ }
+
+-- normalize input, consolidating adjacent Text and CRef elements
+normalizeTree :: [Content] -> [Content]
+normalizeTree = everywhere (mkT go)
+ where go :: [Content] -> [Content]
+ go (Text (CData CDataRaw _ _):xs) = xs
+ go (Text (CData CDataText s1 z):Text (CData CDataText s2 _):xs) =
+ Text (CData CDataText (s1 ++ s2) z):xs
+ go (Text (CData CDataText s1 z):CRef r:xs) =
+ Text (CData CDataText (s1 ++ convertEntity r) z):xs
+ go (CRef r:Text (CData CDataText s1 z):xs) =
+ Text (CData CDataText (convertEntity r ++ s1) z):xs
+ go (CRef r1:CRef r2:xs) =
+ Text (CData CDataText (convertEntity r1 ++ convertEntity r2) Nothing):xs
+ go xs = xs
+
+convertEntity :: String -> String
+convertEntity e = maybe (map toUpper e) (:[]) (lookupEntity e)
+
+-- convenience function to get an attribute value, defaulting to ""
+attrValue :: String -> Element -> String
+attrValue attr elt =
+ case lookupAttrBy (\x -> qName x == attr) (elAttribs elt) of
+ Just z -> z
+ Nothing -> ""
+
+asHtml :: String -> Inlines
+asHtml s = case readHtml def s of
+ Pandoc _ [Plain ils] -> fromList ils
+ _ -> mempty
+
+asMarkdown :: String -> Blocks
+asMarkdown s = fromList bs
+ where Pandoc _ bs = readMarkdown def s
+
+getBlocks :: Element -> OPML Blocks
+getBlocks e = mconcat <$> (mapM parseBlock $ elContent e)
+
+parseBlock :: Content -> OPML Blocks
+parseBlock (Elem e) =
+ case qName (elName e) of
+ "ownerName" -> mempty <$ modify (\st ->
+ st{opmlDocAuthors = [text $ strContent e]})
+ "dateModified" -> mempty <$ modify (\st ->
+ st{opmlDocDate = text $ strContent e})
+ "title" -> mempty <$ modify (\st ->
+ st{opmlDocTitle = text $ strContent e})
+ "outline" -> gets opmlSectionLevel >>= sect . (+1)
+ "?xml" -> return mempty
+ _ -> getBlocks e
+ where sect n = do let headerText = asHtml $ attrValue "text" e
+ let noteBlocks = asMarkdown $ attrValue "_note" e
+ modify $ \st -> st{ opmlSectionLevel = n }
+ bs <- getBlocks e
+ modify $ \st -> st{ opmlSectionLevel = n - 1 }
+ let headerText' = case attrValue "type" e of
+ "link" -> link
+ (attrValue "url" e) "" headerText
+ _ -> headerText
+ return $ header n headerText' <> noteBlocks <> bs
+parseBlock _ = return mempty