diff options
author | John MacFarlane <fiddlosopher@gmail.com> | 2013-03-19 20:23:48 -0700 |
---|---|---|
committer | John MacFarlane <fiddlosopher@gmail.com> | 2013-03-19 20:23:48 -0700 |
commit | 8aa617238042ce8605863e4526e8f5002647fd97 (patch) | |
tree | 60559c84302f2c93e160c458899fbcd80384babd /src/Text | |
parent | d596b0db8321cdb9c018ac8037d301291d0cc63c (diff) | |
parent | 74d53f4347623631c17be557d3682dd807214263 (diff) | |
download | pandoc-8aa617238042ce8605863e4526e8f5002647fd97.tar.gz |
Merge branch 'opml'
Diffstat (limited to 'src/Text')
-rw-r--r-- | src/Text/Pandoc.hs | 3 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/OPML.hs | 95 |
2 files changed, 98 insertions, 0 deletions
diff --git a/src/Text/Pandoc.hs b/src/Text/Pandoc.hs index 8201bc881..80ddb72d7 100644 --- a/src/Text/Pandoc.hs +++ b/src/Text/Pandoc.hs @@ -72,6 +72,7 @@ module Text.Pandoc , readHtml , readTextile , readDocBook + , readOPML , readNative -- * Writers: converting /from/ Pandoc format , Writer (..) @@ -113,6 +114,7 @@ import Text.Pandoc.Readers.Markdown import Text.Pandoc.Readers.MediaWiki import Text.Pandoc.Readers.RST import Text.Pandoc.Readers.DocBook +import Text.Pandoc.Readers.OPML import Text.Pandoc.Readers.LaTeX import Text.Pandoc.Readers.HTML import Text.Pandoc.Readers.Textile @@ -192,6 +194,7 @@ readers = [("native" , \_ s -> return $ readNative s) ,("rst" , \o s -> return $ readRST o s) ,("mediawiki" , \o s -> return $ readMediaWiki o s) ,("docbook" , \o s -> return $ readDocBook o s) + ,("opml" , \o s -> return $ readOPML o s) ,("textile" , \o s -> return $ readTextile o s) -- TODO : textile+lhs ,("html" , \o s -> return $ readHtml o s) ,("latex" , \o s -> return $ readLaTeX o s) diff --git a/src/Text/Pandoc/Readers/OPML.hs b/src/Text/Pandoc/Readers/OPML.hs new file mode 100644 index 000000000..53b599349 --- /dev/null +++ b/src/Text/Pandoc/Readers/OPML.hs @@ -0,0 +1,95 @@ +module Text.Pandoc.Readers.OPML ( readOPML ) where +import Data.Char (toUpper) +import Text.Pandoc.Options +import Text.Pandoc.Definition +import Text.Pandoc.Builder +import Text.Pandoc.Readers.HTML (readHtml) +import Text.Pandoc.Readers.Markdown (readMarkdown) +import Text.XML.Light +import Text.HTML.TagSoup.Entity (lookupEntity) +import Data.Generics +import Data.Monoid +import Control.Monad.State +import Control.Applicative ((<$>), (<$)) + +type OPML = State OPMLState + +data OPMLState = OPMLState{ + opmlSectionLevel :: Int + , opmlDocTitle :: Inlines + , opmlDocAuthors :: [Inlines] + , opmlDocDate :: Inlines + } deriving Show + +readOPML :: ReaderOptions -> String -> Pandoc +readOPML _ inp = setTitle (opmlDocTitle st') + $ setAuthors (opmlDocAuthors st') + $ setDate (opmlDocDate st') + $ doc $ mconcat bs + where (bs, st') = runState (mapM parseBlock $ normalizeTree $ parseXML inp) + OPMLState{ opmlSectionLevel = 0 + , opmlDocTitle = mempty + , opmlDocAuthors = [] + , opmlDocDate = mempty + } + +-- normalize input, consolidating adjacent Text and CRef elements +normalizeTree :: [Content] -> [Content] +normalizeTree = everywhere (mkT go) + where go :: [Content] -> [Content] + go (Text (CData CDataRaw _ _):xs) = xs + go (Text (CData CDataText s1 z):Text (CData CDataText s2 _):xs) = + Text (CData CDataText (s1 ++ s2) z):xs + go (Text (CData CDataText s1 z):CRef r:xs) = + Text (CData CDataText (s1 ++ convertEntity r) z):xs + go (CRef r:Text (CData CDataText s1 z):xs) = + Text (CData CDataText (convertEntity r ++ s1) z):xs + go (CRef r1:CRef r2:xs) = + Text (CData CDataText (convertEntity r1 ++ convertEntity r2) Nothing):xs + go xs = xs + +convertEntity :: String -> String +convertEntity e = maybe (map toUpper e) (:[]) (lookupEntity e) + +-- convenience function to get an attribute value, defaulting to "" +attrValue :: String -> Element -> String +attrValue attr elt = + case lookupAttrBy (\x -> qName x == attr) (elAttribs elt) of + Just z -> z + Nothing -> "" + +asHtml :: String -> Inlines +asHtml s = case readHtml def s of + Pandoc _ [Plain ils] -> fromList ils + _ -> mempty + +asMarkdown :: String -> Blocks +asMarkdown s = fromList bs + where Pandoc _ bs = readMarkdown def s + +getBlocks :: Element -> OPML Blocks +getBlocks e = mconcat <$> (mapM parseBlock $ elContent e) + +parseBlock :: Content -> OPML Blocks +parseBlock (Elem e) = + case qName (elName e) of + "ownerName" -> mempty <$ modify (\st -> + st{opmlDocAuthors = [text $ strContent e]}) + "dateModified" -> mempty <$ modify (\st -> + st{opmlDocDate = text $ strContent e}) + "title" -> mempty <$ modify (\st -> + st{opmlDocTitle = text $ strContent e}) + "outline" -> gets opmlSectionLevel >>= sect . (+1) + "?xml" -> return mempty + _ -> getBlocks e + where sect n = do let headerText = asHtml $ attrValue "text" e + let noteBlocks = asMarkdown $ attrValue "_note" e + modify $ \st -> st{ opmlSectionLevel = n } + bs <- getBlocks e + modify $ \st -> st{ opmlSectionLevel = n - 1 } + let headerText' = case attrValue "type" e of + "link" -> link + (attrValue "url" e) "" headerText + _ -> headerText + return $ header n headerText' <> noteBlocks <> bs +parseBlock _ = return mempty |