aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authormpickering <matthewtpickering@gmail.com>2014-06-15 13:38:16 +0100
committerJesse Rosenthal <jrosenthal@jhu.edu>2014-06-16 07:18:40 -0400
commit3bc818d2d3079f1d31dbb409f839585a32f26f6e (patch)
tree71c9dfd49ac29d2603a05c1754a895c8229f643e
parent7f4668d87185929f75e5d3852c13ef2a5430b0d9 (diff)
downloadpandoc-3bc818d2d3079f1d31dbb409f839585a32f26f6e.tar.gz
Integrated the docx reader into the main pandoc program.
Changes also include generalising the types of reader allowed. The mechanism now mimics the more general output mechanism.
-rw-r--r--pandoc.hs13
-rw-r--r--src/Text/Pandoc.hs56
2 files changed, 46 insertions, 23 deletions
diff --git a/pandoc.hs b/pandoc.hs
index 5dd0e6899..0a8070d7c 100644
--- a/pandoc.hs
+++ b/pandoc.hs
@@ -858,6 +858,7 @@ defaultReaderName fallback (x:xs) =
".textile" -> "textile"
".native" -> "native"
".json" -> "json"
+ ".docx" -> "docx"
_ -> defaultReaderName fallback xs
-- Returns True if extension of first source is .lhs
@@ -1158,15 +1159,21 @@ main = do
Left e -> throwIO e
Right (bs,_) -> return $ UTF8.toString bs
+ let readFiles [] = error "Cannot read archive from stdin"
+ readFiles (x:_) = B.readFile x
+
let convertTabs = tabFilter (if preserveTabs then 0 else tabStop)
let handleIncludes' = if readerName' == "latex" || readerName' == "latex+lhs"
then handleIncludes
else return
- doc <- readSources sources >>=
- handleIncludes' . convertTabs . intercalate "\n" >>=
- reader readerOpts
+ doc <- case reader of
+ StringReader r->
+ readSources sources >>=
+ handleIncludes' . convertTabs . intercalate "\n" >>=
+ r readerOpts
+ ByteStringReader r -> readFiles sources >>= r readerOpts
let doc0 = M.foldWithKey setMeta doc metadata
diff --git a/src/Text/Pandoc.hs b/src/Text/Pandoc.hs
index 130338f0e..aff471a3c 100644
--- a/src/Text/Pandoc.hs
+++ b/src/Text/Pandoc.hs
@@ -62,6 +62,8 @@ module Text.Pandoc
, readers
, writers
-- * Readers: converting /to/ Pandoc format
+ , Reader (..)
+ , readDocX
, readMarkdown
, readMediaWiki
, readRST
@@ -125,6 +127,7 @@ import Text.Pandoc.Readers.HTML
import Text.Pandoc.Readers.Textile
import Text.Pandoc.Readers.Native
import Text.Pandoc.Readers.Haddock
+import Text.Pandoc.Readers.DocX
import Text.Pandoc.Writers.Native
import Text.Pandoc.Writers.Markdown
import Text.Pandoc.Writers.RST
@@ -192,24 +195,34 @@ markdown o s = do
mapM_ warn warnings
return doc
+data Reader = StringReader (ReaderOptions -> String -> IO Pandoc)
+ | ByteStringReader (ReaderOptions -> BL.ByteString -> IO Pandoc)
+
+mkStringReader :: (ReaderOptions -> String -> Pandoc) -> Reader
+mkStringReader r = StringReader (\o s -> return $ r o s)
+
+mkBSReader :: (ReaderOptions -> BL.ByteString -> Pandoc) -> Reader
+mkBSReader r = ByteStringReader (\o s -> return $ r o s)
+
-- | Association list of formats and readers.
-readers :: [(String, ReaderOptions -> String -> IO Pandoc)]
-readers = [ ("native" , \_ s -> return $ readNative s)
- ,("json" , \o s -> return $ readJSON o s)
- ,("markdown" , markdown)
- ,("markdown_strict" , markdown)
- ,("markdown_phpextra" , markdown)
- ,("markdown_github" , markdown)
- ,("markdown_mmd", markdown)
- ,("rst" , \o s -> return $ readRST o s)
- ,("mediawiki" , \o s -> return $ readMediaWiki o s)
- ,("docbook" , \o s -> return $ readDocBook o s)
- ,("opml" , \o s -> return $ readOPML o s)
- ,("org" , \o s -> return $ readOrg o s)
- ,("textile" , \o s -> return $ readTextile o s) -- TODO : textile+lhs
- ,("html" , \o s -> return $ readHtml o s)
- ,("latex" , \o s -> return $ readLaTeX o s)
- ,("haddock" , \o s -> return $ readHaddock o s)
+readers :: [(String, Reader)]
+readers = [ ("native" , StringReader $ \_ s -> return $ readNative s)
+ ,("json" , mkStringReader readJSON )
+ ,("markdown" , StringReader markdown)
+ ,("markdown_strict" , StringReader markdown)
+ ,("markdown_phpextra" , StringReader markdown)
+ ,("markdown_github" , StringReader markdown)
+ ,("markdown_mmd", StringReader markdown)
+ ,("rst" , mkStringReader readRST )
+ ,("mediawiki" , mkStringReader readMediaWiki)
+ ,("docbook" , mkStringReader readDocBook)
+ ,("opml" , mkStringReader readOPML)
+ ,("org" , mkStringReader readOrg)
+ ,("textile" , mkStringReader readTextile) -- TODO : textile+lhs
+ ,("html" , mkStringReader readHtml)
+ ,("latex" , mkStringReader readLaTeX)
+ ,("haddock" , mkStringReader readHaddock)
+ ,("docx" , mkBSReader readDocX)
]
data Writer = PureStringWriter (WriterOptions -> Pandoc -> String)
@@ -280,14 +293,17 @@ getDefaultExtensions "textile" = Set.fromList [Ext_auto_identifiers, Ext
getDefaultExtensions _ = Set.fromList [Ext_auto_identifiers]
-- | Retrieve reader based on formatSpec (format+extensions).
-getReader :: String -> Either String (ReaderOptions -> String -> IO Pandoc)
+getReader :: String -> Either String Reader
getReader s =
case parseFormatSpec s of
Left e -> Left $ intercalate "\n" $ [m | Message m <- errorMessages e]
- Right (readerName, setExts) ->
+ Right (readerName, setExts) ->
case lookup readerName readers of
Nothing -> Left $ "Unknown reader: " ++ readerName
- Just r -> Right $ \o ->
+ Just (StringReader r) -> Right $ StringReader $ \o ->
+ r o{ readerExtensions = setExts $
+ getDefaultExtensions readerName }
+ Just (ByteStringReader r) -> Right $ ByteStringReader $ \o ->
r o{ readerExtensions = setExts $
getDefaultExtensions readerName }