16 files changed, 682 insertions, 358 deletions
diff --git a/data/translations/en.trans b/data/translations/en.trans
new file mode 100644
index 000000000..0bdc172af
--- /dev/null
+++ b/data/translations/en.trans
@@ -0,0 +1,19 @@
+Preface: Preface
+References: References
+Abstract: Abstract
+Bibliography: Bibliography
+Chapter: Chapter
+Appendix: Appendix
+Contents: Contents
+ListOfFigures: ListOfFigures
+ListOfTables: ListOfTables
+Index: Index
+Figure: Figure
+Table: Table
+Part: Part
+Page: page
+Proof: Proof
+See: see
+SeeAlso: see also
+Cc: cc
+To: To
diff --git a/data/translations/fr.trans b/data/translations/fr.trans
new file mode 100644
index 000000000..a1415d846
--- /dev/null
+++ b/data/translations/fr.trans
@@ -0,0 +1,19 @@
+Preface: Préface
+References: Références
+Abstract: Résumé
+Bibliography: Bibliographie
+Chapter: Chaptire
+Appendix: Annexe
+Contents: Table des matières
+ListOfFigures: Table des figures
+ListOfTables: Liste des tableaux
+Index: Index
+Figure: Fig.
+Table: Tab.
+Part: partie
+Page: page
+Proof: Démonstration
+See: voir
+SeeAlso: voir aussi
+Cc: Copie à
+To:
diff --git a/pandoc.cabal b/pandoc.cabal
index c7533dc85..07bb36f95 100644
--- a/pandoc.cabal
+++ b/pandoc.cabal
@@ -72,6 +72,8 @@ Data-Files:
                  data/templates/default.org
                  data/templates/default.epub2
                  data/templates/default.epub3
+                 -- translations
+                 data/translations/*.trans
                  -- source files for reference.docx
                  data/docx/[Content_Types].xml
                  data/docx/_rels/.rels
@@ -469,7 +471,8 @@ Library
                    Text.Pandoc.CSS,
                    Text.Pandoc.CSV,
                    Text.Pandoc.UUID,
-                   Text.Pandoc.BCP47
+                   Text.Pandoc.BCP47,
+                   Text.Pandoc.Translations,
                    Text.Pandoc.Slides,
                    Text.Pandoc.Compat.Time,
                    Paths_pandoc
diff --git a/src/Text/Pandoc/App.hs b/src/Text/Pandoc/App.hs
index 47976a499..99d9aa4cb 100644
--- a/src/Text/Pandoc/App.hs
+++ b/src/Text/Pandoc/App.hs
@@ -41,7 +41,7 @@ module Text.Pandoc.App (
 import Control.Applicative ((<|>))
 import qualified Control.Exception as E
 import Control.Monad
-import Control.Monad.Except (throwError)
+import Control.Monad.Except (throwError, catchError)
 import Control.Monad.Trans
 import Data.Monoid
 import Data.Aeson (FromJSON (..), ToJSON (..), defaultOptions, eitherDecode',
@@ -71,21 +71,22 @@ import System.Environment (getArgs, getEnvironment, getProgName)
 import System.Exit (ExitCode (..), exitSuccess)
 import System.FilePath
 import System.IO (nativeNewline, stdout)
-import qualified System.IO as IO (Newline (..))
 import System.IO.Error (isDoesNotExistError)
+import qualified System.IO as IO (Newline (..))
 import Text.Pandoc
 import Text.Pandoc.Builder (setMeta)
 import Text.Pandoc.Class (PandocIO, extractMedia, fillMediaBag, getLog,
                           setResourcePath, getMediaBag, setTrace, report,
-                          setUserDataDir)
+                          setUserDataDir, readFileStrict, readDataFile,
+                          readDefaultDataFile, setTranslations)
 import Text.Pandoc.Highlighting (highlightingStyles)
+import Text.Pandoc.BCP47 (parseBCP47, Lang(..))
 import Text.Pandoc.Lua (runLuaFilter, LuaException(..))
 import Text.Pandoc.Writers.Math (defaultMathJaxURL, defaultKaTeXURL)
 import Text.Pandoc.PDF (makePDF)
 import Text.Pandoc.Process (pipeProcess)
 import Text.Pandoc.SelfContained (makeDataURI, makeSelfContained)
-import Text.Pandoc.Shared (headerShift, isURI, openURL, readDataFile,
-                           readDataFileUTF8, readDefaultDataFile,
+import Text.Pandoc.Shared (headerShift, isURI, openURL,
                            safeRead, tabFilter, eastAsianLineBreakFilter)
 import qualified Text.Pandoc.UTF8 as UTF8
 import Text.Pandoc.XML (toEntities)
@@ -215,84 +216,6 @@ convertWithOpts opts = do
                                   _ -> e
 
   let standalone = optStandalone opts || not (isTextFormat format) || pdfOutput
-
-  templ <- case optTemplate opts of
-                _ | not standalone -> return Nothing
-                Nothing -> do
-                           deftemp <- runIO $ do
-                                        setUserDataDir datadir
-                                        getDefaultTemplate format
-                           case deftemp of
-                                 Left e  -> E.throwIO e
-                                 Right t -> return (Just t)
-                Just tp -> do
-                           -- strip off extensions
-                           let tp' = case takeExtension tp of
-                                          "" -> tp <.> format
-                                          _  -> tp
-                           Just <$> E.catch (UTF8.readFile tp')
-                             (\e -> if isDoesNotExistError e
-                                       then E.catch
-                                             (readDataFileUTF8 datadir
-                                                ("templates" </> tp'))
-                                             (\e' -> let _ = (e' :: E.SomeException)
-                                                     in E.throwIO e')
-                                       else E.throwIO e)
-
-  let addStringAsVariable varname s vars = return $ (varname, s) : vars
-
-  let addContentsAsVariable varname fp vars = do
-             s <- UTF8.readFile fp
-             return $ (varname, s) : vars
-
-  -- note: this reverses the list constructed in option parsing,
-  -- which in turn was reversed from the command-line order,
-  -- so we end up with the correct order in the variable list:
-  let withList _ [] vars     = return vars
-      withList f (x:xs) vars = f x vars >>= withList f xs
-
-  variables <-
-
-      withList (addStringAsVariable "sourcefile")
-               (reverse $ optInputFiles opts) (("outputfile", optOutputFile opts) : optVariables opts)
-               -- we reverse this list because, unlike
-               -- the other option lists here, it is
-               -- not reversed when parsed from CLI arguments.
-               -- See withList, above.
-      >>=
-      withList (addContentsAsVariable "include-before")
-               (optIncludeBeforeBody opts)
-      >>=
-      withList (addContentsAsVariable "include-after")
-               (optIncludeAfterBody opts)
-      >>=
-      withList (addContentsAsVariable "header-includes")
-               (optIncludeInHeader opts)
-      >>=
-      withList (addStringAsVariable "css") (optCss opts)
-      >>=
-      maybe return (addStringAsVariable "title-prefix") (optTitlePrefix opts)
-      >>=
-      maybe return (addStringAsVariable "epub-cover-image")
-                   (optEpubCoverImage opts)
-      >>=
-      (\vars -> case mathMethod of
-                     LaTeXMathML Nothing -> do
-                        s <- readDataFileUTF8 datadir "LaTeXMathML.js"
-                        return $ ("mathml-script", s) : vars
-                     _ -> return vars)
-      >>=
-      (\vars ->  if format == "dzslides"
-                    then do
-                        dztempl <- readDataFileUTF8 datadir
-                                     ("dzslides" </> "template.html")
-                        let dzline = "<!-- {{{{ dzslides core"
-                        let dzcore = unlines
-                                   $ dropWhile (not . (dzline `isPrefixOf`))
-                                   $ lines dztempl
-                        return $ ("dzslides-core", dzcore) : vars
-                    else return vars)
-
   let sourceURL = case sources of
                     []    -> Nothing
                     (x:_) -> case parseURI x of
@@ -302,21 +225,7 @@ convertWithOpts opts = do
                                                      uriFragment = "" }
                                 _ -> Nothing
 
-  abbrevs <- (Set.fromList . filter (not . null) . lines) <$>
-             case optAbbreviations opts of
-                  Nothing -> readDataFileUTF8 datadir "abbreviations"
-                  Just f  -> UTF8.readFile f
-
-  let readerOpts = def{ readerStandalone = standalone
-                      , readerColumns = optColumns opts
-                      , readerTabStop = optTabStop opts
-                      , readerIndentedCodeClasses = optIndentedCodeClasses opts
-                      , readerDefaultImageExtension =
-                         optDefaultImageExtension opts
-                      , readerTrackChanges = optTrackChanges opts
-                      , readerAbbreviations = abbrevs
-                      , readerExtensions = readerExts
-                      }
+  let addStringAsVariable varname s vars = return $ (varname, s) : vars
 
   highlightStyle <- lookupHighlightStyle $ optHighlightStyle opts
   let addSyntaxMap existingmap f = do
@@ -336,40 +245,6 @@ convertWithOpts opts = do
                   (\(syn,dep) -> (T.unpack syn ++ " requires " ++
                     T.unpack dep ++ " through IncludeRules.")) xs)
 
-  let writerOptions = def { writerTemplate         = templ,
-                            writerVariables        = variables,
-                            writerTabStop          = optTabStop opts,
-                            writerTableOfContents  = optTableOfContents opts,
-                            writerHTMLMathMethod   = mathMethod,
-                            writerIncremental      = optIncremental opts,
-                            writerCiteMethod       = optCiteMethod opts,
-                            writerNumberSections   = optNumberSections opts,
-                            writerNumberOffset     = optNumberOffset opts,
-                            writerSectionDivs      = optSectionDivs opts,
-                            writerExtensions       = writerExts,
-                            writerReferenceLinks   = optReferenceLinks opts,
-                            writerReferenceLocation = optReferenceLocation opts,
-                            writerDpi              = optDpi opts,
-                            writerWrapText         = optWrapText opts,
-                            writerColumns          = optColumns opts,
-                            writerEmailObfuscation = optEmailObfuscation opts,
-                            writerIdentifierPrefix = optIdentifierPrefix opts,
-                            writerSourceURL        = sourceURL,
-                            writerHtmlQTags        = optHtmlQTags opts,
-                            writerTopLevelDivision = optTopLevelDivision opts,
-                            writerListings         = optListings opts,
-                            writerSlideLevel       = optSlideLevel opts,
-                            writerHighlightStyle   = highlightStyle,
-                            writerSetextHeaders    = optSetextHeaders opts,
-                            writerEpubSubdirectory = optEpubSubdirectory opts,
-                            writerEpubMetadata     = epubMetadata,
-                            writerEpubFonts        = optEpubFonts opts,
-                            writerEpubChapterLevel = optEpubChapterLevel opts,
-                            writerTOCDepth         = optTOCDepth opts,
-                            writerReferenceDoc     = optReferenceDoc opts,
-                            writerLaTeXArgs        = optLaTeXEngineArgs opts,
-                            writerSyntaxMap        = syntaxMap
-                          }
 
 
 #ifdef _WINDOWS
@@ -383,18 +258,6 @@ convertWithOpts opts = do
             "Specify an output file using the -o option."
 
 
-  let transforms = (case optBaseHeaderLevel opts of
-                        x | x > 1     -> (headerShift (x - 1) :)
-                          | otherwise -> id) $
-                   (if extensionEnabled Ext_east_asian_line_breaks
-                          readerExts &&
-                       not (extensionEnabled Ext_east_asian_line_breaks
-                            writerExts &&
-                            writerWrapText writerOptions == WrapPreserve)
-                       then (eastAsianLineBreakFilter :)
-                       else id)
-                   []
-
   let convertTabs = tabFilter (if optPreserveTabs opts || readerName == "t2t"
                                   then 0
                                   else optTabStop opts)
@@ -419,32 +282,175 @@ convertWithOpts opts = do
             E.throwIO PandocFailOnWarningError
         return res
 
-  let sourceToDoc :: [FilePath] -> PandocIO Pandoc
-      sourceToDoc sources' =
-         case reader of
-              TextReader r
-                | optFileScope opts || readerName == "json" ->
-                    mconcat <$> mapM (readSource >=> r readerOpts) sources
-                | otherwise ->
-                    readSources sources' >>= r readerOpts
-              ByteStringReader r ->
-                mconcat <$> mapM (readFile' >=> r readerOpts) sources
-
-  metadata <- if format == "jats" &&
-                 isNothing (lookup "csl" (optMetadata opts)) &&
-                 isNothing (lookup "citation-style" (optMetadata opts))
-                 then do
-                   jatsCSL <- readDataFile datadir "jats.csl"
-                   let jatsEncoded = makeDataURI ("application/xml", jatsCSL)
-                   return $ ("csl", jatsEncoded) : optMetadata opts
-                 else return $ optMetadata opts
-
   let eol = case optEol opts of
                  CRLF   -> IO.CRLF
                  LF     -> IO.LF
                  Native -> nativeNewline
 
+  -- note: this reverses the list constructed in option parsing,
+  -- which in turn was reversed from the command-line order,
+  -- so we end up with the correct order in the variable list:
+  let withList _ [] vars     = return vars
+      withList f (x:xs) vars = f x vars >>= withList f xs
+
+  let addContentsAsVariable varname fp vars = do
+             s <- UTF8.toString <$> readFileStrict fp
+             return $ (varname, s) : vars
+
   runIO' $ do
+    variables <-
+        withList (addStringAsVariable "sourcefile")
+                 (reverse $ optInputFiles opts)
+                 (("outputfile", optOutputFile opts) : optVariables opts)
+                 -- we reverse this list because, unlike
+                 -- the other option lists here, it is
+                 -- not reversed when parsed from CLI arguments.
+                 -- See withList, above.
+        >>=
+        withList (addContentsAsVariable "include-before")
+                 (optIncludeBeforeBody opts)
+        >>=
+        withList (addContentsAsVariable "include-after")
+                 (optIncludeAfterBody opts)
+        >>=
+        withList (addContentsAsVariable "header-includes")
+                 (optIncludeInHeader opts)
+        >>=
+        withList (addStringAsVariable "css") (optCss opts)
+        >>=
+        maybe return (addStringAsVariable "title-prefix")
+                     (optTitlePrefix opts)
+        >>=
+        maybe return (addStringAsVariable "epub-cover-image")
+                     (optEpubCoverImage opts)
+        >>=
+        (\vars -> case mathMethod of
+                       LaTeXMathML Nothing -> do
+                          s <- UTF8.toString <$> readDataFile "LaTeXMathML.js"
+                          return $ ("mathml-script", s) : vars
+                       _ -> return vars)
+        >>=
+        (\vars ->  if format == "dzslides"
+                      then do
+                          dztempl <- UTF8.toString <$> readDataFile
+                                       ("dzslides" </> "template.html")
+                          let dzline = "<!-- {{{{ dzslides core"
+                          let dzcore = unlines
+                                     $ dropWhile (not . (dzline `isPrefixOf`))
+                                     $ lines dztempl
+                          return $ ("dzslides-core", dzcore) : vars
+                      else return vars)
+
+    abbrevs <- (Set.fromList . filter (not . null) . lines) <$>
+               case optAbbreviations opts of
+                    Nothing -> UTF8.toString <$> readDataFile "abbreviations"
+                    Just f  -> UTF8.toString <$> readFileStrict f
+
+    templ <- case optTemplate opts of
+                    _ | not standalone -> return Nothing
+                    Nothing -> Just <$> getDefaultTemplate format
+                    Just tp -> do
+                      -- strip off extensions
+                      let tp' = case takeExtension tp of
+                                     "" -> tp <.> format
+                                     _  -> tp
+                      Just . UTF8.toString <$>
+                            (readFileStrict tp' `catchError`
+                             (\e ->
+                                 case e of
+                                      PandocIOError _ e' |
+                                        isDoesNotExistError e' ->
+                                         readDataFile ("templates" </> tp')
+                                      _ -> throwError e))
+
+    metadata <- if format == "jats" &&
+                   isNothing (lookup "csl" (optMetadata opts)) &&
+                   isNothing (lookup "citation-style" (optMetadata opts))
+                   then do
+                     jatsCSL <- readDataFile "jats.csl"
+                     let jatsEncoded = makeDataURI
+                                         ("application/xml", jatsCSL)
+                     return $ ("csl", jatsEncoded) : optMetadata opts
+                   else return $ optMetadata opts
+
+    case lookup "lang" (optMetadata opts) of
+           Just l  -> case parseBCP47 l of
+                           Left _ -> return ()
+                           Right l' -> setTranslations l'
+           Nothing -> setTranslations $ Lang "en" "" "US" []
+
+    let writerOptions = def {
+            writerTemplate         = templ
+          , writerVariables        = variables
+          , writerTabStop          = optTabStop opts
+          , writerTableOfContents  = optTableOfContents opts
+          , writerHTMLMathMethod   = mathMethod
+          , writerIncremental      = optIncremental opts
+          , writerCiteMethod       = optCiteMethod opts
+          , writerNumberSections   = optNumberSections opts
+          , writerNumberOffset     = optNumberOffset opts
+          , writerSectionDivs      = optSectionDivs opts
+          , writerExtensions       = writerExts
+          , writerReferenceLinks   = optReferenceLinks opts
+          , writerReferenceLocation = optReferenceLocation opts
+          , writerDpi              = optDpi opts
+          , writerWrapText         = optWrapText opts
+          , writerColumns          = optColumns opts
+          , writerEmailObfuscation = optEmailObfuscation opts
+          , writerIdentifierPrefix = optIdentifierPrefix opts
+          , writerSourceURL        = sourceURL
+          , writerHtmlQTags        = optHtmlQTags opts
+          , writerTopLevelDivision = optTopLevelDivision opts
+          , writerListings         = optListings opts
+          , writerSlideLevel       = optSlideLevel opts
+          , writerHighlightStyle   = highlightStyle
+          , writerSetextHeaders    = optSetextHeaders opts
+          , writerEpubSubdirectory = optEpubSubdirectory opts
+          , writerEpubMetadata     = epubMetadata
+          , writerEpubFonts        = optEpubFonts opts
+          , writerEpubChapterLevel = optEpubChapterLevel opts
+          , writerTOCDepth         = optTOCDepth opts
+          , writerReferenceDoc     = optReferenceDoc opts
+          , writerLaTeXArgs        = optLaTeXEngineArgs opts
+          , writerSyntaxMap        = syntaxMap
+          }
+
+    let readerOpts = def{
+            readerStandalone = standalone
+          , readerColumns = optColumns opts
+          , readerTabStop = optTabStop opts
+          , readerIndentedCodeClasses = optIndentedCodeClasses opts
+          , readerDefaultImageExtension =
+             optDefaultImageExtension opts
+          , readerTrackChanges = optTrackChanges opts
+          , readerAbbreviations = abbrevs
+          , readerExtensions = readerExts
+          }
+
+    let transforms = (case optBaseHeaderLevel opts of
+                          x | x > 1     -> (headerShift (x - 1) :)
+                            | otherwise -> id) $
+                     (if extensionEnabled Ext_east_asian_line_breaks
+                            readerExts &&
+                         not (extensionEnabled Ext_east_asian_line_breaks
+                              writerExts &&
+                              writerWrapText writerOptions == WrapPreserve)
+                         then (eastAsianLineBreakFilter :)
+                         else id)
+                     []
+
+    let sourceToDoc :: [FilePath] -> PandocIO Pandoc
+        sourceToDoc sources' =
+           case reader of
+                TextReader r
+                  | optFileScope opts || readerName == "json" ->
+                      mconcat <$> mapM (readSource >=> r readerOpts) sources
+                  | otherwise ->
+                      readSources sources' >>= r readerOpts
+                ByteStringReader r ->
+                  mconcat <$> mapM (readFile' >=> r readerOpts) sources
+
+
     setUserDataDir datadir
     when (readerName == "markdown_github" ||
           writerName == "markdown_github") $
@@ -1011,7 +1017,8 @@ options =
     , Option "" ["print-default-data-file"]
                  (ReqArg
                   (\arg _ -> do
-                     readDefaultDataFile arg >>= BS.hPutStr stdout
+                     runIOorExplode $
+                       readDefaultDataFile arg >>= liftIO . BS.hPutStr stdout
                      exitSuccess)
                   "FILE")
                   "" -- "Print default data file"
@@ -1469,7 +1476,9 @@ options =
                  (NoArg
                   (\_ -> do
                      ddir <- getDataDir
-                     tpl <- readDataFileUTF8 Nothing "bash_completion.tpl"
+                     tpl <- runIOorExplode $
+                              UTF8.toString <$>
+                                readDefaultDataFile "bash_completion.tpl"
                      let optnames (Option shorts longs _ _) =
                            map (\c -> ['-',c]) shorts ++
                            map ("--" ++) longs
diff --git a/src/Text/Pandoc/BCP47.hs b/src/Text/Pandoc/BCP47.hs
index b4b55c5d4..1790ccfb7 100644
--- a/src/Text/Pandoc/BCP47.hs
+++ b/src/Text/Pandoc/BCP47.hs
@@ -29,7 +29,6 @@ Functions for parsing and rendering BCP47 language identifiers.
 -}
 module Text.Pandoc.BCP47 (
                        getLang
-                     , toLang
                      , parseBCP47
                      , Lang(..)
                      , renderLang
@@ -40,8 +39,6 @@ import Data.Char (isAscii, isLetter, isUpper, isLower, toUpper, toLower,
                   isAlphaNum)
 import Data.List (intercalate)
 import Text.Pandoc.Definition
-import Text.Pandoc.Class (PandocMonad, report)
-import Text.Pandoc.Logging
 import Text.Pandoc.Options
 import qualified Text.Parsec as P
 
@@ -68,17 +65,6 @@ getLang opts meta =
                Just (MetaString s)        -> Just s
                _                          -> Nothing
 
--- | Convert BCP47 string to a Lang, issuing warning
--- if there are problems.
-toLang :: PandocMonad m => Maybe String -> m (Maybe Lang)
-toLang Nothing = return Nothing
-toLang (Just s) =
-  case parseBCP47 s of
-       Left _ -> do
-         report $ InvalidLang s
-         return Nothing
-       Right l -> return (Just l)
-
 -- | Parse a BCP 47 string as a Lang.  Currently we parse
 -- extensions and private-use fields as "variants," even
 -- though officially they aren't.
diff --git a/src/Text/Pandoc/Class.hs b/src/Text/Pandoc/Class.hs
index 4697177ed..a3dd9ad58 100644
--- a/src/Text/Pandoc/Class.hs
+++ b/src/Text/Pandoc/Class.hs
@@ -1,3 +1,4 @@
+{-# LANGUAGE CPP #-}
 {-# LANGUAGE DeriveFunctor #-}
 {-# LANGUAGE TypeSynonymInstances #-}
 {-# LANGUAGE FlexibleInstances #-}
@@ -71,8 +72,15 @@ module Text.Pandoc.Class ( PandocMonad(..)
                          , runIO
                          , runIOorExplode
                          , runPure
+                         , readDefaultDataFile
+                         , readDataFile
                          , fillMediaBag
                          , extractMedia
+                         , toLang
+                         , setTranslations
+                         , translateTerm
+                         , Translations(..)
+                         , Term(..)
                          ) where
 
 import Prelude hiding (readFile)
@@ -81,10 +89,9 @@ import qualified System.Random as IO (newStdGen)
 import Codec.Archive.Zip (Archive, fromArchive, emptyArchive)
 import Data.Unique (hashUnique)
 import qualified Data.Unique as IO (newUnique)
-import qualified Text.Pandoc.Shared as IO ( readDataFile
-                                          , readDefaultDataFile
-                                          , openURL )
+import qualified Text.Pandoc.Shared as Shared
 import qualified Text.Pandoc.UTF8 as UTF8
+import qualified System.Directory as Directory
 import Text.Pandoc.Compat.Time (UTCTime)
 import Text.Pandoc.Logging
 import Text.Parsec (ParsecT, getPosition, sourceLine, sourceName)
@@ -98,6 +105,7 @@ import Data.Time.Clock.POSIX ( utcTimeToPOSIXSeconds
                              , posixSecondsToUTCTime
                              , POSIXTime )
 import Data.Time.LocalTime (TimeZone, ZonedTime, utcToZonedTime, utc)
+import qualified System.FilePath.Posix as Posix
 import Network.URI ( escapeURIString, nonStrictRelativeTo,
                      unEscapeString, parseURIReference, isAllowedInURI,
                      parseURI, URI(..) )
@@ -111,7 +119,7 @@ import qualified System.Environment as IO (lookupEnv)
 import System.FilePath.Glob (match, compile)
 import System.Directory (createDirectoryIfMissing, getDirectoryContents,
                           doesDirectoryExist)
-import System.FilePath ((</>), (<.>), takeDirectory,
+import System.FilePath ((</>), (<.>), takeDirectory, splitDirectories,
          takeExtension, dropExtension, isRelative, normalise)
 import qualified System.FilePath.Glob as IO (glob)
 import qualified System.Directory as IO (getModificationTime)
@@ -121,13 +129,21 @@ import Control.Monad.State.Strict
 import Control.Monad.Except
 import Control.Monad.Writer (WriterT)
 import Control.Monad.RWS (RWST)
+import Codec.Archive.Zip
 import Data.Word (Word8)
 import Data.Default
 import System.IO.Error
 import System.IO (stderr)
 import qualified Data.Map as M
 import Text.Pandoc.Error
+import Text.Pandoc.BCP47 (Lang(..), parseBCP47, renderLang)
+import Text.Pandoc.Translations (Term(..), Translations(..), readTranslations)
 import qualified Debug.Trace
+#ifdef EMBED_DATA_FILES
+import Text.Pandoc.Data (dataFiles)
+#else
+import Paths_pandoc (getDataFileName)
+#endif
 
 -- | The PandocMonad typeclass contains all the potentially
 -- IO-related functions used in pandoc's readers and writers.
@@ -155,15 +171,12 @@ class (Functor m, Applicative m, Monad m, MonadError PandocError m)
   -- | Read the strict ByteString contents from a file path,
   -- raising an error on failure.
   readFileStrict :: FilePath -> m B.ByteString
-  -- | Read file from from Cabal data directory.
-  readDefaultDataFile :: FilePath -> m B.ByteString
-  -- | Read file from user data directory or,
-  -- if not found there, from Cabal data directory.
-  readDataFile :: FilePath -> m B.ByteString
   -- | Return a list of paths that match a glob, relative to
   -- the working directory.  See 'System.FilePath.Glob' for
   -- the glob syntax.
   glob :: String -> m [FilePath]
+  -- | Returns True if file exists.
+  fileExists :: FilePath -> m Bool
   -- | Return the modification time of a file.
   getModificationTime :: FilePath -> m UTCTime
   -- | Get the value of the 'CommonState' used by all instances
@@ -272,6 +285,9 @@ data CommonState = CommonState { stLog          :: [LogMessage]
                                  -- ^ Directory to search for data files
                                , stMediaBag     :: MediaBag
                                  -- ^ Media parsed from binary containers
+                               , stTranslations :: Maybe
+                                                  (Lang, Maybe Translations)
+                                 -- ^ Translations for localization
                                , stInputFiles   :: Maybe [FilePath]
                                  -- ^ List of input files from command line
                                , stOutputFile   :: Maybe FilePath
@@ -290,6 +306,7 @@ instance Default CommonState where
   def = CommonState { stLog = []
                     , stUserDataDir = Nothing
                     , stMediaBag = mempty
+                    , stTranslations = Nothing
                     , stInputFiles = Nothing
                     , stOutputFile = Nothing
                     , stResourcePath = ["."]
@@ -297,6 +314,71 @@ instance Default CommonState where
                     , stTrace = False
                     }
 
+-- | Convert BCP47 string to a Lang, issuing warning
+-- if there are problems.
+toLang :: PandocMonad m => Maybe String -> m (Maybe Lang)
+toLang Nothing = return Nothing
+toLang (Just s) =
+  case parseBCP47 s of
+       Left _ -> do
+         report $ InvalidLang s
+         return Nothing
+       Right l -> return (Just l)
+
+-- | Select the language to use with 'translateTerm'.
+-- Note that this does not read a translation file;
+-- that is only done the first time 'translateTerm' is
+-- used.
+setTranslations :: PandocMonad m => Lang -> m ()
+setTranslations lang =
+  modifyCommonState $ \st -> st{ stTranslations = Just (lang, Nothing) }
+
+-- | Load term map.
+getTranslations :: PandocMonad m => m Translations
+getTranslations = do
+  mbtrans <- getsCommonState stTranslations
+  case mbtrans of
+       Nothing -> return mempty  -- no language defined
+       Just (_, Just t) -> return t
+       Just (lang, Nothing) -> do  -- read from file
+         let translationFile = "translations/" ++ renderLang lang ++ ".trans"
+         let fallbackFile = "translations/" ++ langLanguage lang ++ ".trans"
+         let getTrans bs =
+               case readTranslations (UTF8.toString bs) of
+                    Left e   -> do
+                      report $ CouldNotLoadTranslations (renderLang lang) e
+                      -- make sure we don't try again...
+                      modifyCommonState $ \st ->
+                        st{ stTranslations = Nothing }
+                      return mempty
+                    Right t -> do
+                      modifyCommonState $ \st ->
+                                  st{ stTranslations = Just (lang, Just t) }
+                      return t
+         catchError (readDataFile translationFile >>= getTrans)
+           (\_ ->
+             catchError (readDataFile fallbackFile >>= getTrans)
+               (\e -> do
+                 report $ CouldNotLoadTranslations (renderLang lang)
+                          $ case e of
+                               PandocCouldNotFindDataFileError _ ->
+                                 ("data file " ++ fallbackFile ++ " not found")
+                               _ -> ""
+                 -- make sure we don't try again...
+                 modifyCommonState $ \st -> st{ stTranslations = Nothing }
+                 return mempty))
+
+-- | Get a translation from the current term map.
+-- Issue a warning if the term is not defined.
+translateTerm :: PandocMonad m => Term -> m String
+translateTerm term = do
+  Translations termMap <- getTranslations
+  case M.lookup term termMap of
+       Just s -> return s
+       Nothing -> do
+         report $ NoTranslation (show term)
+         return ""
+
 -- | Evaluate a 'PandocIO' operation.
 runIO :: PandocIO a -> IO (Either PandocError a)
 runIO ma = flip evalStateT def $ runExceptT $ unPandocIO ma
@@ -317,7 +399,7 @@ liftIOError :: (String -> IO a) -> String -> PandocIO a
 liftIOError f u = do
   res <- liftIO $ tryIOError $ f u
   case res of
-         Left e -> throwError $ PandocIOError u e
+         Left e  -> throwError $ PandocIOError u e
          Right r -> return r
 
 instance PandocMonad PandocIO where
@@ -328,17 +410,15 @@ instance PandocMonad PandocIO where
   newUniqueHash = hashUnique <$> liftIO IO.newUnique
   openURL u = do
     report $ Fetching u
-    res <- liftIO (IO.openURL u)
+    res <- liftIOError Shared.openURL u
     case res of
          Right r -> return r
          Left e  -> throwError $ PandocHttpError u e
   readFileLazy s = liftIOError BL.readFile s
   readFileStrict s = liftIOError B.readFile s
-  readDefaultDataFile fname = liftIOError IO.readDefaultDataFile fname
-  readDataFile fname = do
-    datadir <- getUserDataDir
-    liftIOError (IO.readDataFile datadir) fname
-  glob = liftIO . IO.glob
+
+  glob = liftIOError IO.glob
+  fileExists = liftIOError Directory.doesFileExist
   getModificationTime fp = liftIOError IO.getModificationTime fp
   getCommonState = PandocIO $ lift get
   putCommonState x = PandocIO $ lift $ put x
@@ -432,6 +512,109 @@ downloadOrRead sourceURL s =
          convertSlash '\\' = '/'
          convertSlash x    = x
 
+getDefaultReferenceDocx :: PandocMonad m => m Archive
+getDefaultReferenceDocx = do
+  let paths = ["[Content_Types].xml",
+               "_rels/.rels",
+               "docProps/app.xml",
+               "docProps/core.xml",
+               "word/document.xml",
+               "word/fontTable.xml",
+               "word/footnotes.xml",
+               "word/numbering.xml",
+               "word/settings.xml",
+               "word/webSettings.xml",
+               "word/styles.xml",
+               "word/_rels/document.xml.rels",
+               "word/_rels/footnotes.xml.rels",
+               "word/theme/theme1.xml"]
+  let toLazy = BL.fromChunks . (:[])
+  let pathToEntry path = do
+        epochtime <- (floor . utcTimeToPOSIXSeconds) <$> getCurrentTime
+        contents <- toLazy <$> readDataFile ("docx/" ++ path)
+        return $ toEntry path epochtime contents
+  datadir <- getUserDataDir
+  mbArchive <- case datadir of
+                    Nothing   -> return Nothing
+                    Just d    -> do
+                       exists <- fileExists (d </> "reference.docx")
+                       if exists
+                          then return (Just (d </> "reference.docx"))
+                          else return Nothing
+  case mbArchive of
+     Just arch -> toArchive <$> readFileLazy arch
+     Nothing   -> foldr addEntryToArchive emptyArchive <$>
+                     mapM pathToEntry paths
+
+getDefaultReferenceODT :: PandocMonad m => m Archive
+getDefaultReferenceODT = do
+  let paths = ["mimetype",
+               "manifest.rdf",
+               "styles.xml",
+               "content.xml",
+               "meta.xml",
+               "settings.xml",
+               "Configurations2/accelerator/current.xml",
+               "Thumbnails/thumbnail.png",
+               "META-INF/manifest.xml"]
+  let pathToEntry path = do epochtime <- floor `fmap` getPOSIXTime
+                            contents <- (BL.fromChunks . (:[])) `fmap`
+                                          readDataFile ("odt/" ++ path)
+                            return $ toEntry path epochtime contents
+  datadir <- getUserDataDir
+  mbArchive <- case datadir of
+                    Nothing   -> return Nothing
+                    Just d    -> do
+                       exists <- fileExists (d </> "reference.odt")
+                       if exists
+                          then return (Just (d </> "reference.odt"))
+                          else return Nothing
+  case mbArchive of
+     Just arch -> toArchive <$> readFileLazy arch
+     Nothing   -> foldr addEntryToArchive emptyArchive <$>
+                     mapM pathToEntry paths
+
+-- | Read file from user data directory or,
+-- if not found there, from Cabal data directory.
+readDataFile :: PandocMonad m => FilePath -> m B.ByteString
+readDataFile fname = do
+  datadir <- getUserDataDir
+  case datadir of
+       Nothing -> readDefaultDataFile fname
+       Just userDir -> do
+         exists <- fileExists (userDir </> fname)
+         if exists
+            then readFileStrict (userDir </> fname)
+            else readDefaultDataFile fname
+
+-- | Read file from from Cabal data directory.
+readDefaultDataFile :: PandocMonad m => FilePath -> m B.ByteString
+readDefaultDataFile "reference.docx" =
+  (B.concat . BL.toChunks . fromArchive) <$> getDefaultReferenceDocx
+readDefaultDataFile "reference.odt" =
+  (B.concat . BL.toChunks . fromArchive) <$> getDefaultReferenceODT
+readDefaultDataFile fname =
+#ifdef EMBED_DATA_FILES
+  case lookup (makeCanonical fname) dataFiles of
+    Nothing       -> throwError $ PandocCouldNotFindDataFileError fname
+    Just contents -> return contents
+  where makeCanonical = Posix.joinPath . transformPathParts . splitDirectories
+        transformPathParts = reverse . foldl go []
+        go as     "."  = as
+        go (_:as) ".." = as
+        go as     x    = x : as
+#else
+  getDataFileName fname' >>= checkExistence >>= readFileStrict
+    where fname' = if fname == "MANUAL.txt" then fname else "data" </> fname
+
+checkExistence :: PandocMonad m => FilePath -> m FilePath
+checkExistence fn = do
+  exists <- fileExists fn
+  if exists
+     then return fn
+     else throwError $ PandocCouldNotFindDataFileError fn
+#endif
+
 withPaths :: PandocMonad m => [FilePath] -> (FilePath -> m a) -> FilePath -> m a
 withPaths [] _ fp = throwError $ PandocResourceNotFound fp
 withPaths (p:ps) action fp =
@@ -491,9 +674,8 @@ writeMedia dir mediabag subpath = do
        Nothing -> throwError $ PandocResourceNotFound subpath
        Just (_, bs) -> do
          report $ Extracting fullpath
-         liftIO $ do
-           createDirectoryIfMissing True $ takeDirectory fullpath
-           BL.writeFile fullpath bs
+         liftIOError (createDirectoryIfMissing True) (takeDirectory fullpath)
+         liftIOError (\p -> BL.writeFile p bs) fullpath
 
 adjustImagePath :: FilePath -> [FilePath] -> Inline -> Inline
 adjustImagePath dir paths (Image attr lab (src, tit))
@@ -624,28 +806,17 @@ instance PandocMonad PandocPure where
     case infoFileContents <$> getFileInfo fp fps of
       Just bs -> return bs
       Nothing -> throwError $ PandocResourceNotFound fp
-  readDefaultDataFile "reference.docx" =
-    (B.concat . BL.toChunks . fromArchive) <$> getsPureState stReferenceDocx
-  readDefaultDataFile "reference.odt" =
-    (B.concat . BL.toChunks . fromArchive) <$> getsPureState stReferenceODT
-  readDefaultDataFile fname = do
-    let fname' = if fname == "MANUAL.txt" then fname else "data" </> fname
-    readFileStrict fname'
-  readDataFile fname = do
-    datadir <- getUserDataDir
-    case datadir of
-         Just userDir -> do
-           userDirFiles <- getsPureState stUserDataFiles
-           case infoFileContents <$> getFileInfo (userDir </> fname)
-                userDirFiles of
-             Just bs -> return bs
-             Nothing -> readDefaultDataFile fname
-         Nothing -> readDefaultDataFile fname
 
   glob s = do
     FileTree ftmap <- getsPureState stFiles
     return $ filter (match (compile s)) $ M.keys ftmap
 
+  fileExists fp = do
+    fps <- getsPureState stFiles
+    case getFileInfo fp fps of
+         Nothing -> return False
+         Just _  -> return True
+
   getModificationTime fp = do
     fps <- getsPureState stFiles
     case infoFileMTime <$> getFileInfo fp fps of
@@ -667,9 +838,8 @@ instance PandocMonad m => PandocMonad (ParsecT s st m) where
   openURL = lift . openURL
   readFileLazy = lift . readFileLazy
   readFileStrict = lift . readFileStrict
-  readDefaultDataFile = lift . readDefaultDataFile
-  readDataFile = lift . readDataFile
   glob = lift . glob
+  fileExists = lift . fileExists
   getModificationTime = lift . getModificationTime
   getCommonState = lift getCommonState
   putCommonState = lift . putCommonState
@@ -696,9 +866,8 @@ instance PandocMonad m => PandocMonad (ReaderT r m) where
   openURL = lift . openURL
   readFileLazy = lift . readFileLazy
   readFileStrict = lift . readFileStrict
-  readDefaultDataFile = lift . readDefaultDataFile
-  readDataFile = lift . readDataFile
   glob = lift . glob
+  fileExists = lift . fileExists
   getModificationTime = lift . getModificationTime
   getCommonState = lift getCommonState
   putCommonState = lift . putCommonState
@@ -713,9 +882,8 @@ instance (PandocMonad m, Monoid w) => PandocMonad (WriterT w m) where
   openURL = lift . openURL
   readFileLazy = lift . readFileLazy
   readFileStrict = lift . readFileStrict
-  readDefaultDataFile = lift . readDefaultDataFile
-  readDataFile = lift . readDataFile
   glob = lift . glob
+  fileExists = lift . fileExists
   getModificationTime = lift . getModificationTime
   getCommonState = lift getCommonState
   putCommonState = lift . putCommonState
@@ -730,9 +898,8 @@ instance (PandocMonad m, Monoid w) => PandocMonad (RWST r w st m) where
   openURL = lift . openURL
   readFileLazy = lift . readFileLazy
   readFileStrict = lift . readFileStrict
-  readDefaultDataFile = lift . readDefaultDataFile
-  readDataFile = lift . readDataFile
   glob = lift . glob
+  fileExists = lift . fileExists
   getModificationTime = lift . getModificationTime
   getCommonState = lift getCommonState
   putCommonState = lift . putCommonState
@@ -747,9 +914,8 @@ instance PandocMonad m => PandocMonad (StateT st m) where
   openURL = lift . openURL
   readFileLazy = lift . readFileLazy
   readFileStrict = lift . readFileStrict
-  readDefaultDataFile = lift . readDefaultDataFile
-  readDataFile = lift . readDataFile
   glob = lift . glob
+  fileExists = lift . fileExists
   getModificationTime = lift . getModificationTime
   getCommonState = lift getCommonState
   putCommonState = lift . putCommonState
diff --git a/src/Text/Pandoc/Logging.hs b/src/Text/Pandoc/Logging.hs
index ac45b0a66..832a1f4df 100644
--- a/src/Text/Pandoc/Logging.hs
+++ b/src/Text/Pandoc/Logging.hs
@@ -95,6 +95,8 @@ data LogMessage =
   | CouldNotHighlight String
   | MissingCharacter String
   | Deprecated String String
+  | NoTranslation String
+  | CouldNotLoadTranslations String String
   deriving (Show, Eq, Data, Ord, Typeable, Generic)
 
 instance ToJSON LogMessage where
@@ -195,6 +197,11 @@ instance ToJSON LogMessage where
       Deprecated thing msg ->
            ["thing" .= Text.pack thing,
             "message" .= Text.pack msg]
+      NoTranslation term ->
+           ["term" .= Text.pack term]
+      CouldNotLoadTranslations lang msg ->
+           ["lang" .= Text.pack lang,
+            "message" .= Text.pack msg]
 
 
 showPos :: SourcePos -> String
@@ -282,6 +289,11 @@ showLogMessage msg =
          if null m
             then ""
             else ". " ++ m
+       NoTranslation t ->
+         "The term " ++ t ++ " has no translation defined."
+       CouldNotLoadTranslations lang m ->
+         "Could not load translations for " ++ lang ++
+           if null m then "" else ('\n':m)
 
 messageVerbosity:: LogMessage -> Verbosity
 messageVerbosity msg =
@@ -314,3 +326,5 @@ messageVerbosity msg =
        CouldNotHighlight{}          -> WARNING
        MissingCharacter{}           -> WARNING
        Deprecated{}                 -> WARNING
+       NoTranslation{}              -> WARNING
+       CouldNotLoadTranslations{}   -> WARNING
diff --git a/src/Text/Pandoc/Lua/PandocModule.hs b/src/Text/Pandoc/Lua/PandocModule.hs
index 2d0baf4f8..d46ed3629 100644
--- a/src/Text/Pandoc/Lua/PandocModule.hs
+++ b/src/Text/Pandoc/Lua/PandocModule.hs
@@ -32,13 +32,12 @@ import Data.ByteString.Char8 (unpack)
 import Data.Default (Default (..))
 import Data.Text (pack)
 import Scripting.Lua (LuaState, call, push, pushhsfunction, rawset)
-import Text.Pandoc.Class hiding (readDataFile)
+import Text.Pandoc.Class
 import Text.Pandoc.Definition (Pandoc)
 import Text.Pandoc.Options (ReaderOptions(readerExtensions))
 import Text.Pandoc.Lua.Compat (loadstring)
 import Text.Pandoc.Lua.StackInstances ()
 import Text.Pandoc.Readers (Reader (..), getReader)
-import Text.Pandoc.Shared (readDataFile)
 
 -- | Push the "pandoc" on the lua stack.
 pushPandocModule :: Maybe FilePath -> LuaState -> IO ()
@@ -52,7 +51,8 @@ pushPandocModule datadir lua = do
 
 -- | Get the string representation of the pandoc module
 pandocModuleScript :: Maybe FilePath -> IO String
-pandocModuleScript datadir = unpack <$> readDataFile datadir "pandoc.lua"
+pandocModuleScript datadir = unpack <$>
+  runIOorExplode (setUserDataDir datadir >> readDataFile "pandoc.lua")
 
 read_doc :: String -> String -> IO (Either String Pandoc)
 read_doc formatSpec content = do
diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 0f0e71b93..1fe4594ed 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -55,8 +55,11 @@ import Data.Maybe (fromMaybe, maybeToList)
 import Safe (minimumDef)
 import System.FilePath (addExtension, replaceExtension, takeExtension)
 import Text.Pandoc.Builder
-import Text.Pandoc.Class (PandocMonad, PandocPure, lookupEnv, readFileFromDirs,
-                          report, setResourcePath, getResourcePath)
+import Text.Pandoc.Class (PandocMonad, PandocPure, lookupEnv,
+                          readFileFromDirs, report, setResourcePath,
+                          getResourcePath, setTranslations, translateTerm)
+import qualified Text.Pandoc.Translations as Translations
+import Text.Pandoc.BCP47 (Lang(..))
 import Text.Pandoc.Highlighting (fromListingsLanguage, languagesByExtension)
 import Text.Pandoc.ImageSize (numUnit, showFl)
 import Text.Pandoc.Logging
@@ -1247,6 +1250,7 @@ inlineCommands = M.fromList $
                                     removeDoubleQuotes . untokenize <$> braced
                            mkImage options src)
   , ("enquote", enquote)
+  , ("figurename", doTerm Translations.Figure)
   , ("cite", citation "cite" NormalCitation False)
   , ("Cite", citation "Cite" NormalCitation False)
   , ("citep", citation "citep" NormalCitation False)
@@ -1326,6 +1330,12 @@ inlineCommands = M.fromList $
   , ("ifstrequal", ifstrequal)
   ]
 
+doTerm :: PandocMonad m => Translations.Term -> LP m Inlines
+doTerm term = do
+  s <- (symbol '~' >> return (str "\160")) <|> return space
+  t <- translateTerm term
+  return (str t <> s)
+
 ifstrequal :: PandocMonad m => LP m Inlines
 ifstrequal = do
   str1 <- tok
@@ -1759,6 +1769,9 @@ blockCommands = M.fromList $
    -- includes
    , ("lstinputlisting", inputListing)
    , ("graphicspath", graphicsPath)
+   -- polyglossia
+   , ("setdefaultlanguage", setDefaultLanguage)
+   , ("setmainlanguage", setDefaultLanguage)
    -- hyperlink
    , ("hypertarget", try $ braced >> grouped block)
    -- LaTeX colors
@@ -2206,3 +2219,121 @@ block = (mempty <$ spaces1)
 blocks :: PandocMonad m => LP m Blocks
 blocks = mconcat <$> many block
 
+setDefaultLanguage :: PandocMonad m => LP m Blocks
+setDefaultLanguage = do
+  o <- option "" $ (T.unpack . T.filter (\c -> c /= '[' && c /= ']'))
+                <$> rawopt
+  polylang <- toksToString <$> braced
+  case polyglossiaLangToBCP47 polylang o of
+       Nothing -> return () -- TODO mzero? warning?
+       Just l -> setTranslations l
+  return mempty
+
+polyglossiaLangToBCP47 :: String -> String -> Maybe Lang
+polyglossiaLangToBCP47 s o =
+  case (s, filter (/=' ') o) of
+       ("arabic", "locale=algeria") -> Just $ Lang "ar" "" "DZ" []
+       ("arabic", "locale=mashriq") -> Just $ Lang "ar" "" "SY" []
+       ("arabic", "locale=libya") -> Just $ Lang "ar" "" "LY" []
+       ("arabic", "locale=morocco") -> Just $ Lang "ar" "" "MA" []
+       ("arabic", "locale=mauritania") -> Just $ Lang "ar" "" "MR" []
+       ("arabic", "locale=tunisia") -> Just $ Lang "ar" "" "TN" []
+       ("german", "spelling=old") -> Just $ Lang "de" "" "DE" ["1901"]
+       ("german", "variant=austrian,spelling=old")
+                                  -> Just $ Lang "de" "" "AT" ["1901"]
+       ("german", "variant=austrian") -> Just $ Lang "de" "" "AT" []
+       ("german", "variant=swiss,spelling=old")
+                                  -> Just $ Lang "de" "" "CH" ["1901"]
+       ("german", "variant=swiss") -> Just $ Lang "de" "" "CH" []
+       ("german", _) -> Just $ Lang "de" "" "" []
+       ("lsorbian", _) -> Just $ Lang "dsb" "" "" []
+       ("greek", "variant=poly") -> Just $ Lang "el" "" "polyton" []
+       ("english", "variant=australian") -> Just $ Lang "en" "" "AU" []
+       ("english", "variant=canadian") -> Just $ Lang "en" "" "CA" []
+       ("english", "variant=british") -> Just $ Lang "en" "" "GB" []
+       ("english", "variant=newzealand") -> Just $ Lang "en" "" "NZ" []
+       ("english", "variant=american") -> Just $ Lang "en" "" "US" []
+       ("greek", "variant=ancient") -> Just $ Lang "grc" "" "" []
+       ("usorbian", _) -> Just $ Lang "hsb" "" "" []
+       ("latin", "variant=classic") -> Just $ Lang "la" "" "" ["x-classic"]
+       ("slovenian", _) -> Just $ Lang "sl" "" "" []
+       ("serbianc", _) -> Just $ Lang "sr" "cyrl" "" []
+       ("pinyin", _) -> Just $ Lang "zh" "Latn" "" ["pinyin"]
+       ("afrikaans", _) -> Just $ Lang "af" "" "" []
+       ("amharic", _) -> Just $ Lang "am" "" "" []
+       ("arabic", _) -> Just $ Lang "ar" "" "" []
+       ("assamese", _) -> Just $ Lang "as" "" "" []
+       ("asturian", _) -> Just $ Lang "ast" "" "" []
+       ("bulgarian", _) -> Just $ Lang "bg" "" "" []
+       ("bengali", _) -> Just $ Lang "bn" "" "" []
+       ("tibetan", _) -> Just $ Lang "bo" "" "" []
+       ("breton", _) -> Just $ Lang "br" "" "" []
+       ("catalan", _) -> Just $ Lang "ca" "" "" []
+       ("welsh", _) -> Just $ Lang "cy" "" "" []
+       ("czech", _) -> Just $ Lang "cs" "" "" []
+       ("coptic", _) -> Just $ Lang "cop" "" "" []
+       ("danish", _) -> Just $ Lang "da" "" "" []
+       ("divehi", _) -> Just $ Lang "dv" "" "" []
+       ("greek", _) -> Just $ Lang "el" "" "" []
+       ("english", _) -> Just $ Lang "en" "" "" []
+       ("esperanto", _) -> Just $ Lang "eo" "" "" []
+       ("spanish", _) -> Just $ Lang "es" "" "" []
+       ("estonian", _) -> Just $ Lang "et" "" "" []
+       ("basque", _) -> Just $ Lang "eu" "" "" []
+       ("farsi", _) -> Just $ Lang "fa" "" "" []
+       ("finnish", _) -> Just $ Lang "fi" "" "" []
+       ("french", _) -> Just $ Lang "fr" "" "" []
+       ("friulan", _) -> Just $ Lang "fur" "" "" []
+       ("irish", _) -> Just $ Lang "ga" "" "" []
+       ("scottish", _) -> Just $ Lang "gd" "" "" []
+       ("ethiopic", _) -> Just $ Lang "gez" "" "" []
+       ("galician", _) -> Just $ Lang "gl" "" "" []
+       ("hebrew", _) -> Just $ Lang "he" "" "" []
+       ("hindi", _) -> Just $ Lang "hi" "" "" []
+       ("croatian", _) -> Just $ Lang "hr" "" "" []
+       ("magyar", _) -> Just $ Lang "hu" "" "" []
+       ("armenian", _) -> Just $ Lang "hy" "" "" []
+       ("interlingua", _) -> Just $ Lang "ia" "" "" []
+       ("indonesian", _) -> Just $ Lang "id" "" "" []
+       ("icelandic", _) -> Just $ Lang "is" "" "" []
+       ("italian", _) -> Just $ Lang "it" "" "" []
+       ("japanese", _) -> Just $ Lang "jp" "" "" []
+       ("khmer", _) -> Just $ Lang "km" "" "" []
+       ("kurmanji", _) -> Just $ Lang "kmr" "" "" []
+       ("kannada", _) -> Just $ Lang "kn" "" "" []
+       ("korean", _) -> Just $ Lang "ko" "" "" []
+       ("latin", _) -> Just $ Lang "la" "" "" []
+       ("lao", _) -> Just $ Lang "lo" "" "" []
+       ("lithuanian", _) -> Just $ Lang "lt" "" "" []
+       ("latvian", _) -> Just $ Lang "lv" "" "" []
+       ("malayalam", _) -> Just $ Lang "ml" "" "" []
+       ("mongolian", _) -> Just $ Lang "mn" "" "" []
+       ("marathi", _) -> Just $ Lang "mr" "" "" []
+       ("dutch", _) -> Just $ Lang "nl" "" "" []
+       ("nynorsk", _) -> Just $ Lang "nn" "" "" []
+       ("norsk", _) -> Just $ Lang "no" "" "" []
+       ("nko", _) -> Just $ Lang "nqo" "" "" []
+       ("occitan", _) -> Just $ Lang "oc" "" "" []
+       ("panjabi", _) -> Just $ Lang "pa" "" "" []
+       ("polish", _) -> Just $ Lang "pl" "" "" []
+       ("piedmontese", _) -> Just $ Lang "pms" "" "" []
+       ("portuguese", _) -> Just $ Lang "pt" "" "" []
+       ("romansh", _) -> Just $ Lang "rm" "" "" []
+       ("romanian", _) -> Just $ Lang "ro" "" "" []
+       ("russian", _) -> Just $ Lang "ru" "" "" []
+       ("sanskrit", _) -> Just $ Lang "sa" "" "" []
+       ("samin", _) -> Just $ Lang "se" "" "" []
+       ("slovak", _) -> Just $ Lang "sk" "" "" []
+       ("albanian", _) -> Just $ Lang "sq" "" "" []
+       ("serbian", _) -> Just $ Lang "sr" "" "" []
+       ("swedish", _) -> Just $ Lang "sv" "" "" []
+       ("syriac", _) -> Just $ Lang "syr" "" "" []
+       ("tamil", _) -> Just $ Lang "ta" "" "" []
+       ("telugu", _) -> Just $ Lang "te" "" "" []
+       ("thai", _) -> Just $ Lang "th" "" "" []
+       ("turkmen", _) -> Just $ Lang "tk" "" "" []
+       ("turkish", _) -> Just $ Lang "tr" "" "" []
+       ("ukrainian", _) -> Just $ Lang "uk" "" "" []
+       ("urdu", _) -> Just $ Lang "ur" "" "" []
+       ("vietnamese", _) -> Just $ Lang "vi" "" "" []
+       _ -> Nothing
diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs
index 5503c96f1..9f88a0ad4 100644
--- a/src/Text/Pandoc/Shared.hs
+++ b/src/Text/Pandoc/Shared.hs
@@ -76,11 +76,6 @@ module Text.Pandoc.Shared (
                      renderTags',
                      -- * File handling
                      inDirectory,
-                     getDefaultReferenceDocx,
-                     getDefaultReferenceODT,
-                     readDefaultDataFile,
-                     readDataFile,
-                     readDataFileUTF8,
                      openURL,
                      collapseFilePath,
                      filteredFilesFromArchive,
@@ -116,8 +111,6 @@ import System.Directory
 import System.FilePath (splitDirectories, isPathSeparator)
 import qualified System.FilePath.Posix as Posix
 import Text.Pandoc.MIME (MimeType)
-import Text.Pandoc.Error (PandocError(..))
-import System.FilePath ( (</>) )
 import Data.Generics (Typeable, Data)
 import qualified Control.Monad.State.Strict as S
 import qualified Control.Exception as E
@@ -125,7 +118,6 @@ import Control.Monad (msum, unless, MonadPlus(..))
 import Text.Pandoc.Pretty (charWidth)
 import Text.Pandoc.Generic (bottomUp)
 import Text.Pandoc.Compat.Time
-import Data.Time.Clock.POSIX
 import System.IO.Error
 import System.IO.Temp
 import Text.HTML.TagSoup (renderTagsOptions, RenderOptions(..), Tag(..),
@@ -136,17 +128,12 @@ import qualified Data.ByteString.Char8 as B8
 import Data.ByteString.Base64 (decodeLenient)
 import Data.Sequence (ViewR(..), ViewL(..), viewl, viewr)
 import qualified Data.Text as T
-import Data.ByteString.Lazy (toChunks, fromChunks)
+import Data.ByteString.Lazy (toChunks)
 import qualified Data.ByteString.Lazy as BL
 import Paths_pandoc (version)
 
 import Codec.Archive.Zip
 
-#ifdef EMBED_DATA_FILES
-import Text.Pandoc.Data (dataFiles)
-#else
-import Paths_pandoc (getDataFileName)
-#endif
 import Network.HTTP.Client (httpLbs, responseBody, responseHeaders,
                             Request(port,host,requestHeaders),
                             HttpException)
@@ -612,110 +599,6 @@ inDirectory path action = E.bracket
                              setCurrentDirectory
                              (const $ setCurrentDirectory path >> action)
 
-getDefaultReferenceDocx :: Maybe FilePath -> IO Archive
-getDefaultReferenceDocx datadir = do
-  let paths = ["[Content_Types].xml",
-               "_rels/.rels",
-               "docProps/app.xml",
-               "docProps/core.xml",
-               "word/document.xml",
-               "word/fontTable.xml",
-               "word/footnotes.xml",
-               "word/numbering.xml",
-               "word/settings.xml",
-               "word/webSettings.xml",
-               "word/styles.xml",
-               "word/_rels/document.xml.rels",
-               "word/_rels/footnotes.xml.rels",
-               "word/theme/theme1.xml"]
-  let toLazy = fromChunks . (:[])
-  let pathToEntry path = do epochtime <- (floor . utcTimeToPOSIXSeconds) <$>
-                                          getCurrentTime
-                            contents <- toLazy <$> readDataFile datadir
-                                                       ("docx/" ++ path)
-                            return $ toEntry path epochtime contents
-  mbArchive <- case datadir of
-                    Nothing   -> return Nothing
-                    Just d    -> do
-                       exists <- doesFileExist (d </> "reference.docx")
-                       if exists
-                          then return (Just (d </> "reference.docx"))
-                          else return Nothing
-  case mbArchive of
-     Just arch -> toArchive <$> BL.readFile arch
-     Nothing   -> foldr addEntryToArchive emptyArchive <$>
-                     mapM pathToEntry paths
-
-getDefaultReferenceODT :: Maybe FilePath -> IO Archive
-getDefaultReferenceODT datadir = do
-  let paths = ["mimetype",
-               "manifest.rdf",
-               "styles.xml",
-               "content.xml",
-               "meta.xml",
-               "settings.xml",
-               "Configurations2/accelerator/current.xml",
-               "Thumbnails/thumbnail.png",
-               "META-INF/manifest.xml"]
-  let pathToEntry path = do epochtime <- floor `fmap` getPOSIXTime
-                            contents <- (fromChunks . (:[])) `fmap`
-                                          readDataFile datadir ("odt/" ++ path)
-                            return $ toEntry path epochtime contents
-  mbArchive <- case datadir of
-                    Nothing   -> return Nothing
-                    Just d    -> do
-                       exists <- doesFileExist (d </> "reference.odt")
-                       if exists
-                          then return (Just (d </> "reference.odt"))
-                          else return Nothing
-  case mbArchive of
-     Just arch -> toArchive <$> BL.readFile arch
-     Nothing   -> foldr addEntryToArchive emptyArchive <$>
-                     mapM pathToEntry paths
-
-
-readDefaultDataFile :: FilePath -> IO BS.ByteString
-readDefaultDataFile "reference.docx" =
-  (BS.concat . toChunks . fromArchive) <$> getDefaultReferenceDocx Nothing
-readDefaultDataFile "reference.odt" =
-  (BS.concat . toChunks . fromArchive) <$> getDefaultReferenceODT Nothing
-readDefaultDataFile fname =
-#ifdef EMBED_DATA_FILES
-  case lookup (makeCanonical fname) dataFiles of
-    Nothing       -> E.throwIO $ PandocCouldNotFindDataFileError fname
-    Just contents -> return contents
-  where makeCanonical = Posix.joinPath . transformPathParts . splitDirectories
-        transformPathParts = reverse . foldl go []
-        go as     "."  = as
-        go (_:as) ".." = as
-        go as     x    = x : as
-#else
-  getDataFileName fname' >>= checkExistence >>= BS.readFile
-    where fname' = if fname == "MANUAL.txt" then fname else "data" </> fname
-
-checkExistence :: FilePath -> IO FilePath
-checkExistence fn = do
-  exists <- doesFileExist fn
-  if exists
-     then return fn
-     else E.throwIO $ PandocCouldNotFindDataFileError fn
-#endif
-
--- | Read file from specified user data directory or, if not found there, from
--- Cabal data directory.
-readDataFile :: Maybe FilePath -> FilePath -> IO BS.ByteString
-readDataFile Nothing fname = readDefaultDataFile fname
-readDataFile (Just userDir) fname = do
-  exists <- doesFileExist (userDir </> fname)
-  if exists
-     then BS.readFile (userDir </> fname)
-     else readDefaultDataFile fname
-
--- | Same as 'readDataFile' but returns a String instead of a ByteString.
-readDataFileUTF8 :: Maybe FilePath -> FilePath -> IO String
-readDataFileUTF8 userDir fname =
-  UTF8.toString `fmap` readDataFile userDir fname
-
 -- | Read from a URL and return raw data and maybe mime type.
 openURL :: String -> IO (Either HttpException (BS.ByteString, Maybe MimeType))
 openURL u
diff --git a/src/Text/Pandoc/Templates.hs b/src/Text/Pandoc/Templates.hs
index 43b7dc37b..d5a4faafa 100644
--- a/src/Text/Pandoc/Templates.hs
+++ b/src/Text/Pandoc/Templates.hs
@@ -44,7 +44,7 @@ import qualified Data.Text as T
 import System.FilePath ((<.>), (</>))
 import Text.DocTemplates (Template, TemplateTarget, applyTemplate,
                           compileTemplate, renderTemplate, varListToJSON)
-import Text.Pandoc.Class (PandocMonad(readDataFile))
+import Text.Pandoc.Class (readDataFile, PandocMonad)
 import Text.Pandoc.Error
 import qualified Text.Pandoc.UTF8 as UTF8
 
diff --git a/src/Text/Pandoc/Translations.hs b/src/Text/Pandoc/Translations.hs
new file mode 100644
index 000000000..2185366fd
--- /dev/null
+++ b/src/Text/Pandoc/Translations.hs
@@ -0,0 +1,94 @@
+{-# LANGUAGE DeriveGeneric #-}
+{-# LANGUAGE GeneralizedNewtypeDeriving #-}
+{-
+Copyright (C) 2017 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+-}
+
+{- |
+   Module      : Text.Pandoc.Translations
+   Copyright   : Copyright (C) 2017 John MacFarlane
+   License     : GNU GPL, version 2 or above
+
+   Maintainer  : John MacFarlane <jgm@berkeley.edu>
+   Stability   : alpha
+   Portability : portable
+
+Data types for localization.
+
+Translations are stored in @data/translations/langname.trans@,
+where langname can be the full BCP47 language specifier, or
+just the language part.  File format is:
+
+> # A comment, ignored
+> Figure: Figura
+> Index: Indeksi
+
+-}
+module Text.Pandoc.Translations (
+                           Term(..)
+                         , Translations(..)
+                         , readTranslations
+                         )
+where
+import qualified Data.Map as M
+import GHC.Generics (Generic)
+import Text.Pandoc.Shared (trim, safeRead)
+
+data Term =
+    Preface
+  | References
+  | Abstract
+  | Bibliography
+  | Chapter
+  | Appendix
+  | Contents
+  | ListOfFigures
+  | ListOfTables
+  | Index
+  | Figure
+  | Table
+  | Part
+  | Page
+  | Proof
+  | See
+  | SeeAlso
+  | Cc
+  | To
+  deriving (Show, Eq, Ord, Generic, Read)
+
+newtype Translations = Translations (M.Map Term String)
+        deriving (Show, Eq, Ord, Generic, Monoid)
+
+readTranslations :: String -> Either String Translations
+readTranslations = foldr parseLine (Right mempty) . lines
+
+parseLine :: String
+          -> Either String Translations
+          -> Either String Translations
+parseLine _ (Left s) = Left s
+parseLine ('#':_) x  = x
+parseLine []      x  = x
+parseLine t (Right (Translations tm)) =
+  if null rest
+     then Left $ "no colon in " ++ term
+     else
+       case safeRead term of
+            Nothing    -> Left $ term ++ " is not a recognized term name"
+            Just term' -> Right (Translations $ (M.insert term' defn) tm)
+  where (trm, rest) = break (\c -> c == ':') t
+        defn = trim $ drop 1 rest
+        term = trim trm
diff --git a/src/Text/Pandoc/Writers/ConTeXt.hs b/src/Text/Pandoc/Writers/ConTeXt.hs
index 3c901cab6..6f2cb2b9e 100644
--- a/src/Text/Pandoc/Writers/ConTeXt.hs
+++ b/src/Text/Pandoc/Writers/ConTeXt.hs
@@ -37,7 +37,7 @@ import Data.Maybe (catMaybes)
 import Data.Text (Text)
 import Network.URI (unEscapeString)
 import Text.Pandoc.BCP47
-import Text.Pandoc.Class (PandocMonad, report)
+import Text.Pandoc.Class (PandocMonad, report, toLang)
 import Text.Pandoc.Logging
 import Text.Pandoc.Definition
 import Text.Pandoc.ImageSize
diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs
index 166a09d4b..8b19f3740 100644
--- a/src/Text/Pandoc/Writers/Docx.hs
+++ b/src/Text/Pandoc/Writers/Docx.hs
@@ -51,7 +51,7 @@ import qualified Data.Text as T
 import Data.Time.Clock.POSIX
 import Skylighting
 import System.Random (randomR)
-import Text.Pandoc.Class (PandocMonad, report)
+import Text.Pandoc.Class (PandocMonad, report, toLang)
 import qualified Text.Pandoc.Class as P
 import Text.Pandoc.Compat.Time
 import Text.Pandoc.Definition
@@ -68,7 +68,7 @@ import qualified Text.Pandoc.UTF8 as UTF8
 import Text.Pandoc.Walk
 import Text.Pandoc.Writers.Math
 import Text.Pandoc.Writers.Shared (fixDisplayMath)
-import Text.Pandoc.BCP47 (getLang, renderLang, toLang)
+import Text.Pandoc.BCP47 (getLang, renderLang)
 import Text.Printf (printf)
 import Text.TeXMath
 import Text.XML.Light as XML
diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs
index 4b7bf0e9b..fcc5ad1c6 100644
--- a/src/Text/Pandoc/Writers/LaTeX.hs
+++ b/src/Text/Pandoc/Writers/LaTeX.hs
@@ -45,8 +45,8 @@ import Data.Maybe (catMaybes, fromMaybe, isJust)
 import Data.Text (Text)
 import qualified Data.Text as T
 import Network.URI (unEscapeString)
-import Text.Pandoc.BCP47 (Lang(..), toLang, getLang, renderLang)
-import Text.Pandoc.Class (PandocMonad, report)
+import Text.Pandoc.BCP47 (Lang(..), getLang, renderLang)
+import Text.Pandoc.Class (PandocMonad, report, toLang)
 import Text.Pandoc.Definition
 import Text.Pandoc.Highlighting (formatLaTeXBlock, formatLaTeXInline, highlight,
                                  styleToLaTeX, toListingsLanguage)
diff --git a/src/Text/Pandoc/Writers/ODT.hs b/src/Text/Pandoc/Writers/ODT.hs
index 160141822..4c74ef469 100644
--- a/src/Text/Pandoc/Writers/ODT.hs
+++ b/src/Text/Pandoc/Writers/ODT.hs
@@ -38,7 +38,7 @@ import Data.List (isPrefixOf)
 import Data.Maybe (fromMaybe)
 import qualified Data.Text.Lazy as TL
 import System.FilePath (takeDirectory, takeExtension, (<.>))
-import Text.Pandoc.Class (PandocMonad, report)
+import Text.Pandoc.Class (PandocMonad, report, toLang)
 import qualified Text.Pandoc.Class as P
 import Text.Pandoc.Definition
 import Text.Pandoc.ImageSize
@@ -51,7 +51,7 @@ import Text.Pandoc.UTF8 (fromStringLazy, fromTextLazy, toStringLazy)
 import Text.Pandoc.Walk
 import Text.Pandoc.Writers.OpenDocument (writeOpenDocument)
 import Text.Pandoc.Writers.Shared (fixDisplayMath)
-import Text.Pandoc.BCP47 (getLang, toLang, Lang(..), renderLang)
+import Text.Pandoc.BCP47 (getLang, Lang(..), renderLang)
 import Text.Pandoc.XML
 import Text.TeXMath
 import Text.XML.Light