From 12f6cf13ad3ff48bede646ceff7ee2db300b4051 Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Wed, 7 Nov 2018 21:29:48 +0100 Subject: T.P.App: extract submodule T.P.App.FormatHeuristics Format guessing is used for input and output options and should be shared. --- pandoc.cabal | 1 + src/Text/Pandoc/App.hs | 58 +------------------- src/Text/Pandoc/App/FormatHeuristics.hs | 94 +++++++++++++++++++++++++++++++++ 3 files changed, 96 insertions(+), 57 deletions(-) create mode 100644 src/Text/Pandoc/App/FormatHeuristics.hs diff --git a/pandoc.cabal b/pandoc.cabal index 7b173a88c..c34d3d461 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -522,6 +522,7 @@ library Text.Pandoc.BCP47, Text.Pandoc.Class other-modules: Text.Pandoc.App.CommandLineOptions, + Text.Pandoc.App.FormatHeuristics, Text.Pandoc.App.Opt, Text.Pandoc.App.OutputSettings, Text.Pandoc.Filter.JSON, diff --git a/src/Text/Pandoc/App.hs b/src/Text/Pandoc/App.hs index 809165c2e..a14e4e017 100644 --- a/src/Text/Pandoc/App.hs +++ b/src/Text/Pandoc/App.hs @@ -62,6 +62,7 @@ import System.FilePath import System.IO (nativeNewline, stdout) import qualified System.IO as IO (Newline (..)) import Text.Pandoc +import Text.Pandoc.App.FormatHeuristics (formatFromFilePaths) import Text.Pandoc.App.Opt (Opt (..), LineEnding (..), defaultOpts) import Text.Pandoc.App.CommandLineOptions (parseOptions, options) import Text.Pandoc.App.OutputSettings (OutputSettings (..), optToOutputSettings) @@ -337,63 +338,6 @@ readMetaValue s = case YAML.decodeStrict (UTF8.fromString s) of -> MetaBool b _ -> MetaString s --- Determine default reader based on source file extensions. -formatFromFilePaths :: String -> [FilePath] -> String -formatFromFilePaths fallback [] = fallback -formatFromFilePaths fallback (x:xs) = - case formatFromFilePath x of - Just f -> f - Nothing -> formatFromFilePaths fallback xs - --- Determine format based on file extension -formatFromFilePath :: FilePath -> Maybe String -formatFromFilePath x = - case takeExtension (map toLower x) of - ".adoc" -> Just "asciidoc" - ".asciidoc" -> Just "asciidoc" - ".context" -> Just "context" - ".ctx" -> Just "context" - ".db" -> Just "docbook" - ".doc" -> Just "doc" -- so we get an "unknown reader" error - ".docx" -> Just "docx" - ".dokuwiki" -> Just "dokuwiki" - ".epub" -> Just "epub" - ".fb2" -> Just "fb2" - ".htm" -> Just "html" - ".html" -> Just "html" - ".icml" -> Just "icml" - ".json" -> Just "json" - ".latex" -> Just "latex" - ".lhs" -> Just "markdown+lhs" - ".ltx" -> Just "latex" - ".markdown" -> Just "markdown" - ".md" -> Just "markdown" - ".ms" -> Just "ms" - ".muse" -> Just "muse" - ".native" -> Just "native" - ".odt" -> Just "odt" - ".opml" -> Just "opml" - ".org" -> Just "org" - ".pdf" -> Just "pdf" -- so we get an "unknown reader" error - ".pptx" -> Just "pptx" - ".roff" -> Just "ms" - ".rst" -> Just "rst" - ".rtf" -> Just "rtf" - ".s5" -> Just "s5" - ".t2t" -> Just "t2t" - ".tei" -> Just "tei" - ".tei.xml" -> Just "tei" - ".tex" -> Just "latex" - ".texi" -> Just "texinfo" - ".texinfo" -> Just "texinfo" - ".text" -> Just "markdown" - ".textile" -> Just "textile" - ".txt" -> Just "markdown" - ".wiki" -> Just "mediawiki" - ".xhtml" -> Just "html" - ['.',y] | y `elem` ['1'..'9'] -> Just "man" - _ -> Nothing - -- Transformations of a Pandoc document post-parsing: applyTransforms :: Monad m => [Transform] -> Pandoc -> m Pandoc diff --git a/src/Text/Pandoc/App/FormatHeuristics.hs b/src/Text/Pandoc/App/FormatHeuristics.hs new file mode 100644 index 000000000..c8dbcd645 --- /dev/null +++ b/src/Text/Pandoc/App/FormatHeuristics.hs @@ -0,0 +1,94 @@ +{-# LANGUAGE NoImplicitPrelude #-} +{- +Copyright (C) 2006-2018 John MacFarlane + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.App.FormatHeuristics + Copyright : Copyright (C) 2006-2018 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane + Stability : alpha + Portability : portable + +Guess the format of a file from its name. +-} +module Text.Pandoc.App.FormatHeuristics + ( formatFromFilePaths + ) where + +import Prelude +import Data.Char (toLower) +import System.FilePath (takeExtension) + +-- Determine default reader based on source file extensions. +formatFromFilePaths :: String -> [FilePath] -> String +formatFromFilePaths fallback [] = fallback +formatFromFilePaths fallback (x:xs) = + case formatFromFilePath x of + Just f -> f + Nothing -> formatFromFilePaths fallback xs + +-- Determine format based on file extension +formatFromFilePath :: FilePath -> Maybe String +formatFromFilePath x = + case takeExtension (map toLower x) of + ".adoc" -> Just "asciidoc" + ".asciidoc" -> Just "asciidoc" + ".context" -> Just "context" + ".ctx" -> Just "context" + ".db" -> Just "docbook" + ".doc" -> Just "doc" -- so we get an "unknown reader" error + ".docx" -> Just "docx" + ".dokuwiki" -> Just "dokuwiki" + ".epub" -> Just "epub" + ".fb2" -> Just "fb2" + ".htm" -> Just "html" + ".html" -> Just "html" + ".icml" -> Just "icml" + ".json" -> Just "json" + ".latex" -> Just "latex" + ".lhs" -> Just "markdown+lhs" + ".ltx" -> Just "latex" + ".markdown" -> Just "markdown" + ".md" -> Just "markdown" + ".ms" -> Just "ms" + ".muse" -> Just "muse" + ".native" -> Just "native" + ".odt" -> Just "odt" + ".opml" -> Just "opml" + ".org" -> Just "org" + ".pdf" -> Just "pdf" -- so we get an "unknown reader" error + ".pptx" -> Just "pptx" + ".roff" -> Just "ms" + ".rst" -> Just "rst" + ".rtf" -> Just "rtf" + ".s5" -> Just "s5" + ".t2t" -> Just "t2t" + ".tei" -> Just "tei" + ".tei.xml" -> Just "tei" + ".tex" -> Just "latex" + ".texi" -> Just "texinfo" + ".texinfo" -> Just "texinfo" + ".text" -> Just "markdown" + ".textile" -> Just "textile" + ".txt" -> Just "markdown" + ".wiki" -> Just "mediawiki" + ".xhtml" -> Just "html" + ['.',y] | y `elem` ['1'..'9'] -> Just "man" + _ -> Nothing -- cgit v1.2.3