aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Readers
diff options
context:
space:
mode:
authorYan Pas <yanp.bugz@gmail.com>2018-05-09 03:24:45 +0300
committerYan Pas <yanp.bugz@gmail.com>2018-05-09 03:24:45 +0300
commitc1617565fc2a5984e2e44d4da9adf7f8a26b3160 (patch)
tree146546caab9f32ee352297d5d99f679566d70117 /src/Text/Pandoc/Readers
parentfd3676a568589f07ad0707c07b2a9f87df6e2f6c (diff)
downloadpandoc-c1617565fc2a5984e2e44d4da9adf7f8a26b3160.tar.gz
basic manfile parsing
Diffstat (limited to 'src/Text/Pandoc/Readers')
-rw-r--r--src/Text/Pandoc/Readers/Man.hs241
1 files changed, 178 insertions, 63 deletions
diff --git a/src/Text/Pandoc/Readers/Man.hs b/src/Text/Pandoc/Readers/Man.hs
index d1ff3fc47..166b7c7a7 100644
--- a/src/Text/Pandoc/Readers/Man.hs
+++ b/src/Text/Pandoc/Readers/Man.hs
@@ -14,6 +14,9 @@ GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+
+
-}
{- |
@@ -29,90 +32,202 @@ Conversion of man to 'Pandoc' document.
-}
module Text.Pandoc.Readers.Man where
-import Control.Monad.Except (liftM2, throwError, guard)
-import Text.Pandoc.Class (PandocMonad(..))
+import Control.Monad.Except (throwError)
+import Data.Default (Default)
+import Data.Map (insert)
+import Data.Maybe (isJust)
+import Data.List (intersperse, intercalate)
+import qualified Data.Text as T
+
+import Text.Pandoc.Class (PandocMonad(..), runPure)
import Text.Pandoc.Definition
+import Text.Pandoc.Error (PandocError)
+import Text.Pandoc.Logging (LogMessage(..))
import Text.Pandoc.Options
import Text.Pandoc.Parsing hiding (enclosed)
import Text.Pandoc.Shared (crFilter)
import Text.Parsec
-import Text.Parsec.Char
-import Data.Text (Text)
-import Data.Map (empty)
-import qualified Data.Text as T
+import Text.Parsec.Char ()
+
+data FontKind = Regular | Italic | Bold | ItalicBold deriving Show
+
+data RoffState = RoffState { inCodeBlock :: Bool
+ , fontKind :: FontKind
+ } deriving Show
+
+instance Default RoffState where
+ def = RoffState {inCodeBlock = False, fontKind = Regular}
+
+data ManState = ManState {pState :: ParserState, rState :: RoffState}
+
+instance HasLogMessages ManState where
+ addLogMessage lm mst = mst {pState = addLogMessage lm (pState mst)}
+ getLogMessages mst = getLogMessages $ pState mst
+
+modifyRoffState :: PandocMonad m => (RoffState -> RoffState) -> ParsecT a ManState m ()
+modifyRoffState f = do
+ mst <- getState
+ setState mst { rState = f $ rState mst }
+
+type ManParser m = ParserT [Char] ManState m
+
+testStrr :: [Char] -> SourceName -> Either PandocError (Either ParseError Pandoc)
+testStrr s srcnm = runPure (runParserT parseMan (ManState {pState=def, rState=def}) srcnm s)
+
+printPandoc :: Pandoc -> [Char]
+printPandoc (Pandoc m content) =
+ let ttl = "Pandoc: " ++ (show $ unMeta m)
+ cnt = intercalate "\n" $ map show content
+ in ttl ++ "\n" ++ cnt
+
+strrepr :: (Show a2, Show a1) => Either a2 (Either a1 Pandoc) -> [Char]
+strrepr obj = case obj of
+ Right x -> case x of
+ Right x' -> printPandoc x'
+ Left y' -> show y'
+ Left y -> show y
+
+testFile :: FilePath -> IO ()
+testFile fname = do
+ cont <- readFile fname
+ putStrLn . strrepr $ testStrr cont fname
-- | Read man (troff) from an input string and return a Pandoc document.
-readMan :: PandocMonad m
- => ReaderOptions
- -> Text
- -> m Pandoc
-readMan opts s = do
- parsed <- readWithM parseMan def{ stateOptions = opts } (T.unpack s)
+readMan :: PandocMonad m => ReaderOptions -> T.Text -> m Pandoc
+readMan opts txt = do
+ let state = ManState { pState = def{ stateOptions = opts }, rState = def}
+ parsed <- readWithM parseMan state (T.unpack $ crFilter txt)
case parsed of
Right result -> return result
Left e -> throwError e
-type ManParser m = ParserT [Char] ParserState m
-
-comment :: PandocMonad m => ManParser m String
-comment = do
- string ".\\\" "
- many anyChar
-
-data Macro = Macro { macroName :: String
- , macroArgs :: [String]
- }
-
parseMacro :: PandocMonad m => ManParser m Block
parseMacro = do
- m <- macro
- return $ Plain (map Str $ (macroName m : macroArgs m))
-
-macro :: PandocMonad m => ManParser m Macro
-macro = do
char '.' <|> char '\''
many space
- name <- many1 letter
- --args <- many parseArg
- return $ Macro { macroName = name, macroArgs = [] }
-
+ macroName <- many1 (letter <|> oneOf ['\\', '"'])
+ args <- parseArgs
+ let joinedArgs = concat $ intersperse " " args
+ case macroName of
+ "\\\"" -> return Null -- comment
+ "TH" -> macroTitle (if null args then "" else head args)
+ "nf" -> macroCodeBlock True >> return Null
+ "fi" -> macroCodeBlock False >> return Null
+ "B" -> return $ Plain [Strong [Str joinedArgs]]
+ "BR" -> return $ Plain [Strong [Str joinedArgs]]
+ "BI" -> return $ Plain [Strong [Emph [Str joinedArgs]]]
+ "I" -> return $ Plain [Emph [Str joinedArgs]]
+ "SH" -> return $ Header 2 nullAttr [Str joinedArgs]
+ "sp" -> return $ Plain [LineBreak]
+ _ -> unkownMacro macroName args
+
where
- parseArg :: PandocMonad m => ManParser m String
- parseArg = do
- many1 space
- plainArg
-
- quotedArg :: PandocMonad m => ManParser m String
- quotedArg = do
- char '"'
- val <- many1 quotedChar
- char '"'
- return val
-
- plainArg :: PandocMonad m => ManParser m String
- plainArg = do
- many1 $ noneOf " \t"
-
- quotedChar :: PandocMonad m => ManParser m Char
- quotedChar = do
- noneOf "\""
- <|> try (string "\"\"" >> return '"')
+ macroTitle :: PandocMonad m => String -> ManParser m Block
+ macroTitle mantitle = do
+ modifyState (changeTitle mantitle)
+ if null mantitle
+ then return Null
+ else return $ Header 1 nullAttr [Str mantitle]
+ where
+ changeTitle title mst @ ManState{ pState = pst} =
+ let meta = stateMeta pst
+ metaUp = Meta $ insert "title" (MetaString title) (unMeta meta)
+ in
+ mst { pState = pst {stateMeta = metaUp} }
+
+
+ macroCodeBlock :: PandocMonad m => Bool -> ManParser m ()
+ macroCodeBlock insideCB = modifyRoffState (\rst -> rst{inCodeBlock = insideCB}) >> return ()
+
+ unkownMacro :: PandocMonad m => String -> [String] -> ManParser m Block
+ unkownMacro mname args = do
+ pos <- getPosition
+ logMessage $ SkippedContent ("Unknown macro: " ++ mname) pos
+ return $ Plain $ Str <$> args
+
+ parseArgs :: PandocMonad m => ManParser m [String]
+ parseArgs = do
+ eolOpt <- optionMaybe $ char '\n'
+ if isJust eolOpt
+ then return []
+ else do
+ many1 space
+ arg <- try quotedArg <|> plainArg
+ otherargs <- parseArgs
+ return $ arg : otherargs
+
+ where
+
+ plainArg :: PandocMonad m => ManParser m String
+ plainArg = many1 $ noneOf " \t\n"
+
+ quotedArg :: PandocMonad m => ManParser m String
+ quotedArg = do
+ char '"'
+ val <- many1 quotedChar
+ char '"'
+ return val
+
+ quotedChar :: PandocMonad m => ManParser m Char
+ quotedChar = noneOf "\"\n" <|> try (string "\"\"" >> return '"')
+
+roffInline :: RoffState -> String -> (Maybe Inline)
+roffInline rst str
+ | null str = Nothing
+ | inCodeBlock rst = Just $ Code nullAttr str
+ | otherwise = Just $ case fontKind rst of
+ Regular -> Str str
+ Italic -> Emph [Str str]
+ _ -> Strong [Str str]
parseLine :: PandocMonad m => ManParser m Block
parseLine = do
- str <- many anyChar
- return $ Plain [Str str]
-
-parseBlock :: PandocMonad m => ManParser m Block
-parseBlock = do
- choice [ parseMacro
- , parseLine
- ]
+ parts <- parseLineParts
+ newline
+ return $ if null parts
+ then Plain [LineBreak]
+ else Plain parts
+ where
+ parseLineParts :: PandocMonad m => ManParser m [Inline]
+ parseLineParts = do
+ lnpart <- many $ noneOf "\n\\"
+ ManState {rState = roffSt} <- getState
+ let inl = roffInline roffSt lnpart
+ others <- backSlash <|> return []
+ return $ case inl of
+ Just x -> x:others
+ Nothing -> others
+
+ backSlash :: PandocMonad m => ManParser m [Inline]
+ backSlash = do
+ char '\\'
+ esc <- choice [ char 'f' >> fEscape
+ , char '-' >> return (Just '-')
+ , Just <$> noneOf "\n"
+ ]
+ ManState {rState = roffSt} <- getState
+ case esc of
+ Just c -> case roffInline roffSt [c] of
+ Just inl -> do
+ oth <- parseLineParts
+ return $ inl : oth
+ Nothing -> parseLineParts
+ Nothing -> parseLineParts
+ where
+
+ fEscape :: PandocMonad m => ManParser m (Maybe Char)
+ fEscape = choice [ char 'B' >> modifyRoffState (\rst -> rst {fontKind = Bold})
+ , char 'I' >> modifyRoffState (\rst -> rst {fontKind = Italic})
+ , char 'P' >> modifyRoffState (\rst -> rst {fontKind = Regular})
+ ]
+ >> return Nothing
+
+
parseMan :: PandocMonad m => ManParser m Pandoc
parseMan = do
- blocks <- parseBlock `sepBy` newline
-
- return $ Pandoc Meta{unMeta = empty} blocks \ No newline at end of file
+ blocks <- many (parseMacro <|> parseLine)
+ parserst <- pState <$> getState
+ return $ Pandoc (stateMeta parserst) blocks