From c1617565fc2a5984e2e44d4da9adf7f8a26b3160 Mon Sep 17 00:00:00 2001 From: Yan Pas Date: Wed, 9 May 2018 03:24:45 +0300 Subject: basic manfile parsing --- src/Text/Pandoc/Readers/Man.hs | 241 ++++++++++++++++++++++++++++++----------- 1 file changed, 178 insertions(+), 63 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Man.hs b/src/Text/Pandoc/Readers/Man.hs index d1ff3fc47..166b7c7a7 100644 --- a/src/Text/Pandoc/Readers/Man.hs +++ b/src/Text/Pandoc/Readers/Man.hs @@ -14,6 +14,9 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + + + -} {- | @@ -29,90 +32,202 @@ Conversion of man to 'Pandoc' document. -} module Text.Pandoc.Readers.Man where -import Control.Monad.Except (liftM2, throwError, guard) -import Text.Pandoc.Class (PandocMonad(..)) +import Control.Monad.Except (throwError) +import Data.Default (Default) +import Data.Map (insert) +import Data.Maybe (isJust) +import Data.List (intersperse, intercalate) +import qualified Data.Text as T + +import Text.Pandoc.Class (PandocMonad(..), runPure) import Text.Pandoc.Definition +import Text.Pandoc.Error (PandocError) +import Text.Pandoc.Logging (LogMessage(..)) import Text.Pandoc.Options import Text.Pandoc.Parsing hiding (enclosed) import Text.Pandoc.Shared (crFilter) import Text.Parsec -import Text.Parsec.Char -import Data.Text (Text) -import Data.Map (empty) -import qualified Data.Text as T +import Text.Parsec.Char () + +data FontKind = Regular | Italic | Bold | ItalicBold deriving Show + +data RoffState = RoffState { inCodeBlock :: Bool + , fontKind :: FontKind + } deriving Show + +instance Default RoffState where + def = RoffState {inCodeBlock = False, fontKind = Regular} + +data ManState = ManState {pState :: ParserState, rState :: RoffState} + +instance HasLogMessages ManState where + addLogMessage lm mst = mst {pState = addLogMessage lm (pState mst)} + getLogMessages mst = getLogMessages $ pState mst + +modifyRoffState :: PandocMonad m => (RoffState -> RoffState) -> ParsecT a ManState m () +modifyRoffState f = do + mst <- getState + setState mst { rState = f $ rState mst } + +type ManParser m = ParserT [Char] ManState m + +testStrr :: [Char] -> SourceName -> Either PandocError (Either ParseError Pandoc) +testStrr s srcnm = runPure (runParserT parseMan (ManState {pState=def, rState=def}) srcnm s) + +printPandoc :: Pandoc -> [Char] +printPandoc (Pandoc m content) = + let ttl = "Pandoc: " ++ (show $ unMeta m) + cnt = intercalate "\n" $ map show content + in ttl ++ "\n" ++ cnt + +strrepr :: (Show a2, Show a1) => Either a2 (Either a1 Pandoc) -> [Char] +strrepr obj = case obj of + Right x -> case x of + Right x' -> printPandoc x' + Left y' -> show y' + Left y -> show y + +testFile :: FilePath -> IO () +testFile fname = do + cont <- readFile fname + putStrLn . strrepr $ testStrr cont fname -- | Read man (troff) from an input string and return a Pandoc document. -readMan :: PandocMonad m - => ReaderOptions - -> Text - -> m Pandoc -readMan opts s = do - parsed <- readWithM parseMan def{ stateOptions = opts } (T.unpack s) +readMan :: PandocMonad m => ReaderOptions -> T.Text -> m Pandoc +readMan opts txt = do + let state = ManState { pState = def{ stateOptions = opts }, rState = def} + parsed <- readWithM parseMan state (T.unpack $ crFilter txt) case parsed of Right result -> return result Left e -> throwError e -type ManParser m = ParserT [Char] ParserState m - -comment :: PandocMonad m => ManParser m String -comment = do - string ".\\\" " - many anyChar - -data Macro = Macro { macroName :: String - , macroArgs :: [String] - } - parseMacro :: PandocMonad m => ManParser m Block parseMacro = do - m <- macro - return $ Plain (map Str $ (macroName m : macroArgs m)) - -macro :: PandocMonad m => ManParser m Macro -macro = do char '.' <|> char '\'' many space - name <- many1 letter - --args <- many parseArg - return $ Macro { macroName = name, macroArgs = [] } - + macroName <- many1 (letter <|> oneOf ['\\', '"']) + args <- parseArgs + let joinedArgs = concat $ intersperse " " args + case macroName of + "\\\"" -> return Null -- comment + "TH" -> macroTitle (if null args then "" else head args) + "nf" -> macroCodeBlock True >> return Null + "fi" -> macroCodeBlock False >> return Null + "B" -> return $ Plain [Strong [Str joinedArgs]] + "BR" -> return $ Plain [Strong [Str joinedArgs]] + "BI" -> return $ Plain [Strong [Emph [Str joinedArgs]]] + "I" -> return $ Plain [Emph [Str joinedArgs]] + "SH" -> return $ Header 2 nullAttr [Str joinedArgs] + "sp" -> return $ Plain [LineBreak] + _ -> unkownMacro macroName args + where - parseArg :: PandocMonad m => ManParser m String - parseArg = do - many1 space - plainArg - - quotedArg :: PandocMonad m => ManParser m String - quotedArg = do - char '"' - val <- many1 quotedChar - char '"' - return val - - plainArg :: PandocMonad m => ManParser m String - plainArg = do - many1 $ noneOf " \t" - - quotedChar :: PandocMonad m => ManParser m Char - quotedChar = do - noneOf "\"" - <|> try (string "\"\"" >> return '"') + macroTitle :: PandocMonad m => String -> ManParser m Block + macroTitle mantitle = do + modifyState (changeTitle mantitle) + if null mantitle + then return Null + else return $ Header 1 nullAttr [Str mantitle] + where + changeTitle title mst @ ManState{ pState = pst} = + let meta = stateMeta pst + metaUp = Meta $ insert "title" (MetaString title) (unMeta meta) + in + mst { pState = pst {stateMeta = metaUp} } + + + macroCodeBlock :: PandocMonad m => Bool -> ManParser m () + macroCodeBlock insideCB = modifyRoffState (\rst -> rst{inCodeBlock = insideCB}) >> return () + + unkownMacro :: PandocMonad m => String -> [String] -> ManParser m Block + unkownMacro mname args = do + pos <- getPosition + logMessage $ SkippedContent ("Unknown macro: " ++ mname) pos + return $ Plain $ Str <$> args + + parseArgs :: PandocMonad m => ManParser m [String] + parseArgs = do + eolOpt <- optionMaybe $ char '\n' + if isJust eolOpt + then return [] + else do + many1 space + arg <- try quotedArg <|> plainArg + otherargs <- parseArgs + return $ arg : otherargs + + where + + plainArg :: PandocMonad m => ManParser m String + plainArg = many1 $ noneOf " \t\n" + + quotedArg :: PandocMonad m => ManParser m String + quotedArg = do + char '"' + val <- many1 quotedChar + char '"' + return val + + quotedChar :: PandocMonad m => ManParser m Char + quotedChar = noneOf "\"\n" <|> try (string "\"\"" >> return '"') + +roffInline :: RoffState -> String -> (Maybe Inline) +roffInline rst str + | null str = Nothing + | inCodeBlock rst = Just $ Code nullAttr str + | otherwise = Just $ case fontKind rst of + Regular -> Str str + Italic -> Emph [Str str] + _ -> Strong [Str str] parseLine :: PandocMonad m => ManParser m Block parseLine = do - str <- many anyChar - return $ Plain [Str str] - -parseBlock :: PandocMonad m => ManParser m Block -parseBlock = do - choice [ parseMacro - , parseLine - ] + parts <- parseLineParts + newline + return $ if null parts + then Plain [LineBreak] + else Plain parts + where + parseLineParts :: PandocMonad m => ManParser m [Inline] + parseLineParts = do + lnpart <- many $ noneOf "\n\\" + ManState {rState = roffSt} <- getState + let inl = roffInline roffSt lnpart + others <- backSlash <|> return [] + return $ case inl of + Just x -> x:others + Nothing -> others + + backSlash :: PandocMonad m => ManParser m [Inline] + backSlash = do + char '\\' + esc <- choice [ char 'f' >> fEscape + , char '-' >> return (Just '-') + , Just <$> noneOf "\n" + ] + ManState {rState = roffSt} <- getState + case esc of + Just c -> case roffInline roffSt [c] of + Just inl -> do + oth <- parseLineParts + return $ inl : oth + Nothing -> parseLineParts + Nothing -> parseLineParts + where + + fEscape :: PandocMonad m => ManParser m (Maybe Char) + fEscape = choice [ char 'B' >> modifyRoffState (\rst -> rst {fontKind = Bold}) + , char 'I' >> modifyRoffState (\rst -> rst {fontKind = Italic}) + , char 'P' >> modifyRoffState (\rst -> rst {fontKind = Regular}) + ] + >> return Nothing + + parseMan :: PandocMonad m => ManParser m Pandoc parseMan = do - blocks <- parseBlock `sepBy` newline - - return $ Pandoc Meta{unMeta = empty} blocks \ No newline at end of file + blocks <- many (parseMacro <|> parseLine) + parserst <- pState <$> getState + return $ Pandoc (stateMeta parserst) blocks -- cgit v1.2.3