diff options
Diffstat (limited to 'src/Text/Pandoc/Readers')
-rw-r--r-- | src/Text/Pandoc/Readers/HTML.hs | 1 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Markdown.hs | 50 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Odt/Arrows/Utils.hs | 39 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Odt/ContentReader.hs | 12 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs | 28 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Odt/Namespaces.hs | 2 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Odt/StyleReader.hs | 4 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Org.hs | 55 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Textile.hs | 7 |
9 files changed, 120 insertions, 78 deletions
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index 8b66d2d3d..a97285ae2 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -948,6 +948,7 @@ htmlTag f = try $ do parseOptions{ optTagWarning = True } inp guard $ f next case next of + TagWarning _ -> fail "encountered TagWarning" TagComment s | "<!--" `isPrefixOf` inp -> do count (length s + 4) anyChar diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index f2b0872bb..58878feb5 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -326,23 +326,22 @@ stopLine = try $ (string "---" <|> string "...") >> blankline >> return () mmdTitleBlock :: MarkdownParser () mmdTitleBlock = try $ do guardEnabled Ext_mmd_title_block - kvPairs <- many1 kvPair + firstPair <- kvPair False + restPairs <- many (kvPair True) + let kvPairs = firstPair : restPairs blanklines updateState $ \st -> st{ stateMeta' = stateMeta' st <> return (Meta $ M.fromList kvPairs) } -kvPair :: MarkdownParser (String, MetaValue) -kvPair = try $ do +kvPair :: Bool -> MarkdownParser (String, MetaValue) +kvPair allowEmpty = try $ do key <- many1Till (alphaNum <|> oneOf "_- ") (char ':') - skipMany1 spaceNoNewline - val <- manyTill anyChar + val <- trim <$> manyTill anyChar (try $ newline >> lookAhead (blankline <|> nonspaceChar)) - guard $ not . null . trim $ val + guard $ allowEmpty || not (null val) let key' = concat $ words $ map toLower key - let val' = MetaBlocks $ B.toList $ B.plain $ B.text $ trim val + let val' = MetaBlocks $ B.toList $ B.plain $ B.text $ val return (key',val') - where - spaceNoNewline = satisfy (\x -> isSpace x && (x/='\n') && (x/='\r')) parseMarkdown :: MarkdownParser Pandoc parseMarkdown = do @@ -1321,7 +1320,7 @@ removeOneLeadingSpace xs = gridTableFooter :: MarkdownParser [Char] gridTableFooter = blanklines -pipeBreak :: MarkdownParser [Alignment] +pipeBreak :: MarkdownParser ([Alignment], [Int]) pipeBreak = try $ do nonindentSpaces openPipe <- (True <$ char '|') <|> return False @@ -1331,16 +1330,22 @@ pipeBreak = try $ do guard $ not (null rest && not openPipe) optional (char '|') blankline - return (first:rest) + return $ unzip (first:rest) pipeTable :: MarkdownParser ([Alignment], [Double], F [Blocks], F [[Blocks]]) pipeTable = try $ do nonindentSpaces lookAhead nonspaceChar - (heads,aligns) <- (,) <$> pipeTableRow <*> pipeBreak - lines' <- sequence <$> many pipeTableRow - let widths = replicate (length aligns) 0.0 - return $ (aligns, widths, heads, lines') + (heads,(aligns, seplengths)) <- (,) <$> pipeTableRow <*> pipeBreak + (lines', rawRows) <- unzip <$> many (withRaw pipeTableRow) + let maxlength = maximum $ map length rawRows + numColumns <- getOption readerColumns + let widths = if maxlength > numColumns + then map (\len -> + fromIntegral (len + 1) / fromIntegral numColumns) + seplengths + else replicate (length aligns) 0.0 + return $ (aligns, widths, heads, sequence lines') sepPipe :: MarkdownParser () sepPipe = try $ do @@ -1369,19 +1374,20 @@ pipeTableRow = do ils' | B.isNull ils' -> mempty | otherwise -> B.plain $ ils') cells' -pipeTableHeaderPart :: Parser [Char] st Alignment +pipeTableHeaderPart :: Parser [Char] st (Alignment, Int) pipeTableHeaderPart = try $ do skipMany spaceChar left <- optionMaybe (char ':') - many1 (char '-') + pipe <- many1 (char '-') right <- optionMaybe (char ':') skipMany spaceChar + let len = length pipe + maybe 0 (const 1) left + maybe 0 (const 1) right return $ - case (left,right) of - (Nothing,Nothing) -> AlignDefault - (Just _,Nothing) -> AlignLeft - (Nothing,Just _) -> AlignRight - (Just _,Just _) -> AlignCenter + ((case (left,right) of + (Nothing,Nothing) -> AlignDefault + (Just _,Nothing) -> AlignLeft + (Nothing,Just _) -> AlignRight + (Just _,Just _) -> AlignCenter), len) -- Succeed only if current line contains a pipe. scanForPipe :: Parser [Char] st () diff --git a/src/Text/Pandoc/Readers/Odt/Arrows/Utils.hs b/src/Text/Pandoc/Readers/Odt/Arrows/Utils.hs index d4dcf5be2..fdc02d8d2 100644 --- a/src/Text/Pandoc/Readers/Odt/Arrows/Utils.hs +++ b/src/Text/Pandoc/Readers/Odt/Arrows/Utils.hs @@ -129,24 +129,23 @@ joinOn :: (Arrow a) => (x -> y -> z) -> a (x,y) z joinOn = arr.uncurry -- | Applies a function to the uncurried result-pair of an arrow-application. --- (The §-symbol was chosen to evoke an association with pairs through the --- shared first character) -(>>§) :: (Arrow a) => a x (b,c) -> (b -> c -> d) -> a x d -a >>§ f = a >>^ uncurry f +-- (The %-symbol was chosen to evoke an association with pairs.) +(>>%) :: (Arrow a) => a x (b,c) -> (b -> c -> d) -> a x d +a >>% f = a >>^ uncurry f --- | '(>>§)' with its arguments flipped -(§<<) :: (Arrow a) => (b -> c -> d) -> a x (b,c) -> a x d -(§<<) = flip (>>§) +-- | '(>>%)' with its arguments flipped +(%<<) :: (Arrow a) => (b -> c -> d) -> a x (b,c) -> a x d +(%<<) = flip (>>%) -- | Precomposition with an uncurried function -(§>>) :: (Arrow a) => (b -> c -> d) -> a d r -> a (b,c) r -f §>> a = uncurry f ^>> a +(%>>) :: (Arrow a) => (b -> c -> d) -> a d r -> a (b,c) r +f %>> a = uncurry f ^>> a -- | Precomposition with an uncurried function (right to left variant) -(<<§) :: (Arrow a) => a d r -> (b -> c -> d) -> a (b,c) r -(<<§) = flip (§>>) +(<<%) :: (Arrow a) => a d r -> (b -> c -> d) -> a (b,c) r +(<<%) = flip (%>>) -infixr 2 >>§, §<<, §>>, <<§ +infixr 2 >>%, %<<, %>>, <<% -- | Duplicate a value and apply an arrow to the second instance. @@ -271,7 +270,7 @@ newtype ParallelArrow a b c = CoEval { evalParallelArrow :: a b c } instance (Arrow a, Monoid m) => Monoid (ParallelArrow a b m) where mempty = CoEval $ returnV mempty - (CoEval a) `mappend` (CoEval ~b) = CoEval $ a &&& b >>§ mappend + (CoEval a) `mappend` (CoEval ~b) = CoEval $ a &&& b >>% mappend -- | Evaluates a collection of arrows in a parallel fashion. -- @@ -433,29 +432,29 @@ a ^>>?^? f = a ^>> Left ^|||^ f a >>?! f = a >>> right f --- -(>>?§) :: (ArrowChoice a, Monoid f) +(>>?%) :: (ArrowChoice a, Monoid f) => FallibleArrow a x f (b,b') -> (b -> b' -> c) -> FallibleArrow a x f c -a >>?§ f = a >>?^ (uncurry f) +a >>?% f = a >>?^ (uncurry f) --- -(^>>?§) :: (ArrowChoice a, Monoid f) +(^>>?%) :: (ArrowChoice a, Monoid f) => (x -> Either f (b,b')) -> (b -> b' -> c) -> FallibleArrow a x f c -a ^>>?§ f = arr a >>?^ (uncurry f) +a ^>>?% f = arr a >>?^ (uncurry f) --- -(>>?§?) :: (ArrowChoice a, Monoid f) +(>>?%?) :: (ArrowChoice a, Monoid f) => FallibleArrow a x f (b,b') -> (b -> b' -> (Either f c)) -> FallibleArrow a x f c -a >>?§? f = a >>?^? (uncurry f) +a >>?%? f = a >>?^? (uncurry f) infixr 1 >>?, >>?^, >>?^? infixr 1 ^>>?, ^>>?^, ^>>?^?, >>?! -infixr 1 >>?§, ^>>?§, >>?§? +infixr 1 >>?%, ^>>?%, >>?%? -- | Keep values that are Right, replace Left values by a constant. ifFailedUse :: (ArrowChoice a) => v -> a (Either f v) v diff --git a/src/Text/Pandoc/Readers/Odt/ContentReader.hs b/src/Text/Pandoc/Readers/Odt/ContentReader.hs index 9ff3532e1..1f1c57646 100644 --- a/src/Text/Pandoc/Readers/Odt/ContentReader.hs +++ b/src/Text/Pandoc/Readers/Odt/ContentReader.hs @@ -145,7 +145,7 @@ type OdtReaderSafe a b = XMLReaderSafe ReaderState a b fromStyles :: (a -> Styles -> b) -> OdtReaderSafe a b fromStyles f = keepingTheValue (getExtraState >>^ styleSet) - >>§ f + >>% f -- getStyleByName :: OdtReader StyleName Style @@ -162,7 +162,7 @@ lookupListStyle = fromStyles lookupListStyleByName >>^ maybeToChoice -- switchCurrentListStyle :: OdtReaderSafe (Maybe ListStyle) (Maybe ListStyle) switchCurrentListStyle = keepingTheValue getExtraState - >>§ swapCurrentListStyle + >>% swapCurrentListStyle >>> first setExtraState >>^ snd @@ -170,7 +170,7 @@ switchCurrentListStyle = keepingTheValue getExtraState pushStyle :: OdtReaderSafe Style Style pushStyle = keepingTheValue ( ( keepingTheValue getExtraState - >>§ pushStyle' + >>% pushStyle' ) >>> setExtraState ) @@ -470,7 +470,7 @@ matchingElement :: (Monoid e) matchingElement ns name reader = (ns, name, asResultAccumulator reader) where asResultAccumulator :: (ArrowChoice a, Monoid m) => a m m -> a m (Fallible m) - asResultAccumulator a = liftAsSuccess $ keepingTheValue a >>§ (<>) + asResultAccumulator a = liftAsSuccess $ keepingTheValue a >>% (<>) -- matchChildContent' :: (Monoid result) @@ -497,14 +497,14 @@ matchChildContent ls fallback = returnV mempty >>> matchContent ls fallback -- -- | Open Document allows several consecutive spaces if they are marked up read_plain_text :: OdtReaderSafe (Inlines, XML.Content) Inlines -read_plain_text = fst ^&&& read_plain_text' >>§ recover +read_plain_text = fst ^&&& read_plain_text' >>% recover where -- fallible version read_plain_text' :: OdtReader (Inlines, XML.Content) Inlines read_plain_text' = ( second ( arr extractText ) >>^ spreadChoice >>?! second text ) - >>?§ (<>) + >>?% (<>) -- extractText :: XML.Content -> Fallible String extractText (XML.Text cData) = succeedWith (XML.cdData cData) diff --git a/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs b/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs index 7c1764889..8c03d1a09 100644 --- a/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs +++ b/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs @@ -331,7 +331,7 @@ convertingExtraState v a = withSubStateF setVAsExtraState modifyWithA where setVAsExtraState = liftAsSuccess $ extractFromState id >>^ replaceExtraState v modifyWithA = keepingTheValue (moreState ^>> a) - >>^ spreadChoice >>?§ flip replaceExtraState + >>^ spreadChoice >>?% flip replaceExtraState -- | First sets the extra state to the new value. Then produces a new -- extra state with a converter that uses the new state. Finally, the @@ -413,14 +413,14 @@ elemName :: (NameSpaceID nsID) -> XMLConverter nsID extraState x XML.QName elemName nsID name = lookupNSiri nsID &&& lookupNSprefix nsID - >>§ XML.QName name + >>% XML.QName name -- | Checks if a given element matches both a specified namespace id -- and a specified element name elemNameIs :: (NameSpaceID nsID) => nsID -> ElementName -> XMLConverter nsID extraState XML.Element Bool -elemNameIs nsID name = keepingTheValue (lookupNSiri nsID) >>§ hasThatName +elemNameIs nsID name = keepingTheValue (lookupNSiri nsID) >>% hasThatName where hasThatName e iri = let elName = XML.elName e in XML.qName elName == name && XML.qURI elName == iri @@ -461,8 +461,8 @@ currentElemIs'' nsID name = ( (getCurrentElement >>^ XML.elName >>> (XML.qName >>^ (&&).(== name) ) ^&&&^ (XML.qIRI >>^ (==) ) - ) >>§ (.) - ) &&& lookupNSiri nsID >>§ ($) + ) >>% (.) + ) &&& lookupNSiri nsID >>% ($) -} -- @@ -487,7 +487,7 @@ findChildren :: (NameSpaceID nsID) -> XMLConverter nsID extraState x [XML.Element] findChildren nsID name = elemName nsID name &&& getCurrentElement - >>§ XML.findChildren + >>% XML.findChildren -- filterChildren :: (XML.Element -> Bool) @@ -508,7 +508,7 @@ findChild' :: (NameSpaceID nsID) -> XMLConverter nsID extraState x (Maybe XML.Element) findChild' nsID name = elemName nsID name &&& getCurrentElement - >>§ XML.findChild + >>% XML.findChild -- findChild :: (NameSpaceID nsID) @@ -596,7 +596,7 @@ isThatTheAttrValue :: (NameSpaceID nsID) isThatTheAttrValue nsID attrName = keepingTheValue (findAttr nsID attrName) - >>§ right.(==) + >>% right.(==) -- | Lookup value in a dictionary, fail if no attribute found or value -- not in dictionary @@ -669,7 +669,7 @@ findAttr' :: (NameSpaceID nsID) -> XMLConverter nsID extraState x (Maybe AttributeValue) findAttr' nsID attrName = elemName nsID attrName &&& getCurrentElement - >>§ XML.findAttr + >>% XML.findAttr -- | Return value as string or fail findAttr :: (NameSpaceID nsID) @@ -787,7 +787,7 @@ prepareIteration :: (NameSpaceID nsID) -> XMLConverter nsID extraState b [(b, XML.Element)] prepareIteration nsID name = keepingTheValue (findChildren nsID name) - >>§ distributeValue + >>% distributeValue -- | Applies a converter to every child element of a specific type. -- Collects results in a 'Monoid'. @@ -877,9 +877,9 @@ makeMatcherE nsID name c = ( second ( elemNameIs nsID name >>^ bool Nothing (Just tryC) ) - >>§ (<|>) + >>% (<|>) ) &&&^ snd - where tryC = (fst ^&&& executeThere c >>§ recover) &&&^ snd + where tryC = (fst ^&&& executeThere c >>% recover) &&&^ snd -- Helper function: The @c@ is actually a converter that is to be selected by -- matching XML content to the first two parameters. @@ -899,14 +899,14 @@ makeMatcherC nsID name c = ( second ( contentToElem >>^ bool Nothing (Just cWithJump) ) ) - >>§ (<|>) + >>% (<|>) ) &&&^ snd where cWithJump = ( fst ^&&& ( second contentToElem >>> spreadChoice ^>>? executeThere c ) - >>§ recover) + >>% recover) &&&^ snd contentToElem :: FallibleXMLConverter nsID extraState XML.Content XML.Element contentToElem = arr $ \e -> case e of diff --git a/src/Text/Pandoc/Readers/Odt/Namespaces.hs b/src/Text/Pandoc/Readers/Odt/Namespaces.hs index e28056814..deb009998 100644 --- a/src/Text/Pandoc/Readers/Odt/Namespaces.hs +++ b/src/Text/Pandoc/Readers/Odt/Namespaces.hs @@ -107,4 +107,4 @@ nsIDs = [ ("http://www.w3.org/1999/xhtml" , NsXHtml ), ("http://www.w3.org/2002/xforms" , NsXForms ), ("http://www.w3.org/1999/xlink" , NsXLink ) - ]
\ No newline at end of file + ] diff --git a/src/Text/Pandoc/Readers/Odt/StyleReader.hs b/src/Text/Pandoc/Readers/Odt/StyleReader.hs index e403424f6..96cfed0b3 100644 --- a/src/Text/Pandoc/Readers/Odt/StyleReader.hs +++ b/src/Text/Pandoc/Readers/Odt/StyleReader.hs @@ -174,7 +174,7 @@ findPitch :: XMLReaderSafe FontPitches _x (Maybe FontPitch) findPitch = ( lookupAttr NsStyle "font-pitch" `ifFailedDo` findAttr NsStyle "font-name" >>? ( keepingTheValue getExtraState - >>§ M.lookup + >>% M.lookup >>^ maybeToChoice ) ) @@ -447,7 +447,7 @@ readAllStyles :: StyleReader _x Styles readAllStyles = ( readFontPitches >>?! ( readAutomaticStyles &&& readStyles )) - >>?§? chooseMax + >>?%? chooseMax -- all top elements are always on the same hierarchy level -- diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index 27a8fe957..2585ace21 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -2,7 +2,7 @@ {-# LANGUAGE GeneralizedNewtypeDeriving #-} {-# LANGUAGE MultiParamTypeClasses, FlexibleContexts, FlexibleInstances #-} {- -Copyright (C) 2014-2015 Albert Krewinkel <tarleb@moltkeplatz.de> +Copyright (C) 2014-2015 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -24,7 +24,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Copyright : Copyright (C) 2014 Albert Krewinkel License : GNU GPL, version 2 or above - Maintainer : Albert Krewinkel <tarleb@moltkeplatz.de> + Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> Conversion of org-mode formatted plain text to 'Pandoc' document. -} @@ -140,6 +140,7 @@ data OrgParserState = OrgParserState , orgStateMeta :: Meta , orgStateMeta' :: F Meta , orgStateNotes' :: OrgNoteTable + , orgStateParserContext :: ParserContext , orgStateIdentifiers :: [String] , orgStateHeaderMap :: M.Map Inlines String } @@ -181,6 +182,7 @@ defaultOrgParserState = OrgParserState , orgStateMeta = nullMeta , orgStateMeta' = return nullMeta , orgStateNotes' = [] + , orgStateParserContext = NullState , orgStateIdentifiers = [] , orgStateHeaderMap = M.empty } @@ -291,6 +293,23 @@ blanklines = <* updateLastPreCharPos <* updateLastForbiddenCharPos +-- | Succeeds when we're in list context. +inList :: OrgParser () +inList = do + ctx <- orgStateParserContext <$> getState + guard (ctx == ListItemState) + +-- | Parse in different context +withContext :: ParserContext -- ^ New parser context + -> OrgParser a -- ^ Parser to run in that context + -> OrgParser a +withContext context parser = do + oldContext <- orgStateParserContext <$> getState + updateState $ \s -> s{ orgStateParserContext = context } + result <- parser + updateState $ \s -> s{ orgStateParserContext = oldContext } + return result + -- -- parsing blocks -- @@ -513,10 +532,16 @@ rundocBlockClass :: String rundocBlockClass = rundocPrefix ++ "block" blockOption :: OrgParser (String, String) -blockOption = try $ (,) <$> orgArgKey <*> orgParamValue +blockOption = try $ do + argKey <- orgArgKey + paramValue <- option "yes" orgParamValue + return (argKey, paramValue) inlineBlockOption :: OrgParser (String, String) -inlineBlockOption = try $ (,) <$> orgArgKey <*> orgInlineParamValue +inlineBlockOption = try $ do + argKey <- orgArgKey + paramValue <- option "yes" orgInlineParamValue + return (argKey, paramValue) orgArgKey :: OrgParser String orgArgKey = try $ @@ -525,11 +550,17 @@ orgArgKey = try $ orgParamValue :: OrgParser String orgParamValue = try $ - skipSpaces *> many1 (noneOf "\t\n\r ") <* skipSpaces + skipSpaces + *> notFollowedBy (char ':' ) + *> many1 (noneOf "\t\n\r ") + <* skipSpaces orgInlineParamValue :: OrgParser String orgInlineParamValue = try $ - skipSpaces *> many1 (noneOf "\t\n\r ]") <* skipSpaces + skipSpaces + *> notFollowedBy (char ':') + *> many1 (noneOf "\t\n\r ]") + <* skipSpaces orgArgWordChar :: OrgParser Char orgArgWordChar = alphaNum <|> oneOf "-_" @@ -891,9 +922,13 @@ noteBlock = try $ do paraOrPlain :: OrgParser (F Blocks) paraOrPlain = try $ do ils <- parseInlines - nl <- option False (newline >> return True) - try (guard nl >> notFollowedBy (orderedListStart <|> bulletListStart) >> - return (B.para <$> ils)) + nl <- option False (newline *> return True) + -- Read block as paragraph, except if we are in a list context and the block + -- is directly followed by a list item, in which case the block is read as + -- plain text. + try (guard nl + *> notFollowedBy (inList *> (orderedListStart <|> bulletListStart)) + *> return (B.para <$> ils)) <|> (return (B.plain <$> ils)) inlinesTillNewline :: OrgParser (F Inlines) @@ -970,7 +1005,7 @@ definitionListItem parseMarkerGetLength = try $ do -- parse raw text for one list item, excluding start marker and continuations listItem :: OrgParser Int -> OrgParser (F Blocks) -listItem start = try $ do +listItem start = try . withContext ListItemState $ do markerLength <- try start firstLine <- anyLineNewline blank <- option "" ("\n" <$ blankline) diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs index 83280aa2e..43aaa3f9a 100644 --- a/src/Text/Pandoc/Readers/Textile.hs +++ b/src/Text/Pandoc/Readers/Textile.hs @@ -79,11 +79,12 @@ readTextile opts s = -- | Generate a Pandoc ADT from a textile document parseTextile :: Parser [Char] ParserState Pandoc parseTextile = do - -- textile allows raw HTML and does smart punctuation by default + -- textile allows raw HTML and does smart punctuation by default, + -- but we do not enable smart punctuation unless it is explicitly + -- asked for, for better conversion to other light markup formats oldOpts <- stateOptions `fmap` getState updateState $ \state -> state{ stateOptions = - oldOpts{ readerSmart = True - , readerParseRaw = True + oldOpts{ readerParseRaw = True , readerOldDashes = True } } many blankline |