diff options
-rw-r--r-- | README | 10 | ||||
-rw-r--r-- | src/Text/Pandoc/Options.hs | 2 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Markdown.hs | 16 |
3 files changed, 26 insertions, 2 deletions
@@ -3224,7 +3224,15 @@ treated as spaces or as hard line breaks. This option is intended for use with East Asian languages where spaces are not used between words, but text is divided into lines for readability. -#### Extension: `emoji` #### +#### Extension: `east_asian_line_breaks` #### + +Causes newlines within a paragraph to be ignored, rather than +being treated as spaces or as hard line breaks, when they occur +between two East Asian wide characters. This is a better choice +than `ignore_line_breaks` for texts that include a mix of East +Asian wide characters and other characters. + +##### Extension: `emoji` #### Parses textual emojis like `:smile:` as Unicode emoticons. diff --git a/src/Text/Pandoc/Options.hs b/src/Text/Pandoc/Options.hs index 9f27f46f9..7dd47cd59 100644 --- a/src/Text/Pandoc/Options.hs +++ b/src/Text/Pandoc/Options.hs @@ -106,6 +106,8 @@ data Extension = | Ext_subscript -- ^ Subscript using ~this~ syntax | Ext_hard_line_breaks -- ^ All newlines become hard line breaks | Ext_ignore_line_breaks -- ^ Newlines in paragraphs are ignored + | Ext_east_asian_line_breaks -- ^ Newlines in paragraphs are ignored between + -- East Asian wide characters | Ext_literate_haskell -- ^ Enable literate Haskell conventions | Ext_abbreviations -- ^ PHP markdown extra abbreviation definitions | Ext_emoji -- ^ Support emoji like :smile: diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 0b7faadb7..999ab11de 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -40,6 +40,7 @@ import Data.Char ( isSpace, isAlphaNum, toLower ) import Data.Maybe import Text.Pandoc.Definition import Text.Pandoc.Emoji (emojis) +import Text.Pandoc.Generic (bottomUp) import qualified Data.Text as T import Data.Text (Text) import qualified Data.Yaml as Yaml @@ -51,6 +52,7 @@ import qualified Data.Vector as V import Text.Pandoc.Builder (Inlines, Blocks, trimInlines) import Text.Pandoc.Options import Text.Pandoc.Shared +import Text.Pandoc.Pretty (charWidth) import Text.Pandoc.XML (fromEntities) import Text.Pandoc.Parsing hiding (tableWith) import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXBlock ) @@ -356,7 +358,19 @@ parseMarkdown = do st <- getState let meta = runF (stateMeta' st) st let Pandoc _ bs = B.doc $ runF blocks st - return $ Pandoc meta bs + eastAsianLineBreaks <- option False $ + True <$ guardEnabled Ext_east_asian_line_breaks + return $ (if eastAsianLineBreaks + then bottomUp softBreakFilter + else id) $ Pandoc meta bs + +softBreakFilter :: [Inline] -> [Inline] +softBreakFilter (x:SoftBreak:y:zs) = + case (stringify x, stringify y) of + (xs@(_:_), (c:_)) + | charWidth (last xs) == 2 && charWidth c == 2 -> x:y:zs + _ -> x:SoftBreak:y:zs +softBreakFilter xs = xs referenceKey :: MarkdownParser (F Blocks) referenceKey = try $ do |