From 44120ea7165546152af88fd442c52ab0f201052e Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 12 Dec 2015 17:28:52 -0800 Subject: Implemented `east_asian_line_breaks` extension. Text.Pandoc.Options: Added `Ext_east_asian_line_breaks` constructor to `Extension` (API change). This extension is like `ignore_line_breaks`, but smarter -- it only ignores line breaks between two East Asian wide characters. This makes it better suited for writing with a mix of East Asian and non-East Asian scripts. Closes #2586. --- src/Text/Pandoc/Options.hs | 2 ++ src/Text/Pandoc/Readers/Markdown.hs | 16 +++++++++++++++- 2 files changed, 17 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/Text/Pandoc/Options.hs b/src/Text/Pandoc/Options.hs index 9f27f46f9..7dd47cd59 100644 --- a/src/Text/Pandoc/Options.hs +++ b/src/Text/Pandoc/Options.hs @@ -106,6 +106,8 @@ data Extension = | Ext_subscript -- ^ Subscript using ~this~ syntax | Ext_hard_line_breaks -- ^ All newlines become hard line breaks | Ext_ignore_line_breaks -- ^ Newlines in paragraphs are ignored + | Ext_east_asian_line_breaks -- ^ Newlines in paragraphs are ignored between + -- East Asian wide characters | Ext_literate_haskell -- ^ Enable literate Haskell conventions | Ext_abbreviations -- ^ PHP markdown extra abbreviation definitions | Ext_emoji -- ^ Support emoji like :smile: diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 0b7faadb7..999ab11de 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -40,6 +40,7 @@ import Data.Char ( isSpace, isAlphaNum, toLower ) import Data.Maybe import Text.Pandoc.Definition import Text.Pandoc.Emoji (emojis) +import Text.Pandoc.Generic (bottomUp) import qualified Data.Text as T import Data.Text (Text) import qualified Data.Yaml as Yaml @@ -51,6 +52,7 @@ import qualified Data.Vector as V import Text.Pandoc.Builder (Inlines, Blocks, trimInlines) import Text.Pandoc.Options import Text.Pandoc.Shared +import Text.Pandoc.Pretty (charWidth) import Text.Pandoc.XML (fromEntities) import Text.Pandoc.Parsing hiding (tableWith) import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXBlock ) @@ -356,7 +358,19 @@ parseMarkdown = do st <- getState let meta = runF (stateMeta' st) st let Pandoc _ bs = B.doc $ runF blocks st - return $ Pandoc meta bs + eastAsianLineBreaks <- option False $ + True <$ guardEnabled Ext_east_asian_line_breaks + return $ (if eastAsianLineBreaks + then bottomUp softBreakFilter + else id) $ Pandoc meta bs + +softBreakFilter :: [Inline] -> [Inline] +softBreakFilter (x:SoftBreak:y:zs) = + case (stringify x, stringify y) of + (xs@(_:_), (c:_)) + | charWidth (last xs) == 2 && charWidth c == 2 -> x:y:zs + _ -> x:SoftBreak:y:zs +softBreakFilter xs = xs referenceKey :: MarkdownParser (F Blocks) referenceKey = try $ do -- cgit v1.2.3