aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--README10
-rw-r--r--src/Text/Pandoc/Options.hs2
-rw-r--r--src/Text/Pandoc/Readers/Markdown.hs16
3 files changed, 26 insertions, 2 deletions
diff --git a/README b/README
index 8f00ac3a8..ea6a25871 100644
--- a/README
+++ b/README
@@ -3224,7 +3224,15 @@ treated as spaces or as hard line breaks. This option is intended for
use with East Asian languages where spaces are not used between words,
but text is divided into lines for readability.
-#### Extension: `emoji` ####
+#### Extension: `east_asian_line_breaks` ####
+
+Causes newlines within a paragraph to be ignored, rather than
+being treated as spaces or as hard line breaks, when they occur
+between two East Asian wide characters. This is a better choice
+than `ignore_line_breaks` for texts that include a mix of East
+Asian wide characters and other characters.
+
+##### Extension: `emoji` ####
Parses textual emojis like `:smile:` as Unicode emoticons.
diff --git a/src/Text/Pandoc/Options.hs b/src/Text/Pandoc/Options.hs
index 9f27f46f9..7dd47cd59 100644
--- a/src/Text/Pandoc/Options.hs
+++ b/src/Text/Pandoc/Options.hs
@@ -106,6 +106,8 @@ data Extension =
| Ext_subscript -- ^ Subscript using ~this~ syntax
| Ext_hard_line_breaks -- ^ All newlines become hard line breaks
| Ext_ignore_line_breaks -- ^ Newlines in paragraphs are ignored
+ | Ext_east_asian_line_breaks -- ^ Newlines in paragraphs are ignored between
+ -- East Asian wide characters
| Ext_literate_haskell -- ^ Enable literate Haskell conventions
| Ext_abbreviations -- ^ PHP markdown extra abbreviation definitions
| Ext_emoji -- ^ Support emoji like :smile:
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 0b7faadb7..999ab11de 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -40,6 +40,7 @@ import Data.Char ( isSpace, isAlphaNum, toLower )
import Data.Maybe
import Text.Pandoc.Definition
import Text.Pandoc.Emoji (emojis)
+import Text.Pandoc.Generic (bottomUp)
import qualified Data.Text as T
import Data.Text (Text)
import qualified Data.Yaml as Yaml
@@ -51,6 +52,7 @@ import qualified Data.Vector as V
import Text.Pandoc.Builder (Inlines, Blocks, trimInlines)
import Text.Pandoc.Options
import Text.Pandoc.Shared
+import Text.Pandoc.Pretty (charWidth)
import Text.Pandoc.XML (fromEntities)
import Text.Pandoc.Parsing hiding (tableWith)
import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXBlock )
@@ -356,7 +358,19 @@ parseMarkdown = do
st <- getState
let meta = runF (stateMeta' st) st
let Pandoc _ bs = B.doc $ runF blocks st
- return $ Pandoc meta bs
+ eastAsianLineBreaks <- option False $
+ True <$ guardEnabled Ext_east_asian_line_breaks
+ return $ (if eastAsianLineBreaks
+ then bottomUp softBreakFilter
+ else id) $ Pandoc meta bs
+
+softBreakFilter :: [Inline] -> [Inline]
+softBreakFilter (x:SoftBreak:y:zs) =
+ case (stringify x, stringify y) of
+ (xs@(_:_), (c:_))
+ | charWidth (last xs) == 2 && charWidth c == 2 -> x:y:zs
+ _ -> x:SoftBreak:y:zs
+softBreakFilter xs = xs
referenceKey :: MarkdownParser (F Blocks)
referenceKey = try $ do