diff options
author | John MacFarlane <jgm@berkeley.edu> | 2015-12-12 17:28:52 -0800 |
---|---|---|
committer | John MacFarlane <jgm@berkeley.edu> | 2015-12-12 17:28:52 -0800 |
commit | 44120ea7165546152af88fd442c52ab0f201052e (patch) | |
tree | 34324b8b450b33f74437621dcf7087665dc0b78c /src | |
parent | 60d383e27e84022142ab666d4648a666cc5a026a (diff) | |
download | pandoc-44120ea7165546152af88fd442c52ab0f201052e.tar.gz |
Implemented `east_asian_line_breaks` extension.
Text.Pandoc.Options: Added `Ext_east_asian_line_breaks` constructor to
`Extension` (API change).
This extension is like `ignore_line_breaks`, but smarter -- it
only ignores line breaks between two East Asian wide characters.
This makes it better suited for writing with a mix of East Asian
and non-East Asian scripts.
Closes #2586.
Diffstat (limited to 'src')
-rw-r--r-- | src/Text/Pandoc/Options.hs | 2 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Markdown.hs | 16 |
2 files changed, 17 insertions, 1 deletions
diff --git a/src/Text/Pandoc/Options.hs b/src/Text/Pandoc/Options.hs index 9f27f46f9..7dd47cd59 100644 --- a/src/Text/Pandoc/Options.hs +++ b/src/Text/Pandoc/Options.hs @@ -106,6 +106,8 @@ data Extension = | Ext_subscript -- ^ Subscript using ~this~ syntax | Ext_hard_line_breaks -- ^ All newlines become hard line breaks | Ext_ignore_line_breaks -- ^ Newlines in paragraphs are ignored + | Ext_east_asian_line_breaks -- ^ Newlines in paragraphs are ignored between + -- East Asian wide characters | Ext_literate_haskell -- ^ Enable literate Haskell conventions | Ext_abbreviations -- ^ PHP markdown extra abbreviation definitions | Ext_emoji -- ^ Support emoji like :smile: diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 0b7faadb7..999ab11de 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -40,6 +40,7 @@ import Data.Char ( isSpace, isAlphaNum, toLower ) import Data.Maybe import Text.Pandoc.Definition import Text.Pandoc.Emoji (emojis) +import Text.Pandoc.Generic (bottomUp) import qualified Data.Text as T import Data.Text (Text) import qualified Data.Yaml as Yaml @@ -51,6 +52,7 @@ import qualified Data.Vector as V import Text.Pandoc.Builder (Inlines, Blocks, trimInlines) import Text.Pandoc.Options import Text.Pandoc.Shared +import Text.Pandoc.Pretty (charWidth) import Text.Pandoc.XML (fromEntities) import Text.Pandoc.Parsing hiding (tableWith) import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXBlock ) @@ -356,7 +358,19 @@ parseMarkdown = do st <- getState let meta = runF (stateMeta' st) st let Pandoc _ bs = B.doc $ runF blocks st - return $ Pandoc meta bs + eastAsianLineBreaks <- option False $ + True <$ guardEnabled Ext_east_asian_line_breaks + return $ (if eastAsianLineBreaks + then bottomUp softBreakFilter + else id) $ Pandoc meta bs + +softBreakFilter :: [Inline] -> [Inline] +softBreakFilter (x:SoftBreak:y:zs) = + case (stringify x, stringify y) of + (xs@(_:_), (c:_)) + | charWidth (last xs) == 2 && charWidth c == 2 -> x:y:zs + _ -> x:SoftBreak:y:zs +softBreakFilter xs = xs referenceKey :: MarkdownParser (F Blocks) referenceKey = try $ do |