Skip to content
8 changes: 8 additions & 0 deletions Doc/library/textwrap.rst
Original file line number Diff line number Diff line change
Expand Up @@ -281,6 +281,14 @@ hyphenated words; only then will long words be broken if necessary, unless
.. versionadded:: 3.4


.. attribute:: text_len

(default: ``len``) Used to determine the length of a string. You can
provide a custom function, e.g. to account for wide characters.
Comment thread
merwok marked this conversation as resolved.

.. versionadded:: 3.11


.. index:: single: ...; placeholder

.. attribute:: placeholder
Expand Down
17 changes: 17 additions & 0 deletions Lib/test/test_textwrap.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#

import unittest
import unicodedata

from textwrap import TextWrapper, wrap, fill, dedent, indent, shorten

Expand Down Expand Up @@ -1076,5 +1077,21 @@ def test_first_word_too_long_but_placeholder_fits(self):
self.check_shorten("Helloo", 5, "[...]")


class WideCharacterTestCase(BaseTestCase):
def test_wide_character(self):
def text_len(text):
n = 0
for c in text:
if unicodedata.east_asian_width(c) in {'F', 'W'}:
n += 2
else:
n += 1
return n

text = "123 🔧"
expected = ["123", "🔧"]
self.check_wrap(text, 5, expected, text_len=text_len)
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I feel that more tests are needed; text_len is used at multiple places in the code.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for adding tests! Could some of them use different texts?

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would like to avoid adding example for languages that I do not speak myself (e.g. Chinese). Are there ways to reach out to the wider community to provide meaningful examples?



if __name__ == '__main__':
unittest.main()
26 changes: 15 additions & 11 deletions Lib/textwrap.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,8 @@ def __init__(self,
tabsize=8,
*,
max_lines=None,
placeholder=' [...]'):
placeholder=' [...]',
text_len=len):
Comment thread
merwok marked this conversation as resolved.
self.width = width
self.initial_indent = initial_indent
self.subsequent_indent = subsequent_indent
Expand All @@ -138,6 +139,7 @@ def __init__(self,
self.tabsize = tabsize
self.max_lines = max_lines
self.placeholder = placeholder
self.text_len = text_len


# -- Private methods -----------------------------------------------
Expand Down Expand Up @@ -217,7 +219,7 @@ def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
if self.break_long_words:
end = space_left
chunk = reversed_chunks[-1]
if self.break_on_hyphens and len(chunk) > space_left:
if self.break_on_hyphens and self.text_len(chunk) > space_left:
# break after last hyphen, but only if there are
# non-hyphens before it
hyphen = chunk.rfind('-', 0, space_left)
Expand Down Expand Up @@ -259,7 +261,8 @@ def _wrap_chunks(self, chunks):
indent = self.subsequent_indent
else:
indent = self.initial_indent
if len(indent) + len(self.placeholder.lstrip()) > self.width:
if (self.text_len(indent) +
self.text_len(self.placeholder.lstrip()) > self.width):
raise ValueError("placeholder too large for max width")

# Arrange in reverse order so items can be efficiently popped
Expand All @@ -280,15 +283,15 @@ def _wrap_chunks(self, chunks):
indent = self.initial_indent

# Maximum width for this line.
width = self.width - len(indent)
width = self.width - self.text_len(indent)

# First chunk on line is whitespace -- drop it, unless this
# is the very beginning of the text (ie. no lines started yet).
if self.drop_whitespace and chunks[-1].strip() == '' and lines:
del chunks[-1]

while chunks:
l = len(chunks[-1])
l = self.text_len(chunks[-1])

# Can at least squeeze this chunk onto the current line.
if cur_len + l <= width:
Expand All @@ -301,13 +304,13 @@ def _wrap_chunks(self, chunks):

# The current line is full, and the next chunk is too big to
# fit on *any* line (not just this one).
if chunks and len(chunks[-1]) > width:
if chunks and self.text_len(chunks[-1]) > width:
self._handle_long_word(chunks, cur_line, cur_len, width)
cur_len = sum(map(len, cur_line))
cur_len = sum(map(self.text_len, cur_line))

# If the last chunk on this line is all whitespace, drop it.
if self.drop_whitespace and cur_line and cur_line[-1].strip() == '':
cur_len -= len(cur_line[-1])
cur_len -= self.text_len(cur_line[-1])
del cur_line[-1]

if cur_line:
Expand All @@ -323,16 +326,17 @@ def _wrap_chunks(self, chunks):
else:
while cur_line:
if (cur_line[-1].strip() and
cur_len + len(self.placeholder) <= width):
cur_len + self.text_len(self.placeholder) <= width):
cur_line.append(self.placeholder)
lines.append(indent + ''.join(cur_line))
break
cur_len -= len(cur_line[-1])
cur_len -= self.text_len(cur_line[-1])
del cur_line[-1]
else:
if lines:
prev_line = lines[-1].rstrip()
if (len(prev_line) + len(self.placeholder) <=
if (self.text_len(prev_line) +
self.text_len(self.placeholder) <=
self.width):
lines[-1] = prev_line + self.placeholder
break
Expand Down