Unverified Commit fed6ce17 authored by Gary Qian's avatar Gary Qian Committed by GitHub

Directionality formatter to move whitespace bidi handling into framework (#51964)

parent edd03a1a
......@@ -602,6 +602,14 @@ class TextPainter {
return value & 0xF800 == 0xD800;
}
// Checks if the glyph is either [Unicode.RLM] or [Unicode.LRM]. These values take
// up zero space and do not have valid bounding boxes around them.
//
// We do not directly use the [Unicode] constants since they are strings.
bool _isUnicodeDirectionality(int value) {
return value == 0x200F || value == 0x200E;
}
/// Returns the closest offset after `offset` at which the input cursor can be
/// positioned.
int getOffsetAfter(int offset) {
......@@ -636,7 +644,7 @@ class TextPainter {
return null;
// Check for multi-code-unit glyphs such as emojis or zero width joiner
final bool needsSearch = _isUtf16Surrogate(prevCodeUnit) || _text.codeUnitAt(offset) == _zwjUtf16;
final bool needsSearch = _isUtf16Surrogate(prevCodeUnit) || _text.codeUnitAt(offset) == _zwjUtf16 || _isUnicodeDirectionality(prevCodeUnit);
int graphemeClusterLength = needsSearch ? 2 : 1;
List<TextBox> boxes = <TextBox>[];
while (boxes.isEmpty && flattenedText != null) {
......@@ -687,7 +695,7 @@ class TextPainter {
if (nextCodeUnit == null)
return null;
// Check for multi-code-unit glyphs such as emojis or zero width joiner
final bool needsSearch = _isUtf16Surrogate(nextCodeUnit) || nextCodeUnit == _zwjUtf16;
final bool needsSearch = _isUtf16Surrogate(nextCodeUnit) || nextCodeUnit == _zwjUtf16 || _isUnicodeDirectionality(nextCodeUnit);
int graphemeClusterLength = needsSearch ? 2 : 1;
List<TextBox> boxes = <TextBox>[];
while (boxes.isEmpty && flattenedText != null) {
......
......@@ -1655,7 +1655,11 @@ class EditableTextState extends State<EditableText> with AutomaticKeepAliveClien
_lastBottomViewInset = WidgetsBinding.instance.window.viewInsets.bottom;
}
_WhitespaceDirectionalityFormatter _whitespaceFormatter;
void _formatAndSetValue(TextEditingValue value) {
_whitespaceFormatter ??= _WhitespaceDirectionalityFormatter(textDirection: _textDirection);
// Check if the new value is the same as the current local value, or is the same
// as the post-formatting value of the previous pass.
final bool textChanged = _value?.text != value?.text;
......@@ -1666,6 +1670,9 @@ class EditableTextState extends State<EditableText> with AutomaticKeepAliveClien
for (final TextInputFormatter formatter in widget.inputFormatters) {
value = formatter.formatEditUpdate(_value, value);
}
// Always pass the text through the whitespace directionality formatter to
// maintain expected behavior with carets on trailing whitespace.
value = _whitespaceFormatter.formatEditUpdate(_value, value);
_lastFormattedValue = value;
}
// If the text has changed or the selection has changed, we should update the
......@@ -2145,3 +2152,163 @@ class _Editable extends LeafRenderObjectWidget {
..paintCursorAboveText = paintCursorAboveText;
}
}
// This formatter inserts [Unicode.RLM] and [Unicode.LRM] into the
// string in order to preserve expected caret behavior when trailing
// whitespace is inserted.
//
// When typing in a direction that opposes the base direction
// of the paragraph, un-enclosed whitespace gets the directionality
// of the paragraph. This is often at odds with what is immeditely
// being typed causing the caret to jump to the wrong side of the text.
// This formatter makes use of the RLM and LRM to cause the text
// shaper to inherently treat the whitespace as being surrounded
// by the directionality of the previous non-whitespace codepoint.
class _WhitespaceDirectionalityFormatter extends TextInputFormatter {
// The [textDirection] should be the base directionality of the
// paragraph/editable.
_WhitespaceDirectionalityFormatter({TextDirection textDirection})
: _baseDirection = textDirection,
_previousNonWhitespaceDirection = textDirection;
// Using regex here instead of ICU is suboptimal, but is enough
// to produce the correct results for any reasonable input where this
// is even relevant. Using full ICU would be a much heavier change,
// requiring exposure of the C++ ICU API.
//
// LTR covers most scripts and symbols, including but not limited to Latin,
// ideographic scripts (Chinese, Japanese, etc), Cyrilic, Indic, and
// SE Asian scripts.
final RegExp _ltrRegExp = RegExp(r'[A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02B8\u0300-\u0590\u0800-\u1FFF\u2C00-\uFB1C\uFDFE-\uFE6F\uFEFD-\uFFFF]');
// RTL covers Arabic, Hebrew, and other RTL languages such as Urdu,
// Aramic, Farsi, Dhivehi.
final RegExp _rtlRegExp = RegExp(r'[\u0591-\u07FF\uFB1D-\uFDFD\uFE70-\uFEFC]');
// Although whitespaces are not the only codepoints that have weak directionality,
// these are the primary cause of the caret being misplaced.
final RegExp _whitespaceRegExp = RegExp(r'\s');
final TextDirection _baseDirection;
// Tracks the directionality of the most recently encountered
// codepoint that was not whitespace. This becomes the direction of
// marker inserted to fully surround ambiguous whitespace.
TextDirection _previousNonWhitespaceDirection;
// Prevents the formatter from attempting more expensive formatting
// operations mixed directionality is found.
bool _hasOpposingDirection = false;
// See [Unicode.RLM] and [Unicode.LRM].
//
// We do not directly use the [Unicode] constants since they are strings.
static const int _rlm = 0x200F;
static const int _lrm = 0x200E;
@override
TextEditingValue formatEditUpdate(
TextEditingValue oldValue,
TextEditingValue newValue,
) {
// Skip formatting (which can be more expensive) if there are no cases of
// mixing directionality. Once a case of mixed directionality is found,
// always perform the formatting.
if (!_hasOpposingDirection) {
_hasOpposingDirection = _baseDirection == TextDirection.ltr ?
_rtlRegExp.hasMatch(newValue.text) : _ltrRegExp.hasMatch(newValue.text);
}
if (_hasOpposingDirection) {
_previousNonWhitespaceDirection = _baseDirection;
final List<int> outputCodepoints = <int>[];
// We add/subtract from these as we insert/remove markers.
int selectionBase = newValue.selection.baseOffset;
int selectionExtent = newValue.selection.extentOffset;
void addToSelection() {
selectionBase += outputCodepoints.length <= selectionBase ? 1 : 0;
selectionExtent += outputCodepoints.length <= selectionExtent ? 1 : 0;
}
void subtractFromSelection() {
selectionBase -= outputCodepoints.length < selectionBase ? 1 : 0;
selectionExtent -= outputCodepoints.length < selectionExtent ? 1 : 0;
}
bool previousWasWhitespace = false;
bool previousWasDirectionalityMarker = false;
int previousNonWhitespaceCodepoint;
for (final int codepoint in newValue.text.runes) {
if (isWhitespace(codepoint)) {
// Only compute the directionality of the non-whitespace
// when the value is needed.
if (!previousWasWhitespace && previousNonWhitespaceCodepoint != null) {
_previousNonWhitespaceDirection = getDirection(previousNonWhitespaceCodepoint);
}
// If we already added directionality for this run of whitespace,
// "shift" the marker added to the end of the whitespace run.
if (previousWasWhitespace) {
subtractFromSelection();
outputCodepoints.removeLast();
}
outputCodepoints.add(codepoint);
addToSelection();
outputCodepoints.add(_previousNonWhitespaceDirection == TextDirection.rtl ? _rlm : _lrm);
previousWasWhitespace = true;
previousWasDirectionalityMarker = false;
} else if (isDirectionalityMarker(codepoint)) {
// Handle pre-existing directionality markers. Use pre-existing marker
// instead of the one we add.
if (previousWasWhitespace) {
subtractFromSelection();
outputCodepoints.removeLast();
}
outputCodepoints.add(codepoint);
previousWasWhitespace = false;
previousWasDirectionalityMarker = true;
} else {
// If the whitespace was already enclosed by the same directionality,
// we can remove the artifically added marker.
if (!previousWasDirectionalityMarker &&
previousWasWhitespace &&
getDirection(codepoint) == _previousNonWhitespaceDirection) {
subtractFromSelection();
outputCodepoints.removeLast();
}
// Normal character, track its codepoint add it to the string.
previousNonWhitespaceCodepoint = codepoint;
outputCodepoints.add(codepoint);
previousWasWhitespace = false;
previousWasDirectionalityMarker = false;
}
}
final String formatted = String.fromCharCodes(outputCodepoints);
return TextEditingValue(
text: formatted,
selection: TextSelection(
baseOffset: selectionBase,
extentOffset: selectionExtent,
affinity: newValue.selection.affinity,
isDirectional: newValue.selection.isDirectional
),
);
}
return newValue;
}
bool isWhitespace(int value) {
return _whitespaceRegExp.hasMatch(String.fromCharCode(value));
}
bool isDirectionalityMarker(int value) {
return value == _rlm || value == _lrm;
}
TextDirection getDirection(int value) {
// Use the LTR version as short-circuiting will be more efficient since
// there are more LTR codepoints.
return _ltrRegExp.hasMatch(String.fromCharCode(value)) ? TextDirection.ltr : TextDirection.rtl;
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment