Skip to content

Commit ac29fa7

Browse files
committed
Make whitespace filters Unicode-aware
1 parent 1954a26 commit ac29fa7

2 files changed

Lines changed: 57 additions & 4 deletions

File tree

lib/liquid/standardfilters.rb

Lines changed: 22 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,16 @@ module StandardFilters
3636
%r{<style.*?</style>}m,
3737
)
3838
STRIP_HTML_TAGS = /<.*?>/m
39+
UNICODE_WHITESPACE_LEFT = /\A[[:space:]]+/
40+
UNICODE_WHITESPACE_RIGHT = /[[:space:]]+\z/
41+
UNICODE_WHITESPACE_EDGES = Regexp.union(UNICODE_WHITESPACE_LEFT, UNICODE_WHITESPACE_RIGHT)
42+
UNICODE_WHITESPACE_RUNS = /[[:space:]]+/
43+
private_constant(
44+
:UNICODE_WHITESPACE_EDGES,
45+
:UNICODE_WHITESPACE_LEFT,
46+
:UNICODE_WHITESPACE_RIGHT,
47+
:UNICODE_WHITESPACE_RUNS,
48+
)
3949

4050
class << self
4151
def try_coerce_encoding(input, encoding:)
@@ -307,48 +317,56 @@ def split(input, pattern)
307317
# @liquid_category string
308318
# @liquid_summary
309319
# Removes leading and trailing whitespace and collapses consecutive whitespace to a single space.
320+
# @liquid_description
321+
# Whitespace is identified using the `[[:space:]]` regular expression character class.
310322
# @liquid_syntax string | squish
311323
# @liquid_return [string]
312324
def squish(input)
313325
return if input.nil?
314326

315-
Utils.to_s(input).strip.gsub(/\s+/, ' ')
327+
Utils.to_s(input).gsub(UNICODE_WHITESPACE_RUNS, ' ').strip
316328
end
317329

318330
# @liquid_public_docs
319331
# @liquid_type filter
320332
# @liquid_category string
321333
# @liquid_summary
322334
# Strips all whitespace from the left and right of a string.
335+
# @liquid_description
336+
# Whitespace is identified using the `[[:space:]]` regular expression character class.
323337
# @liquid_syntax string | strip
324338
# @liquid_return [string]
325339
def strip(input)
326340
input = Utils.to_s(input)
327-
input.strip
341+
input.gsub(UNICODE_WHITESPACE_EDGES, ' ').strip
328342
end
329343

330344
# @liquid_public_docs
331345
# @liquid_type filter
332346
# @liquid_category string
333347
# @liquid_summary
334348
# Strips all whitespace from the left of a string.
349+
# @liquid_description
350+
# Whitespace is identified using the `[[:space:]]` regular expression character class.
335351
# @liquid_syntax string | lstrip
336352
# @liquid_return [string]
337353
def lstrip(input)
338354
input = Utils.to_s(input)
339-
input.lstrip
355+
input.gsub(UNICODE_WHITESPACE_LEFT, ' ').lstrip
340356
end
341357

342358
# @liquid_public_docs
343359
# @liquid_type filter
344360
# @liquid_category string
345361
# @liquid_summary
346362
# Strips all whitespace from the right of a string.
363+
# @liquid_description
364+
# Whitespace is identified using the `[[:space:]]` regular expression character class.
347365
# @liquid_syntax string | rstrip
348366
# @liquid_return [string]
349367
def rstrip(input)
350368
input = Utils.to_s(input)
351-
input.rstrip
369+
input.gsub(UNICODE_WHITESPACE_RIGHT, ' ').rstrip
352370
end
353371

354372
# @liquid_public_docs

test/integration/standard_filter_test.rb

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -169,6 +169,15 @@ def test_squish_filter
169169
\t boo " | squish }})).render)
170170
assert_equal("", Liquid::Template.parse('{{ nil | squish }}').render)
171171
assert_equal("", Liquid::Template.parse('{{ " " | squish }}').render)
172+
173+
unicode_spaces = "\u00A0\u202F\u2009\u2007"
174+
175+
assert_template_result(
176+
"foo bar boo",
177+
"{{ source | squish }}",
178+
{ 'source' => "#{unicode_spaces}foo\u202F\u2009bar\t\n\u2007boo#{unicode_spaces}" },
179+
)
180+
assert_template_result("\u200Bfoo\u200B", "{{ source | squish }}", { 'source' => "\u200Bfoo\u200B" })
172181
end
173182

174183
def test_escape
@@ -703,16 +712,42 @@ def test_pipes_in_string_arguments
703712
def test_strip
704713
assert_template_result('ab c', "{{ source | strip }}", { 'source' => " ab c " })
705714
assert_template_result('ab c', "{{ source | strip }}", { 'source' => " \tab c \n \t" })
715+
716+
unicode_spaces = "\u00A0\u202F\u2009\u2007"
717+
718+
assert_template_result(
719+
'ab c',
720+
"{{ source | strip }}",
721+
{ 'source' => "#{unicode_spaces}ab c#{unicode_spaces}" },
722+
)
723+
assert_template_result("a\u00A0b\u202Fc", "{{ source | strip }}", { 'source' => "a\u00A0b\u202Fc" })
724+
assert_template_result("\u200Bfoo\u200B", "{{ source | strip }}", { 'source' => "\u200Bfoo\u200B" })
706725
end
707726

708727
def test_lstrip
709728
assert_template_result('ab c ', "{{ source | lstrip }}", { 'source' => " ab c " })
710729
assert_template_result("ab c \n \t", "{{ source | lstrip }}", { 'source' => " \tab c \n \t" })
730+
731+
unicode_spaces = "\u00A0\u202F\u2009\u2007"
732+
733+
assert_template_result(
734+
"ab c#{unicode_spaces}",
735+
"{{ source | lstrip }}",
736+
{ 'source' => "#{unicode_spaces}ab c#{unicode_spaces}" },
737+
)
711738
end
712739

713740
def test_rstrip
714741
assert_template_result(" ab c", "{{ source | rstrip }}", { 'source' => " ab c " })
715742
assert_template_result(" \tab c", "{{ source | rstrip }}", { 'source' => " \tab c \n \t" })
743+
744+
unicode_spaces = "\u00A0\u202F\u2009\u2007"
745+
746+
assert_template_result(
747+
"#{unicode_spaces}ab c",
748+
"{{ source | rstrip }}",
749+
{ 'source' => "#{unicode_spaces}ab c#{unicode_spaces}" },
750+
)
716751
end
717752

718753
def test_strip_newlines

0 commit comments

Comments
 (0)