diff --git a/spec/std/string_spec.cr b/spec/std/string_spec.cr index 6d7487ded0e2..6964f369ffdc 100644 --- a/spec/std/string_spec.cr +++ b/spec/std/string_spec.cr @@ -2630,24 +2630,31 @@ describe "String" do "foo\nbar\r\nbaz\r\n".lines(chomp: false).should eq(["foo\n", "bar\r\n", "baz\r\n"]) end - it "gets each_line" do - lines = [] of String - "foo\n\nbar\r\nbaz\n".each_line do |line| - lines << line - end.should be_nil - lines.should eq(["foo", "", "bar", "baz"]) - end + describe "#each_line" do + it "gets each_line" do + lines = [] of String + "foo\n\nbar\r\nbaz\n".each_line do |line| + lines << line + end.should be_nil + lines.should eq(["foo", "", "bar", "baz"]) + end - it "gets each_line with chomp = false" do - lines = [] of String - "foo\n\nbar\r\nbaz\r\n".each_line(chomp: false) do |line| - lines << line - end.should be_nil - lines.should eq(["foo\n", "\n", "bar\r\n", "baz\r\n"]) - end + it "gets each_line with chomp = false" do + lines = [] of String + "foo\n\nbar\r\nbaz\r\n".each_line(chomp: false) do |line| + lines << line + end.should be_nil + lines.should eq(["foo\n", "\n", "bar\r\n", "baz\r\n"]) + end - it_iterates "#each_line", ["foo", "bar", "baz"], "foo\nbar\r\nbaz\r\n".each_line - it_iterates "#each_line(chomp: false)", ["foo\n", "bar\r\n", "baz\r\n"], "foo\nbar\r\nbaz\r\n".each_line(chomp: false) + it_iterates "#each_line", ["foo", "bar", "baz"], "foo\nbar\r\nbaz\r\n".each_line + it_iterates "#each_line(chomp: false)", ["foo\n", "bar\r\n", "baz\r\n"], "foo\nbar\r\nbaz\r\n".each_line(chomp: false) + + it_iterates "#each_line(chomp: false, remove_empty: true)", + ["foo\n", "bar\r\n", "baz\r\n"], "foo\n\nbar\r\n\nbaz\r\n\r\n".each_line(chomp: false, remove_empty: true) + it_iterates "#each_line(remove_empty: true)", + ["foo", "bar", "baz"], "\n\nfoo\n\nbar\r\n\nbaz\r\n\r\n".each_line(remove_empty: true) + end it_iterates "#each_codepoint", [97, 98, 9731], "ab☃".each_codepoint diff --git a/src/string.cr b/src/string.cr index 7507e3b7249e..43df17e3be47 100644 --- a/src/string.cr +++ b/src/string.cr @@ -4358,21 +4358,25 @@ class String # # even the monkey seems to want # # a little coat of straw # ``` - def each_line(chomp = true, &block : String ->) : Nil + def each_line(chomp = true, *, remove_empty : Bool = false, &block : String ->) : Nil return if empty? offset = 0 while byte_index = byte_index('\n'.ord.to_u8, offset) count = byte_index - offset + 1 - if chomp - count -= 1 - if offset + count > 0 && to_unsafe[offset + count - 1] === '\r' - count -= 1 + chomped_count = count + if chomp || remove_empty + chomped_count -= 1 + if offset + chomped_count > 0 && to_unsafe[offset + chomped_count - 1] === '\r' + chomped_count -= 1 end end + if chomp + count = chomped_count + end - yield unsafe_byte_slice_string(offset, count) + yield unsafe_byte_slice_string(offset, count) unless remove_empty && chomped_count.zero? offset = byte_index + 1 end @@ -4382,8 +4386,8 @@ class String end # Returns an `Iterator` which yields each line of this string (see `String#each_line`). - def each_line(chomp = true) - LineIterator.new(self, chomp) + def each_line(chomp = true, *, remove_empty : Bool = true) + LineIterator.new(self, chomp, remove_empty: true) end # Converts camelcase boundaries to underscores. @@ -5622,7 +5626,7 @@ class String private class LineIterator include Iterator(String) - def initialize(@string : String, @chomp : Bool) + def initialize(@string : String, @chomp : Bool, *, @remove_empty : Bool) @offset = 0 @end = false end @@ -5633,13 +5637,23 @@ class String byte_index = @string.byte_index('\n'.ord.to_u8, @offset) if byte_index count = byte_index - @offset + 1 - if @chomp - count -= 1 - if @offset + count > 0 && @string.to_unsafe[@offset + count - 1] === '\r' - count -= 1 + chomped_count = count + if @chomp || @remove_empty + chomped_count -= 1 + if @offset + count > 0 && @string.to_unsafe[@offset + chomped_count - 1] === '\r' + chomped_count -= 1 end end + if @remove_empty && chomped_count.zero? + @offset = byte_index + 1 + return self.next + end + + if @chomp + count = chomped_count + end + value = @string.unsafe_byte_slice_string(@offset, count) @offset = byte_index + 1 else