Skip to content

Commit

Permalink
fix: text length not computed properly (#1303)
Browse files Browse the repository at this point in the history
## Description

This PR fixes how the text lengths related to a `Post` or a
`FreeTextReactionValue` are computed. Previously the count would only
consider the byte size of the text. Instead, it's much better to count
graphemes.

As per [this
comment](https://stackoverflow.com/questions/36928185/counting-characters-in-golang-string#comment128023757_55148379):

> There is a difference between bytes, runes, and graphemes, and it
seems many people confuse the three. (In most use cases, it doesn't
matter anyway.) For example, 🏳️‍🌈 (rainbow flag emoji) is 1
grapheme, 4 runes, and 14 bytes. The Go stdlib only has built-in
functions for bytes and runes but not for graphemes.

By counting graphemes instead of runes or bytes, we can assure a
consistent user experience with any application that implementes
client-based char count.

<!-- Add a description of the changes that this PR introduces and the
files that
are the most critical to review. -->

---

### Author Checklist

*All items are required. Please add a note to the item if the item is
not applicable and
please add links to any relevant follow up issues.*

I have...

- [x] included the correct [type
prefix](https://github.com/commitizen/conventional-commit-types/blob/v3.0.0/index.json)
in the PR title
- [ ] added `!` to the type prefix if API or client breaking change
- [x] targeted the correct branch (see [PR
Targeting](https://github.com/desmos-labs/desmos/blob/master/CONTRIBUTING.md#pr-targeting))
- [x] provided a link to the relevant issue or specification
- [x] followed the guidelines for [building
modules](https://docs.cosmos.network/v0.44/building-modules/intro.html)
- [x] included the necessary unit and integration
[tests](https://github.com/desmos-labs/desmos/blob/master/CONTRIBUTING.md#testing)
- [x] added a changelog entry to `CHANGELOG.md`
- [ ] included comments for [documenting Go
code](https://blog.golang.org/godoc)
- [x] updated the relevant documentation or specification
- [x] reviewed "Files changed" and left comments if necessary
- [x] confirmed all CI checks have passed

### Reviewers Checklist

*All items are required. Please add a note if the item is not applicable
and please add
your handle next to the items reviewed if you only reviewed selected
items.*

I have...

- [ ] confirmed the correct [type
prefix](https://github.com/commitizen/conventional-commit-types/blob/v3.0.0/index.json)
in the PR title
- [ ] confirmed `!` in the type prefix if API or client breaking change
- [ ] confirmed all author checklist items have been addressed
- [ ] reviewed state machine logic
- [ ] reviewed API design and naming
- [ ] reviewed documentation is accurate
- [ ] reviewed tests and test coverage
- [ ] manually tested (if applicable)

---------

Signed-off-by: Riccardo Montagnin <[email protected]>
  • Loading branch information
RiccardoM authored Feb 13, 2024
1 parent 3596aa8 commit e0742c8
Show file tree
Hide file tree
Showing 9 changed files with 168 additions and 2 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
type: fix
module: other
pull_request: 1303
description: Replaced graphemes count instead of bytes count to determine text length
backward_compatible: true
date: 2024-02-13T10:55:14.7783395Z
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ require (
github.com/osmosis-labs/go-mutesting v0.0.0-20221219192234-827e6d6b9d4e
github.com/prometheus/client_golang v1.18.0
github.com/rakyll/statik v0.1.7
github.com/rivo/uniseg v0.4.7
github.com/spf13/cast v1.6.0
github.com/spf13/cobra v1.8.0
github.com/spf13/pflag v1.0.5
Expand Down
2 changes: 2 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -1288,6 +1288,8 @@ github.com/rcrowley/go-metrics v0.0.0-20181016184325-3113b8401b8a/go.mod h1:bCqn
github.com/rcrowley/go-metrics v0.0.0-20201227073835-cf1acfcdf475 h1:N/ElC8H3+5XpJzTSTfLsJV/mx9Q9g7kxmchpfZyxgzM=
github.com/rcrowley/go-metrics v0.0.0-20201227073835-cf1acfcdf475/go.mod h1:bCqnVzQkZxMG4s8nGwiZ5l3QUCyqpo9Y+/ZMZ9VjZe4=
github.com/retailnext/hllpp v1.0.1-0.20180308014038-101a6d2f8b52/go.mod h1:RDpi1RftBQPUCDRw6SmxeaREsAaRKnOclghuzp/WRzc=
github.com/rivo/uniseg v0.4.7 h1:WUdvkW8uEhrYfLC4ZzdpI2ztxP1I582+49Oc5Mq64VQ=
github.com/rivo/uniseg v0.4.7/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88=
github.com/rjeczalik/notify v0.9.1/go.mod h1:rKwnCoCGeuQnwBtTSPL9Dad03Vh2n40ePRrjvIXnJho=
github.com/rogpeppe/fastuuid v0.0.0-20150106093220-6724a57986af/go.mod h1:XWv6SoW27p1b0cqNHllgS5HIMJraePCO15w5zCzIWYg=
github.com/rogpeppe/fastuuid v1.2.0/go.mod h1:jVj6XXZzXRy/MSR5jhDC/2q6DgLz+nrA6LYCDYWNEvQ=
Expand Down
2 changes: 1 addition & 1 deletion x/posts/keeper/posts.go
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ func (k Keeper) ValidatePost(ctx sdk.Context, post types.Post) error {
}

// Check the post text length to make sure it's not exceeding the max length
if uint32(len(post.Text)) > params.MaxTextLength {
if uint32(post.GetTextLength()) > params.MaxTextLength {
return errors.Wrapf(types.ErrInvalidPost, "text exceed max length allowed")
}

Expand Down
10 changes: 10 additions & 0 deletions x/posts/types/models.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import (
"github.com/gogo/protobuf/proto"

sdk "github.com/cosmos/cosmos-sdk/types"
"github.com/rivo/uniseg"
)

// ParsePostID parses the given value as a post id, returning an error if it's invalid
Expand Down Expand Up @@ -155,6 +156,15 @@ func (p Post) GetMentionedUsers() []string {
return mentions
}

// GetTextLength returns the length of the post text
func (p Post) GetTextLength() int {
// Counting graphemes instead of runes of bytes can provide a more accurate length of the text.
// This will also ensure that emojis are counted as a single character, which will grant a more consistent
// user experience with clients as well.
// Example: 🏳️‍🌈 (rainbow flag emoji) is 1 grapheme, 4 runes, and 14 bytes.
return uniseg.GraphemeClusterCount(p.Text)
}

// NewPostReference returns a new PostReference instance
func NewPostReference(referenceType PostReferenceType, postID uint64, position uint64) PostReference {
return PostReference{
Expand Down
97 changes: 97 additions & 0 deletions x/posts/types/models_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -349,6 +349,103 @@ func TestPost_Validate(t *testing.T) {
}
}

func TestPost_GetTextLength(t *testing.T) {
testCases := []struct {
name string
post types.Post
expLength int
}{
{
name: "latin text returns correct length",
post: types.NewPost(
1,
0,
2,
"External id",
"Hello",
"cosmos1eqpa6mv2jgevukaqtjmx5535vhc3mm3cf458zg",
1,
nil,
nil,
nil,
types.REPLY_SETTING_EVERYONE,
time.Date(2020, 1, 1, 12, 00, 00, 000, time.UTC),
nil,
"cosmos13t6y2nnugtshwuy0zkrq287a95lyy8vzleaxmd",
),
expLength: 5,
},
{
name: "non latin text returns correct length",
post: types.NewPost(
1,
0,
2,
"External id",
"世界",
"cosmos1eqpa6mv2jgevukaqtjmx5535vhc3mm3cf458zg",
1,
nil,
nil,
nil,
types.REPLY_SETTING_EVERYONE,
time.Date(2020, 1, 1, 12, 00, 00, 000, time.UTC),
nil,
"cosmos13t6y2nnugtshwuy0zkrq287a95lyy8vzleaxmd",
),
expLength: 2,
},
{
name: "emoji text returns correct length",
post: types.NewPost(
1,
0,
2,
"External id",
"😃😃😃",
"cosmos1eqpa6mv2jgevukaqtjmx5535vhc3mm3cf458zg",
1,
nil,
nil,
nil,
types.REPLY_SETTING_EVERYONE,
time.Date(2020, 1, 1, 12, 00, 00, 000, time.UTC),
nil,
"cosmos13t6y2nnugtshwuy0zkrq287a95lyy8vzleaxmd",
),
expLength: 3,
},
{
name: "mixed text returns correct length",
post: types.NewPost(
1,
0,
2,
"External id",
"Hello 世界! 😃",
"cosmos1eqpa6mv2jgevukaqtjmx5535vhc3mm3cf458zg",
1,
nil,
nil,
nil,
types.REPLY_SETTING_EVERYONE,
time.Date(2020, 1, 1, 12, 00, 00, 000, time.UTC),
nil,
"cosmos13t6y2nnugtshwuy0zkrq287a95lyy8vzleaxmd",
),
expLength: 11,
},
}

for _, tc := range testCases {
tc := tc
t.Run(tc.name, func(t *testing.T) {
t.Parallel()
require.Equal(t, tc.expLength, tc.post.GetTextLength())
})
}
}

func TestPostReference_Validate(t *testing.T) {
testCases := []struct {
name string
Expand Down
2 changes: 1 addition & 1 deletion x/reactions/keeper/reactions.go
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ func (k Keeper) validateFreeTextValue(ctx sdk.Context, reaction types.Reaction,
}

// Make sure the value respected the max length
if uint32(len(value.Text)) > params.MaxLength {
if uint32(value.GetLength()) > params.MaxLength {
return errors.Wrapf(sdkerrors.ErrInvalidRequest, "value exceed max length allowed")
}

Expand Down
12 changes: 12 additions & 0 deletions x/reactions/types/models.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ import (
"strconv"
"strings"

"github.com/rivo/uniseg"

codectypes "github.com/cosmos/cosmos-sdk/codec/types"
sdk "github.com/cosmos/cosmos-sdk/types"
"github.com/gogo/protobuf/proto"
Expand Down Expand Up @@ -121,6 +123,16 @@ func NewFreeTextValue(text string) *FreeTextValue {
// isReactionValue implements ReactionValue
func (v *FreeTextValue) isReactionValue() {}

// GetLength returns the length of the reaction value
func (v *FreeTextValue) GetLength() int {
// Counting graphemes instead of runes of bytes can provide a more accurate length of the text.
// This will also ensure that emojis are counted as a single character, which will grant a more consistent
// user experience with clients as well.
// Example: 🏳️‍🌈 (rainbow flag emoji) is 1 grapheme, 4 runes, and 14 bytes.
return uniseg.GraphemeClusterCount(v.Text)

}

// Validate implements ReactionValue
func (v *FreeTextValue) Validate() error {
if strings.TrimSpace(v.Text) == "" {
Expand Down
38 changes: 38 additions & 0 deletions x/reactions/types/models_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,44 @@ func TestRegisteredReactionValue_Validate(t *testing.T) {

// --------------------------------------------------------------------------------------------------------------------

func TestFreeTextValue_Length(t *testing.T) {

testCases := []struct {
name string
reaction *types.FreeTextValue
expLength int
}{
{
name: "latin text returns correct length",
reaction: types.NewFreeTextValue("Hello"),
expLength: 5,
},
{
name: "non latin text returns correct length",
reaction: types.NewFreeTextValue("世界"),
expLength: 2,
},
{
name: "emoji text returns correct length",
reaction: types.NewFreeTextValue("😃😃😃"),
expLength: 3,
},
{
name: "mixed text returns correct length",
reaction: types.NewFreeTextValue("Hello 世界! 😃"),
expLength: 11,
},
}

for _, tc := range testCases {
tc := tc
t.Run(tc.name, func(t *testing.T) {
t.Parallel()
require.Equal(t, tc.expLength, tc.reaction.GetLength())
})
}
}

func TestFreeTextValue_Validate(t *testing.T) {
testCases := []struct {
name string
Expand Down

0 comments on commit e0742c8

Please sign in to comment.