Faster `lex_identifier` #1746

haldun · 2023-10-28T13:19:58Z

I have introduced some caching in lex_identifier to make it a bit faster. I tested the changes on parsing a 10M lines of Ruby file. I see a %6 decrease in time.

This is the program that I used to measure

#include <stdio.h>
#include <fcntl.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
#include <sys/mman.h>
#include <time.h>

#include "prism.h"

int main(int argc, char const *argv[])
{
    __auto_type fd = open("/tmp/b.rb", O_RDONLY);
    struct stat st;
    __auto_type rc = fstat(fd, &st);
    size_t size = st.st_size;
    const uint8_t * region = mmap(
        0, size,
        PROT_READ, MAP_FILE|MAP_PRIVATE,
        fd, 0
    );
    __auto_type parser = malloc(sizeof(pm_parser_t));
    __auto_type start = (float)clock()/CLOCKS_PER_SEC;
    pm_parser_init(parser, region, size, NULL);
    __auto_type node = pm_parse(parser);
    __auto_type end = (float)clock()/CLOCKS_PER_SEC;
    __auto_type timeElapsed = end - start;
    printf("%f\n", timeElapsed);
    __auto_type unmap_result = munmap((void *)region, size);
    close(fd);
    return 0;
}

eregon · 2023-10-28T18:22:11Z

src/prism.c

+    if (parser->current.end < end) {
+        if (((parser->current.end + 1 >= end) || (parser->current.end[1] != '=')) && (match(parser, '!') || match(parser, '?'))) {


Could this use current_end too? It seems more consistent to always use that variable rather than a mix of it & not.

eregon

Interesting, so the extra field accesses were causing the overhead here?
I wonder why the C compiler doesn't/cannot optimize that.

haldun · 2023-10-28T18:39:40Z

I will dig deeper to this. I am not totally sure about the difference.

haldun · 2023-10-30T09:56:59Z

It looks like the difference comes from accessing parser->end and accessing/mutating parser->current.end in the while loop.

Before

lex_identifier:
        push    r14
        push    r13
        mov     r13d, esi
        push    r12
        push    rbp
        mov     rbp, rdi
        push    rbx
        mov     rbx, QWORD PTR [rdi+320]
        mov     r12, QWORD PTR [rdi+272]
        cmp     rbx, r12
        jnb     .L1358
.L1356:
        cmp     BYTE PTR [rbp+666], 0
        jne     .L1549
        movzx   eax, BYTE PTR [rbx]
        test    al, al
        js      .L1363
        movzx   edx, al
        test    BYTE PTR pm_encoding_unicode_table[rdx], 2
        jne     .L1361
        cmp     al, 95
        je      .L1361
.L1362:
        mov     r14, QWORD PTR [rbp+312]
        mov     rdx, rbx
        mov     eax, DWORD PTR [rbp+0]
        sub     rdx, r14
        cmp     rbx, r12
        jnb     .L1472
        lea     rcx, [rbx+1]
        cmp     rcx, r12
        jb      .L1550
.L1365:
        movzx   esi, BYTE PTR [rbx]
        cmp     sil, 33
        je      .L1368
        cmp     sil, 63
        je      .L1368
        test    al, -128
        je      .L1367
        cmp     rcx, r12
        jb      .L1551
.L1378:
        cmp     sil, 61
        je      .L1552
.L1367:
        test    eax, 1032
        je      .L1493
        test    r13b, r13b
        je      .L1380
.L1493:
        test    al, 48
        je      .L1472
.L1380:
        cmp     BYTE PTR [rbx], 58
        jne     .L1472
        cmp     rcx, r12
        jb      .L1553
.L1382:
        mov     DWORD PTR [rbp+0], 2064
        cmp     BYTE PTR [rbx], 58
        jne     .L1375
.L1542:
        mov     QWORD PTR [rbp+320], rcx
        jmp     .L1375
.L1360:
        mov     rbx, QWORD PTR [rbp+320]
        mov     r12, QWORD PTR [rbp+272]
.L1361:
        add     rbx, 1
        mov     QWORD PTR [rbp+320], rbx
        cmp     rbx, r12
        jb      .L1356
.L1358:
        mov     r14, QWORD PTR [rbp+312]
        mov     rdx, rbx
        mov     eax, DWORD PTR [rbp+0]
        sub     rdx, r14
.L1472:
        cmp     eax, 256
        je      .L1385
        cmp     rdx, 12
        ja      .L1385
        jmp     [QWORD PTR .L1387[0+rdx*8]]
.L1387:
        .quad   .L1385
        .quad   .L1385
        .quad   .L1393
        .quad   .L1392
        .quad   .L1391
        .quad   .L1390
        .quad   .L1389
        .quad   .L1385
        .quad   .L1388
        .quad   .L1385
        .quad   .L1385
        .quad   .L1385
        .quad   .L1386
.L1549:
        mov     rsi, r12
        mov     rdi, rbx
        sub     rsi, rbx
        call    [QWORD PTR [rbp+472]]
        test    rax, rax
        jne     .L1360
        movzx   eax, BYTE PTR [rbx]
        mov     r12, QWORD PTR [rbp+272]
        mov     rbx, QWORD PTR [rbp+320]
        cmp     al, 95
        je      .L1361
        test    al, al
        jns     .L1362
        add     rbx, 1
        mov     QWORD PTR [rbp+320], rbx
        cmp     rbx, r12
        jb      .L1356
        jmp     .L1358
.L1363:
        mov     rsi, r12
        mov     rdi, rbx
        sub     rsi, rbx
        call    pm_encoding_utf_8_alnum_char
        mov     rbx, QWORD PTR [rbp+320]
        mov     r12, QWORD PTR [rbp+272]
        add     rbx, 1
        mov     QWORD PTR [rbp+320], rbx
        cmp     rbx, r12
        jb      .L1356
        jmp     .L1358
.L1550:
        cmp     BYTE PTR [rbx+1], 61
        jne     .L1365
        test    al, -128
        je      .L1367
.L1366:
        lea     rsi, [rbx+2]
        cmp     rsi, r12
        jnb     .L1367
        cmp     BYTE PTR [rbx+2], 62
        jne     .L1367
        movzx   esi, BYTE PTR [rbx]
        jmp     .L1378
.L1393:
        lea     rdx, [r14+2]
        cmp     r12, rdx
        jnb     .L1554
.L1385:
        mov     rsi, r12
        mov     rdi, r14
        sub     rsi, r14
        call    [QWORD PTR [rbp+480]]
        mov     edx, 29
        test    al, al
        je      .L1379
.L1355:
        pop     rbx
        mov     eax, edx
        pop     rbp
        pop     r12
        pop     r13
        pop     r14
        ret
.L1552:
        mov     QWORD PTR [rbp+320], rcx
.L1379:
        mov     edx, 55
        pop     rbx
        pop     rbp
        mov     eax, edx
        pop     r12
        pop     r13
        pop     r14
        ret

After

lex_identifier:
        push    r14
        push    r13
        mov     r13d, esi
        push    r12
        push    rbp
        mov     rbp, rdi
        push    rbx
        mov     r12, QWORD PTR [rdi+272]
        mov     rbx, QWORD PTR [rdi+320]
        mov     r14, QWORD PTR [rdi+312]
        cmp     rbx, r12
        jnb     .L1463
.L1272:
        cmp     BYTE PTR [rbp+666], 0
        jne     .L1464
        movzx   eax, BYTE PTR [rbx]
        test    al, al
        js      .L1278
        movzx   edx, al
        test    BYTE PTR pm_encoding_unicode_table[rdx], 2
        jne     .L1279
        cmp     al, 95
        je      .L1279
.L1277:
        mov     rdx, rbx
        mov     QWORD PTR [rbp+320], rbx
        mov     eax, DWORD PTR [rbp+0]
        sub     rdx, r14
        cmp     rbx, r12
        jnb     .L1390
        lea     rcx, [rbx+1]
        cmp     rcx, r12
        jb      .L1465
.L1281:
        mov     rdi, QWORD PTR [rbp+272]
        cmp     rbx, rdi
        jnb     .L1282
        movzx   esi, BYTE PTR [rbx]
        cmp     sil, 33
        je      .L1283
        cmp     sil, 63
        je      .L1283
.L1282:
        test    al, -128
        je      .L1292
        mov     rdi, QWORD PTR [rbp+272]
        mov     r8, rdi
        cmp     rcx, rdi
        jnb     .L1293
        movzx   esi, BYTE PTR [rbx+1]
        mov     r9d, esi
        and     r9d, -65
        cmp     r9b, 62
        je      .L1292
        cmp     sil, 61
        je      .L1466
.L1293:
        cmp     rbx, rdi
        jnb     .L1390
        movzx   esi, BYTE PTR [rbx]
        cmp     sil, 61
        je      .L1467
        test    eax, 1032
        je      .L1388
        test    r13b, r13b
        je      .L1389
.L1388:
        mov     r8, rdi
        test    al, 48
        jne     .L1389
.L1390:
        cmp     eax, 256
        je      .L1301
.L1468:
        cmp     rdx, 12
        ja      .L1301
        jmp     [QWORD PTR .L1303[0+rdx*8]]
.L1303:
        .quad   .L1301
        .quad   .L1301
        .quad   .L1309
        .quad   .L1308
        .quad   .L1307
        .quad   .L1306
        .quad   .L1305
        .quad   .L1301
        .quad   .L1304
        .quad   .L1301
        .quad   .L1301
        .quad   .L1301
        .quad   .L1302
.L1278:
        mov     rsi, QWORD PTR [rbp+272]
        mov     rdi, rbx
        sub     rsi, rbx
        call    pm_encoding_utf_8_alnum_char
.L1279:
        add     rbx, 1
        cmp     r12, rbx
        jne     .L1272
.L1274:
        mov     eax, DWORD PTR [rbp+0]
        mov     rdx, r12
        mov     QWORD PTR [rbp+320], r12
        sub     rdx, r14
        cmp     eax, 256
        jne     .L1468
.L1301:
        mov     rsi, r12
        mov     rdi, r14
        sub     rsi, r14
        call    [QWORD PTR [rbp+480]]
        mov     edx, 29
        test    al, al
        jne     .L1271
.L1295:
        mov     edx, 55
.L1271:
        pop     rbx
        mov     eax, edx
        pop     rbp
        pop     r12
        pop     r13
        pop     r14
        ret

https://godbolt.org/z/cEx7qh5sf

eregon · 2023-10-30T10:15:49Z

Interesting, maybe the C compiler thinks assigning parser->current.end can change parser->end.

prism/include/prism/parser.h

Line 351 in 4ff8fe2

const uint8_t *end; // the pointer to the end of the source

Would const uint8_t *const end; help there? (not sure if C compilers trust const though)

Or potentially some use of restrict if that's not enough.

In any case I think this is fine to merge given the speedup and little changes needed.

kddnewton

Great work! One additional small request please

kddnewton · 2023-10-30T14:36:49Z

src/prism.c

@@ -6163,7 +6168,7 @@ lex_identifier(pm_parser_t *parser, bool previous_command_start) {
        }
    }

-    return parser->encoding.isupper_char(parser->current.start, parser->end - parser->current.start) ? PM_TOKEN_CONSTANT : PM_TOKEN_IDENTIFIER;
+    return parser->encoding.isupper_char(current_start, end - current_start) ? PM_TOKEN_CONSTANT : PM_TOKEN_IDENTIFIER;


One more change that should help - instead of going through the function pointer can you check if the encoding is changed and otherwise use the default encoding function?

Suggested change

return parser->encoding.isupper_char(current_start, end - current_start) ? PM_TOKEN_CONSTANT : PM_TOKEN_IDENTIFIER;

if (parser->encoding_changed) {

return parser->encoding.isupper_char(current_start, end - current_start) ? PM_TOKEN_CONSTANT : PM_TOKEN_IDENTIFIER;

} else {

return pm_encoding_utf_8_isupper_char(current_start, end - current_start) ? PM_TOKEN_CONSTANT : PM_TOKEN_IDENTIFIER;

}

Note this will require making pm_encoding_utf_8_isupper_char non-static.

eregon reviewed Oct 28, 2023

View reviewed changes

kddnewton requested changes Oct 30, 2023

View reviewed changes

haldun force-pushed the haldun/faster-lex-identifier branch from 17898a4 to c44e349 Compare October 30, 2023 14:45

Faster lex_identifier

e44a9ae

haldun force-pushed the haldun/faster-lex-identifier branch from c44e349 to e44a9ae Compare October 30, 2023 14:47

kddnewton approved these changes Oct 30, 2023

View reviewed changes

kddnewton merged commit 4bdafcc into ruby:main Oct 30, 2023
45 checks passed

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Faster `lex_identifier` #1746

Faster `lex_identifier` #1746

haldun commented Oct 28, 2023

eregon Oct 28, 2023

eregon left a comment

haldun commented Oct 28, 2023

haldun commented Oct 30, 2023

eregon commented Oct 30, 2023 •

edited

Loading

kddnewton left a comment

kddnewton Oct 30, 2023

		if (parser->current.end < end) {
		if (((parser->current.end + 1 >= end) \|\| (parser->current.end[1] != '=')) && (match(parser, '!') \|\| match(parser, '?'))) {

-    return parser->encoding.isupper_char(current_start, end - current_start) ? PM_TOKEN_CONSTANT : PM_TOKEN_IDENTIFIER;
+    if (parser->encoding_changed) {
+        return parser->encoding.isupper_char(current_start, end - current_start) ? PM_TOKEN_CONSTANT : PM_TOKEN_IDENTIFIER;
+    } else {
+        return pm_encoding_utf_8_isupper_char(current_start, end - current_start) ? PM_TOKEN_CONSTANT : PM_TOKEN_IDENTIFIER;
+    }

Faster lex_identifier #1746

Faster lex_identifier #1746

Conversation

haldun commented Oct 28, 2023

eregon Oct 28, 2023

Choose a reason for hiding this comment

eregon left a comment

Choose a reason for hiding this comment

haldun commented Oct 28, 2023

haldun commented Oct 30, 2023

eregon commented Oct 30, 2023 • edited Loading

kddnewton left a comment

Choose a reason for hiding this comment

kddnewton Oct 30, 2023

Choose a reason for hiding this comment

Faster `lex_identifier` #1746

Faster `lex_identifier` #1746

eregon commented Oct 30, 2023 •

edited

Loading