-
Notifications
You must be signed in to change notification settings - Fork 144
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Faster lex_identifier
#1746
Faster lex_identifier
#1746
Conversation
src/prism.c
Outdated
if (parser->current.end < end) { | ||
if (((parser->current.end + 1 >= end) || (parser->current.end[1] != '=')) && (match(parser, '!') || match(parser, '?'))) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Could this use current_end
too? It seems more consistent to always use that variable rather than a mix of it & not.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Interesting, so the extra field accesses were causing the overhead here?
I wonder why the C compiler doesn't/cannot optimize that.
I will dig deeper to this. I am not totally sure about the difference. |
It looks like the difference comes from accessing Before lex_identifier:
push r14
push r13
mov r13d, esi
push r12
push rbp
mov rbp, rdi
push rbx
mov rbx, QWORD PTR [rdi+320]
mov r12, QWORD PTR [rdi+272]
cmp rbx, r12
jnb .L1358
.L1356:
cmp BYTE PTR [rbp+666], 0
jne .L1549
movzx eax, BYTE PTR [rbx]
test al, al
js .L1363
movzx edx, al
test BYTE PTR pm_encoding_unicode_table[rdx], 2
jne .L1361
cmp al, 95
je .L1361
.L1362:
mov r14, QWORD PTR [rbp+312]
mov rdx, rbx
mov eax, DWORD PTR [rbp+0]
sub rdx, r14
cmp rbx, r12
jnb .L1472
lea rcx, [rbx+1]
cmp rcx, r12
jb .L1550
.L1365:
movzx esi, BYTE PTR [rbx]
cmp sil, 33
je .L1368
cmp sil, 63
je .L1368
test al, -128
je .L1367
cmp rcx, r12
jb .L1551
.L1378:
cmp sil, 61
je .L1552
.L1367:
test eax, 1032
je .L1493
test r13b, r13b
je .L1380
.L1493:
test al, 48
je .L1472
.L1380:
cmp BYTE PTR [rbx], 58
jne .L1472
cmp rcx, r12
jb .L1553
.L1382:
mov DWORD PTR [rbp+0], 2064
cmp BYTE PTR [rbx], 58
jne .L1375
.L1542:
mov QWORD PTR [rbp+320], rcx
jmp .L1375
.L1360:
mov rbx, QWORD PTR [rbp+320]
mov r12, QWORD PTR [rbp+272]
.L1361:
add rbx, 1
mov QWORD PTR [rbp+320], rbx
cmp rbx, r12
jb .L1356
.L1358:
mov r14, QWORD PTR [rbp+312]
mov rdx, rbx
mov eax, DWORD PTR [rbp+0]
sub rdx, r14
.L1472:
cmp eax, 256
je .L1385
cmp rdx, 12
ja .L1385
jmp [QWORD PTR .L1387[0+rdx*8]]
.L1387:
.quad .L1385
.quad .L1385
.quad .L1393
.quad .L1392
.quad .L1391
.quad .L1390
.quad .L1389
.quad .L1385
.quad .L1388
.quad .L1385
.quad .L1385
.quad .L1385
.quad .L1386
.L1549:
mov rsi, r12
mov rdi, rbx
sub rsi, rbx
call [QWORD PTR [rbp+472]]
test rax, rax
jne .L1360
movzx eax, BYTE PTR [rbx]
mov r12, QWORD PTR [rbp+272]
mov rbx, QWORD PTR [rbp+320]
cmp al, 95
je .L1361
test al, al
jns .L1362
add rbx, 1
mov QWORD PTR [rbp+320], rbx
cmp rbx, r12
jb .L1356
jmp .L1358
.L1363:
mov rsi, r12
mov rdi, rbx
sub rsi, rbx
call pm_encoding_utf_8_alnum_char
mov rbx, QWORD PTR [rbp+320]
mov r12, QWORD PTR [rbp+272]
add rbx, 1
mov QWORD PTR [rbp+320], rbx
cmp rbx, r12
jb .L1356
jmp .L1358
.L1550:
cmp BYTE PTR [rbx+1], 61
jne .L1365
test al, -128
je .L1367
.L1366:
lea rsi, [rbx+2]
cmp rsi, r12
jnb .L1367
cmp BYTE PTR [rbx+2], 62
jne .L1367
movzx esi, BYTE PTR [rbx]
jmp .L1378
.L1393:
lea rdx, [r14+2]
cmp r12, rdx
jnb .L1554
.L1385:
mov rsi, r12
mov rdi, r14
sub rsi, r14
call [QWORD PTR [rbp+480]]
mov edx, 29
test al, al
je .L1379
.L1355:
pop rbx
mov eax, edx
pop rbp
pop r12
pop r13
pop r14
ret
.L1552:
mov QWORD PTR [rbp+320], rcx
.L1379:
mov edx, 55
pop rbx
pop rbp
mov eax, edx
pop r12
pop r13
pop r14
ret
After lex_identifier:
push r14
push r13
mov r13d, esi
push r12
push rbp
mov rbp, rdi
push rbx
mov r12, QWORD PTR [rdi+272]
mov rbx, QWORD PTR [rdi+320]
mov r14, QWORD PTR [rdi+312]
cmp rbx, r12
jnb .L1463
.L1272:
cmp BYTE PTR [rbp+666], 0
jne .L1464
movzx eax, BYTE PTR [rbx]
test al, al
js .L1278
movzx edx, al
test BYTE PTR pm_encoding_unicode_table[rdx], 2
jne .L1279
cmp al, 95
je .L1279
.L1277:
mov rdx, rbx
mov QWORD PTR [rbp+320], rbx
mov eax, DWORD PTR [rbp+0]
sub rdx, r14
cmp rbx, r12
jnb .L1390
lea rcx, [rbx+1]
cmp rcx, r12
jb .L1465
.L1281:
mov rdi, QWORD PTR [rbp+272]
cmp rbx, rdi
jnb .L1282
movzx esi, BYTE PTR [rbx]
cmp sil, 33
je .L1283
cmp sil, 63
je .L1283
.L1282:
test al, -128
je .L1292
mov rdi, QWORD PTR [rbp+272]
mov r8, rdi
cmp rcx, rdi
jnb .L1293
movzx esi, BYTE PTR [rbx+1]
mov r9d, esi
and r9d, -65
cmp r9b, 62
je .L1292
cmp sil, 61
je .L1466
.L1293:
cmp rbx, rdi
jnb .L1390
movzx esi, BYTE PTR [rbx]
cmp sil, 61
je .L1467
test eax, 1032
je .L1388
test r13b, r13b
je .L1389
.L1388:
mov r8, rdi
test al, 48
jne .L1389
.L1390:
cmp eax, 256
je .L1301
.L1468:
cmp rdx, 12
ja .L1301
jmp [QWORD PTR .L1303[0+rdx*8]]
.L1303:
.quad .L1301
.quad .L1301
.quad .L1309
.quad .L1308
.quad .L1307
.quad .L1306
.quad .L1305
.quad .L1301
.quad .L1304
.quad .L1301
.quad .L1301
.quad .L1301
.quad .L1302
.L1278:
mov rsi, QWORD PTR [rbp+272]
mov rdi, rbx
sub rsi, rbx
call pm_encoding_utf_8_alnum_char
.L1279:
add rbx, 1
cmp r12, rbx
jne .L1272
.L1274:
mov eax, DWORD PTR [rbp+0]
mov rdx, r12
mov QWORD PTR [rbp+320], r12
sub rdx, r14
cmp eax, 256
jne .L1468
.L1301:
mov rsi, r12
mov rdi, r14
sub rsi, r14
call [QWORD PTR [rbp+480]]
mov edx, 29
test al, al
jne .L1271
.L1295:
mov edx, 55
.L1271:
pop rbx
mov eax, edx
pop rbp
pop r12
pop r13
pop r14
ret |
Interesting, maybe the C compiler thinks assigning Line 351 in 4ff8fe2
Would Or potentially some use of In any case I think this is fine to merge given the speedup and little changes needed. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Great work! One additional small request please
src/prism.c
Outdated
@@ -6163,7 +6168,7 @@ lex_identifier(pm_parser_t *parser, bool previous_command_start) { | |||
} | |||
} | |||
|
|||
return parser->encoding.isupper_char(parser->current.start, parser->end - parser->current.start) ? PM_TOKEN_CONSTANT : PM_TOKEN_IDENTIFIER; | |||
return parser->encoding.isupper_char(current_start, end - current_start) ? PM_TOKEN_CONSTANT : PM_TOKEN_IDENTIFIER; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
One more change that should help - instead of going through the function pointer can you check if the encoding is changed and otherwise use the default encoding function?
return parser->encoding.isupper_char(current_start, end - current_start) ? PM_TOKEN_CONSTANT : PM_TOKEN_IDENTIFIER; | |
if (parser->encoding_changed) { | |
return parser->encoding.isupper_char(current_start, end - current_start) ? PM_TOKEN_CONSTANT : PM_TOKEN_IDENTIFIER; | |
} else { | |
return pm_encoding_utf_8_isupper_char(current_start, end - current_start) ? PM_TOKEN_CONSTANT : PM_TOKEN_IDENTIFIER; | |
} |
Note this will require making pm_encoding_utf_8_isupper_char non-static.
17898a4
to
c44e349
Compare
c44e349
to
e44a9ae
Compare
I have introduced some caching in
lex_identifier
to make it a bit faster. I tested the changes on parsing a 10M lines of Ruby file. I see a %6 decrease in time.This is the program that I used to measure