From 6b0c0e16072837d7e5b784ef46686fdf56966ca2 Mon Sep 17 00:00:00 2001 From: Richard Leach Date: Tue, 19 Nov 2024 23:02:37 +0000 Subject: [PATCH] OP_SUBSTR_LEFT - a specialised OP_SUBSTR variant This commit adds OP_SUBSTR_LEFT and associated machinery for fast handling of the constructions: substr EXPR,0,LENGTH,'' and substr EXPR,0,LENGTH Where EXPR is a scalar lexical, the OFFSET is zero, and either there is no REPLACEMENT or it is the empty string. LENGTH can be anything that OP_SUBSTR supports. These constraints allow for a very stripped back and optimised version of pp_substr. The primary motivation was for situations where a scalar, containing some network packets or other binary data structure, is being parsed piecemeal. Nibbling away at the scalar can be useful when you don't know how exactly it will be parsed and unpacked until you get started. It also means that you don't need to worry about correctly updating a separate offset variable. This operator also turns out to be an efficient way to (destructively) break an expression up into fixed size chunks. For example, given: my $x = ''; my $str = "A"x100_000_000; This code: $x = substr($str, 0, 5, "") while ($str); is twice as fast as doing: for ($pos = 0; $pos < length($str); $pos += 5) { $x = substr($str, $pos, 5); } Compared with blead, `$y = substr($x, 0, 5)` runs 40% faster and `$y = substr($x, 0, 5, '')` runs 45% faster. --- MANIFEST | 1 + ext/Opcode/Opcode.pm | 5 +- lib/B/Deparse.pm | 21 +- lib/B/Deparse.t | 8 + lib/B/Op_private.pm | 5 +- opcode.h | 200 ++++++++------- opnames.h | 589 ++++++++++++++++++++++--------------------- peep.c | 92 +++++++ pp.c | 106 ++++++++ pp_proto.h | 1 + regen/op_private | 2 +- regen/opcodes | 1 + t/op/substr_left.t | 108 ++++++++ t/perf/benchmarks | 16 ++ t/perf/opcount.t | 78 ++++++ 15 files changed, 837 insertions(+), 396 deletions(-) create mode 100644 t/op/substr_left.t diff --git a/MANIFEST b/MANIFEST index 543f63784f586..dd89f05c4dec6 100644 --- a/MANIFEST +++ b/MANIFEST @@ -6438,6 +6438,7 @@ t/op/studytied.t See if study works with tied scalars t/op/sub.t See if subroutines work t/op/sub_lval.t See if lvalue subroutines work t/op/substr.t See if substr works +t/op/substr_left.t See if substr($x, 0, $l, '') optimisation works t/op/substr_thr.t See if substr works in another thread t/op/svflags.t See if POK is set as expected. t/op/svleak.pl Test file for svleak.t diff --git a/ext/Opcode/Opcode.pm b/ext/Opcode/Opcode.pm index baacf3c677ea9..b5db319dd838d 100644 --- a/ext/Opcode/Opcode.pm +++ b/ext/Opcode/Opcode.pm @@ -1,4 +1,4 @@ -package Opcode 1.66; +package Opcode 1.67; use strict; @@ -322,7 +322,8 @@ invert_opset function. slt sgt sle sge seq sne scmp isa - substr vec stringify study pos length index rindex ord chr + substr substr_left vec stringify study pos length index + rindex ord chr ucfirst lcfirst uc lc fc quotemeta trans transr chop schop chomp schomp diff --git a/lib/B/Deparse.pm b/lib/B/Deparse.pm index 6e33893736958..11057624ecaeb 100644 --- a/lib/B/Deparse.pm +++ b/lib/B/Deparse.pm @@ -7,7 +7,7 @@ # This is based on the module of the same name by Malcolm Beattie, # but essentially none of his code remains. -package B::Deparse 1.80; +package B::Deparse 1.81; use strict; use Carp; use B qw(class main_root main_start main_cv svref_2object opnumber perlstring @@ -3419,6 +3419,25 @@ sub pp_substr { maybe_local(@_, listop(@_, "substr")) } +sub pp_substr_left { + my ($self,$op,$cx) = @_; + + my $lex = ($op->private & OPpTARGET_MY); + + my $val = 'substr(' . $self->deparse($op->first->sibling, $cx) + . ', 0, ' . $self->deparse($op->first->sibling->sibling->sibling, $cx) + . ( (($op->private & 7) == 3) ? '' : ", '')" ); + + if ($lex) { + my $targ = $op->targ; + my $var = $self->maybe_my($op, $cx, $self->padname($op->targ), + $self->padname_sv($targ), + 0); + $val = $self->maybe_parens("$var = $val", $cx, 7); + } + $val; +} + sub pp_index { # Also handles pp_rindex. # diff --git a/lib/B/Deparse.t b/lib/B/Deparse.t index 20adbca4743b7..73063982fbbce 100644 --- a/lib/B/Deparse.t +++ b/lib/B/Deparse.t @@ -1757,6 +1757,14 @@ print sort(foo('bar')); substr(my $a, 0, 0) = (foo(), bar()); $a++; #### +# 4-arg substr (non-chop) +my $str = 'ABCD'; +my $bbb = substr($str, 1, 1, ''); +#### +# 4-arg substr (chop) +my $str = 'ABCD'; +my $aaa = substr($str, 0, 1, ''); +#### # This following line works around an unfixed bug that we are not trying to # test for here: # CONTEXT BEGIN { $^H{a} = "b"; delete $^H{a} } # make %^H localised diff --git a/lib/B/Op_private.pm b/lib/B/Op_private.pm index 9270a1f3ee015..7618a7862dd39 100644 --- a/lib/B/Op_private.pm +++ b/lib/B/Op_private.pm @@ -150,7 +150,7 @@ $bits{$_}{6} = 'OPpPAD_STATE' for qw(emptyavhv lvavref lvref padav padhv padsv p $bits{$_}{7} = 'OPpPV_IS_UTF8' for qw(dump goto last next redo); $bits{$_}{6} = 'OPpREFCOUNTED' for qw(leave leaveeval leavesub leavesublv leavewrite); $bits{$_}{2} = 'OPpSLICEWARNING' for qw(aslice hslice padav padhv rv2av rv2hv); -$bits{$_}{4} = 'OPpTARGET_MY' for qw(abs add atan2 ceil chmod chomp chown chr chroot concat cos crypt divide emptyavhv exec exp flock floor getpgrp getppid getpriority hex i_add i_divide i_modulo i_multiply i_negate i_subtract index int kill left_shift length link log mkdir modulo multiconcat multiply nbit_and nbit_or nbit_xor ncomplement negate oct ord pow push rand refaddr reftype rename right_shift rindex rmdir schomp scomplement setpgrp setpriority sin sleep sqrt srand stringify subtract symlink system time undef unlink unshift utime wait waitpid); +$bits{$_}{4} = 'OPpTARGET_MY' for qw(abs add atan2 ceil chmod chomp chown chr chroot concat cos crypt divide emptyavhv exec exp flock floor getpgrp getppid getpriority hex i_add i_divide i_modulo i_multiply i_negate i_subtract index int kill left_shift length link log mkdir modulo multiconcat multiply nbit_and nbit_or nbit_xor ncomplement negate oct ord pow push rand refaddr reftype rename right_shift rindex rmdir schomp scomplement setpgrp setpriority sin sleep sqrt srand stringify substr_left subtract symlink system time undef unlink unshift utime wait waitpid); $bits{$_}{5} = 'OPpTRANS_COMPLEMENT' for qw(trans transr); $bits{$_}{7} = 'OPpTRANS_DELETE' for qw(trans transr); $bits{$_}{2} = 'OPpTRANS_IDENTICAL' for qw(trans transr); @@ -585,6 +585,7 @@ $bits{stat}{0} = $bf[0]; $bits{study}{0} = $bf[0]; $bits{substcont}{0} = $bf[0]; @{$bits{substr}}{4,2,1,0} = ('OPpSUBSTR_REPL_FIRST', $bf[4], $bf[4], $bf[4]); +@{$bits{substr_left}}{2,1,0} = ($bf[4], $bf[4], $bf[4]); @{$bits{subtract}}{1,0} = ($bf[1], $bf[1]); @{$bits{symlink}}{3,2,1,0} = ($bf[5], $bf[5], $bf[5], $bf[5]); @{$bits{syscall}}{3,2,1,0} = ($bf[5], $bf[5], $bf[5], $bf[5]); @@ -925,7 +926,7 @@ our %ops_using = ( OPpSORT_DESCEND => [qw(sort)], OPpSPLIT_ASSIGN => [qw(split)], OPpSUBSTR_REPL_FIRST => [qw(substr)], - OPpTARGET_MY => [qw(abs add atan2 ceil chmod chomp chown chr chroot concat cos crypt divide emptyavhv exec exp flock floor getpgrp getppid getpriority hex i_add i_divide i_modulo i_multiply i_negate i_subtract index int kill left_shift length link log mkdir modulo multiconcat multiply nbit_and nbit_or nbit_xor ncomplement negate oct ord pow push rand refaddr reftype rename right_shift rindex rmdir schomp scomplement setpgrp setpriority sin sleep sqrt srand stringify subtract symlink system time undef unlink unshift utime wait waitpid)], + OPpTARGET_MY => [qw(abs add atan2 ceil chmod chomp chown chr chroot concat cos crypt divide emptyavhv exec exp flock floor getpgrp getppid getpriority hex i_add i_divide i_modulo i_multiply i_negate i_subtract index int kill left_shift length link log mkdir modulo multiconcat multiply nbit_and nbit_or nbit_xor ncomplement negate oct ord pow push rand refaddr reftype rename right_shift rindex rmdir schomp scomplement setpgrp setpriority sin sleep sqrt srand stringify substr_left subtract symlink system time undef unlink unshift utime wait waitpid)], OPpTRANS_COMPLEMENT => [qw(trans transr)], OPpTRUEBOOL => [qw(blessed grepwhile index length padav padhv pos ref rindex rv2av rv2hv subst)], OPpUNDEF_KEEP_PV => [qw(undef)], diff --git a/opcode.h b/opcode.h index ab62fa16ccc63..35a8aa7b3d748 100644 --- a/opcode.h +++ b/opcode.h @@ -268,6 +268,7 @@ EXTCONST char* const PL_op_name[] INIT({ "abs", "length", "substr", + "substr_left", "vec", "index", "rindex", @@ -687,6 +688,7 @@ EXTCONST char* const PL_op_desc[] INIT({ "abs", "length", "substr", + "substr left", "vec", "index", "rindex", @@ -1111,6 +1113,7 @@ INIT({ Perl_pp_abs, Perl_pp_length, Perl_pp_substr, + Perl_pp_substr_left, Perl_pp_vec, Perl_pp_index, Perl_pp_rindex, /* implemented by Perl_pp_index */ @@ -1530,6 +1533,7 @@ INIT({ Perl_ck_fun, /* abs */ Perl_ck_length, /* length */ Perl_ck_substr, /* substr */ + Perl_ck_substr, /* substr_left */ Perl_ck_fun, /* vec */ Perl_ck_index, /* index */ Perl_ck_index, /* rindex */ @@ -1948,6 +1952,7 @@ EXTCONST U32 PL_opargs[] INIT({ 0x00009b9e, /* abs */ 0x00009b9e, /* length */ 0x0991140c, /* substr */ + 0x0991141c, /* substr_left */ 0x0011140c, /* vec */ 0x0091141c, /* index */ 0x0091141c, /* rindex */ @@ -2668,9 +2673,10 @@ EXTCONST I16 PL_op_private_bitdef_ix[] = { 78, /* abs */ 106, /* length */ 109, /* substr */ - 112, /* vec */ - 114, /* index */ - 114, /* rindex */ + 112, /* substr_left */ + 114, /* vec */ + 116, /* index */ + 116, /* rindex */ 56, /* sprintf */ 56, /* formline */ 78, /* ord */ @@ -2681,91 +2687,91 @@ EXTCONST I16 PL_op_private_bitdef_ix[] = { 0, /* uc */ 0, /* lc */ 0, /* quotemeta */ - 118, /* rv2av */ - 125, /* aelemfast */ - 125, /* aelemfast_lex */ - 125, /* aelemfastlex_store */ - 126, /* aelem */ - 131, /* aslice */ - 134, /* kvaslice */ + 120, /* rv2av */ + 127, /* aelemfast */ + 127, /* aelemfast_lex */ + 127, /* aelemfastlex_store */ + 128, /* aelem */ + 133, /* aslice */ + 136, /* kvaslice */ 0, /* aeach */ 0, /* avalues */ 44, /* akeys */ 0, /* each */ 44, /* values */ 44, /* keys */ - 135, /* delete */ - 139, /* exists */ - 141, /* rv2hv */ - 126, /* helem */ - 131, /* hslice */ - 134, /* kvhslice */ - 149, /* multideref */ + 137, /* delete */ + 141, /* exists */ + 143, /* rv2hv */ + 128, /* helem */ + 133, /* hslice */ + 136, /* kvhslice */ + 151, /* multideref */ 56, /* unpack */ 56, /* pack */ - 156, /* split */ + 158, /* split */ 56, /* join */ - 161, /* list */ + 163, /* list */ 13, /* lslice */ 56, /* anonlist */ 56, /* anonhash */ - 163, /* emptyavhv */ + 165, /* emptyavhv */ 56, /* splice */ 101, /* push */ 0, /* pop */ 0, /* shift */ 101, /* unshift */ - 168, /* sort */ - 173, /* reverse */ + 170, /* sort */ + 175, /* reverse */ 0, /* grepstart */ - 175, /* grepwhile */ + 177, /* grepwhile */ 0, /* mapstart */ 0, /* mapwhile */ 0, /* range */ - 177, /* flip */ - 177, /* flop */ + 179, /* flip */ + 179, /* flop */ 0, /* and */ 0, /* or */ 13, /* xor */ 0, /* dor */ - 179, /* cond_expr */ + 181, /* cond_expr */ 0, /* andassign */ 0, /* orassign */ 0, /* dorassign */ - 181, /* entersub */ - 188, /* leavesub */ - 188, /* leavesublv */ + 183, /* entersub */ + 190, /* leavesub */ + 190, /* leavesublv */ 0, /* argcheck */ - 190, /* argelem */ - 192, /* argdefelem */ - 195, /* caller */ + 192, /* argelem */ + 194, /* argdefelem */ + 197, /* caller */ 56, /* warn */ 56, /* die */ 56, /* reset */ -1, /* lineseq */ - 197, /* nextstate */ - 197, /* dbstate */ + 199, /* nextstate */ + 199, /* dbstate */ -1, /* unstack */ -1, /* enter */ - 198, /* leave */ + 200, /* leave */ -1, /* scope */ - 200, /* enteriter */ - 204, /* iter */ + 202, /* enteriter */ + 206, /* iter */ -1, /* enterloop */ - 206, /* leaveloop */ + 208, /* leaveloop */ -1, /* return */ - 208, /* last */ - 208, /* next */ - 208, /* redo */ - 208, /* dump */ - 210, /* goto */ + 210, /* last */ + 210, /* next */ + 210, /* redo */ + 210, /* dump */ + 212, /* goto */ 56, /* exit */ - 213, /* method */ - 213, /* method_named */ - 213, /* method_super */ - 213, /* method_redir */ - 213, /* method_redir_super */ - 215, /* open */ + 215, /* method */ + 215, /* method_named */ + 215, /* method_super */ + 215, /* method_redir */ + 215, /* method_redir_super */ + 217, /* open */ 56, /* close */ 56, /* pipe_op */ 56, /* fileno */ @@ -2781,7 +2787,7 @@ EXTCONST I16 PL_op_private_bitdef_ix[] = { 56, /* getc */ 56, /* read */ 56, /* enterwrite */ - 188, /* leavewrite */ + 190, /* leavewrite */ -1, /* prtf */ -1, /* print */ -1, /* say */ @@ -2811,33 +2817,33 @@ EXTCONST I16 PL_op_private_bitdef_ix[] = { 0, /* getpeername */ 0, /* lstat */ 0, /* stat */ - 220, /* ftrread */ - 220, /* ftrwrite */ - 220, /* ftrexec */ - 220, /* fteread */ - 220, /* ftewrite */ - 220, /* fteexec */ - 225, /* ftis */ - 225, /* ftsize */ - 225, /* ftmtime */ - 225, /* ftatime */ - 225, /* ftctime */ - 225, /* ftrowned */ - 225, /* fteowned */ - 225, /* ftzero */ - 225, /* ftsock */ - 225, /* ftchr */ - 225, /* ftblk */ - 225, /* ftfile */ - 225, /* ftdir */ - 225, /* ftpipe */ - 225, /* ftsuid */ - 225, /* ftsgid */ - 225, /* ftsvtx */ - 225, /* ftlink */ - 225, /* fttty */ - 225, /* fttext */ - 225, /* ftbinary */ + 222, /* ftrread */ + 222, /* ftrwrite */ + 222, /* ftrexec */ + 222, /* fteread */ + 222, /* ftewrite */ + 222, /* fteexec */ + 227, /* ftis */ + 227, /* ftsize */ + 227, /* ftmtime */ + 227, /* ftatime */ + 227, /* ftctime */ + 227, /* ftrowned */ + 227, /* fteowned */ + 227, /* ftzero */ + 227, /* ftsock */ + 227, /* ftchr */ + 227, /* ftblk */ + 227, /* ftfile */ + 227, /* ftdir */ + 227, /* ftpipe */ + 227, /* ftsuid */ + 227, /* ftsgid */ + 227, /* ftsvtx */ + 227, /* ftlink */ + 227, /* fttty */ + 227, /* fttext */ + 227, /* ftbinary */ 56, /* chdir */ 101, /* chown */ 78, /* chroot */ @@ -2857,17 +2863,17 @@ EXTCONST I16 PL_op_private_bitdef_ix[] = { 0, /* rewinddir */ 0, /* closedir */ -1, /* fork */ - 229, /* wait */ + 231, /* wait */ 101, /* waitpid */ 101, /* system */ 101, /* exec */ 101, /* kill */ - 229, /* getppid */ + 231, /* getppid */ 101, /* getpgrp */ 101, /* setpgrp */ 101, /* getpriority */ 101, /* setpriority */ - 229, /* time */ + 231, /* time */ -1, /* tms */ 0, /* localtime */ 56, /* gmtime */ @@ -2887,8 +2893,8 @@ EXTCONST I16 PL_op_private_bitdef_ix[] = { 0, /* require */ 0, /* dofile */ -1, /* hintseval */ - 230, /* entereval */ - 188, /* leaveeval */ + 232, /* entereval */ + 190, /* leaveeval */ 0, /* entertry */ -1, /* leavetry */ 0, /* ghbyname */ @@ -2926,17 +2932,17 @@ EXTCONST I16 PL_op_private_bitdef_ix[] = { 0, /* lock */ 0, /* once */ -1, /* custom */ - 237, /* coreargs */ - 241, /* avhvswitch */ + 239, /* coreargs */ + 243, /* avhvswitch */ 3, /* runcv */ 0, /* fc */ -1, /* padcv */ -1, /* introcv */ -1, /* clonecv */ - 243, /* padrange */ - 245, /* refassign */ - 251, /* lvref */ - 257, /* lvrefslice */ + 245, /* padrange */ + 247, /* refassign */ + 253, /* lvref */ + 259, /* lvrefslice */ 17, /* lvavref */ 0, /* anonconst */ 13, /* isa */ @@ -2946,20 +2952,20 @@ EXTCONST I16 PL_op_private_bitdef_ix[] = { -1, /* leavetrycatch */ -1, /* poptry */ 0, /* catch */ - 258, /* pushdefer */ + 260, /* pushdefer */ 0, /* is_bool */ 0, /* is_weak */ 0, /* weaken */ 0, /* unweaken */ 53, /* blessed */ - 260, /* refaddr */ - 260, /* reftype */ - 260, /* ceil */ - 260, /* floor */ + 262, /* refaddr */ + 262, /* reftype */ + 262, /* ceil */ + 262, /* floor */ 0, /* is_tainted */ - 263, /* helemexistsor */ - 265, /* methstart */ - 267, /* initfield */ + 265, /* helemexistsor */ + 267, /* methstart */ + 269, /* initfield */ -1, /* classname */ }; @@ -3013,6 +3019,7 @@ EXTCONST U16 PL_op_private_bitdefs[] = { 0x5aa9, /* bit_and, bit_xor, bit_or, sbit_and, sbit_xor, sbit_or, complement */ 0x06d4, 0x57d0, 0x0003, /* length */ 0x4d30, 0x3dec, 0x024b, /* substr */ + 0x57d0, 0x024b, /* substr_left */ 0x3dec, 0x0067, /* vec */ 0x3f58, 0x06d4, 0x57d0, 0x02af, /* index, rindex */ 0x3cfc, 0x47f8, 0x06d4, 0x3dec, 0x5148, 0x5424, 0x0003, /* rv2av */ @@ -3191,6 +3198,7 @@ EXTCONST U8 PL_op_private_valid[] = { /* ABS */ (OPpARG1_MASK|OPpTARGET_MY), /* LENGTH */ (OPpARG1_MASK|OPpTARGET_MY|OPpTRUEBOOL), /* SUBSTR */ (OPpARG3_MASK|OPpMAYBE_LVSUB|OPpSUBSTR_REPL_FIRST), + /* SUBSTR_LEFT */ (OPpARG3_MASK|OPpTARGET_MY), /* VEC */ (OPpARG2_MASK|OPpMAYBE_LVSUB), /* INDEX */ (OPpARG4_MASK|OPpTARGET_MY|OPpTRUEBOOL|OPpINDEX_BOOLNEG), /* RINDEX */ (OPpARG4_MASK|OPpTARGET_MY|OPpTRUEBOOL|OPpINDEX_BOOLNEG), diff --git a/opnames.h b/opnames.h index 4fb9f5d12d7d4..262ff212f9a69 100644 --- a/opnames.h +++ b/opnames.h @@ -136,299 +136,300 @@ typedef enum opcode { OP_ABS = 119, OP_LENGTH = 120, OP_SUBSTR = 121, - OP_VEC = 122, - OP_INDEX = 123, - OP_RINDEX = 124, - OP_SPRINTF = 125, - OP_FORMLINE = 126, - OP_ORD = 127, - OP_CHR = 128, - OP_CRYPT = 129, - OP_UCFIRST = 130, - OP_LCFIRST = 131, - OP_UC = 132, - OP_LC = 133, - OP_QUOTEMETA = 134, - OP_RV2AV = 135, - OP_AELEMFAST = 136, - OP_AELEMFAST_LEX = 137, - OP_AELEMFASTLEX_STORE = 138, - OP_AELEM = 139, - OP_ASLICE = 140, - OP_KVASLICE = 141, - OP_AEACH = 142, - OP_AVALUES = 143, - OP_AKEYS = 144, - OP_EACH = 145, - OP_VALUES = 146, - OP_KEYS = 147, - OP_DELETE = 148, - OP_EXISTS = 149, - OP_RV2HV = 150, - OP_HELEM = 151, - OP_HSLICE = 152, - OP_KVHSLICE = 153, - OP_MULTIDEREF = 154, - OP_UNPACK = 155, - OP_PACK = 156, - OP_SPLIT = 157, - OP_JOIN = 158, - OP_LIST = 159, - OP_LSLICE = 160, - OP_ANONLIST = 161, - OP_ANONHASH = 162, - OP_EMPTYAVHV = 163, - OP_SPLICE = 164, - OP_PUSH = 165, - OP_POP = 166, - OP_SHIFT = 167, - OP_UNSHIFT = 168, - OP_SORT = 169, - OP_REVERSE = 170, - OP_GREPSTART = 171, - OP_GREPWHILE = 172, - OP_MAPSTART = 173, - OP_MAPWHILE = 174, - OP_RANGE = 175, - OP_FLIP = 176, - OP_FLOP = 177, - OP_AND = 178, - OP_OR = 179, - OP_XOR = 180, - OP_DOR = 181, - OP_COND_EXPR = 182, - OP_ANDASSIGN = 183, - OP_ORASSIGN = 184, - OP_DORASSIGN = 185, - OP_ENTERSUB = 186, - OP_LEAVESUB = 187, - OP_LEAVESUBLV = 188, - OP_ARGCHECK = 189, - OP_ARGELEM = 190, - OP_ARGDEFELEM = 191, - OP_CALLER = 192, - OP_WARN = 193, - OP_DIE = 194, - OP_RESET = 195, - OP_LINESEQ = 196, - OP_NEXTSTATE = 197, - OP_DBSTATE = 198, - OP_UNSTACK = 199, - OP_ENTER = 200, - OP_LEAVE = 201, - OP_SCOPE = 202, - OP_ENTERITER = 203, - OP_ITER = 204, - OP_ENTERLOOP = 205, - OP_LEAVELOOP = 206, - OP_RETURN = 207, - OP_LAST = 208, - OP_NEXT = 209, - OP_REDO = 210, - OP_DUMP = 211, - OP_GOTO = 212, - OP_EXIT = 213, - OP_METHOD = 214, - OP_METHOD_NAMED = 215, - OP_METHOD_SUPER = 216, - OP_METHOD_REDIR = 217, - OP_METHOD_REDIR_SUPER = 218, - OP_OPEN = 219, - OP_CLOSE = 220, - OP_PIPE_OP = 221, - OP_FILENO = 222, - OP_UMASK = 223, - OP_BINMODE = 224, - OP_TIE = 225, - OP_UNTIE = 226, - OP_TIED = 227, - OP_DBMOPEN = 228, - OP_DBMCLOSE = 229, - OP_SSELECT = 230, - OP_SELECT = 231, - OP_GETC = 232, - OP_READ = 233, - OP_ENTERWRITE = 234, - OP_LEAVEWRITE = 235, - OP_PRTF = 236, - OP_PRINT = 237, - OP_SAY = 238, - OP_SYSOPEN = 239, - OP_SYSSEEK = 240, - OP_SYSREAD = 241, - OP_SYSWRITE = 242, - OP_EOF = 243, - OP_TELL = 244, - OP_SEEK = 245, - OP_TRUNCATE = 246, - OP_FCNTL = 247, - OP_IOCTL = 248, - OP_FLOCK = 249, - OP_SEND = 250, - OP_RECV = 251, - OP_SOCKET = 252, - OP_SOCKPAIR = 253, - OP_BIND = 254, - OP_CONNECT = 255, - OP_LISTEN = 256, - OP_ACCEPT = 257, - OP_SHUTDOWN = 258, - OP_GSOCKOPT = 259, - OP_SSOCKOPT = 260, - OP_GETSOCKNAME = 261, - OP_GETPEERNAME = 262, - OP_LSTAT = 263, - OP_STAT = 264, - OP_FTRREAD = 265, - OP_FTRWRITE = 266, - OP_FTREXEC = 267, - OP_FTEREAD = 268, - OP_FTEWRITE = 269, - OP_FTEEXEC = 270, - OP_FTIS = 271, - OP_FTSIZE = 272, - OP_FTMTIME = 273, - OP_FTATIME = 274, - OP_FTCTIME = 275, - OP_FTROWNED = 276, - OP_FTEOWNED = 277, - OP_FTZERO = 278, - OP_FTSOCK = 279, - OP_FTCHR = 280, - OP_FTBLK = 281, - OP_FTFILE = 282, - OP_FTDIR = 283, - OP_FTPIPE = 284, - OP_FTSUID = 285, - OP_FTSGID = 286, - OP_FTSVTX = 287, - OP_FTLINK = 288, - OP_FTTTY = 289, - OP_FTTEXT = 290, - OP_FTBINARY = 291, - OP_CHDIR = 292, - OP_CHOWN = 293, - OP_CHROOT = 294, - OP_UNLINK = 295, - OP_CHMOD = 296, - OP_UTIME = 297, - OP_RENAME = 298, - OP_LINK = 299, - OP_SYMLINK = 300, - OP_READLINK = 301, - OP_MKDIR = 302, - OP_RMDIR = 303, - OP_OPEN_DIR = 304, - OP_READDIR = 305, - OP_TELLDIR = 306, - OP_SEEKDIR = 307, - OP_REWINDDIR = 308, - OP_CLOSEDIR = 309, - OP_FORK = 310, - OP_WAIT = 311, - OP_WAITPID = 312, - OP_SYSTEM = 313, - OP_EXEC = 314, - OP_KILL = 315, - OP_GETPPID = 316, - OP_GETPGRP = 317, - OP_SETPGRP = 318, - OP_GETPRIORITY = 319, - OP_SETPRIORITY = 320, - OP_TIME = 321, - OP_TMS = 322, - OP_LOCALTIME = 323, - OP_GMTIME = 324, - OP_ALARM = 325, - OP_SLEEP = 326, - OP_SHMGET = 327, - OP_SHMCTL = 328, - OP_SHMREAD = 329, - OP_SHMWRITE = 330, - OP_MSGGET = 331, - OP_MSGCTL = 332, - OP_MSGSND = 333, - OP_MSGRCV = 334, - OP_SEMOP = 335, - OP_SEMGET = 336, - OP_SEMCTL = 337, - OP_REQUIRE = 338, - OP_DOFILE = 339, - OP_HINTSEVAL = 340, - OP_ENTEREVAL = 341, - OP_LEAVEEVAL = 342, - OP_ENTERTRY = 343, - OP_LEAVETRY = 344, - OP_GHBYNAME = 345, - OP_GHBYADDR = 346, - OP_GHOSTENT = 347, - OP_GNBYNAME = 348, - OP_GNBYADDR = 349, - OP_GNETENT = 350, - OP_GPBYNAME = 351, - OP_GPBYNUMBER = 352, - OP_GPROTOENT = 353, - OP_GSBYNAME = 354, - OP_GSBYPORT = 355, - OP_GSERVENT = 356, - OP_SHOSTENT = 357, - OP_SNETENT = 358, - OP_SPROTOENT = 359, - OP_SSERVENT = 360, - OP_EHOSTENT = 361, - OP_ENETENT = 362, - OP_EPROTOENT = 363, - OP_ESERVENT = 364, - OP_GPWNAM = 365, - OP_GPWUID = 366, - OP_GPWENT = 367, - OP_SPWENT = 368, - OP_EPWENT = 369, - OP_GGRNAM = 370, - OP_GGRGID = 371, - OP_GGRENT = 372, - OP_SGRENT = 373, - OP_EGRENT = 374, - OP_GETLOGIN = 375, - OP_SYSCALL = 376, - OP_LOCK = 377, - OP_ONCE = 378, - OP_CUSTOM = 379, - OP_COREARGS = 380, - OP_AVHVSWITCH = 381, - OP_RUNCV = 382, - OP_FC = 383, - OP_PADCV = 384, - OP_INTROCV = 385, - OP_CLONECV = 386, - OP_PADRANGE = 387, - OP_REFASSIGN = 388, - OP_LVREF = 389, - OP_LVREFSLICE = 390, - OP_LVAVREF = 391, - OP_ANONCONST = 392, - OP_ISA = 393, - OP_CMPCHAIN_AND = 394, - OP_CMPCHAIN_DUP = 395, - OP_ENTERTRYCATCH = 396, - OP_LEAVETRYCATCH = 397, - OP_POPTRY = 398, - OP_CATCH = 399, - OP_PUSHDEFER = 400, - OP_IS_BOOL = 401, - OP_IS_WEAK = 402, - OP_WEAKEN = 403, - OP_UNWEAKEN = 404, - OP_BLESSED = 405, - OP_REFADDR = 406, - OP_REFTYPE = 407, - OP_CEIL = 408, - OP_FLOOR = 409, - OP_IS_TAINTED = 410, - OP_HELEMEXISTSOR = 411, - OP_METHSTART = 412, - OP_INITFIELD = 413, - OP_CLASSNAME = 414, + OP_SUBSTR_LEFT = 122, + OP_VEC = 123, + OP_INDEX = 124, + OP_RINDEX = 125, + OP_SPRINTF = 126, + OP_FORMLINE = 127, + OP_ORD = 128, + OP_CHR = 129, + OP_CRYPT = 130, + OP_UCFIRST = 131, + OP_LCFIRST = 132, + OP_UC = 133, + OP_LC = 134, + OP_QUOTEMETA = 135, + OP_RV2AV = 136, + OP_AELEMFAST = 137, + OP_AELEMFAST_LEX = 138, + OP_AELEMFASTLEX_STORE = 139, + OP_AELEM = 140, + OP_ASLICE = 141, + OP_KVASLICE = 142, + OP_AEACH = 143, + OP_AVALUES = 144, + OP_AKEYS = 145, + OP_EACH = 146, + OP_VALUES = 147, + OP_KEYS = 148, + OP_DELETE = 149, + OP_EXISTS = 150, + OP_RV2HV = 151, + OP_HELEM = 152, + OP_HSLICE = 153, + OP_KVHSLICE = 154, + OP_MULTIDEREF = 155, + OP_UNPACK = 156, + OP_PACK = 157, + OP_SPLIT = 158, + OP_JOIN = 159, + OP_LIST = 160, + OP_LSLICE = 161, + OP_ANONLIST = 162, + OP_ANONHASH = 163, + OP_EMPTYAVHV = 164, + OP_SPLICE = 165, + OP_PUSH = 166, + OP_POP = 167, + OP_SHIFT = 168, + OP_UNSHIFT = 169, + OP_SORT = 170, + OP_REVERSE = 171, + OP_GREPSTART = 172, + OP_GREPWHILE = 173, + OP_MAPSTART = 174, + OP_MAPWHILE = 175, + OP_RANGE = 176, + OP_FLIP = 177, + OP_FLOP = 178, + OP_AND = 179, + OP_OR = 180, + OP_XOR = 181, + OP_DOR = 182, + OP_COND_EXPR = 183, + OP_ANDASSIGN = 184, + OP_ORASSIGN = 185, + OP_DORASSIGN = 186, + OP_ENTERSUB = 187, + OP_LEAVESUB = 188, + OP_LEAVESUBLV = 189, + OP_ARGCHECK = 190, + OP_ARGELEM = 191, + OP_ARGDEFELEM = 192, + OP_CALLER = 193, + OP_WARN = 194, + OP_DIE = 195, + OP_RESET = 196, + OP_LINESEQ = 197, + OP_NEXTSTATE = 198, + OP_DBSTATE = 199, + OP_UNSTACK = 200, + OP_ENTER = 201, + OP_LEAVE = 202, + OP_SCOPE = 203, + OP_ENTERITER = 204, + OP_ITER = 205, + OP_ENTERLOOP = 206, + OP_LEAVELOOP = 207, + OP_RETURN = 208, + OP_LAST = 209, + OP_NEXT = 210, + OP_REDO = 211, + OP_DUMP = 212, + OP_GOTO = 213, + OP_EXIT = 214, + OP_METHOD = 215, + OP_METHOD_NAMED = 216, + OP_METHOD_SUPER = 217, + OP_METHOD_REDIR = 218, + OP_METHOD_REDIR_SUPER = 219, + OP_OPEN = 220, + OP_CLOSE = 221, + OP_PIPE_OP = 222, + OP_FILENO = 223, + OP_UMASK = 224, + OP_BINMODE = 225, + OP_TIE = 226, + OP_UNTIE = 227, + OP_TIED = 228, + OP_DBMOPEN = 229, + OP_DBMCLOSE = 230, + OP_SSELECT = 231, + OP_SELECT = 232, + OP_GETC = 233, + OP_READ = 234, + OP_ENTERWRITE = 235, + OP_LEAVEWRITE = 236, + OP_PRTF = 237, + OP_PRINT = 238, + OP_SAY = 239, + OP_SYSOPEN = 240, + OP_SYSSEEK = 241, + OP_SYSREAD = 242, + OP_SYSWRITE = 243, + OP_EOF = 244, + OP_TELL = 245, + OP_SEEK = 246, + OP_TRUNCATE = 247, + OP_FCNTL = 248, + OP_IOCTL = 249, + OP_FLOCK = 250, + OP_SEND = 251, + OP_RECV = 252, + OP_SOCKET = 253, + OP_SOCKPAIR = 254, + OP_BIND = 255, + OP_CONNECT = 256, + OP_LISTEN = 257, + OP_ACCEPT = 258, + OP_SHUTDOWN = 259, + OP_GSOCKOPT = 260, + OP_SSOCKOPT = 261, + OP_GETSOCKNAME = 262, + OP_GETPEERNAME = 263, + OP_LSTAT = 264, + OP_STAT = 265, + OP_FTRREAD = 266, + OP_FTRWRITE = 267, + OP_FTREXEC = 268, + OP_FTEREAD = 269, + OP_FTEWRITE = 270, + OP_FTEEXEC = 271, + OP_FTIS = 272, + OP_FTSIZE = 273, + OP_FTMTIME = 274, + OP_FTATIME = 275, + OP_FTCTIME = 276, + OP_FTROWNED = 277, + OP_FTEOWNED = 278, + OP_FTZERO = 279, + OP_FTSOCK = 280, + OP_FTCHR = 281, + OP_FTBLK = 282, + OP_FTFILE = 283, + OP_FTDIR = 284, + OP_FTPIPE = 285, + OP_FTSUID = 286, + OP_FTSGID = 287, + OP_FTSVTX = 288, + OP_FTLINK = 289, + OP_FTTTY = 290, + OP_FTTEXT = 291, + OP_FTBINARY = 292, + OP_CHDIR = 293, + OP_CHOWN = 294, + OP_CHROOT = 295, + OP_UNLINK = 296, + OP_CHMOD = 297, + OP_UTIME = 298, + OP_RENAME = 299, + OP_LINK = 300, + OP_SYMLINK = 301, + OP_READLINK = 302, + OP_MKDIR = 303, + OP_RMDIR = 304, + OP_OPEN_DIR = 305, + OP_READDIR = 306, + OP_TELLDIR = 307, + OP_SEEKDIR = 308, + OP_REWINDDIR = 309, + OP_CLOSEDIR = 310, + OP_FORK = 311, + OP_WAIT = 312, + OP_WAITPID = 313, + OP_SYSTEM = 314, + OP_EXEC = 315, + OP_KILL = 316, + OP_GETPPID = 317, + OP_GETPGRP = 318, + OP_SETPGRP = 319, + OP_GETPRIORITY = 320, + OP_SETPRIORITY = 321, + OP_TIME = 322, + OP_TMS = 323, + OP_LOCALTIME = 324, + OP_GMTIME = 325, + OP_ALARM = 326, + OP_SLEEP = 327, + OP_SHMGET = 328, + OP_SHMCTL = 329, + OP_SHMREAD = 330, + OP_SHMWRITE = 331, + OP_MSGGET = 332, + OP_MSGCTL = 333, + OP_MSGSND = 334, + OP_MSGRCV = 335, + OP_SEMOP = 336, + OP_SEMGET = 337, + OP_SEMCTL = 338, + OP_REQUIRE = 339, + OP_DOFILE = 340, + OP_HINTSEVAL = 341, + OP_ENTEREVAL = 342, + OP_LEAVEEVAL = 343, + OP_ENTERTRY = 344, + OP_LEAVETRY = 345, + OP_GHBYNAME = 346, + OP_GHBYADDR = 347, + OP_GHOSTENT = 348, + OP_GNBYNAME = 349, + OP_GNBYADDR = 350, + OP_GNETENT = 351, + OP_GPBYNAME = 352, + OP_GPBYNUMBER = 353, + OP_GPROTOENT = 354, + OP_GSBYNAME = 355, + OP_GSBYPORT = 356, + OP_GSERVENT = 357, + OP_SHOSTENT = 358, + OP_SNETENT = 359, + OP_SPROTOENT = 360, + OP_SSERVENT = 361, + OP_EHOSTENT = 362, + OP_ENETENT = 363, + OP_EPROTOENT = 364, + OP_ESERVENT = 365, + OP_GPWNAM = 366, + OP_GPWUID = 367, + OP_GPWENT = 368, + OP_SPWENT = 369, + OP_EPWENT = 370, + OP_GGRNAM = 371, + OP_GGRGID = 372, + OP_GGRENT = 373, + OP_SGRENT = 374, + OP_EGRENT = 375, + OP_GETLOGIN = 376, + OP_SYSCALL = 377, + OP_LOCK = 378, + OP_ONCE = 379, + OP_CUSTOM = 380, + OP_COREARGS = 381, + OP_AVHVSWITCH = 382, + OP_RUNCV = 383, + OP_FC = 384, + OP_PADCV = 385, + OP_INTROCV = 386, + OP_CLONECV = 387, + OP_PADRANGE = 388, + OP_REFASSIGN = 389, + OP_LVREF = 390, + OP_LVREFSLICE = 391, + OP_LVAVREF = 392, + OP_ANONCONST = 393, + OP_ISA = 394, + OP_CMPCHAIN_AND = 395, + OP_CMPCHAIN_DUP = 396, + OP_ENTERTRYCATCH = 397, + OP_LEAVETRYCATCH = 398, + OP_POPTRY = 399, + OP_CATCH = 400, + OP_PUSHDEFER = 401, + OP_IS_BOOL = 402, + OP_IS_WEAK = 403, + OP_WEAKEN = 404, + OP_UNWEAKEN = 405, + OP_BLESSED = 406, + OP_REFADDR = 407, + OP_REFTYPE = 408, + OP_CEIL = 409, + OP_FLOOR = 410, + OP_IS_TAINTED = 411, + OP_HELEMEXISTSOR = 412, + OP_METHSTART = 413, + OP_INITFIELD = 414, + OP_CLASSNAME = 415, OP_max } opcode; @@ -439,7 +440,7 @@ An enum of all the legal Perl opcodes, defined in F =cut */ -#define MAXO 415 +#define MAXO 416 #define OP_FREED MAXO /* the OP_IS_* macros are optimized to a simple range check because diff --git a/peep.c b/peep.c index 4d507fb88c1de..ea2f7f18eeed1 100644 --- a/peep.c +++ b/peep.c @@ -3868,6 +3868,98 @@ Perl_rpeep(pTHX_ OP *o) } break; + case OP_SUBSTR: { + OP *expr, *offs, *len; + /* Specialize substr($x, 0, $y) and substr($x,0,$y,"") */ + /* Does this substr have 3-4 args and amiable flags? */ + if ( + ((cMAXARG3x(o) == 4) || (cMAXARG3x(o) == 3)) + /* No lvalue cases, no OPpSUBSTR_REPL_FIRST*/ + && !(o->op_private & (OPpSUBSTR_REPL_FIRST|OPpMAYBE_LVSUB)) + && !(o->op_flags & OPf_MOD) + ){ + /* Should be a leading ex-pushmark */ + OP *pushmark = cBINOPx(o)->op_first; + assert(pushmark->op_type == OP_NULL); + expr = OpSIBLING(pushmark); + offs = OpSIBLING(expr); + + /* Gets complicated fast if the expr isn't simple*/ + if (expr->op_type != OP_PADSV) + break; + /* Is the offset CONST zero? */ + if (offs->op_type != OP_CONST) + break; + SV *offs_sv = cSVOPx_sv(offs); + if (!(SvIOK(offs_sv) && SvIVX(offs_sv) == 0)) + break; + len = OpSIBLING(offs); + + if (cMAXARG3x(o) == 4) {/* replacement */ + /* Is the replacement string CONST ""? */ + OP *repl = OpSIBLING(len); + if (repl->op_type != OP_CONST) + break; + SV *repl_sv = cSVOPx_sv(repl); + if(!(SvPOK(repl_sv) && SvCUR(repl_sv) == 0)) + break; + } + } else { + break; + } + /* It's on! */ + /* Take out the static LENGTH & REPLACMENT OPs */ + /* (The finalizer does not seem to change op_next here) */ + expr->op_next = offs->op_next; + o->op_private = cMAXARG3x(o); + if (cMAXARG3x(o) == 4) + len->op_next = o; + + /* We have a problem if padrange pushes EXRP for us and + * then jumps straight to the offs CONST OP. For example: + * push @{$pref{ substr($key, 0, 1) }}, $key; + * We don't want to hit that OP, but cannot easily figure + * out if that is going to happen and adjust for it. + * So we have to null out the OP, and then do a fixup in + * B::Deparse. :/ */ + op_null(offs); + + /* repl status unchanged because it makes Deparsing easier. */ + + /* Upgrade the SUBSTR to a SUBSTR_LEFT */ + OpTYPE_set(o, OP_SUBSTR_LEFT); + + /* oldop will be the OP_CONST associated with "" */ + /* oldoldop is more unpredictable */ + oldoldop = oldop = NULL; + + /* pp_substr may be unsuitable for TARGMY optimization + * because of its potential RETPUSHUNDEF, and use of + * bit 4 for OPpSUBSTR_REPL_FIRST, but no such + * problems with pp_substr_left. Must just avoid + * sv == TARG.*/ + if (OP_TYPE_IS(o->op_next, OP_PADSV) && + !(o->op_next->op_private) && + OP_TYPE_IS(o->op_next->op_next, OP_SASSIGN) && + (o->op_next->op_targ != expr->op_targ) + ) { + OP * padsv = o->op_next; + OP * sassign = padsv->op_next; + /* Carry over some flags */ + o->op_flags = OPf_KIDS | (o->op_flags & OPf_PARENS) | + (sassign->op_flags & (OPf_WANT|OPf_PARENS)); + o->op_private |= OPpTARGET_MY; + /* Steal the TARG, set op_next pointers*/ + o->op_targ = padsv->op_targ; + padsv->op_targ = 0; + o->op_next = sassign->op_next; + /* Null the replaced OPs*/ + op_null(padsv); + op_null(sassign); + } + } + break; + case OP_SASSIGN: { if (OP_GIMME(o,0) == G_VOID || ( o->op_next->op_type == OP_LINESEQ diff --git a/pp.c b/pp.c index beac21e761d1c..059fe0ec249c4 100644 --- a/pp.c +++ b/pp.c @@ -3722,6 +3722,112 @@ PP_wrapped(pp_substr, RETPUSHUNDEF; } +/* OP_SUBSTR_LEFT is a specialized version of OP_SUBSTR, where: + * the EXPR is a PADSV + * the OFFSET is a CONST zero + * the replacement pattern is a CONST "" + * it's definitely not in lvalue context (see the check in pp_substr) + * it definitely doesn't have OPpSUBSTR_REPL_FIRST set + * it may be an rvalue or in void context (may support TARGMY later) + */ +PP(pp_substr_left) +{ + dTARGET; + STRLEN curlen; + STRLEN utf8_curlen = 0; + STRLEN byte_len = 0; + SV *sv = PL_stack_sp[-1]; + const bool rvalue = (GIMME_V != G_VOID) || (PL_op->op_private & OPpTARGET_MY); + const bool do_chop = (MAXARG3 == 4); + const char *tmps; + + if (SvROK(sv) && do_chop) { + Perl_ck_warner(aTHX_ packWARN(WARN_SUBSTR), + "Attempt to use reference as lvalue in substr" + ); + } + + if (do_chop) { + SvGETMAGIC(sv); + if (!SvTHINKFIRST(sv) && SvPOK(sv)) { + curlen = SvCUR(sv); + tmps = SvPVX_mutable(sv); + (void)SvPOK_only_UTF8(sv); + } else { + tmps = SvPV_force_nomg(sv, curlen); + } + } else { + if (!SvGMAGICAL(sv) && !SvROK(sv) && SvPOK(sv)) { + curlen = SvCUR(sv); + tmps = (char *)SvPVX_const(sv); + } else { + tmps = SvPV_const(sv, curlen); + } + } + + if (DO_UTF8(sv)) { + utf8_curlen = sv_or_pv_len_utf8(sv, tmps, curlen); + if (utf8_curlen == curlen) + utf8_curlen = 0; + else + curlen = utf8_curlen; + } + + /* Inlined, simplified Perl_translate_substr_offsets */ + if (curlen) { + const IV len_iv = SvIV(PL_stack_sp[0]); + const int len_is_uv = len_iv ? SvIOK_UV(PL_stack_sp[0]) : 1; + + if (!len_is_uv && len_iv < 0) { /* Negative length supplied */ + const IV pos2_iv = curlen + len_iv; + if (!(curlen-1 > ~(UV)len_iv) && pos2_iv < 0) { + byte_len = 0; + } else if ((UV)pos2_iv > curlen) { + byte_len = (STRLEN)( (UV)curlen); + } else { + byte_len = (STRLEN)( (UV)pos2_iv ); + } + } else if ((UV)len_iv <= curlen) { /* Non-negative length supplied */ + byte_len = (STRLEN)( (UV)len_iv); + } else { + byte_len = curlen; + } + } + /* End of inlined, simplified Perl_translate_substr_offsets */ + + if (utf8_curlen) { + /* This could update byte_len, but the return value + will always be zero, which subsequent code has + assumed to be the case. */ + sv_or_pv_pos_u2b(sv, tmps, 0, &byte_len); + } + + if (rvalue) { + SvTAINTED_off(TARG); /* decontaminate */ + SvUTF8_off(TARG); /* decontaminate */ + sv_setpvn(TARG, tmps, byte_len); +#ifdef USE_LOCALE_COLLATE + sv_unmagic(TARG, PERL_MAGIC_collxfrm); +#endif + if (utf8_curlen) + SvUTF8_on(TARG); + } + + if (do_chop) { + SvTAINT(sv); + sv_chop(sv, SvPVX(sv) + byte_len); + SvSETMAGIC(sv); + } + + if (rvalue) { + SvSETMAGIC(TARG); + rpp_replace_2_1(TARG); + } else { + rpp_popfree_2(); + } + return NORMAL; +} + PP_wrapped(pp_vec, 3, 0) { dSP; diff --git a/pp_proto.h b/pp_proto.h index 02754995f4f27..8e094c22a2fe2 100644 --- a/pp_proto.h +++ b/pp_proto.h @@ -282,6 +282,7 @@ PERL_CALLCONV PP(pp_study) __attribute__visibility__("hidden"); PERL_CALLCONV PP(pp_subst) __attribute__visibility__("hidden"); PERL_CALLCONV PP(pp_substcont) __attribute__visibility__("hidden"); PERL_CALLCONV PP(pp_substr) __attribute__visibility__("hidden"); +PERL_CALLCONV PP(pp_substr_left) __attribute__visibility__("hidden"); PERL_CALLCONV PP(pp_subtract) __attribute__visibility__("hidden"); PERL_CALLCONV PP(pp_syscall) __attribute__visibility__("hidden"); PERL_CALLCONV PP(pp_sysopen) __attribute__visibility__("hidden"); diff --git a/regen/op_private b/regen/op_private index 409db837faf0f..302710c897abb 100644 --- a/regen/op_private +++ b/regen/op_private @@ -234,7 +234,7 @@ use strict; # substr starts off with 4 bits set in # ck_fun(), but since it never has more than 7 # args, bit 3 is later stolen - qw(substr); + qw(substr substr_left); $args4{$_} = 1 for keys %maxarg, grep !$args0{$_} && !$args1{$_} diff --git a/regen/opcodes b/regen/opcodes index f93a7a0635b33..6b902f36f2591 100644 --- a/regen/opcodes +++ b/regen/opcodes @@ -200,6 +200,7 @@ abs abs ck_fun fsTu% S? length length ck_length ifsTu% S? substr substr ck_substr st@ S S S? S? +substr_left substr left ck_substr sT@ S S S? S? vec vec ck_fun ist@ S S S index index ck_index isT@ S S S? diff --git a/t/op/substr_left.t b/t/op/substr_left.t new file mode 100644 index 0000000000000..a9e37037fc0e3 --- /dev/null +++ b/t/op/substr_left.t @@ -0,0 +1,108 @@ +BEGIN { + chdir 't'; + require './test.pl'; + @INC = '../lib'; +} + +use warnings; +use strict; + +my $str; my $result; my $len; + +# In each of the tests below, the OP_SUBSTR should have been replaced +# with a specialised OP_SUBSTR_LEFT OP. The tests in this file are +# intended as sanity tests for pp_substr_left's string position +# calculations and treatment of the input `sv` and output TARG. + +# Basic functionality with a simple string +$str = "Hello, World!"; +$result = substr($str, 0, 5, ""); +is($result, "Hello", 'simple case: correct extraction'); +is($str, ", World!", 'simple case: remainder is correct'); +# LENGTH is zero +$result = substr($str, 0, 0, ""); +is($result, "", 'zero length: returns empty string'); +is($str, ", World!", 'zero length: EXPR remains unchanged'); +# LENGTH is larger than the string +$result = substr($str, 0, 10, ""); +is($result, ", World!", 'LENGTH: returns entire string'); +is($str, "", 'LENGTH: EXPR is empty'); +# EXPR is an empty string +$result = substr($str, 0, 4, ""); +is($result, "", 'empty EXPR: returns empty string'); +is($str, "", 'empty EXPR: EXPR remains empty'); +# EXPR lexical is undef +{ +no warnings 'uninitialized'; +$str = undef; +$result = substr($str, 0, 2, ""); +is($result, "", 'undef EXPR: returns empty string'); +is($str, "", 'undef EXPR: EXPR becomes empty string'); +# LENGTH is undef +$str = "Hello"; +$result = substr($str, 0, undef, ""); +is($result, "", 'undef LENGTH: returns empty string'); +is($str, "Hello", 'undef LENGTH: EXPR is unchanged'); +} +# LENGTH is negative +$result = substr($str, 0, -2, ""); +is($result, "Hel", 'negative LENGTH: returns characters 0..length-2'); +is($str, "lo", 'negative LENGTH: 2 chars remaining'); +# EXPR is numeric (non-string) +$str = 12345678; +$result = substr($str, 0, 6, ""); +is($result, "123456", 'IV EXPR: returns stringified characters'); +is($str, "78", 'IV EXPR: stringified EXPR'); +# LENGTH IS A NV +$str = "Hello, again"; +$len = 2.5; +$result = substr($str, 0, $len, ""); +is($result, "He", 'NV LENGTH: returns floor() characters'); +is($str, "llo, again", 'NV LENGTH: EXPR retains length-floor() characters'); + +use Tie::Scalar; +{ + package TiedScalar; + use base 'Tie::StdScalar'; + sub STORE { + my ($self, $value) = @_; + $$self = $value; + } + sub FETCH { + my ($self) = @_; + return $$self; + } +} +# EXPR is a tied variable +my $str2; +tie $str2, 'TiedScalar'; +$str2 = "Hello World"; +$result = substr($str2, 0, 5, ""); +is($result, "Hello", 'tied EXPR: returns correct characters'); +is($str2, " World", 'tied EXPR: tied EXPR variable updated correctly'); +# TARG is a tied variable +my $result2; +tie $result2, 'TiedScalar'; +$result2 = substr($str2, 0, 2, ""); +is($result2, " W", 'tied TARG: returns correct characters'); +is($str2, "orld", 'tied TARG: tied EXPR variable updated correctly'); +# EXPR is a scalar containing UTF-8 string +use utf8; +$str = "Привет мир"; # "Hello world" in Russian +$result = substr($str, 0, 7, ""); +is($result, "Привет ", 'UTF-8 EXPR: returns correct UTF-8 characters'); +is($str, "мир", 'UTF-8 EXPR: UTF-8 string updated correctly'); +# LENGTH is outside of IV range +use Config; +$str = "Hello, Bernard"; +my $max_iv = $Config{ivsize} == 8 ? 9_223_372_036_854_775_807 : 2_147_483_647; +$result = substr($str, 0, $max_iv + 1, ""); +is($result, "Hello, Bernard", 'UV LENGTH: returns entire string'); +is($str, "", 'UV LENGTH: EXPR is emptied'); +# EXPR contains binary data +$str = "\x00\x01\x02\x03\x04\x05"; +$result = substr($str, 0, 3, ""); +is($result, "\x00\x01\x02", 'hex EXPR: returns correct characters'); +is($str, "\x03\x04\x05", 'hex EXPR: retains correct characters'); + +done_testing(); diff --git a/t/perf/benchmarks b/t/perf/benchmarks index 59da49cafcadf..9d0e00cf50249 100644 --- a/t/perf/benchmarks +++ b/t/perf/benchmarks @@ -1779,6 +1779,22 @@ code => 'index $x, "b"', }, + # SUBSTR + 'func::substr::nibble_void' => { + desc => 'substr with a zero offset, empty replacement pattern (void)', + setup => 'my $z = "JAPH"x1000', + code => 'substr($z, 0, 2, "")', + }, + 'func::substr::nibble_lex' => { + desc => 'substr with a zero offset, empty replacement pattern (lex assign)', + setup => 'my $x; my $z = "JAPH"x1000', + code => '$x = substr($z, 0, 2, "")', + }, + 'func::substr::nibble_gvsv' => { + desc => 'substr with a zero offset, empty replacement pattern (global assign)', + setup => 'our $x; my $z = "JAPH"x1000', + code => '$x = substr($z, 0, 2, "")', + }, # JOIN diff --git a/t/perf/opcount.t b/t/perf/opcount.t index f904764c14096..0cac902a95fbb 100644 --- a/t/perf/opcount.t +++ b/t/perf/opcount.t @@ -1028,4 +1028,82 @@ test_opcount(0, "foreach 2 lexicals on builtin::indexed LIST", iter => 1, }); +# substr with const zero offset and "" replacements +test_opcount(0, "substr with const zero offset and '' repl (void)", + sub { my $z; substr($z, 0, 2, "") }, + { + substr => 0, + substr_left => 1, + const => 2, + }); + +test_opcount(0, "substr with const zero offset and '' repl (lexical)", + sub { my $z; my $x = substr($z, 0, 2, "") }, + { + substr => 0, + substr_left => 1, + const => 2, + padsv => 3, + sassign => 1 + }); + +test_opcount(0, "substr with const zero offset and '' repl (lexical TARGMY)", + sub { my ($z, $x); $x = substr($z, 0, 2, "") }, + { + substr => 0, + substr_left => 1, + const => 2, + padsv => 3, + padsv_store => 0, + sassign => 0 + }); + +test_opcount(0, "substr with const zero offset and '' repl (gv)", + sub { my $z; our $x = substr($z, 0, 2, "") }, + { + substr => 0, + substr_left => 1, + const => 2, + gvsv => 1, + sassign => 1 + }); + +test_opcount(0, "substr with const zero offset (void)", + sub { my $z; substr($z, 0, 2) }, + { + substr => 0, + substr_left => 1, + const => 1, + }); + +test_opcount(0, "substr with const zero offset (lexical)", + sub { my $z; my $x = substr($z, 0, 2) }, + { + substr => 0, + substr_left => 1, + const => 1, + padsv => 3, + sassign => 1 + }); + +test_opcount(0, "substr with const zero offset (lexical TARGMY)", + sub { my ($z, $x); $x = substr($z, 0, 2) }, + { + substr => 0, + substr_left => 1, + const => 1, + padsv => 3, + sassign => 0 + }); + +test_opcount(0, "substr with const zero offset (gv)", + sub { my $z; our $x = substr($z, 0, 2) }, + { + substr => 0, + substr_left => 1, + const => 1, + gvsv => 1, + sassign => 1 + }); + done_testing();