From f258d4338ddd51f6fcc155b2f8852c0c96b5028e Mon Sep 17 00:00:00 2001 From: Richard Leach Date: Tue, 19 Nov 2024 23:02:37 +0000 Subject: [PATCH] OP_SUBSTR_NIBBLE - a specialised OP_SUBSTR variant This commit adds OP_SUBSTR_NIBBLE and associated machinery for fast handling of the constructions: substr EXPR,0,LENGTH,'' and substr EXPR,0,LENGTH Where EXPR is a scalar lexical, the OFFSET is zero, and either there is no REPLACEMENT or it is the empty string. LENGTH can be anything that OP_SUBSTR supports. These constraints allow for a very stripped back and optimised version of pp_substr. The primary motivation was for situations where a scalar, containing some network packets or other binary data structure, is being parsed piecemeal. Nibbling away at the scalar can be useful when you don't know how exactly it will be parsed and unpacked until you get started. It also means that you don't need to worry about correctly updating a separate offset variable. This operator also turns out to be an efficient way to (destructively) break an expression up into fixed size chunks. For example, given: my $x = ''; my $str = "A"x100_000_000; This code: $x = substr($str, 0, 5, "") while ($str); is twice as fast as doing: for ($pos = 0; $pos < length($str); $pos += 5) { $x = substr($str, $pos, 5); } Compared with blead, `$y = substr($x, 0, 5)` runs 40% faster and `$y = substr($x, 0, 5, '')` runs 45% faster. --- MANIFEST | 1 + ext/Opcode/Opcode.pm | 5 +- lib/B/Deparse.pm | 21 +- lib/B/Deparse.t | 8 + lib/B/Op_private.pm | 1 + opcode.h | 200 ++++++++------- opnames.h | 589 ++++++++++++++++++++++--------------------- peep.c | 93 +++++++ pp.c | 106 ++++++++ pp_proto.h | 1 + regen/op_private | 2 +- regen/opcodes | 1 + t/op/substr_nibble.t | 108 ++++++++ t/perf/benchmarks | 16 ++ t/perf/opcount.t | 32 +++ 15 files changed, 790 insertions(+), 394 deletions(-) create mode 100644 t/op/substr_nibble.t diff --git a/MANIFEST b/MANIFEST index edc0d6e5bbb72..0363d449b35e7 100644 --- a/MANIFEST +++ b/MANIFEST @@ -6436,6 +6436,7 @@ t/op/studytied.t See if study works with tied scalars t/op/sub.t See if subroutines work t/op/sub_lval.t See if lvalue subroutines work t/op/substr.t See if substr works +t/op/substr_nibble.t See if substr($x, 0, $l, '') optimisation works t/op/substr_thr.t See if substr works in another thread t/op/svflags.t See if POK is set as expected. t/op/svleak.pl Test file for svleak.t diff --git a/ext/Opcode/Opcode.pm b/ext/Opcode/Opcode.pm index baacf3c677ea9..e2c2991a7001c 100644 --- a/ext/Opcode/Opcode.pm +++ b/ext/Opcode/Opcode.pm @@ -1,4 +1,4 @@ -package Opcode 1.66; +package Opcode 1.67; use strict; @@ -322,7 +322,8 @@ invert_opset function. slt sgt sle sge seq sne scmp isa - substr vec stringify study pos length index rindex ord chr + substr substr_nibble vec stringify study pos length index + rindex ord chr ucfirst lcfirst uc lc fc quotemeta trans transr chop schop chomp schomp diff --git a/lib/B/Deparse.pm b/lib/B/Deparse.pm index 6e33893736958..dc5cb6ee9e092 100644 --- a/lib/B/Deparse.pm +++ b/lib/B/Deparse.pm @@ -7,7 +7,7 @@ # This is based on the module of the same name by Malcolm Beattie, # but essentially none of his code remains. -package B::Deparse 1.80; +package B::Deparse 1.81; use strict; use Carp; use B qw(class main_root main_start main_cv svref_2object opnumber perlstring @@ -3419,6 +3419,25 @@ sub pp_substr { maybe_local(@_, listop(@_, "substr")) } +sub pp_substr_nibble { + my ($self,$op,$cx) = @_; + + my $lex = ($op->private & OPpTARGET_MY); + + my $val = 'substr(' . $self->deparse($op->first->sibling, $cx) + . ', 0, ' . $self->deparse($op->first->sibling->sibling->sibling, $cx) + . ( (($op->private & 7) == 3) ? '' : ", '')" ); + + if ($lex) { + my $targ = $op->targ; + my $var = $self->maybe_my($op, $cx, $self->padname($op->targ), + $self->padname_sv($targ), + 0); + $val = $self->maybe_parens("$var = $val", $cx, 7); + } + $val; +} + sub pp_index { # Also handles pp_rindex. # diff --git a/lib/B/Deparse.t b/lib/B/Deparse.t index 59f937dcc52de..198566af878f6 100644 --- a/lib/B/Deparse.t +++ b/lib/B/Deparse.t @@ -1757,6 +1757,14 @@ print sort(foo('bar')); substr(my $a, 0, 0) = (foo(), bar()); $a++; #### +# 4-arg substr (non-nibble) +my $str = 'ABCD'; +my $bbb = substr($str, 1, 1, ''); +#### +# 4-arg substr (nibble) +my $str = 'ABCD'; +my $aaa = substr($str, 0, 1, ''); +#### # This following line works around an unfixed bug that we are not trying to # test for here: # CONTEXT BEGIN { $^H{a} = "b"; delete $^H{a} } # make %^H localised diff --git a/lib/B/Op_private.pm b/lib/B/Op_private.pm index 255817c85fc11..db085d9bed887 100644 --- a/lib/B/Op_private.pm +++ b/lib/B/Op_private.pm @@ -585,6 +585,7 @@ $bits{stat}{0} = $bf[0]; $bits{study}{0} = $bf[0]; $bits{substcont}{0} = $bf[0]; @{$bits{substr}}{4,2,1,0} = ('OPpSUBSTR_REPL_FIRST', $bf[4], $bf[4], $bf[4]); +@{$bits{substr_nibble}}{2,1,0} = ($bf[4], $bf[4], $bf[4]); @{$bits{subtract}}{1,0} = ($bf[1], $bf[1]); @{$bits{symlink}}{3,2,1,0} = ($bf[5], $bf[5], $bf[5], $bf[5]); @{$bits{syscall}}{3,2,1,0} = ($bf[5], $bf[5], $bf[5], $bf[5]); diff --git a/opcode.h b/opcode.h index ab62fa16ccc63..fafd9a4516ce3 100644 --- a/opcode.h +++ b/opcode.h @@ -268,6 +268,7 @@ EXTCONST char* const PL_op_name[] INIT({ "abs", "length", "substr", + "substr_nibble", "vec", "index", "rindex", @@ -687,6 +688,7 @@ EXTCONST char* const PL_op_desc[] INIT({ "abs", "length", "substr", + "substr nibble", "vec", "index", "rindex", @@ -1111,6 +1113,7 @@ INIT({ Perl_pp_abs, Perl_pp_length, Perl_pp_substr, + Perl_pp_substr_nibble, Perl_pp_vec, Perl_pp_index, Perl_pp_rindex, /* implemented by Perl_pp_index */ @@ -1530,6 +1533,7 @@ INIT({ Perl_ck_fun, /* abs */ Perl_ck_length, /* length */ Perl_ck_substr, /* substr */ + Perl_ck_substr, /* substr_nibble */ Perl_ck_fun, /* vec */ Perl_ck_index, /* index */ Perl_ck_index, /* rindex */ @@ -1948,6 +1952,7 @@ EXTCONST U32 PL_opargs[] INIT({ 0x00009b9e, /* abs */ 0x00009b9e, /* length */ 0x0991140c, /* substr */ + 0x0991140c, /* substr_nibble */ 0x0011140c, /* vec */ 0x0091141c, /* index */ 0x0091141c, /* rindex */ @@ -2668,9 +2673,10 @@ EXTCONST I16 PL_op_private_bitdef_ix[] = { 78, /* abs */ 106, /* length */ 109, /* substr */ - 112, /* vec */ - 114, /* index */ - 114, /* rindex */ + 112, /* substr_nibble */ + 113, /* vec */ + 115, /* index */ + 115, /* rindex */ 56, /* sprintf */ 56, /* formline */ 78, /* ord */ @@ -2681,91 +2687,91 @@ EXTCONST I16 PL_op_private_bitdef_ix[] = { 0, /* uc */ 0, /* lc */ 0, /* quotemeta */ - 118, /* rv2av */ - 125, /* aelemfast */ - 125, /* aelemfast_lex */ - 125, /* aelemfastlex_store */ - 126, /* aelem */ - 131, /* aslice */ - 134, /* kvaslice */ + 119, /* rv2av */ + 126, /* aelemfast */ + 126, /* aelemfast_lex */ + 126, /* aelemfastlex_store */ + 127, /* aelem */ + 132, /* aslice */ + 135, /* kvaslice */ 0, /* aeach */ 0, /* avalues */ 44, /* akeys */ 0, /* each */ 44, /* values */ 44, /* keys */ - 135, /* delete */ - 139, /* exists */ - 141, /* rv2hv */ - 126, /* helem */ - 131, /* hslice */ - 134, /* kvhslice */ - 149, /* multideref */ + 136, /* delete */ + 140, /* exists */ + 142, /* rv2hv */ + 127, /* helem */ + 132, /* hslice */ + 135, /* kvhslice */ + 150, /* multideref */ 56, /* unpack */ 56, /* pack */ - 156, /* split */ + 157, /* split */ 56, /* join */ - 161, /* list */ + 162, /* list */ 13, /* lslice */ 56, /* anonlist */ 56, /* anonhash */ - 163, /* emptyavhv */ + 164, /* emptyavhv */ 56, /* splice */ 101, /* push */ 0, /* pop */ 0, /* shift */ 101, /* unshift */ - 168, /* sort */ - 173, /* reverse */ + 169, /* sort */ + 174, /* reverse */ 0, /* grepstart */ - 175, /* grepwhile */ + 176, /* grepwhile */ 0, /* mapstart */ 0, /* mapwhile */ 0, /* range */ - 177, /* flip */ - 177, /* flop */ + 178, /* flip */ + 178, /* flop */ 0, /* and */ 0, /* or */ 13, /* xor */ 0, /* dor */ - 179, /* cond_expr */ + 180, /* cond_expr */ 0, /* andassign */ 0, /* orassign */ 0, /* dorassign */ - 181, /* entersub */ - 188, /* leavesub */ - 188, /* leavesublv */ + 182, /* entersub */ + 189, /* leavesub */ + 189, /* leavesublv */ 0, /* argcheck */ - 190, /* argelem */ - 192, /* argdefelem */ - 195, /* caller */ + 191, /* argelem */ + 193, /* argdefelem */ + 196, /* caller */ 56, /* warn */ 56, /* die */ 56, /* reset */ -1, /* lineseq */ - 197, /* nextstate */ - 197, /* dbstate */ + 198, /* nextstate */ + 198, /* dbstate */ -1, /* unstack */ -1, /* enter */ - 198, /* leave */ + 199, /* leave */ -1, /* scope */ - 200, /* enteriter */ - 204, /* iter */ + 201, /* enteriter */ + 205, /* iter */ -1, /* enterloop */ - 206, /* leaveloop */ + 207, /* leaveloop */ -1, /* return */ - 208, /* last */ - 208, /* next */ - 208, /* redo */ - 208, /* dump */ - 210, /* goto */ + 209, /* last */ + 209, /* next */ + 209, /* redo */ + 209, /* dump */ + 211, /* goto */ 56, /* exit */ - 213, /* method */ - 213, /* method_named */ - 213, /* method_super */ - 213, /* method_redir */ - 213, /* method_redir_super */ - 215, /* open */ + 214, /* method */ + 214, /* method_named */ + 214, /* method_super */ + 214, /* method_redir */ + 214, /* method_redir_super */ + 216, /* open */ 56, /* close */ 56, /* pipe_op */ 56, /* fileno */ @@ -2781,7 +2787,7 @@ EXTCONST I16 PL_op_private_bitdef_ix[] = { 56, /* getc */ 56, /* read */ 56, /* enterwrite */ - 188, /* leavewrite */ + 189, /* leavewrite */ -1, /* prtf */ -1, /* print */ -1, /* say */ @@ -2811,33 +2817,33 @@ EXTCONST I16 PL_op_private_bitdef_ix[] = { 0, /* getpeername */ 0, /* lstat */ 0, /* stat */ - 220, /* ftrread */ - 220, /* ftrwrite */ - 220, /* ftrexec */ - 220, /* fteread */ - 220, /* ftewrite */ - 220, /* fteexec */ - 225, /* ftis */ - 225, /* ftsize */ - 225, /* ftmtime */ - 225, /* ftatime */ - 225, /* ftctime */ - 225, /* ftrowned */ - 225, /* fteowned */ - 225, /* ftzero */ - 225, /* ftsock */ - 225, /* ftchr */ - 225, /* ftblk */ - 225, /* ftfile */ - 225, /* ftdir */ - 225, /* ftpipe */ - 225, /* ftsuid */ - 225, /* ftsgid */ - 225, /* ftsvtx */ - 225, /* ftlink */ - 225, /* fttty */ - 225, /* fttext */ - 225, /* ftbinary */ + 221, /* ftrread */ + 221, /* ftrwrite */ + 221, /* ftrexec */ + 221, /* fteread */ + 221, /* ftewrite */ + 221, /* fteexec */ + 226, /* ftis */ + 226, /* ftsize */ + 226, /* ftmtime */ + 226, /* ftatime */ + 226, /* ftctime */ + 226, /* ftrowned */ + 226, /* fteowned */ + 226, /* ftzero */ + 226, /* ftsock */ + 226, /* ftchr */ + 226, /* ftblk */ + 226, /* ftfile */ + 226, /* ftdir */ + 226, /* ftpipe */ + 226, /* ftsuid */ + 226, /* ftsgid */ + 226, /* ftsvtx */ + 226, /* ftlink */ + 226, /* fttty */ + 226, /* fttext */ + 226, /* ftbinary */ 56, /* chdir */ 101, /* chown */ 78, /* chroot */ @@ -2857,17 +2863,17 @@ EXTCONST I16 PL_op_private_bitdef_ix[] = { 0, /* rewinddir */ 0, /* closedir */ -1, /* fork */ - 229, /* wait */ + 230, /* wait */ 101, /* waitpid */ 101, /* system */ 101, /* exec */ 101, /* kill */ - 229, /* getppid */ + 230, /* getppid */ 101, /* getpgrp */ 101, /* setpgrp */ 101, /* getpriority */ 101, /* setpriority */ - 229, /* time */ + 230, /* time */ -1, /* tms */ 0, /* localtime */ 56, /* gmtime */ @@ -2887,8 +2893,8 @@ EXTCONST I16 PL_op_private_bitdef_ix[] = { 0, /* require */ 0, /* dofile */ -1, /* hintseval */ - 230, /* entereval */ - 188, /* leaveeval */ + 231, /* entereval */ + 189, /* leaveeval */ 0, /* entertry */ -1, /* leavetry */ 0, /* ghbyname */ @@ -2926,17 +2932,17 @@ EXTCONST I16 PL_op_private_bitdef_ix[] = { 0, /* lock */ 0, /* once */ -1, /* custom */ - 237, /* coreargs */ - 241, /* avhvswitch */ + 238, /* coreargs */ + 242, /* avhvswitch */ 3, /* runcv */ 0, /* fc */ -1, /* padcv */ -1, /* introcv */ -1, /* clonecv */ - 243, /* padrange */ - 245, /* refassign */ - 251, /* lvref */ - 257, /* lvrefslice */ + 244, /* padrange */ + 246, /* refassign */ + 252, /* lvref */ + 258, /* lvrefslice */ 17, /* lvavref */ 0, /* anonconst */ 13, /* isa */ @@ -2946,20 +2952,20 @@ EXTCONST I16 PL_op_private_bitdef_ix[] = { -1, /* leavetrycatch */ -1, /* poptry */ 0, /* catch */ - 258, /* pushdefer */ + 259, /* pushdefer */ 0, /* is_bool */ 0, /* is_weak */ 0, /* weaken */ 0, /* unweaken */ 53, /* blessed */ - 260, /* refaddr */ - 260, /* reftype */ - 260, /* ceil */ - 260, /* floor */ + 261, /* refaddr */ + 261, /* reftype */ + 261, /* ceil */ + 261, /* floor */ 0, /* is_tainted */ - 263, /* helemexistsor */ - 265, /* methstart */ - 267, /* initfield */ + 264, /* helemexistsor */ + 266, /* methstart */ + 268, /* initfield */ -1, /* classname */ }; @@ -3013,6 +3019,7 @@ EXTCONST U16 PL_op_private_bitdefs[] = { 0x5aa9, /* bit_and, bit_xor, bit_or, sbit_and, sbit_xor, sbit_or, complement */ 0x06d4, 0x57d0, 0x0003, /* length */ 0x4d30, 0x3dec, 0x024b, /* substr */ + 0x024b, /* substr_nibble */ 0x3dec, 0x0067, /* vec */ 0x3f58, 0x06d4, 0x57d0, 0x02af, /* index, rindex */ 0x3cfc, 0x47f8, 0x06d4, 0x3dec, 0x5148, 0x5424, 0x0003, /* rv2av */ @@ -3191,6 +3198,7 @@ EXTCONST U8 PL_op_private_valid[] = { /* ABS */ (OPpARG1_MASK|OPpTARGET_MY), /* LENGTH */ (OPpARG1_MASK|OPpTARGET_MY|OPpTRUEBOOL), /* SUBSTR */ (OPpARG3_MASK|OPpMAYBE_LVSUB|OPpSUBSTR_REPL_FIRST), + /* SUBSTR_NIBBLE */ (OPpARG3_MASK), /* VEC */ (OPpARG2_MASK|OPpMAYBE_LVSUB), /* INDEX */ (OPpARG4_MASK|OPpTARGET_MY|OPpTRUEBOOL|OPpINDEX_BOOLNEG), /* RINDEX */ (OPpARG4_MASK|OPpTARGET_MY|OPpTRUEBOOL|OPpINDEX_BOOLNEG), diff --git a/opnames.h b/opnames.h index 4fb9f5d12d7d4..847317c726169 100644 --- a/opnames.h +++ b/opnames.h @@ -136,299 +136,300 @@ typedef enum opcode { OP_ABS = 119, OP_LENGTH = 120, OP_SUBSTR = 121, - OP_VEC = 122, - OP_INDEX = 123, - OP_RINDEX = 124, - OP_SPRINTF = 125, - OP_FORMLINE = 126, - OP_ORD = 127, - OP_CHR = 128, - OP_CRYPT = 129, - OP_UCFIRST = 130, - OP_LCFIRST = 131, - OP_UC = 132, - OP_LC = 133, - OP_QUOTEMETA = 134, - OP_RV2AV = 135, - OP_AELEMFAST = 136, - OP_AELEMFAST_LEX = 137, - OP_AELEMFASTLEX_STORE = 138, - OP_AELEM = 139, - OP_ASLICE = 140, - OP_KVASLICE = 141, - OP_AEACH = 142, - OP_AVALUES = 143, - OP_AKEYS = 144, - OP_EACH = 145, - OP_VALUES = 146, - OP_KEYS = 147, - OP_DELETE = 148, - OP_EXISTS = 149, - OP_RV2HV = 150, - OP_HELEM = 151, - OP_HSLICE = 152, - OP_KVHSLICE = 153, - OP_MULTIDEREF = 154, - OP_UNPACK = 155, - OP_PACK = 156, - OP_SPLIT = 157, - OP_JOIN = 158, - OP_LIST = 159, - OP_LSLICE = 160, - OP_ANONLIST = 161, - OP_ANONHASH = 162, - OP_EMPTYAVHV = 163, - OP_SPLICE = 164, - OP_PUSH = 165, - OP_POP = 166, - OP_SHIFT = 167, - OP_UNSHIFT = 168, - OP_SORT = 169, - OP_REVERSE = 170, - OP_GREPSTART = 171, - OP_GREPWHILE = 172, - OP_MAPSTART = 173, - OP_MAPWHILE = 174, - OP_RANGE = 175, - OP_FLIP = 176, - OP_FLOP = 177, - OP_AND = 178, - OP_OR = 179, - OP_XOR = 180, - OP_DOR = 181, - OP_COND_EXPR = 182, - OP_ANDASSIGN = 183, - OP_ORASSIGN = 184, - OP_DORASSIGN = 185, - OP_ENTERSUB = 186, - OP_LEAVESUB = 187, - OP_LEAVESUBLV = 188, - OP_ARGCHECK = 189, - OP_ARGELEM = 190, - OP_ARGDEFELEM = 191, - OP_CALLER = 192, - OP_WARN = 193, - OP_DIE = 194, - OP_RESET = 195, - OP_LINESEQ = 196, - OP_NEXTSTATE = 197, - OP_DBSTATE = 198, - OP_UNSTACK = 199, - OP_ENTER = 200, - OP_LEAVE = 201, - OP_SCOPE = 202, - OP_ENTERITER = 203, - OP_ITER = 204, - OP_ENTERLOOP = 205, - OP_LEAVELOOP = 206, - OP_RETURN = 207, - OP_LAST = 208, - OP_NEXT = 209, - OP_REDO = 210, - OP_DUMP = 211, - OP_GOTO = 212, - OP_EXIT = 213, - OP_METHOD = 214, - OP_METHOD_NAMED = 215, - OP_METHOD_SUPER = 216, - OP_METHOD_REDIR = 217, - OP_METHOD_REDIR_SUPER = 218, - OP_OPEN = 219, - OP_CLOSE = 220, - OP_PIPE_OP = 221, - OP_FILENO = 222, - OP_UMASK = 223, - OP_BINMODE = 224, - OP_TIE = 225, - OP_UNTIE = 226, - OP_TIED = 227, - OP_DBMOPEN = 228, - OP_DBMCLOSE = 229, - OP_SSELECT = 230, - OP_SELECT = 231, - OP_GETC = 232, - OP_READ = 233, - OP_ENTERWRITE = 234, - OP_LEAVEWRITE = 235, - OP_PRTF = 236, - OP_PRINT = 237, - OP_SAY = 238, - OP_SYSOPEN = 239, - OP_SYSSEEK = 240, - OP_SYSREAD = 241, - OP_SYSWRITE = 242, - OP_EOF = 243, - OP_TELL = 244, - OP_SEEK = 245, - OP_TRUNCATE = 246, - OP_FCNTL = 247, - OP_IOCTL = 248, - OP_FLOCK = 249, - OP_SEND = 250, - OP_RECV = 251, - OP_SOCKET = 252, - OP_SOCKPAIR = 253, - OP_BIND = 254, - OP_CONNECT = 255, - OP_LISTEN = 256, - OP_ACCEPT = 257, - OP_SHUTDOWN = 258, - OP_GSOCKOPT = 259, - OP_SSOCKOPT = 260, - OP_GETSOCKNAME = 261, - OP_GETPEERNAME = 262, - OP_LSTAT = 263, - OP_STAT = 264, - OP_FTRREAD = 265, - OP_FTRWRITE = 266, - OP_FTREXEC = 267, - OP_FTEREAD = 268, - OP_FTEWRITE = 269, - OP_FTEEXEC = 270, - OP_FTIS = 271, - OP_FTSIZE = 272, - OP_FTMTIME = 273, - OP_FTATIME = 274, - OP_FTCTIME = 275, - OP_FTROWNED = 276, - OP_FTEOWNED = 277, - OP_FTZERO = 278, - OP_FTSOCK = 279, - OP_FTCHR = 280, - OP_FTBLK = 281, - OP_FTFILE = 282, - OP_FTDIR = 283, - OP_FTPIPE = 284, - OP_FTSUID = 285, - OP_FTSGID = 286, - OP_FTSVTX = 287, - OP_FTLINK = 288, - OP_FTTTY = 289, - OP_FTTEXT = 290, - OP_FTBINARY = 291, - OP_CHDIR = 292, - OP_CHOWN = 293, - OP_CHROOT = 294, - OP_UNLINK = 295, - OP_CHMOD = 296, - OP_UTIME = 297, - OP_RENAME = 298, - OP_LINK = 299, - OP_SYMLINK = 300, - OP_READLINK = 301, - OP_MKDIR = 302, - OP_RMDIR = 303, - OP_OPEN_DIR = 304, - OP_READDIR = 305, - OP_TELLDIR = 306, - OP_SEEKDIR = 307, - OP_REWINDDIR = 308, - OP_CLOSEDIR = 309, - OP_FORK = 310, - OP_WAIT = 311, - OP_WAITPID = 312, - OP_SYSTEM = 313, - OP_EXEC = 314, - OP_KILL = 315, - OP_GETPPID = 316, - OP_GETPGRP = 317, - OP_SETPGRP = 318, - OP_GETPRIORITY = 319, - OP_SETPRIORITY = 320, - OP_TIME = 321, - OP_TMS = 322, - OP_LOCALTIME = 323, - OP_GMTIME = 324, - OP_ALARM = 325, - OP_SLEEP = 326, - OP_SHMGET = 327, - OP_SHMCTL = 328, - OP_SHMREAD = 329, - OP_SHMWRITE = 330, - OP_MSGGET = 331, - OP_MSGCTL = 332, - OP_MSGSND = 333, - OP_MSGRCV = 334, - OP_SEMOP = 335, - OP_SEMGET = 336, - OP_SEMCTL = 337, - OP_REQUIRE = 338, - OP_DOFILE = 339, - OP_HINTSEVAL = 340, - OP_ENTEREVAL = 341, - OP_LEAVEEVAL = 342, - OP_ENTERTRY = 343, - OP_LEAVETRY = 344, - OP_GHBYNAME = 345, - OP_GHBYADDR = 346, - OP_GHOSTENT = 347, - OP_GNBYNAME = 348, - OP_GNBYADDR = 349, - OP_GNETENT = 350, - OP_GPBYNAME = 351, - OP_GPBYNUMBER = 352, - OP_GPROTOENT = 353, - OP_GSBYNAME = 354, - OP_GSBYPORT = 355, - OP_GSERVENT = 356, - OP_SHOSTENT = 357, - OP_SNETENT = 358, - OP_SPROTOENT = 359, - OP_SSERVENT = 360, - OP_EHOSTENT = 361, - OP_ENETENT = 362, - OP_EPROTOENT = 363, - OP_ESERVENT = 364, - OP_GPWNAM = 365, - OP_GPWUID = 366, - OP_GPWENT = 367, - OP_SPWENT = 368, - OP_EPWENT = 369, - OP_GGRNAM = 370, - OP_GGRGID = 371, - OP_GGRENT = 372, - OP_SGRENT = 373, - OP_EGRENT = 374, - OP_GETLOGIN = 375, - OP_SYSCALL = 376, - OP_LOCK = 377, - OP_ONCE = 378, - OP_CUSTOM = 379, - OP_COREARGS = 380, - OP_AVHVSWITCH = 381, - OP_RUNCV = 382, - OP_FC = 383, - OP_PADCV = 384, - OP_INTROCV = 385, - OP_CLONECV = 386, - OP_PADRANGE = 387, - OP_REFASSIGN = 388, - OP_LVREF = 389, - OP_LVREFSLICE = 390, - OP_LVAVREF = 391, - OP_ANONCONST = 392, - OP_ISA = 393, - OP_CMPCHAIN_AND = 394, - OP_CMPCHAIN_DUP = 395, - OP_ENTERTRYCATCH = 396, - OP_LEAVETRYCATCH = 397, - OP_POPTRY = 398, - OP_CATCH = 399, - OP_PUSHDEFER = 400, - OP_IS_BOOL = 401, - OP_IS_WEAK = 402, - OP_WEAKEN = 403, - OP_UNWEAKEN = 404, - OP_BLESSED = 405, - OP_REFADDR = 406, - OP_REFTYPE = 407, - OP_CEIL = 408, - OP_FLOOR = 409, - OP_IS_TAINTED = 410, - OP_HELEMEXISTSOR = 411, - OP_METHSTART = 412, - OP_INITFIELD = 413, - OP_CLASSNAME = 414, + OP_SUBSTR_NIBBLE = 122, + OP_VEC = 123, + OP_INDEX = 124, + OP_RINDEX = 125, + OP_SPRINTF = 126, + OP_FORMLINE = 127, + OP_ORD = 128, + OP_CHR = 129, + OP_CRYPT = 130, + OP_UCFIRST = 131, + OP_LCFIRST = 132, + OP_UC = 133, + OP_LC = 134, + OP_QUOTEMETA = 135, + OP_RV2AV = 136, + OP_AELEMFAST = 137, + OP_AELEMFAST_LEX = 138, + OP_AELEMFASTLEX_STORE = 139, + OP_AELEM = 140, + OP_ASLICE = 141, + OP_KVASLICE = 142, + OP_AEACH = 143, + OP_AVALUES = 144, + OP_AKEYS = 145, + OP_EACH = 146, + OP_VALUES = 147, + OP_KEYS = 148, + OP_DELETE = 149, + OP_EXISTS = 150, + OP_RV2HV = 151, + OP_HELEM = 152, + OP_HSLICE = 153, + OP_KVHSLICE = 154, + OP_MULTIDEREF = 155, + OP_UNPACK = 156, + OP_PACK = 157, + OP_SPLIT = 158, + OP_JOIN = 159, + OP_LIST = 160, + OP_LSLICE = 161, + OP_ANONLIST = 162, + OP_ANONHASH = 163, + OP_EMPTYAVHV = 164, + OP_SPLICE = 165, + OP_PUSH = 166, + OP_POP = 167, + OP_SHIFT = 168, + OP_UNSHIFT = 169, + OP_SORT = 170, + OP_REVERSE = 171, + OP_GREPSTART = 172, + OP_GREPWHILE = 173, + OP_MAPSTART = 174, + OP_MAPWHILE = 175, + OP_RANGE = 176, + OP_FLIP = 177, + OP_FLOP = 178, + OP_AND = 179, + OP_OR = 180, + OP_XOR = 181, + OP_DOR = 182, + OP_COND_EXPR = 183, + OP_ANDASSIGN = 184, + OP_ORASSIGN = 185, + OP_DORASSIGN = 186, + OP_ENTERSUB = 187, + OP_LEAVESUB = 188, + OP_LEAVESUBLV = 189, + OP_ARGCHECK = 190, + OP_ARGELEM = 191, + OP_ARGDEFELEM = 192, + OP_CALLER = 193, + OP_WARN = 194, + OP_DIE = 195, + OP_RESET = 196, + OP_LINESEQ = 197, + OP_NEXTSTATE = 198, + OP_DBSTATE = 199, + OP_UNSTACK = 200, + OP_ENTER = 201, + OP_LEAVE = 202, + OP_SCOPE = 203, + OP_ENTERITER = 204, + OP_ITER = 205, + OP_ENTERLOOP = 206, + OP_LEAVELOOP = 207, + OP_RETURN = 208, + OP_LAST = 209, + OP_NEXT = 210, + OP_REDO = 211, + OP_DUMP = 212, + OP_GOTO = 213, + OP_EXIT = 214, + OP_METHOD = 215, + OP_METHOD_NAMED = 216, + OP_METHOD_SUPER = 217, + OP_METHOD_REDIR = 218, + OP_METHOD_REDIR_SUPER = 219, + OP_OPEN = 220, + OP_CLOSE = 221, + OP_PIPE_OP = 222, + OP_FILENO = 223, + OP_UMASK = 224, + OP_BINMODE = 225, + OP_TIE = 226, + OP_UNTIE = 227, + OP_TIED = 228, + OP_DBMOPEN = 229, + OP_DBMCLOSE = 230, + OP_SSELECT = 231, + OP_SELECT = 232, + OP_GETC = 233, + OP_READ = 234, + OP_ENTERWRITE = 235, + OP_LEAVEWRITE = 236, + OP_PRTF = 237, + OP_PRINT = 238, + OP_SAY = 239, + OP_SYSOPEN = 240, + OP_SYSSEEK = 241, + OP_SYSREAD = 242, + OP_SYSWRITE = 243, + OP_EOF = 244, + OP_TELL = 245, + OP_SEEK = 246, + OP_TRUNCATE = 247, + OP_FCNTL = 248, + OP_IOCTL = 249, + OP_FLOCK = 250, + OP_SEND = 251, + OP_RECV = 252, + OP_SOCKET = 253, + OP_SOCKPAIR = 254, + OP_BIND = 255, + OP_CONNECT = 256, + OP_LISTEN = 257, + OP_ACCEPT = 258, + OP_SHUTDOWN = 259, + OP_GSOCKOPT = 260, + OP_SSOCKOPT = 261, + OP_GETSOCKNAME = 262, + OP_GETPEERNAME = 263, + OP_LSTAT = 264, + OP_STAT = 265, + OP_FTRREAD = 266, + OP_FTRWRITE = 267, + OP_FTREXEC = 268, + OP_FTEREAD = 269, + OP_FTEWRITE = 270, + OP_FTEEXEC = 271, + OP_FTIS = 272, + OP_FTSIZE = 273, + OP_FTMTIME = 274, + OP_FTATIME = 275, + OP_FTCTIME = 276, + OP_FTROWNED = 277, + OP_FTEOWNED = 278, + OP_FTZERO = 279, + OP_FTSOCK = 280, + OP_FTCHR = 281, + OP_FTBLK = 282, + OP_FTFILE = 283, + OP_FTDIR = 284, + OP_FTPIPE = 285, + OP_FTSUID = 286, + OP_FTSGID = 287, + OP_FTSVTX = 288, + OP_FTLINK = 289, + OP_FTTTY = 290, + OP_FTTEXT = 291, + OP_FTBINARY = 292, + OP_CHDIR = 293, + OP_CHOWN = 294, + OP_CHROOT = 295, + OP_UNLINK = 296, + OP_CHMOD = 297, + OP_UTIME = 298, + OP_RENAME = 299, + OP_LINK = 300, + OP_SYMLINK = 301, + OP_READLINK = 302, + OP_MKDIR = 303, + OP_RMDIR = 304, + OP_OPEN_DIR = 305, + OP_READDIR = 306, + OP_TELLDIR = 307, + OP_SEEKDIR = 308, + OP_REWINDDIR = 309, + OP_CLOSEDIR = 310, + OP_FORK = 311, + OP_WAIT = 312, + OP_WAITPID = 313, + OP_SYSTEM = 314, + OP_EXEC = 315, + OP_KILL = 316, + OP_GETPPID = 317, + OP_GETPGRP = 318, + OP_SETPGRP = 319, + OP_GETPRIORITY = 320, + OP_SETPRIORITY = 321, + OP_TIME = 322, + OP_TMS = 323, + OP_LOCALTIME = 324, + OP_GMTIME = 325, + OP_ALARM = 326, + OP_SLEEP = 327, + OP_SHMGET = 328, + OP_SHMCTL = 329, + OP_SHMREAD = 330, + OP_SHMWRITE = 331, + OP_MSGGET = 332, + OP_MSGCTL = 333, + OP_MSGSND = 334, + OP_MSGRCV = 335, + OP_SEMOP = 336, + OP_SEMGET = 337, + OP_SEMCTL = 338, + OP_REQUIRE = 339, + OP_DOFILE = 340, + OP_HINTSEVAL = 341, + OP_ENTEREVAL = 342, + OP_LEAVEEVAL = 343, + OP_ENTERTRY = 344, + OP_LEAVETRY = 345, + OP_GHBYNAME = 346, + OP_GHBYADDR = 347, + OP_GHOSTENT = 348, + OP_GNBYNAME = 349, + OP_GNBYADDR = 350, + OP_GNETENT = 351, + OP_GPBYNAME = 352, + OP_GPBYNUMBER = 353, + OP_GPROTOENT = 354, + OP_GSBYNAME = 355, + OP_GSBYPORT = 356, + OP_GSERVENT = 357, + OP_SHOSTENT = 358, + OP_SNETENT = 359, + OP_SPROTOENT = 360, + OP_SSERVENT = 361, + OP_EHOSTENT = 362, + OP_ENETENT = 363, + OP_EPROTOENT = 364, + OP_ESERVENT = 365, + OP_GPWNAM = 366, + OP_GPWUID = 367, + OP_GPWENT = 368, + OP_SPWENT = 369, + OP_EPWENT = 370, + OP_GGRNAM = 371, + OP_GGRGID = 372, + OP_GGRENT = 373, + OP_SGRENT = 374, + OP_EGRENT = 375, + OP_GETLOGIN = 376, + OP_SYSCALL = 377, + OP_LOCK = 378, + OP_ONCE = 379, + OP_CUSTOM = 380, + OP_COREARGS = 381, + OP_AVHVSWITCH = 382, + OP_RUNCV = 383, + OP_FC = 384, + OP_PADCV = 385, + OP_INTROCV = 386, + OP_CLONECV = 387, + OP_PADRANGE = 388, + OP_REFASSIGN = 389, + OP_LVREF = 390, + OP_LVREFSLICE = 391, + OP_LVAVREF = 392, + OP_ANONCONST = 393, + OP_ISA = 394, + OP_CMPCHAIN_AND = 395, + OP_CMPCHAIN_DUP = 396, + OP_ENTERTRYCATCH = 397, + OP_LEAVETRYCATCH = 398, + OP_POPTRY = 399, + OP_CATCH = 400, + OP_PUSHDEFER = 401, + OP_IS_BOOL = 402, + OP_IS_WEAK = 403, + OP_WEAKEN = 404, + OP_UNWEAKEN = 405, + OP_BLESSED = 406, + OP_REFADDR = 407, + OP_REFTYPE = 408, + OP_CEIL = 409, + OP_FLOOR = 410, + OP_IS_TAINTED = 411, + OP_HELEMEXISTSOR = 412, + OP_METHSTART = 413, + OP_INITFIELD = 414, + OP_CLASSNAME = 415, OP_max } opcode; @@ -439,7 +440,7 @@ An enum of all the legal Perl opcodes, defined in F =cut */ -#define MAXO 415 +#define MAXO 416 #define OP_FREED MAXO /* the OP_IS_* macros are optimized to a simple range check because diff --git a/peep.c b/peep.c index 5b5082862ceb3..956731637e729 100644 --- a/peep.c +++ b/peep.c @@ -3868,6 +3868,99 @@ Perl_rpeep(pTHX_ OP *o) } break; + case OP_SUBSTR: { + OP *pushmark, *expr, *offs, *len, *repl; + SV *offs_sv, *repl_sv; + /* Specialize substr($x, 0, $y) and substr($x,0,$y,"") */ + /* Does this substr have 3-4 args and amiable flags? */ + if ( + (((o->op_private & 7) == 4) || ((o->op_private & 7) == 3)) + /* No lvalue cases, no OPpSUBSTR_REPL_FIRST*/ + && !(o->op_private & (OPpSUBSTR_REPL_FIRST|OPpMAYBE_LVSUB)) + && !(o->op_flags & OPf_MOD) + ){ + /* Should be a leading ex-pushmark */ + pushmark = cBINOPx(o)->op_first; + assert(pushmark->op_type == OP_NULL); + expr = OpSIBLING(pushmark); + offs = OpSIBLING(expr); + + /* Gets complicated fast if the expr isn't simple*/ + if (expr->op_type != OP_PADSV) + break; + /* Is the offset CONST zero? */ + if (offs->op_type != OP_CONST) + break; + offs_sv = cSVOPx_sv(offs); + if (!(SvIOK(offs_sv) && SvIVX(offs_sv) == 0)) + break; + len = OpSIBLING(offs); + + if ((o->op_private & 7) == 4) {/* replacement */ + /* Is the replacement string CONST ""? */ + repl = OpSIBLING(len); + if (repl->op_type != OP_CONST) + break; + repl_sv = cSVOPx_sv(repl); + if(!(SvPOK(repl_sv) && SvCUR(repl_sv) == 0)) + break; + } + } else { + break; + } + /* It's on! */ + /* Take out the static LENGTH & REPLACMENT OPs */ + /* (The finalizer does not seem to change op_next here) */ + expr->op_next = offs->op_next; + o->op_private = o->op_private & 7; + if ((o->op_private & 7) == 4) + len->op_next = o; + + /* We have a problem if padrange pushes EXRP for us and + * then jumps straight to the offs CONST OP. For example: + * push @{$pref{ substr($key, 0, 1) }}, $key; + * We don't want to hit that OP, but cannot easily figure + * out if that is going to happen and adjust for it. + * So we have to null out the OP, and then do a fixup in + * B::Deparse. :/ */ + op_null(offs); + + /* repl staus unchanged because it makes Deparsing easier. */ + + /* Upgrade the SUBSTR to a SUBSTR_NIBBLE */ + OpTYPE_set(o, OP_SUBSTR_NIBBLE); + + /* oldop will be the OP_CONST associated with "" */ + /* oldoldop is more unpredictable */ + oldoldop = oldop = NULL; + + /* pp_substr may be unsuitable for TARGMY optimization + * because of its potential RETPUSHUNDEF, and use of + * bit 4 for OPpSUBSTR_REPL_FIRST, but no such + * problems with pp_substr_nibble. Must just avoid + * sv == TARG.*/ + if (OP_TYPE_IS(o->op_next, OP_PADSV) && + !(o->op_next->op_private) && + OP_TYPE_IS(o->op_next->op_next, OP_SASSIGN) && + (o->op_next->op_targ != expr->op_targ) + ) { + OP * padsv = o->op_next; + OP * sassign = padsv->op_next; + /* Carry over some flags */ + o->op_flags = OPf_KIDS | (o->op_flags & OPf_PARENS) | + (sassign->op_flags & (OPf_WANT|OPf_PARENS)); + o->op_private |= OPpTARGET_MY; + /* Steal the TARG, set op_next pointers*/ + o->op_targ = padsv->op_targ; + padsv->op_targ = 0; + o->op_next = sassign->op_next; + /* Null the replaced OPs*/ + op_null(padsv); + op_null(sassign); + } + } + break; + case OP_SASSIGN: { if (OP_GIMME(o,0) == G_VOID || ( o->op_next->op_type == OP_LINESEQ diff --git a/pp.c b/pp.c index 46a0333876724..3145fd42a647e 100644 --- a/pp.c +++ b/pp.c @@ -3722,6 +3722,112 @@ PP_wrapped(pp_substr, RETPUSHUNDEF; } +/* OP_SUBSTR_NIBBLE is a specialized version of OP_SUBSTR, where: + * the EXPR is a PADSV + * the OFFSET is a CONST zero + * the replacement pattern is a CONST "" + * it's definitely not in lvalue context (see the check in pp_substr) + * it definitely doesn't have OPpSUBSTR_REPL_FIRST set + * it may be an rvalue or in void context (may support TARGMY later) + */ +PP(pp_substr_nibble) +{ + dTARGET; + STRLEN curlen; + STRLEN utf8_curlen = 0; + STRLEN byte_len = 0; + SV *sv = PL_stack_sp[-1]; + const bool rvalue = (GIMME_V != G_VOID) || (PL_op->op_private & OPpTARGET_MY); + const bool do_chop = ((PL_op->op_private & 7) == 4) ? true : false; + const char *tmps; + + if (SvROK(sv) && do_chop) { + Perl_ck_warner(aTHX_ packWARN(WARN_SUBSTR), + "Attempt to use reference as lvalue in substr" + ); + } + + if (do_chop) { + SvGETMAGIC(sv); + if (!SvTHINKFIRST(sv) && SvPOK(sv)) { + curlen = SvCUR(sv); + tmps = SvPVX_mutable(sv); + (void)SvPOK_only_UTF8(sv); + } else { + tmps = SvPV_force_nomg(sv, curlen); + } + } else { + if (!SvGMAGICAL(sv) && !SvROK(sv) && SvPOK(sv)) { + curlen = SvCUR(sv); + tmps = (char *)SvPVX_const(sv); + } else { + tmps = SvPV_const(sv, curlen); + } + } + + if (DO_UTF8(sv)) { + utf8_curlen = sv_or_pv_len_utf8(sv, tmps, curlen); + if (utf8_curlen == curlen) + utf8_curlen = 0; + else + curlen = utf8_curlen; + } + + /* Inlined, simplified Perl_translate_substr_offsets */ + if (curlen) { + const IV len_iv = SvIV(PL_stack_sp[0]); + const int len_is_uv = len_iv ? SvIOK_UV(PL_stack_sp[0]) : 1; + + if (!len_is_uv && len_iv < 0) { /* Negative length supplied */ + const IV pos2_iv = curlen + len_iv; + if (!(curlen-1 > ~(UV)len_iv) && pos2_iv < 0) { + byte_len = 0; + } else if ((UV)pos2_iv > curlen) { + byte_len = (STRLEN)( (UV)curlen); + } else { + byte_len = (STRLEN)( (UV)pos2_iv ); + } + } else if ((UV)len_iv <= curlen) { /* Non-negative length supplied */ + byte_len = (STRLEN)( (UV)len_iv); + } else { + byte_len = curlen; + } + } + /* End of inlined, simplified Perl_translate_substr_offsets */ + + if (utf8_curlen) { + /* This could update byte_len, but the return value + will always be zero, which subsequent code has + assumed to be the case. */ + sv_or_pv_pos_u2b(sv, tmps, 0, &byte_len); + } + + if (rvalue) { + SvTAINTED_off(TARG); /* decontaminate */ + SvUTF8_off(TARG); /* decontaminate */ + sv_setpvn(TARG, tmps, byte_len); +#ifdef USE_LOCALE_COLLATE + sv_unmagic(TARG, PERL_MAGIC_collxfrm); +#endif + if (utf8_curlen) + SvUTF8_on(TARG); + } + + if (do_chop) { + SvTAINT(sv); + sv_chop(sv, SvPVX(sv) + byte_len); + SvSETMAGIC(sv); + } + + if (rvalue) { + SvSETMAGIC(TARG); + rpp_replace_2_1(TARG); + } else { + rpp_popfree_2(); + } + return NORMAL; +} + PP_wrapped(pp_vec, 3, 0) { dSP; diff --git a/pp_proto.h b/pp_proto.h index 02754995f4f27..2b2278ed5e757 100644 --- a/pp_proto.h +++ b/pp_proto.h @@ -282,6 +282,7 @@ PERL_CALLCONV PP(pp_study) __attribute__visibility__("hidden"); PERL_CALLCONV PP(pp_subst) __attribute__visibility__("hidden"); PERL_CALLCONV PP(pp_substcont) __attribute__visibility__("hidden"); PERL_CALLCONV PP(pp_substr) __attribute__visibility__("hidden"); +PERL_CALLCONV PP(pp_substr_nibble) __attribute__visibility__("hidden"); PERL_CALLCONV PP(pp_subtract) __attribute__visibility__("hidden"); PERL_CALLCONV PP(pp_syscall) __attribute__visibility__("hidden"); PERL_CALLCONV PP(pp_sysopen) __attribute__visibility__("hidden"); diff --git a/regen/op_private b/regen/op_private index 409db837faf0f..f9cfd1a0de4dd 100644 --- a/regen/op_private +++ b/regen/op_private @@ -234,7 +234,7 @@ use strict; # substr starts off with 4 bits set in # ck_fun(), but since it never has more than 7 # args, bit 3 is later stolen - qw(substr); + qw(substr substr_nibble); $args4{$_} = 1 for keys %maxarg, grep !$args0{$_} && !$args1{$_} diff --git a/regen/opcodes b/regen/opcodes index f93a7a0635b33..231a4a9175f9c 100644 --- a/regen/opcodes +++ b/regen/opcodes @@ -200,6 +200,7 @@ abs abs ck_fun fsTu% S? length length ck_length ifsTu% S? substr substr ck_substr st@ S S S? S? +substr_nibble substr nibble ck_substr st@ S S S? S? vec vec ck_fun ist@ S S S index index ck_index isT@ S S S? diff --git a/t/op/substr_nibble.t b/t/op/substr_nibble.t new file mode 100644 index 0000000000000..b66ebe53e48f1 --- /dev/null +++ b/t/op/substr_nibble.t @@ -0,0 +1,108 @@ +BEGIN { + chdir 't'; + require './test.pl'; + @INC = '../lib'; +} + +use warnings; +use strict; + +my $str; my $result; my $len; + +# In each of the tests below, the OP_SUBSTR should have been replaced +# with a specialised OP_SUBSTR_NIBBLE OP. The tests in this file are +# intended as sanity tests for pp_substr_nibble's string position +# calculations and treatment of the input `sv` and output TARG. + +# Basic functionality with a simple string +$str = "Hello, World!"; +$result = substr($str, 0, 5, ""); +is($result, "Hello", 'simple case: correct extraction'); +is($str, ", World!", 'simple case: remainder is correct'); +# LENGTH is zero +$result = substr($str, 0, 0, ""); +is($result, "", 'zero length: returns empty string'); +is($str, ", World!", 'zero length: EXPR remains unchanged'); +# LENGTH is larger than the string +$result = substr($str, 0, 10, ""); +is($result, ", World!", 'LENGTH: returns entire string'); +is($str, "", 'LENGTH: EXPR is empty'); +# EXPR is an empty string +$result = substr($str, 0, 4, ""); +is($result, "", 'empty EXPR: returns empty string'); +is($str, "", 'empty EXPR: EXPR remains empty'); +# EXPR lexical is undef +{ +no warnings 'uninitialized'; +$str = undef; +$result = substr($str, 0, 2, ""); +is($result, "", 'undef EXPR: returns empty string'); +is($str, "", 'undef EXPR: EXPR becomes empty string'); +# LENGTH is undef +$str = "Hello"; +$result = substr($str, 0, undef, ""); +is($result, "", 'undef LENGTH: returns empty string'); +is($str, "Hello", 'undef LENGTH: EXPR is unchanged'); +} +# LENGTH is negative +$result = substr($str, 0, -2, ""); +is($result, "Hel", 'negative LENGTH: returns characters 0..length-2'); +is($str, "lo", 'negative LENGTH: 2 chars remaining'); +# EXPR is numeric (non-string) +$str = 12345678; +$result = substr($str, 0, 6, ""); +is($result, "123456", 'IV EXPR: returns stringified characters'); +is($str, "78", 'IV EXPR: stringified EXPR'); +# LENGTH IS A NV +$str = "Hello, again"; +$len = 2.5; +$result = substr($str, 0, $len, ""); +is($result, "He", 'NV LENGTH: returns floor() characters'); +is($str, "llo, again", 'NV LENGTH: EXPR retains length-floor() characters'); + +use Tie::Scalar; +{ + package TiedScalar; + use base 'Tie::StdScalar'; + sub STORE { + my ($self, $value) = @_; + $$self = $value; + } + sub FETCH { + my ($self) = @_; + return $$self; + } +} +# EXPR is a tied variable +my $str2; +tie $str2, 'TiedScalar'; +$str2 = "Hello World"; +$result = substr($str2, 0, 5, ""); +is($result, "Hello", 'tied EXPR: returns correct characters'); +is($str2, " World", 'tied EXPR: tied EXPR variable updated correctly'); +# TARG is a tied variable +my $result2; +tie $result2, 'TiedScalar'; +$result2 = substr($str2, 0, 2, ""); +is($result2, " W", 'tied TARG: returns correct characters'); +is($str2, "orld", 'tied TARG: tied EXPR variable updated correctly'); +# EXPR is a scalar containing UTF-8 string +use utf8; +$str = "Привет мир"; # "Hello world" in Russian +$result = substr($str, 0, 7, ""); +is($result, "Привет ", 'UTF-8 EXPR: returns correct UTF-8 characters'); +is($str, "мир", 'UTF-8 EXPR: UTF-8 string updated correctly'); +# LENGTH is outside of IV range +use Config; +$str = "Hello, Bernard"; +my $max_iv = $Config{ivsize} == 8 ? 9_223_372_036_854_775_807 : 2_147_483_647; +$result = substr($str, 0, $max_iv + 1, ""); +is($result, "Hello, Bernard", 'UV LENGTH: returns entire string'); +is($str, "", 'UV LENGTH: EXPR is emptied'); +# EXPR contains binary data +$str = "\x00\x01\x02\x03\x04\x05"; +$result = substr($str, 0, 3, ""); +is($result, "\x00\x01\x02", 'hex EXPR: returns correct characters'); +is($str, "\x03\x04\x05", 'hex EXPR: retains correct characters'); + +done_testing(); diff --git a/t/perf/benchmarks b/t/perf/benchmarks index 59da49cafcadf..9d0e00cf50249 100644 --- a/t/perf/benchmarks +++ b/t/perf/benchmarks @@ -1779,6 +1779,22 @@ code => 'index $x, "b"', }, + # SUBSTR + 'func::substr::nibble_void' => { + desc => 'substr with a zero offset, empty replacement pattern (void)', + setup => 'my $z = "JAPH"x1000', + code => 'substr($z, 0, 2, "")', + }, + 'func::substr::nibble_lex' => { + desc => 'substr with a zero offset, empty replacement pattern (lex assign)', + setup => 'my $x; my $z = "JAPH"x1000', + code => '$x = substr($z, 0, 2, "")', + }, + 'func::substr::nibble_gvsv' => { + desc => 'substr with a zero offset, empty replacement pattern (global assign)', + setup => 'our $x; my $z = "JAPH"x1000', + code => '$x = substr($z, 0, 2, "")', + }, # JOIN diff --git a/t/perf/opcount.t b/t/perf/opcount.t index ece5ec8ef9077..663825a85aef0 100644 --- a/t/perf/opcount.t +++ b/t/perf/opcount.t @@ -1020,4 +1020,36 @@ test_opcount(0, "foreach 2 lexicals on builtin::indexed", padav => 2, }); +# substr with const zero offset and "" replacements +test_opcount(0, "substr with const zero offset (void)", + sub { my $z = "JAPH"; substr($z, 0, 2, "") }, + { + substr => 0, + substr_nibble =>1, + const => 3, + padsv_store => 1, + }); + +test_opcount(0, "substr with const zero offset (lexical)", + sub { my $z = "JAPH"; my $x = substr($z, 0, 2, "") }, + { + substr => 0, + substr_nibble =>1, + const => 3, + padsv => 2, + padsv_store => 1, + sassign => 1 + }); + +test_opcount(0, "substr with const zero offset (gv)", + sub { my $z = "JAPH"; our $x = substr($z, 0, 2, "") }, + { + substr => 0, + substr_nibble =>1, + const => 3, + gvsv => 1, + padsv_store => 1, + sassign => 1 + }); + done_testing();