From 8d585c68edfeea13c799c7241f55dd1cf4e5d71b Mon Sep 17 00:00:00 2001 From: "Paul \"LeoNerd\" Evans" Date: Tue, 26 Nov 2024 15:18:39 +0000 Subject: [PATCH] op.c: Further optimisations of foreach+indexed to apply to LIST as well This builds on the work of commit a8394b4e8c, expanding it to apply to any `foreach` loop iterating over any kind of list, not just an in-place array. --- lib/B/Deparse.t | 4 +++ lib/builtin.t | 13 +++++-- op.c | 91 +++++++++++++++++++++++++++++++---------------- pod/perldelta.pod | 24 +++++++++++++ pp_hot.c | 10 ++++++ t/perf/opcount.t | 10 +++++- 6 files changed, 118 insertions(+), 34 deletions(-) diff --git a/lib/B/Deparse.t b/lib/B/Deparse.t index 59f937dcc52d..20adbca4743b 100644 --- a/lib/B/Deparse.t +++ b/lib/B/Deparse.t @@ -2615,6 +2615,10 @@ foreach my ($idx, $elem) (builtin::indexed @arr) { die; } #### +foreach my ($idx, $elem) (builtin::indexed 'x', 'y', 'z') { + die; +} +#### my @ducks; foreach my ($tick, $trick, $track) (@ducks) { study $_; diff --git a/lib/builtin.t b/lib/builtin.t index c4b26ee3640b..22a1adf800b7 100644 --- a/lib/builtin.t +++ b/lib/builtin.t @@ -357,7 +357,7 @@ package FetchStoreCounter { } ok(eq_array(\@output, [qw( [0]=zero [1]=one [2]=two [3]=three [4]=four [5]=five )] ), - 'foreach + builtin::indexed' ); + 'foreach + builtin::indexed ARRAY' ); undef @output; @@ -368,7 +368,16 @@ package FetchStoreCounter { } ok(eq_array(\@output, [qw( [0]=zero [1]=one [2]=two [3]=three [4]=four [5]=five )] ), - 'foreach + imported indexed' ); + 'foreach + imported indexed ARRAY' ); + + undef @output; + + foreach my ( $idx, $val ) ( builtin::indexed qw( six seven eight nine ) ) { + push @output, "[$idx]=$val"; + } + + ok(eq_array(\@output, [qw( [0]=six [1]=seven [2]=eight [3]=nine )] ), + 'foreach + builtin::indexed LIST' ); } # Vanilla trim tests diff --git a/op.c b/op.c index 94ccc3080c6b..fcb48b0dd309 100644 --- a/op.c +++ b/op.c @@ -9672,6 +9672,17 @@ S_op_is_cv_xsub(pTHX_ OP *o, XSUBADDR_t xsub) return CvXSUB(cv) == xsub; } +#define op_is_call_to_cv_xsub(o, xsub) S_op_is_call_to_cv_xsub(aTHX_ o, xsub) +static bool +S_op_is_call_to_cv_xsub(pTHX_ OP *o, XSUBADDR_t xsub) +{ + if(o->op_type != OP_ENTERSUB) + return false; + + OP *cvop = cLISTOPx(cUNOPo->op_first)->op_last; + return op_is_cv_xsub(cvop, xsub); +} + /* =for apidoc newFOROP @@ -9812,45 +9823,64 @@ Perl_newFOROP(pTHX_ I32 flags, OP *sv, OP *expr, OP *block, OP *cont) enteriterpflags |= OPpITER_DEF; } - if (expr->op_type == OP_RV2AV || expr->op_type == OP_PADAV) { - expr = op_lvalue(op_force_list(scalar(ref(expr, OP_ITER))), OP_GREPSTART); - enteriterflags |= OPf_STACKED; - } - else if (padoff != 0 && how_many_more == 1 && /* two lexical vars */ - expr->op_type == OP_ENTERSUB) { + if (padoff != 0 && how_many_more == 1 && /* two lexical vars */ + op_is_call_to_cv_xsub(expr, &Perl_XS_builtin_indexed)) { /* expr is a call to builtin::indexed */ + /* Turn the OP_ENTERSUB into a regular OP_LIST without the final CV, + * and set the OPpITER_INDEXED flag instead */ OP *args = cUNOPx(expr)->op_first; assert(OP_TYPE_IS_OR_WAS(args, OP_LIST)); - OP *pre_firstarg = NULL; - OP *firstarg = cLISTOPx(args)->op_first; - OP *lastarg = cLISTOPx(args)->op_last; + OP *first = cLISTOPx(args)->op_first; + /* OP_PUSHMARK must remain */ + assert(first->op_type == OP_PUSHMARK); + first = OpSIBLING(first); - if(firstarg->op_type == OP_PUSHMARK) - pre_firstarg = firstarg, firstarg = OpSIBLING(firstarg); - if(firstarg == lastarg) - firstarg = NULL; + OP *pre_last = NULL, *last = first; + while(OpHAS_SIBLING(last)) + pre_last = last, last = OpSIBLING(last); + if(pre_last) { + /* splice out the final CV op */ + cLISTOPx(args)->op_last = pre_last; + OpLASTSIB_set(pre_last, args); - if (op_is_cv_xsub(lastarg, &Perl_XS_builtin_indexed) && /* a call to builtin::indexed */ - firstarg && OpSIBLING(firstarg) == lastarg && /* with one arg */ - (firstarg->op_type == OP_RV2AV || firstarg->op_type == OP_PADAV) /* ... which is an array */ - ) { - /* Turn for my ($idx, $val) (indexed @arr) into a similar OPf_STACKED - * loop on the array itself as the case above, plus a flag to tell - * pp_iter to set the index directly - */ + op_free(last); - /* Cut the array arg out of the args list and discard the rest of - * the original expr - */ - op_sibling_splice(args, pre_firstarg, 1, NULL); + last = pre_last; + } + + if(first == last && (first->op_type == OP_PADAV || first->op_type == OP_RV2AV)) { + /* Preserve the ARRAY shortcut */ + OpLASTSIB_set(cLISTOPx(args)->op_first, args); op_free(expr); - expr = op_lvalue(op_force_list(scalar(ref(firstarg, OP_ITER))), OP_GREPSTART); - enteriterflags |= OPf_STACKED; - iterpflags |= OPpITER_INDEXED; + OpLASTSIB_set(first, NULL); + expr = first; } - else - goto expr_not_special; + else { + /* the op_targ slot contained the "was" op_type for an + * OP_NULL; clear it or op_free() will get very confused */ + args->op_targ = 0; + OpTYPE_set(args, OP_LIST); + OpLASTSIB_set(args, NULL); + + expr->op_flags &= ~OPf_KIDS; + cUNOPx(expr)->op_first = NULL; + op_free(expr); + + expr = args; + } + + /* expr's parent has currently been set to NULL, but that's OK. When + * it gets consumed by the LOOP* structure later to make the loop op + * itself this will get set correctly. + */ + + iterpflags |= OPpITER_INDEXED; + } + + if (expr->op_type == OP_RV2AV || expr->op_type == OP_PADAV) { + expr = op_lvalue(op_force_list(scalar(ref(expr, OP_ITER))), OP_GREPSTART); + enteriterflags |= OPf_STACKED; } else if (expr->op_type == OP_NULL && (expr->op_flags & OPf_KIDS) && @@ -9882,7 +9912,6 @@ Perl_newFOROP(pTHX_ I32 flags, OP *sv, OP *expr, OP *block, OP *cont) enteriterflags |= OPf_STACKED; } else { -expr_not_special: expr = op_lvalue(op_force_list(expr), OP_GREPSTART); } diff --git a/pod/perldelta.pod b/pod/perldelta.pod index 6e78d01a6f0b..a70a9d04fadf 100644 --- a/pod/perldelta.pod +++ b/pod/perldelta.pod @@ -105,6 +105,30 @@ There may well be none in a stable release. XXX +=item * + +Code that uses the C function from the L module to generate +a list of index/value pairs out of an array or list which is then passed into +a two-variable C list to unpack those again is now optimised to be +more efficient. + + my @array = (...); + + foreach my ($idx, $val) (builtin::indexed @array) { + ... + } + +Z<> + + foreach my ($idx, $val) (builtin::indexed LIST...) { + ... + } + +In particular, a temporary list twice the size of the original is no longer +generated. Instead, the loop iterates down the original array or list +in-place directly, in the same way that C or +C would do. + =back =head1 Modules and Pragmata diff --git a/pp_hot.c b/pp_hot.c index b6042fad0498..641bedc42569 100644 --- a/pp_hot.c +++ b/pp_hot.c @@ -5031,6 +5031,16 @@ PP(pp_iter) sv = PL_stack_base[ix]; } + if (UNLIKELY(pflags & OPpITER_INDEXED) && (i == 0)) { + SvREFCNT_dec(*itersvp); + /* here ix is really a stack pointer offset; we have to + * calculate the real index */ + *itersvp = newSViv(ix - cx->blk_loop.state_u.stack.basesp - 1); + + ++i; + ++itersvp; + } + av = NULL; goto loop_ary_common; diff --git a/t/perf/opcount.t b/t/perf/opcount.t index ece5ec8ef907..f904764c1409 100644 --- a/t/perf/opcount.t +++ b/t/perf/opcount.t @@ -1011,7 +1011,7 @@ test_opcount(0, "Empty anonhash ref and direct lexical assignment", srefgen => 1, }); -test_opcount(0, "foreach 2 lexicals on builtin::indexed", +test_opcount(0, "foreach 2 lexicals on builtin::indexed ARRAY", sub { my @input = (); foreach my ($i, $x) (builtin::indexed @input) { } }, { entersub => 0, # no call to builtin::indexed @@ -1020,4 +1020,12 @@ test_opcount(0, "foreach 2 lexicals on builtin::indexed", padav => 2, }); +test_opcount(0, "foreach 2 lexicals on builtin::indexed LIST", + sub { foreach my ($i, $x) (builtin::indexed qw( x y z )) { } }, + { + entersub => 0, # no call to builtin::indexed + enteriter => 1, + iter => 1, + }); + done_testing();