Discussion:
[perl.git] branch blead, updated. v5.17.0-241-g3b6759a
(too old to reply)
Yves Orton
2012-06-07 07:00:31 UTC
Permalink
In perl.git, the branch blead has been updated

<http://perl5.git.perl.org/perl.git/commitdiff/3b6759a6b10290b6fea26fb98b50fbf5ce4466b5?hp=ae0a0fb2ce70e35111ba07e2a578edd8fcc989f8>

- Log -----------------------------------------------------------------
commit 3b6759a6b10290b6fea26fb98b50fbf5ce4466b5
Author: Yves Orton <***@gmail.com>
Date: Wed Jun 6 08:53:05 2012 +0200

optimise (?:|) and related NOTHING like constructs out of the compiled optree

A pattern like (?:|) causes the regex engine to do extra work even though
it is equivelent to matching nothing. This optimises such sequences into
more efficient opcodes that do less work, and in some cases optimises
them away entirely.
-----------------------------------------------------------------------

Summary of changes:
regcomp.c | 141 ++++++++++++++++++++++++++++++++++++++++++++++++++-----------
1 files changed, 116 insertions(+), 25 deletions(-)

diff --git a/regcomp.c b/regcomp.c
index 5ec3bc4..906add7 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -3372,21 +3372,18 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
* in which case we do some bookkeeping, otherwise we update
* the end pointer. */
if ( !first ) {
+ first = cur;
+ trietype = noper_trietype;
if ( noper_trietype == NOTHING ) {
#if !defined(DEBUGGING) && !defined(NOJUMPTRIE)
regnode * const noper_next = regnext( noper );
- U8 noper_next_type = (noper_next && noper_next != tail) ? OP(noper_next) : 0;
+ U8 noper_next_type = (noper_next && noper_next!=tail) ? OP(noper_next) : 0;
U8 noper_next_trietype = noper_next_type ? TRIE_TYPE( noper_next_type ) :0;
#endif

- if ( noper_next_trietype ) {
- first = cur;
+ if ( noper_next_trietype )
trietype = noper_next_trietype;
- }
- } else {
- first = cur;
- trietype = noper_trietype;
- }
+ }
} else {
if ( trietype == NOTHING )
trietype = noper_trietype;
@@ -3436,24 +3433,46 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
"", SvPV_nolen_const( mysv ),REG_NODE_NUM(cur));

});
- if ( last && trietype != NOTHING ) {
- /* the last branch of the sequence was part of a trie,
- * so we have to construct it here outside of the loop
- */
- made= make_trie( pRExC_state, startbranch, first, scan, tail, count, trietype, depth+1 );
+ if ( last ) {
+ if ( trietype != NOTHING ) {
+ /* the last branch of the sequence was part of a trie,
+ * so we have to construct it here outside of the loop
+ */
+ made= make_trie( pRExC_state, startbranch, first, scan, tail, count, trietype, depth+1 );
#ifdef TRIE_STUDY_OPT
- if ( ((made == MADE_EXACT_TRIE &&
- startbranch == first)
- || ( first_non_open == first )) &&
- depth==0 ) {
- flags |= SCF_TRIE_RESTUDY;
- if ( startbranch == first
- && scan == tail )
- {
- RExC_seen &=~REG_TOP_LEVEL_BRANCHES;
+ if ( ((made == MADE_EXACT_TRIE &&
+ startbranch == first)
+ || ( first_non_open == first )) &&
+ depth==0 ) {
+ flags |= SCF_TRIE_RESTUDY;
+ if ( startbranch == first
+ && scan == tail )
+ {
+ RExC_seen &=~REG_TOP_LEVEL_BRANCHES;
+ }
}
- }
#endif
+ } else {
+ /* at this point we know whatever we have is a NOTHING sequence/branch
+ * AND if 'startbranch' is 'first' then we can turn the whole thing into a NOTHING
+ */
+ if ( startbranch == first ) {
+ regnode *opt;
+ /* the entire thing is a NOTHING sequence, something like this:
+ * (?:|) So we can turn it into a plain NOTHING op. */
+ DEBUG_TRIE_COMPILE_r({
+ regprop(RExC_rx, mysv, cur);
+ PerlIO_printf( Perl_debug_log,
+ "%*s- %s (%d) <NOTHING BRANCH SEQUENCE>\n", (int)depth * 2 + 2,
+ "", SvPV_nolen_const( mysv ),REG_NODE_NUM(cur));
+
+ });
+ OP(startbranch)= NOTHING;
+ NEXT_OFF(startbranch)= tail - startbranch;
+ for ( opt= startbranch + 1; opt < tail ; opt++ )
+ OP(opt)= OPTIMIZED;
+ }
+ }
} /* end if ( last) */
} /* TRIE_MAXBUF is non zero */

@@ -4378,6 +4397,32 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
/* Lookbehind, or need to calculate parens/evals/stclass: */
&& (scan->flags || data || (flags & SCF_DO_STCLASS))
&& (OP(scan) == IFMATCH || OP(scan) == UNLESSM)) {
+ if ( OP(scan) == UNLESSM &&
+ scan->flags == 0 &&
+ OP(NEXTOPER(NEXTOPER(scan))) == NOTHING &&
+ OP(regnext(NEXTOPER(NEXTOPER(scan)))) == SUCCEED
+ ) {
+ regnode *opt;
+ regnode *upto= regnext(scan);
+ DEBUG_PARSE_r({
+ SV * const mysv_val=sv_newmortal();
+ DEBUG_STUDYDATA("OPFAIL",data,depth);
+
+ /*DEBUG_PARSE_MSG("opfail");*/
+ regprop(RExC_rx, mysv_val, upto);
+ PerlIO_printf(Perl_debug_log, "~ replace with OPFAIL pointed at %s (%"IVdf") offset %"IVdf"\n",
+ SvPV_nolen_const(mysv_val),
+ (IV)REG_NODE_NUM(upto),
+ (IV)(upto - scan)
+ );
+ });
+ OP(scan) = OPFAIL;
+ NEXT_OFF(scan) = upto - scan;
+ for (opt= scan + 1; opt < upto ; opt++)
+ OP(opt) = OPTIMIZED;
+ scan= upto;
+ continue;
+ }
if ( !PERL_ENABLE_POSITIVE_ASSERTION_STUDY
|| OP(scan) == UNLESSM )
{
@@ -4542,8 +4587,6 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
}
}
}
-
-
}
#endif
}
@@ -8412,9 +8455,24 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
}
break;
}
+ DEBUG_PARSE_r(if (!SIZE_ONLY) {
+ SV * const mysv_val1=sv_newmortal();
+ SV * const mysv_val2=sv_newmortal();
+ DEBUG_PARSE_MSG("lsbr");
+ regprop(RExC_rx, mysv_val1, lastbr);
+ regprop(RExC_rx, mysv_val2, ender);
+ PerlIO_printf(Perl_debug_log, "~ tying lastbr %s (%"IVdf") to ender %s (%"IVdf") offset %"IVdf"\n",
+ SvPV_nolen_const(mysv_val1),
+ (IV)REG_NODE_NUM(lastbr),
+ SvPV_nolen_const(mysv_val2),
+ (IV)REG_NODE_NUM(ender),
+ (IV)(ender - lastbr)
+ );
+ });
REGTAIL(pRExC_state, lastbr, ender);

if (have_branch && !SIZE_ONLY) {
+ char is_nothing= 1;
if (depth==1)
RExC_seen |= REG_TOP_LEVEL_BRANCHES;

@@ -8423,11 +8481,44 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
const U8 op = PL_regkind[OP(br)];
if (op == BRANCH) {
REGTAIL_STUDY(pRExC_state, NEXTOPER(br), ender);
+ if (OP(NEXTOPER(br)) != NOTHING || regnext(NEXTOPER(br)) != ender)
+ is_nothing= 0;
}
else if (op == BRANCHJ) {
REGTAIL_STUDY(pRExC_state, NEXTOPER(NEXTOPER(br)), ender);
+ /* for now we always disable this optimisation * /
+ if (OP(NEXTOPER(NEXTOPER(br))) != NOTHING || regnext(NEXTOPER(NEXTOPER(br))) != ender)
+ */
+ is_nothing= 0;
}
}
+ if (is_nothing) {
+ br= PL_regkind[OP(ret)] != BRANCH ? regnext(ret) : ret;
+ DEBUG_PARSE_r(if (!SIZE_ONLY) {
+ SV * const mysv_val1=sv_newmortal();
+ SV * const mysv_val2=sv_newmortal();
+ DEBUG_PARSE_MSG("NADA");
+ regprop(RExC_rx, mysv_val1, ret);
+ regprop(RExC_rx, mysv_val2, ender);
+ PerlIO_printf(Perl_debug_log, "~ converting ret %s (%"IVdf") to ender %s (%"IVdf") offset %"IVdf"\n",
+ SvPV_nolen_const(mysv_val1),
+ (IV)REG_NODE_NUM(ret),
+ SvPV_nolen_const(mysv_val2),
+ (IV)REG_NODE_NUM(ender),
+ (IV)(ender - ret)
+ );
+ });
+ OP(br)= NOTHING;
+ if (OP(ender) == TAIL) {
+ NEXT_OFF(br)= 0;
+ RExC_emit= br + 1;
+ } else {
+ regnode *opt;
+ for ( opt= br + 1; opt < ender ; opt++ )
+ OP(opt)= OPTIMIZED;
+ NEXT_OFF(br)= ender - br;
+ }
+ }
}
}


--
Perl5 Master Repository
demerphq
2012-06-07 07:08:12 UTC
Permalink
if this patch causes trouble please revert it without hesitation
Post by Yves Orton
In perl.git, the branch blead has been updated
<http://perl5.git.perl.org/perl.git/commitdiff/3b6759a6b10290b6fea26fb98b50fbf5ce4466b5?hp=ae0a0fb2ce70e35111ba07e2a578edd8fcc989f8>
- Log -----------------------------------------------------------------
commit 3b6759a6b10290b6fea26fb98b50fbf5ce4466b5
Date:   Wed Jun 6 08:53:05 2012 +0200
   optimise (?:|) and related NOTHING like constructs out of the compiled optree
   A pattern like (?:|) causes the regex engine to do extra work even though
   it is equivelent to matching nothing. This optimises such sequences into
   more efficient opcodes that do less work, and in some cases optimises
   them away entirely.
-----------------------------------------------------------------------
 regcomp.c |  141 ++++++++++++++++++++++++++++++++++++++++++++++++++-----------
 1 files changed, 116 insertions(+), 25 deletions(-)
diff --git a/regcomp.c b/regcomp.c
index 5ec3bc4..906add7 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -3372,21 +3372,18 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                                 * in which case we do some bookkeeping, otherwise we update
                                 * the end pointer. */
                                if ( !first ) {
+                                    first = cur;
+                                    trietype = noper_trietype;
                                   if ( noper_trietype == NOTHING ) {
 #if !defined(DEBUGGING) && !defined(NOJUMPTRIE)
                                       regnode * const noper_next = regnext( noper );
-                                       U8 noper_next_type = (noper_next && noper_next != tail) ? OP(noper_next) : 0;
+                                        U8 noper_next_type = (noper_next && noper_next!=tail) ? OP(noper_next) : 0;
                                       U8 noper_next_trietype = noper_next_type ? TRIE_TYPE( noper_next_type ) :0;
 #endif
-                                       if ( noper_next_trietype ) {
-                                           first = cur;
+                                        if ( noper_next_trietype )
                                           trietype = noper_next_trietype;
-                                       }
-                                   } else {
-                                       first = cur;
-                                       trietype = noper_trietype;
-                                   }
+                                    }
                                } else {
                                    if ( trietype == NOTHING )
                                        trietype = noper_trietype;
@@ -3436,24 +3433,46 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                              "", SvPV_nolen_const( mysv ),REG_NODE_NUM(cur));
                        });
-                        if ( last && trietype != NOTHING ) {
-                            /* the last branch of the sequence was part of a trie,
-                             * so we have to construct it here outside of the loop
-                             */
-                            made= make_trie( pRExC_state, startbranch, first, scan, tail, count, trietype, depth+1 );
+                        if ( last ) {
+                            if ( trietype != NOTHING ) {
+                                /* the last branch of the sequence was part of a trie,
+                                 * so we have to construct it here outside of the loop
+                                 */
+                                made= make_trie( pRExC_state, startbranch, first, scan, tail, count, trietype, depth+1 );
 #ifdef TRIE_STUDY_OPT
-                            if ( ((made == MADE_EXACT_TRIE &&
-                                 startbranch == first)
-                                 || ( first_non_open == first )) &&
-                                 depth==0 ) {
-                                flags |= SCF_TRIE_RESTUDY;
-                                if ( startbranch == first
-                                     && scan == tail )
-                                {
-                                    RExC_seen &=~REG_TOP_LEVEL_BRANCHES;
+                                if ( ((made == MADE_EXACT_TRIE &&
+                                     startbranch == first)
+                                     || ( first_non_open == first )) &&
+                                     depth==0 ) {
+                                    flags |= SCF_TRIE_RESTUDY;
+                                    if ( startbranch == first
+                                         && scan == tail )
+                                    {
+                                        RExC_seen &=~REG_TOP_LEVEL_BRANCHES;
+                                    }
                                }
-                            }
 #endif
+                            } else {
+                                /* at this point we know whatever we have is a NOTHING sequence/branch
+                                 * AND if 'startbranch' is 'first' then we can turn the whole thing into a NOTHING
+                                 */
+                                if ( startbranch == first ) {
+                                    regnode *opt;
+                                     * (?:|) So we can turn it into a plain NOTHING op. */
+                                    DEBUG_TRIE_COMPILE_r({
+                                        regprop(RExC_rx, mysv, cur);
+                                        PerlIO_printf( Perl_debug_log,
+                                          "%*s- %s (%d) <NOTHING BRANCH SEQUENCE>\n", (int)depth * 2 + 2,
+                                          "", SvPV_nolen_const( mysv ),REG_NODE_NUM(cur));
+
+                                    });
+                                    OP(startbranch)= NOTHING;
+                                    NEXT_OFF(startbranch)= tail - startbranch;
+                                    for ( opt= startbranch + 1; opt < tail ; opt++ )
+                                        OP(opt)= OPTIMIZED;
+                                }
+                            }
                        } /* end if ( last) */
                    } /* TRIE_MAXBUF is non zero */
@@ -4378,6 +4397,32 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                /* Lookbehind, or need to calculate parens/evals/stclass: */
                  && (scan->flags || data || (flags & SCF_DO_STCLASS))
                  && (OP(scan) == IFMATCH || OP(scan) == UNLESSM)) {
+            if ( OP(scan) == UNLESSM &&
+                 scan->flags == 0 &&
+                 OP(NEXTOPER(NEXTOPER(scan))) == NOTHING &&
+                 OP(regnext(NEXTOPER(NEXTOPER(scan)))) == SUCCEED
+            ) {
+                regnode *opt;
+                regnode *upto= regnext(scan);
+                DEBUG_PARSE_r({
+                    SV * const mysv_val=sv_newmortal();
+                    DEBUG_STUDYDATA("OPFAIL",data,depth);
+
+                    /*DEBUG_PARSE_MSG("opfail");*/
+                    regprop(RExC_rx, mysv_val, upto);
+                    PerlIO_printf(Perl_debug_log, "~ replace with OPFAIL pointed at %s (%"IVdf") offset %"IVdf"\n",
+                                  SvPV_nolen_const(mysv_val),
+                                  (IV)REG_NODE_NUM(upto),
+                                  (IV)(upto - scan)
+                    );
+                });
+                OP(scan) = OPFAIL;
+                NEXT_OFF(scan) = upto - scan;
+                for (opt= scan + 1; opt < upto ; opt++)
+                    OP(opt) = OPTIMIZED;
+                scan= upto;
+                continue;
+            }
            if ( !PERL_ENABLE_POSITIVE_ASSERTION_STUDY
                || OP(scan) == UNLESSM )
            {
@@ -4542,8 +4587,6 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                        }
                    }
                }
-
-
           }
 #endif
       }
@@ -8412,9 +8455,24 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
            }
           break;
       }
+        DEBUG_PARSE_r(if (!SIZE_ONLY) {
+            SV * const mysv_val1=sv_newmortal();
+            SV * const mysv_val2=sv_newmortal();
+            DEBUG_PARSE_MSG("lsbr");
+            regprop(RExC_rx, mysv_val1, lastbr);
+            regprop(RExC_rx, mysv_val2, ender);
+            PerlIO_printf(Perl_debug_log, "~ tying lastbr %s (%"IVdf") to ender %s (%"IVdf") offset %"IVdf"\n",
+                          SvPV_nolen_const(mysv_val1),
+                          (IV)REG_NODE_NUM(lastbr),
+                          SvPV_nolen_const(mysv_val2),
+                          (IV)REG_NODE_NUM(ender),
+                          (IV)(ender - lastbr)
+            );
+        });
        REGTAIL(pRExC_state, lastbr, ender);
       if (have_branch && !SIZE_ONLY) {
+            char is_nothing= 1;
           if (depth==1)
               RExC_seen |= REG_TOP_LEVEL_BRANCHES;
@@ -8423,11 +8481,44 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
               const U8 op = PL_regkind[OP(br)];
               if (op == BRANCH) {
                    REGTAIL_STUDY(pRExC_state, NEXTOPER(br), ender);
+                    if (OP(NEXTOPER(br)) != NOTHING || regnext(NEXTOPER(br)) != ender)
+                        is_nothing= 0;
               }
               else if (op == BRANCHJ) {
                    REGTAIL_STUDY(pRExC_state, NEXTOPER(NEXTOPER(br)), ender);
+                    /* for now we always disable this optimisation * /
+                    if (OP(NEXTOPER(NEXTOPER(br))) != NOTHING || regnext(NEXTOPER(NEXTOPER(br))) != ender)
+                    */
+                        is_nothing= 0;
               }
           }
+            if (is_nothing) {
+                br= PL_regkind[OP(ret)] != BRANCH ? regnext(ret) : ret;
+                DEBUG_PARSE_r(if (!SIZE_ONLY) {
+                    SV * const mysv_val1=sv_newmortal();
+                    SV * const mysv_val2=sv_newmortal();
+                    DEBUG_PARSE_MSG("NADA");
+                    regprop(RExC_rx, mysv_val1, ret);
+                    regprop(RExC_rx, mysv_val2, ender);
+                    PerlIO_printf(Perl_debug_log, "~ converting ret %s (%"IVdf") to ender %s (%"IVdf") offset %"IVdf"\n",
+                                  SvPV_nolen_const(mysv_val1),
+                                  (IV)REG_NODE_NUM(ret),
+                                  SvPV_nolen_const(mysv_val2),
+                                  (IV)REG_NODE_NUM(ender),
+                                  (IV)(ender - ret)
+                    );
+                });
+                OP(br)= NOTHING;
+                if (OP(ender) == TAIL) {
+                    NEXT_OFF(br)= 0;
+                    RExC_emit= br + 1;
+                } else {
+                    regnode *opt;
+                    for ( opt= br + 1; opt < ender ; opt++ )
+                        OP(opt)= OPTIMIZED;
+                    NEXT_OFF(br)= ender - br;
+                }
+            }
       }
    }
--
Perl5 Master Repository
Loading...