From 523008768c53cc57af216e297bad5bbe76fcd260 Mon Sep 17 00:00:00 2001 From: Flavio Soibelmann Glock Date: Thu, 29 Jan 2026 22:27:17 +0100 Subject: [PATCH] Fix next-if in loops when refactoring creates closures --- .../astrefactor/LargeBlockRefactorer.java | 60 +++++++++++++++---- .../astvisitor/ControlFlowFinder.java | 8 ++- .../org/perlonjava/codegen/EmitBlock.java | 16 ++--- .../perlonjava/codegen/EmitControlFlow.java | 16 +++++ .../org/perlonjava/codegen/EmitForeach.java | 7 ++- .../org/perlonjava/codegen/EmitStatement.java | 4 +- .../org/perlonjava/codegen/JavaClassInfo.java | 13 ++++ .../org/perlonjava/codegen/LoopLabels.java | 15 +++++ .../unit/next_if_should_skip_iteration.t | 35 +++++++++++ 9 files changed, 152 insertions(+), 22 deletions(-) create mode 100644 src/test/resources/unit/next_if_should_skip_iteration.t diff --git a/src/main/java/org/perlonjava/astrefactor/LargeBlockRefactorer.java b/src/main/java/org/perlonjava/astrefactor/LargeBlockRefactorer.java index 497db431f..1316d0130 100644 --- a/src/main/java/org/perlonjava/astrefactor/LargeBlockRefactorer.java +++ b/src/main/java/org/perlonjava/astrefactor/LargeBlockRefactorer.java @@ -240,8 +240,27 @@ private static boolean shouldRefactorBlock(BlockNode node, EmitterVisitor emitte return false; } - // Check if we're in a context that allows refactoring - return refactorEnabled || !emitterVisitor.ctx.javaClassInfo.gotoLabelStack.isEmpty(); + // Codegen-time refactoring must be conservative. + // Wrapping statements in extracted closures changes control-flow semantics by losing the + // enclosing loop label stack. Only refactor when explicitly enabled and the block is + // actually large enough to risk hitting JVM method-size limits. + // + // If you need to re-enable/extend codegen-time refactoring in the future, do NOT wrap + // arbitrary statements into closures inside a loop body. In particular, moving + // `next/last/redo/goto` into an extracted closure will make codegen see an empty + // loop-label stack and compile these as non-local control flow. + // + // Safe options (in order of preference): + // - Keep any loop-control statements in the original method (never move them into a closure). + // - If you must extract, add an explicit mechanism to propagate and handle loop control across + // the extracted boundary (e.g. via RuntimeControlFlowRegistry + loop boundary checks, or by + // implementing/turning on loop handlers/call-site checks). + if (!refactorEnabled) { + return false; + } + + long estimatedSize = estimateTotalBytecodeSizeCapped(node.elements, LARGE_BYTECODE_SIZE); + return estimatedSize > LARGE_BYTECODE_SIZE; } /** @@ -323,6 +342,20 @@ private static void trySmartChunking(BlockNode node, Parser parser, int maxNeste ControlFlowFinder blockFinder = controlFlowFinderTl.get(); blockFinder.scan(node); boolean hasAnyControlFlowInBlock = blockFinder.foundControlFlow; + + // IMPORTANT: Smart chunking wraps segments in extracted closures. + // Loop control operators (next/last/redo/goto) must remain in the original scope + // so codegen can resolve loop labels and emit local jumps. + // Even when chunk boundaries try to "break" on control flow, the suffix stitching + // can still place the control-flow node inside a generated closure. + // + // Be conservative: if the block contains ANY control flow, do not chunk it. + if (hasAnyControlFlowInBlock) { + if (parser != null || node.annotations != null) { + node.setAnnotation("refactorSkipReason", "Smart chunking skipped: block contains control flow"); + } + return; + } boolean treatAllElementsAsSafe = !hasLabelElement && !hasAnyControlFlowInBlock; if (treatAllElementsAsSafe) { @@ -527,11 +560,16 @@ private static boolean shouldBreakChunk(Node element) { * Try to refactor the entire block as a subroutine. */ private static boolean tryWholeBlockRefactoring(EmitterVisitor emitterVisitor, BlockNode node) { - // Check for unsafe control flow using ControlFlowDetectorVisitor - // This properly handles loop depth - unlabeled next/last/redo inside loops are safe - controlFlowDetector.reset(); - controlFlowDetector.scan(node); - if (controlFlowDetector.hasUnsafeControlFlow()) { + // IMPORTANT: wrapping a block in an extracted closure/subroutine loses the enclosing + // loop label stack. Even control flow that is "safe" in-place (e.g. unlabeled `next` + // inside a real loop) becomes unsafe when moved into a closure because codegen will + // not find loop labels and will emit non-local tagged returns. + // + // Therefore, be conservative here: if the block contains ANY control flow statement, + // do not refactor the whole block into a subroutine. + ControlFlowFinder finder = controlFlowFinderTl.get(); + finder.scan(node); + if (finder.foundControlFlow) { return false; } @@ -581,10 +619,12 @@ private static boolean chunkWouldBeWrapped(List chunk) { * @return true if unsafe control flow found */ private static boolean chunkHasUnsafeControlFlow(List chunk) { - controlFlowDetector.reset(); + // Same reasoning as tryWholeBlockRefactoring(): moving a chunk into a closure loses + // loop context, so ANY control flow statement makes the chunk unsafe. + ControlFlowFinder finder = controlFlowFinderTl.get(); for (Node element : chunk) { - element.accept(controlFlowDetector); - if (controlFlowDetector.hasUnsafeControlFlow()) { + finder.scan(element); + if (finder.foundControlFlow) { return true; } } diff --git a/src/main/java/org/perlonjava/astvisitor/ControlFlowFinder.java b/src/main/java/org/perlonjava/astvisitor/ControlFlowFinder.java index d9cce3ec7..36c407cd4 100644 --- a/src/main/java/org/perlonjava/astvisitor/ControlFlowFinder.java +++ b/src/main/java/org/perlonjava/astvisitor/ControlFlowFinder.java @@ -41,8 +41,12 @@ public void scan(Node root) { if (root instanceof AbstractNode abstractNode) { Boolean cached = abstractNode.getCachedHasAnyControlFlow(); - if (cached != null) { - foundControlFlow = cached; + // IMPORTANT: Only trust positive cache entries. + // A cached FALSE can become stale if the AST is rewritten (e.g. during large-block + // refactoring / macros) and would cause us to miss control flow, leading to unsafe + // extraction of chunks that contain next/last/redo/goto. + if (Boolean.TRUE.equals(cached)) { + foundControlFlow = true; return; } } diff --git a/src/main/java/org/perlonjava/codegen/EmitBlock.java b/src/main/java/org/perlonjava/codegen/EmitBlock.java index 203565f63..a40369b1b 100644 --- a/src/main/java/org/perlonjava/codegen/EmitBlock.java +++ b/src/main/java/org/perlonjava/codegen/EmitBlock.java @@ -132,13 +132,12 @@ public static void emitBlock(EmitterVisitor emitterVisitor, BlockNode node) { if (node.isLoop) { // A labeled/bare block used as a loop target (e.g. SKIP: { ... }) is a - // pseudo-loop: it supports labeled next/last/redo (e.g. next SKIP), but - // an unlabeled next/last/redo must target the nearest enclosing true loop. + // pseudo-loop: it supports *labeled* next/last/redo (e.g. next SKIP), but + // an *unlabeled* next/last/redo must target the nearest enclosing true loop. // - // However, a *bare* block with loop control (e.g. `{ ...; redo }` or - // `{ ... } continue { ... }`) is itself a valid target for *unlabeled* - // last/next/redo, matching Perl semantics. - boolean isBareBlock = node.labelName == null; + // Perl semantics: unlabeled next/last/redo prefer a real loop if one exists, + // but outside any real loop a bare block can act as the target. + boolean isUnlabeledTarget = node.labelName == null; emitterVisitor.ctx.javaClassInfo.pushLoopLabels( node.labelName, nextLabel, @@ -146,8 +145,9 @@ public static void emitBlock(EmitterVisitor emitterVisitor, BlockNode node) { nextLabel, emitterVisitor.ctx.javaClassInfo.stackLevelManager.getStackLevel(), emitterVisitor.ctx.contextType, - isBareBlock, - isBareBlock); + true, + isUnlabeledTarget, + false); } // Special case: detect pattern of `local $_` followed by `For1Node` with needsArrayOfAlias diff --git a/src/main/java/org/perlonjava/codegen/EmitControlFlow.java b/src/main/java/org/perlonjava/codegen/EmitControlFlow.java index 01ff365e0..259e2c128 100644 --- a/src/main/java/org/perlonjava/codegen/EmitControlFlow.java +++ b/src/main/java/org/perlonjava/codegen/EmitControlFlow.java @@ -70,6 +70,22 @@ static void handleNextOperator(EmitterContext ctx, OperatorNode node) { } if (loopLabels == null) { + if (ctx.compilerOptions != null && ctx.compilerOptions.debugEnabled) { + ctx.logDebug("visit(next): loopLabels == null; dumping loopLabelStack (top-first):"); + int depth = 0; + for (LoopLabels ll : ctx.javaClassInfo.loopLabelStack) { + ctx.logDebug( + " [" + (depth++) + "] label=" + ll.labelName + + " isTrueLoop=" + ll.isTrueLoop + + " isRealLoop=" + ll.isRealLoop + + " isUnlabeledTarget=" + ll.isUnlabeledControlFlowTarget + + " asmStackLevel=" + ll.asmStackLevel + + " context=" + ll.context); + } + if (depth == 0) { + ctx.logDebug(" "); + } + } // Non-local control flow: return tagged RuntimeControlFlowList ctx.logDebug("visit(next): Non-local control flow for " + operator + " " + labelStr); diff --git a/src/main/java/org/perlonjava/codegen/EmitForeach.java b/src/main/java/org/perlonjava/codegen/EmitForeach.java index 7b8b95acc..c9e67c104 100644 --- a/src/main/java/org/perlonjava/codegen/EmitForeach.java +++ b/src/main/java/org/perlonjava/codegen/EmitForeach.java @@ -305,9 +305,11 @@ public static void emitFor1(EmitterVisitor emitterVisitor, For1Node node) { emitterVisitor.ctx.javaClassInfo.stackLevelManager.getStackLevel(), RuntimeContextType.VOID); currentLoopLabels.controlFlowHandler = controlFlowHandler; - + emitterVisitor.ctx.logDebug("FOR1 pushLoopLabels label=" + node.labelName + " asmStackLevel=" + currentLoopLabels.asmStackLevel); emitterVisitor.ctx.javaClassInfo.pushLoopLabels(currentLoopLabels); + emitterVisitor.ctx.logDebug("FOR1 loopLabelStack depth after push=" + emitterVisitor.ctx.javaClassInfo.loopLabelStack.size()); + node.body.accept(emitterVisitor.with(RuntimeContextType.VOID)); // Check RuntimeControlFlowRegistry for non-local control flow @@ -315,6 +317,9 @@ public static void emitFor1(EmitterVisitor emitterVisitor, For1Node node) { LoopLabels poppedLabels = emitterVisitor.ctx.javaClassInfo.popLoopLabels(); + emitterVisitor.ctx.logDebug("FOR1 popLoopLabels label=" + (poppedLabels != null ? poppedLabels.labelName : null) + + " remainingDepth=" + emitterVisitor.ctx.javaClassInfo.loopLabelStack.size()); + mv.visitLabel(continueLabel); if (node.continueBlock != null) { diff --git a/src/main/java/org/perlonjava/codegen/EmitStatement.java b/src/main/java/org/perlonjava/codegen/EmitStatement.java index 3cc9ed7dd..2ddbac863 100644 --- a/src/main/java/org/perlonjava/codegen/EmitStatement.java +++ b/src/main/java/org/perlonjava/codegen/EmitStatement.java @@ -159,6 +159,7 @@ public static void emitFor3(EmitterVisitor emitterVisitor, For3Node node) { // Unlabeled next/last/redo must be allowed for *bare* blocks (no label), but // must *not* accidentally target pseudo-loops like `SKIP: { ... }`. boolean isUnlabeledTarget = !node.isSimpleBlock || node.labelName == null; + emitterVisitor.ctx.javaClassInfo.pushLoopLabels( node.labelName, continueLabel, @@ -167,7 +168,8 @@ public static void emitFor3(EmitterVisitor emitterVisitor, For3Node node) { emitterVisitor.ctx.javaClassInfo.stackLevelManager.getStackLevel(), RuntimeContextType.VOID, true, - isUnlabeledTarget); + isUnlabeledTarget, + !node.isSimpleBlock); // Visit the loop body node.body.accept(voidVisitor); diff --git a/src/main/java/org/perlonjava/codegen/JavaClassInfo.java b/src/main/java/org/perlonjava/codegen/JavaClassInfo.java index e29219ba4..ebc0df943 100644 --- a/src/main/java/org/perlonjava/codegen/JavaClassInfo.java +++ b/src/main/java/org/perlonjava/codegen/JavaClassInfo.java @@ -154,6 +154,10 @@ public void pushLoopLabels(String labelName, Label nextLabel, Label redoLabel, L public void pushLoopLabels(String labelName, Label nextLabel, Label redoLabel, Label lastLabel, int stackLevel, int context, boolean isTrueLoop, boolean isUnlabeledControlFlowTarget) { loopLabelStack.push(new LoopLabels(labelName, nextLabel, redoLabel, lastLabel, stackLevel, context, isTrueLoop, isUnlabeledControlFlowTarget)); } + + public void pushLoopLabels(String labelName, Label nextLabel, Label redoLabel, Label lastLabel, int stackLevel, int context, boolean isTrueLoop, boolean isUnlabeledControlFlowTarget, boolean isRealLoop) { + loopLabelStack.push(new LoopLabels(labelName, nextLabel, redoLabel, lastLabel, stackLevel, context, isTrueLoop, isUnlabeledControlFlowTarget, isRealLoop)); + } /** * Pushes a LoopLabels object onto the loop label stack. @@ -228,11 +232,20 @@ public LoopLabels findLoopLabelsByName(String labelName) { * @return the innermost LoopLabels with isTrueLoop=true, or null if none */ public LoopLabels findInnermostTrueLoopLabels() { + // Prefer a real loop (for/foreach/while/until) if one exists. + for (LoopLabels loopLabels : loopLabelStack) { + if (loopLabels != null && loopLabels.isUnlabeledControlFlowTarget && loopLabels.isRealLoop) { + return loopLabels; + } + } + + // Fallback: allow a block to act as a loop target only when there's no real loop. for (LoopLabels loopLabels : loopLabelStack) { if (loopLabels != null && loopLabels.isUnlabeledControlFlowTarget) { return loopLabels; } } + return null; } diff --git a/src/main/java/org/perlonjava/codegen/LoopLabels.java b/src/main/java/org/perlonjava/codegen/LoopLabels.java index 878248275..3278deb3a 100644 --- a/src/main/java/org/perlonjava/codegen/LoopLabels.java +++ b/src/main/java/org/perlonjava/codegen/LoopLabels.java @@ -51,6 +51,15 @@ public class LoopLabels { */ public boolean isTrueLoop; + /** + * Whether this is a "real" loop construct (for/foreach/while/until). + * + * Perl semantics for *unlabeled* next/last/redo: + * - Prefer the nearest enclosing real loop. + * - Only when there is no real loop, a block can act as a loop target. + */ + public boolean isRealLoop; + /** * Whether unlabeled next/last/redo should target this loop/block. * @@ -101,6 +110,12 @@ public LoopLabels(String labelName, Label nextLabel, Label redoLabel, Label last this.context = context; this.isTrueLoop = isTrueLoop; this.isUnlabeledControlFlowTarget = isUnlabeledControlFlowTarget; + this.isRealLoop = true; + } + + public LoopLabels(String labelName, Label nextLabel, Label redoLabel, Label lastLabel, int asmStackLevel, int context, boolean isTrueLoop, boolean isUnlabeledControlFlowTarget, boolean isRealLoop) { + this(labelName, nextLabel, redoLabel, lastLabel, asmStackLevel, context, isTrueLoop, isUnlabeledControlFlowTarget); + this.isRealLoop = isRealLoop; } /** diff --git a/src/test/resources/unit/next_if_should_skip_iteration.t b/src/test/resources/unit/next_if_should_skip_iteration.t new file mode 100644 index 000000000..4d2d736fd --- /dev/null +++ b/src/test/resources/unit/next_if_should_skip_iteration.t @@ -0,0 +1,35 @@ +use strict; +use warnings; +use Test::More; + +# Regression test for perl5_t/t/uni/variables.t drift. +# In perl, `next if (...)` must skip the *entire* loop iteration. +# jperl currently mis-handles this in a specific shape, causing +12 extra checks. + +my $count = 0; + +for my $ord (0x21 .. 0x2f) { # includes '#' (0x23) and '*' (0x2a) + my $chr = chr $ord; + my $tests = 0; + + if ($chr =~ /[[:punct:][:digit:]]/a) { + next if ($chr eq '#' or $chr eq '*'); + + $count++; + $tests++; + $count++; + $tests++; + } + + $count++; + $tests++; + + SKIP: { + my $pad = 6 - $tests; + $count += $pad if $pad > 0; + } +} + +is($count, 78, 'next-if must skip the whole iteration (no +12 drift)'); + +done_testing();