From 523008768c53cc57af216e297bad5bbe76fcd260 Mon Sep 17 00:00:00 2001
From: Flavio Soibelmann Glock <fglock@gmail.com>
Date: Thu, 29 Jan 2026 22:27:17 +0100
Subject: [PATCH] Fix next-if in loops when refactoring creates closures

---
 .../astrefactor/LargeBlockRefactorer.java     | 60 +++++++++++++++----
 .../astvisitor/ControlFlowFinder.java         |  8 ++-
 .../org/perlonjava/codegen/EmitBlock.java     | 16 ++---
 .../perlonjava/codegen/EmitControlFlow.java   | 16 +++++
 .../org/perlonjava/codegen/EmitForeach.java   |  7 ++-
 .../org/perlonjava/codegen/EmitStatement.java |  4 +-
 .../org/perlonjava/codegen/JavaClassInfo.java | 13 ++++
 .../org/perlonjava/codegen/LoopLabels.java    | 15 +++++
 .../unit/next_if_should_skip_iteration.t      | 35 +++++++++++
 9 files changed, 152 insertions(+), 22 deletions(-)
 create mode 100644 src/test/resources/unit/next_if_should_skip_iteration.t

diff --git a/src/main/java/org/perlonjava/astrefactor/LargeBlockRefactorer.java b/src/main/java/org/perlonjava/astrefactor/LargeBlockRefactorer.java
index 497db431f..1316d0130 100644
--- a/src/main/java/org/perlonjava/astrefactor/LargeBlockRefactorer.java
+++ b/src/main/java/org/perlonjava/astrefactor/LargeBlockRefactorer.java
@@ -240,8 +240,27 @@ private static boolean shouldRefactorBlock(BlockNode node, EmitterVisitor emitte
             return false;
         }
 
-        // Check if we're in a context that allows refactoring
-        return refactorEnabled || !emitterVisitor.ctx.javaClassInfo.gotoLabelStack.isEmpty();
+        // Codegen-time refactoring must be conservative.
+        // Wrapping statements in extracted closures changes control-flow semantics by losing the
+        // enclosing loop label stack. Only refactor when explicitly enabled and the block is
+        // actually large enough to risk hitting JVM method-size limits.
+        //
+        // If you need to re-enable/extend codegen-time refactoring in the future, do NOT wrap
+        // arbitrary statements into closures inside a loop body. In particular, moving
+        // `next/last/redo/goto` into an extracted closure will make codegen see an empty
+        // loop-label stack and compile these as non-local control flow.
+        //
+        // Safe options (in order of preference):
+        // - Keep any loop-control statements in the original method (never move them into a closure).
+        // - If you must extract, add an explicit mechanism to propagate and handle loop control across
+        //   the extracted boundary (e.g. via RuntimeControlFlowRegistry + loop boundary checks, or by
+        //   implementing/turning on loop handlers/call-site checks).
+        if (!refactorEnabled) {
+            return false;
+        }
+
+        long estimatedSize = estimateTotalBytecodeSizeCapped(node.elements, LARGE_BYTECODE_SIZE);
+        return estimatedSize > LARGE_BYTECODE_SIZE;
     }
 
     /**
@@ -323,6 +342,20 @@ private static void trySmartChunking(BlockNode node, Parser parser, int maxNeste
         ControlFlowFinder blockFinder = controlFlowFinderTl.get();
         blockFinder.scan(node);
         boolean hasAnyControlFlowInBlock = blockFinder.foundControlFlow;
+
+        // IMPORTANT: Smart chunking wraps segments in extracted closures.
+        // Loop control operators (next/last/redo/goto) must remain in the original scope
+        // so codegen can resolve loop labels and emit local jumps.
+        // Even when chunk boundaries try to "break" on control flow, the suffix stitching
+        // can still place the control-flow node inside a generated closure.
+        //
+        // Be conservative: if the block contains ANY control flow, do not chunk it.
+        if (hasAnyControlFlowInBlock) {
+            if (parser != null || node.annotations != null) {
+                node.setAnnotation("refactorSkipReason", "Smart chunking skipped: block contains control flow");
+            }
+            return;
+        }
         boolean treatAllElementsAsSafe = !hasLabelElement && !hasAnyControlFlowInBlock;
 
         if (treatAllElementsAsSafe) {
@@ -527,11 +560,16 @@ private static boolean shouldBreakChunk(Node element) {
      * Try to refactor the entire block as a subroutine.
      */
     private static boolean tryWholeBlockRefactoring(EmitterVisitor emitterVisitor, BlockNode node) {
-        // Check for unsafe control flow using ControlFlowDetectorVisitor
-        // This properly handles loop depth - unlabeled next/last/redo inside loops are safe
-        controlFlowDetector.reset();
-        controlFlowDetector.scan(node);
-        if (controlFlowDetector.hasUnsafeControlFlow()) {
+        // IMPORTANT: wrapping a block in an extracted closure/subroutine loses the enclosing
+        // loop label stack. Even control flow that is "safe" in-place (e.g. unlabeled `next`
+        // inside a real loop) becomes unsafe when moved into a closure because codegen will
+        // not find loop labels and will emit non-local tagged returns.
+        //
+        // Therefore, be conservative here: if the block contains ANY control flow statement,
+        // do not refactor the whole block into a subroutine.
+        ControlFlowFinder finder = controlFlowFinderTl.get();
+        finder.scan(node);
+        if (finder.foundControlFlow) {
             return false;
         }
 
@@ -581,10 +619,12 @@ private static boolean chunkWouldBeWrapped(List<Node> chunk) {
      * @return true if unsafe control flow found
      */
     private static boolean chunkHasUnsafeControlFlow(List<Node> chunk) {
-        controlFlowDetector.reset();
+        // Same reasoning as tryWholeBlockRefactoring(): moving a chunk into a closure loses
+        // loop context, so ANY control flow statement makes the chunk unsafe.
+        ControlFlowFinder finder = controlFlowFinderTl.get();
         for (Node element : chunk) {
-            element.accept(controlFlowDetector);
-            if (controlFlowDetector.hasUnsafeControlFlow()) {
+            finder.scan(element);
+            if (finder.foundControlFlow) {
                 return true;
             }
         }
diff --git a/src/main/java/org/perlonjava/astvisitor/ControlFlowFinder.java b/src/main/java/org/perlonjava/astvisitor/ControlFlowFinder.java
index d9cce3ec7..36c407cd4 100644
--- a/src/main/java/org/perlonjava/astvisitor/ControlFlowFinder.java
+++ b/src/main/java/org/perlonjava/astvisitor/ControlFlowFinder.java
@@ -41,8 +41,12 @@ public void scan(Node root) {
 
         if (root instanceof AbstractNode abstractNode) {
             Boolean cached = abstractNode.getCachedHasAnyControlFlow();
-            if (cached != null) {
-                foundControlFlow = cached;
+            // IMPORTANT: Only trust positive cache entries.
+            // A cached FALSE can become stale if the AST is rewritten (e.g. during large-block
+            // refactoring / macros) and would cause us to miss control flow, leading to unsafe
+            // extraction of chunks that contain next/last/redo/goto.
+            if (Boolean.TRUE.equals(cached)) {
+                foundControlFlow = true;
                 return;
             }
         }
diff --git a/src/main/java/org/perlonjava/codegen/EmitBlock.java b/src/main/java/org/perlonjava/codegen/EmitBlock.java
index 203565f63..a40369b1b 100644
--- a/src/main/java/org/perlonjava/codegen/EmitBlock.java
+++ b/src/main/java/org/perlonjava/codegen/EmitBlock.java
@@ -132,13 +132,12 @@ public static void emitBlock(EmitterVisitor emitterVisitor, BlockNode node) {
 
         if (node.isLoop) {
             // A labeled/bare block used as a loop target (e.g. SKIP: { ... }) is a
-            // pseudo-loop: it supports labeled next/last/redo (e.g. next SKIP), but
-            // an unlabeled next/last/redo must target the nearest enclosing true loop.
+            // pseudo-loop: it supports *labeled* next/last/redo (e.g. next SKIP), but
+            // an *unlabeled* next/last/redo must target the nearest enclosing true loop.
             //
-            // However, a *bare* block with loop control (e.g. `{ ...; redo }` or
-            // `{ ... } continue { ... }`) is itself a valid target for *unlabeled*
-            // last/next/redo, matching Perl semantics.
-            boolean isBareBlock = node.labelName == null;
+            // Perl semantics: unlabeled next/last/redo prefer a real loop if one exists,
+            // but outside any real loop a bare block can act as the target.
+            boolean isUnlabeledTarget = node.labelName == null;
             emitterVisitor.ctx.javaClassInfo.pushLoopLabels(
                     node.labelName,
                     nextLabel,
@@ -146,8 +145,9 @@ public static void emitBlock(EmitterVisitor emitterVisitor, BlockNode node) {
                     nextLabel,
                     emitterVisitor.ctx.javaClassInfo.stackLevelManager.getStackLevel(),
                     emitterVisitor.ctx.contextType,
-                    isBareBlock,
-                    isBareBlock);
+                    true,
+                    isUnlabeledTarget,
+                    false);
         }
 
         // Special case: detect pattern of `local $_` followed by `For1Node` with needsArrayOfAlias
diff --git a/src/main/java/org/perlonjava/codegen/EmitControlFlow.java b/src/main/java/org/perlonjava/codegen/EmitControlFlow.java
index 01ff365e0..259e2c128 100644
--- a/src/main/java/org/perlonjava/codegen/EmitControlFlow.java
+++ b/src/main/java/org/perlonjava/codegen/EmitControlFlow.java
@@ -70,6 +70,22 @@ static void handleNextOperator(EmitterContext ctx, OperatorNode node) {
         }
         
         if (loopLabels == null) {
+            if (ctx.compilerOptions != null && ctx.compilerOptions.debugEnabled) {
+                ctx.logDebug("visit(next): loopLabels == null; dumping loopLabelStack (top-first):");
+                int depth = 0;
+                for (LoopLabels ll : ctx.javaClassInfo.loopLabelStack) {
+                    ctx.logDebug(
+                            "  [" + (depth++) + "] label=" + ll.labelName +
+                                    " isTrueLoop=" + ll.isTrueLoop +
+                                    " isRealLoop=" + ll.isRealLoop +
+                                    " isUnlabeledTarget=" + ll.isUnlabeledControlFlowTarget +
+                                    " asmStackLevel=" + ll.asmStackLevel +
+                                    " context=" + ll.context);
+                }
+                if (depth == 0) {
+                    ctx.logDebug("  <empty>");
+                }
+            }
             // Non-local control flow: return tagged RuntimeControlFlowList
             ctx.logDebug("visit(next): Non-local control flow for " + operator + " " + labelStr);
             
diff --git a/src/main/java/org/perlonjava/codegen/EmitForeach.java b/src/main/java/org/perlonjava/codegen/EmitForeach.java
index 7b8b95acc..c9e67c104 100644
--- a/src/main/java/org/perlonjava/codegen/EmitForeach.java
+++ b/src/main/java/org/perlonjava/codegen/EmitForeach.java
@@ -305,9 +305,11 @@ public static void emitFor1(EmitterVisitor emitterVisitor, For1Node node) {
                 emitterVisitor.ctx.javaClassInfo.stackLevelManager.getStackLevel(),
                 RuntimeContextType.VOID);
         currentLoopLabels.controlFlowHandler = controlFlowHandler;
-
+        emitterVisitor.ctx.logDebug("FOR1 pushLoopLabels label=" + node.labelName + " asmStackLevel=" + currentLoopLabels.asmStackLevel);
         emitterVisitor.ctx.javaClassInfo.pushLoopLabels(currentLoopLabels);
 
+        emitterVisitor.ctx.logDebug("FOR1 loopLabelStack depth after push=" + emitterVisitor.ctx.javaClassInfo.loopLabelStack.size());
+
         node.body.accept(emitterVisitor.with(RuntimeContextType.VOID));
         
         // Check RuntimeControlFlowRegistry for non-local control flow
@@ -315,6 +317,9 @@ public static void emitFor1(EmitterVisitor emitterVisitor, For1Node node) {
 
         LoopLabels poppedLabels = emitterVisitor.ctx.javaClassInfo.popLoopLabels();
 
+        emitterVisitor.ctx.logDebug("FOR1 popLoopLabels label=" + (poppedLabels != null ? poppedLabels.labelName : null) +
+                " remainingDepth=" + emitterVisitor.ctx.javaClassInfo.loopLabelStack.size());
+
         mv.visitLabel(continueLabel);
 
         if (node.continueBlock != null) {
diff --git a/src/main/java/org/perlonjava/codegen/EmitStatement.java b/src/main/java/org/perlonjava/codegen/EmitStatement.java
index 3cc9ed7dd..2ddbac863 100644
--- a/src/main/java/org/perlonjava/codegen/EmitStatement.java
+++ b/src/main/java/org/perlonjava/codegen/EmitStatement.java
@@ -159,6 +159,7 @@ public static void emitFor3(EmitterVisitor emitterVisitor, For3Node node) {
                 // Unlabeled next/last/redo must be allowed for *bare* blocks (no label), but
                 // must *not* accidentally target pseudo-loops like `SKIP: { ... }`.
                 boolean isUnlabeledTarget = !node.isSimpleBlock || node.labelName == null;
+
                 emitterVisitor.ctx.javaClassInfo.pushLoopLabels(
                         node.labelName,
                         continueLabel,
@@ -167,7 +168,8 @@ public static void emitFor3(EmitterVisitor emitterVisitor, For3Node node) {
                         emitterVisitor.ctx.javaClassInfo.stackLevelManager.getStackLevel(),
                         RuntimeContextType.VOID,
                         true,
-                        isUnlabeledTarget);
+                        isUnlabeledTarget,
+                        !node.isSimpleBlock);
 
                 // Visit the loop body
                 node.body.accept(voidVisitor);
diff --git a/src/main/java/org/perlonjava/codegen/JavaClassInfo.java b/src/main/java/org/perlonjava/codegen/JavaClassInfo.java
index e29219ba4..ebc0df943 100644
--- a/src/main/java/org/perlonjava/codegen/JavaClassInfo.java
+++ b/src/main/java/org/perlonjava/codegen/JavaClassInfo.java
@@ -154,6 +154,10 @@ public void pushLoopLabels(String labelName, Label nextLabel, Label redoLabel, L
     public void pushLoopLabels(String labelName, Label nextLabel, Label redoLabel, Label lastLabel, int stackLevel, int context, boolean isTrueLoop, boolean isUnlabeledControlFlowTarget) {
         loopLabelStack.push(new LoopLabels(labelName, nextLabel, redoLabel, lastLabel, stackLevel, context, isTrueLoop, isUnlabeledControlFlowTarget));
     }
+
+    public void pushLoopLabels(String labelName, Label nextLabel, Label redoLabel, Label lastLabel, int stackLevel, int context, boolean isTrueLoop, boolean isUnlabeledControlFlowTarget, boolean isRealLoop) {
+        loopLabelStack.push(new LoopLabels(labelName, nextLabel, redoLabel, lastLabel, stackLevel, context, isTrueLoop, isUnlabeledControlFlowTarget, isRealLoop));
+    }
     
     /**
      * Pushes a LoopLabels object onto the loop label stack.
@@ -228,11 +232,20 @@ public LoopLabels findLoopLabelsByName(String labelName) {
      * @return the innermost LoopLabels with isTrueLoop=true, or null if none
      */
     public LoopLabels findInnermostTrueLoopLabels() {
+        // Prefer a real loop (for/foreach/while/until) if one exists.
+        for (LoopLabels loopLabels : loopLabelStack) {
+            if (loopLabels != null && loopLabels.isUnlabeledControlFlowTarget && loopLabels.isRealLoop) {
+                return loopLabels;
+            }
+        }
+
+        // Fallback: allow a block to act as a loop target only when there's no real loop.
         for (LoopLabels loopLabels : loopLabelStack) {
             if (loopLabels != null && loopLabels.isUnlabeledControlFlowTarget) {
                 return loopLabels;
             }
         }
+
         return null;
     }
 
diff --git a/src/main/java/org/perlonjava/codegen/LoopLabels.java b/src/main/java/org/perlonjava/codegen/LoopLabels.java
index 878248275..3278deb3a 100644
--- a/src/main/java/org/perlonjava/codegen/LoopLabels.java
+++ b/src/main/java/org/perlonjava/codegen/LoopLabels.java
@@ -51,6 +51,15 @@ public class LoopLabels {
      */
     public boolean isTrueLoop;
 
+    /**
+     * Whether this is a "real" loop construct (for/foreach/while/until).
+     *
+     * Perl semantics for *unlabeled* next/last/redo:
+     * - Prefer the nearest enclosing real loop.
+     * - Only when there is no real loop, a block can act as a loop target.
+     */
+    public boolean isRealLoop;
+
     /**
      * Whether unlabeled next/last/redo should target this loop/block.
      *
@@ -101,6 +110,12 @@ public LoopLabels(String labelName, Label nextLabel, Label redoLabel, Label last
         this.context = context;
         this.isTrueLoop = isTrueLoop;
         this.isUnlabeledControlFlowTarget = isUnlabeledControlFlowTarget;
+        this.isRealLoop = true;
+    }
+
+    public LoopLabels(String labelName, Label nextLabel, Label redoLabel, Label lastLabel, int asmStackLevel, int context, boolean isTrueLoop, boolean isUnlabeledControlFlowTarget, boolean isRealLoop) {
+        this(labelName, nextLabel, redoLabel, lastLabel, asmStackLevel, context, isTrueLoop, isUnlabeledControlFlowTarget);
+        this.isRealLoop = isRealLoop;
     }
 
     /**
diff --git a/src/test/resources/unit/next_if_should_skip_iteration.t b/src/test/resources/unit/next_if_should_skip_iteration.t
new file mode 100644
index 000000000..4d2d736fd
--- /dev/null
+++ b/src/test/resources/unit/next_if_should_skip_iteration.t
@@ -0,0 +1,35 @@
+use strict;
+use warnings;
+use Test::More;
+
+# Regression test for perl5_t/t/uni/variables.t drift.
+# In perl, `next if (...)` must skip the *entire* loop iteration.
+# jperl currently mis-handles this in a specific shape, causing +12 extra checks.
+
+my $count = 0;
+
+for my $ord (0x21 .. 0x2f) { # includes '#' (0x23) and '*' (0x2a)
+    my $chr = chr $ord;
+    my $tests = 0;
+
+    if ($chr =~ /[[:punct:][:digit:]]/a) {
+        next if ($chr eq '#' or $chr eq '*');
+
+        $count++;
+        $tests++;
+        $count++;
+        $tests++;
+    }
+
+    $count++;
+    $tests++;
+
+    SKIP: {
+        my $pad = 6 - $tests;
+        $count += $pad if $pad > 0;
+    }
+}
+
+is($count, 78, 'next-if must skip the whole iteration (no +12 drift)');
+
+done_testing();