From 3839627f759497d0e5dd407229ce4e7ad54977d5 Mon Sep 17 00:00:00 2001
From: Hugo <hugo.payet@epitech.eu>
Date: Sat, 7 Feb 2026 19:15:12 +0100
Subject: [PATCH 01/11] feat: add compilation database interface

---
 include/analysis/CompileCommands.hpp | 33 ++++++++++++++++++++++++++++
 1 file changed, 33 insertions(+)
 create mode 100644 include/analysis/CompileCommands.hpp
diff --git a/include/analysis/CompileCommands.hpp b/include/analysis/CompileCommands.hpp
new file mode 100644
index 0000000..e1d102a
--- /dev/null
+++ b/include/analysis/CompileCommands.hpp
@@ -0,0 +1,33 @@
+#pragma once
+
+#include <memory>
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+namespace ctrace::stack::analysis
+{
+    struct CompileCommand
+    {
+        std::string directory;
+        std::vector<std::string> arguments;
+    };
+
+    class CompilationDatabase
+    {
+      public:
+        static std::shared_ptr<CompilationDatabase> loadFromFile(const std::string& path,
+                                                                 std::string& error);
+
+        const CompileCommand* findCommandForFile(const std::string& filePath) const;
+
+        const std::string& sourcePath() const
+        {
+            return sourcePath_;
+        }
+
+      private:
+        std::string sourcePath_;
+        std::unordered_map<std::string, CompileCommand> commands_;
+    };
+} // namespace ctrace::stack::analysis

From 8dafe2c942a4465659be675bce2b54b4cb057133 Mon Sep 17 00:00:00 2001
From: Hugo <hugo.payet@epitech.eu>
Date: Sat, 7 Feb 2026 19:15:48 +0100
Subject: [PATCH 02/11] feat: extend analysis config for compdb and IR dumping

---
 src/analysis/CompileCommands.cpp | 381 +++++++++++++++++++++++++++++++
 1 file changed, 381 insertions(+)
 create mode 100644 src/analysis/CompileCommands.cpp

diff --git a/src/analysis/CompileCommands.cpp b/src/analysis/CompileCommands.cpp
new file mode 100644
index 0000000..2c79ea6
--- /dev/null
+++ b/src/analysis/CompileCommands.cpp
@@ -0,0 +1,381 @@
+#include "analysis/CompileCommands.hpp"
+
+#include <cctype>
+#include <filesystem>
+#include <string>
+#include <system_error>
+#include <utility>
+#include <vector>
+
+#include <llvm/Support/Error.h>
+#include <llvm/Support/JSON.h>
+#include <llvm/Support/MemoryBuffer.h>
+
+namespace ctrace::stack::analysis
+{
+    namespace
+    {
+        std::string normalizePath(const std::filesystem::path& path)
+        {
+            if (path.empty())
+                return {};
+
+            std::error_code ec;
+            std::filesystem::path absPath = std::filesystem::absolute(path, ec);
+            if (ec)
+                absPath = path;
+
+            std::filesystem::path canonicalPath = std::filesystem::weakly_canonical(absPath, ec);
+            std::filesystem::path norm = ec ? absPath.lexically_normal() : canonicalPath;
+            std::string out = norm.generic_string();
+            while (out.size() > 1 && out.back() == '/')
+                out.pop_back();
+            return out;
+        }
+
+        bool pathHasSuffix(const std::string& path, const std::string& suffix)
+        {
+            if (suffix.empty())
+                return false;
+            if (path.size() < suffix.size())
+                return false;
+            if (path.compare(path.size() - suffix.size(), suffix.size(), suffix) != 0)
+                return false;
+            if (path.size() == suffix.size())
+                return true;
+            return path[path.size() - suffix.size() - 1] == '/';
+        }
+
+        std::vector<std::string> buildPathSuffixes(const std::string& path)
+        {
+            std::vector<std::string> suffixes;
+            if (path.empty())
+                return suffixes;
+
+            suffixes.push_back(path);
+            for (std::size_t i = 1; i < path.size(); ++i)
+            {
+                if (path[i] == '/' && i + 1 < path.size())
+                    suffixes.push_back(path.substr(i));
+            }
+            return suffixes;
+        }
+
+        std::vector<std::string> tokenizeCommandLine(const std::string& command)
+        {
+            std::vector<std::string> tokens;
+            std::string current;
+            enum class State
+            {
+                Normal,
+                SingleQuote,
+                DoubleQuote
+            };
+
+            State state = State::Normal;
+            for (std::size_t i = 0; i < command.size(); ++i)
+            {
+                char c = command[i];
+                if (state == State::Normal)
+                {
+                    if (std::isspace(static_cast<unsigned char>(c)))
+                    {
+                        if (!current.empty())
+                        {
+                            tokens.push_back(current);
+                            current.clear();
+                        }
+                        continue;
+                    }
+                    if (c == '\'')
+                    {
+                        state = State::SingleQuote;
+                        continue;
+                    }
+                    if (c == '"')
+                    {
+                        state = State::DoubleQuote;
+                        continue;
+                    }
+                    if (c == '\\' && i + 1 < command.size())
+                    {
+                        current.push_back(command[++i]);
+                        continue;
+                    }
+                    current.push_back(c);
+                    continue;
+                }
+
+                if (state == State::SingleQuote)
+                {
+                    if (c == '\'')
+                    {
+                        state = State::Normal;
+                        continue;
+                    }
+                    current.push_back(c);
+                    continue;
+                }
+
+                if (c == '"')
+                {
+                    state = State::Normal;
+                    continue;
+                }
+                if (c == '\\' && i + 1 < command.size())
+                {
+                    current.push_back(command[++i]);
+                    continue;
+                }
+                current.push_back(c);
+            }
+
+            if (!current.empty())
+                tokens.push_back(current);
+
+            return tokens;
+        }
+
+        void stripOutputAndDependencyArgs(std::vector<std::string>& args)
+        {
+            std::vector<std::string> filtered;
+            filtered.reserve(args.size());
+
+            for (std::size_t i = 0; i < args.size(); ++i)
+            {
+                const std::string& arg = args[i];
+                if (arg == "-o" || arg == "--output")
+                {
+                    if (i + 1 < args.size())
+                        ++i;
+                    continue;
+                }
+                if (arg.size() > 2 && arg.rfind("-o", 0) == 0)
+                    continue;
+
+                if (arg == "-MF" || arg == "-MT" || arg == "-MQ")
+                {
+                    if (i + 1 < args.size())
+                        ++i;
+                    continue;
+                }
+                if ((arg.size() > 3 && (arg.rfind("-MF", 0) == 0 || arg.rfind("-MT", 0) == 0 ||
+                                        arg.rfind("-MQ", 0) == 0)))
+                    continue;
+
+                if (arg == "-M" || arg == "-MM" || arg == "-MD" || arg == "-MMD" || arg == "-MG" ||
+                    arg == "-MP")
+                    continue;
+
+                filtered.push_back(arg);
+            }
+
+            args.swap(filtered);
+        }
+
+        void stripInputFileArg(std::vector<std::string>& args, const std::string& directory,
+                               const std::string& fileKey)
+        {
+            if (fileKey.empty())
+                return;
+
+            std::vector<std::string> filtered;
+            filtered.reserve(args.size());
+            bool removed = false;
+
+            for (const auto& arg : args)
+            {
+                if (!removed && !arg.empty() && arg[0] != '-')
+                {
+                    std::filesystem::path argPath(arg);
+                    if (argPath.is_relative())
+                        argPath = std::filesystem::path(directory) / argPath;
+                    std::string argKey = normalizePath(argPath);
+                    if (!argKey.empty() && argKey == fileKey)
+                    {
+                        removed = true;
+                        continue;
+                    }
+                }
+
+                filtered.push_back(arg);
+            }
+
+            args.swap(filtered);
+        }
+
+        std::vector<std::string> extractArguments(const llvm::json::Object& obj)
+        {
+            std::vector<std::string> args;
+            if (auto* arr = obj.getArray("arguments"))
+            {
+                args.reserve(arr->size());
+                for (const auto& value : *arr)
+                {
+                    if (auto str = value.getAsString())
+                        args.push_back(str->str());
+                }
+                return args;
+            }
+
+            if (auto command = obj.getString("command"))
+                return tokenizeCommandLine(command->str());
+
+            return args;
+        }
+
+        void stripLeadingCommandTokens(std::vector<std::string>& args)
+        {
+            std::size_t start = 0;
+            while (start < args.size())
+            {
+                const std::string& token = args[start];
+                if (!token.empty() && (token[0] == '-' || token[0] == '@'))
+                    break;
+                ++start;
+            }
+            if (start > 0)
+                args.erase(args.begin(), args.begin() + static_cast<std::ptrdiff_t>(start));
+        }
+
+        std::filesystem::path normalizeDirectoryPath(const std::filesystem::path& compdbDir,
+                                                     const std::string& directory)
+        {
+            std::filesystem::path dirPath =
+                directory.empty() ? compdbDir : std::filesystem::path(directory);
+            if (dirPath.is_relative())
+                dirPath = compdbDir / dirPath;
+            return dirPath;
+        }
+    } // namespace
+
+    std::shared_ptr<CompilationDatabase> CompilationDatabase::loadFromFile(const std::string& path,
+                                                                           std::string& error)
+    {
+        error.clear();
+        auto bufferOrErr = llvm::MemoryBuffer::getFile(path);
+        if (!bufferOrErr)
+        {
+            error = "unable to read compile commands file: " + path + " (" +
+                    bufferOrErr.getError().message() + ")";
+            return nullptr;
+        }
+
+        auto parsed = llvm::json::parse(bufferOrErr.get()->getBuffer());
+        if (!parsed)
+        {
+            error = "failed to parse compile commands JSON: " + llvm::toString(parsed.takeError());
+            return nullptr;
+        }
+
+        auto* array = parsed->getAsArray();
+        if (!array)
+        {
+            error = "compile commands JSON must be an array";
+            return nullptr;
+        }
+
+        auto db = std::make_shared<CompilationDatabase>();
+        db->sourcePath_ = normalizePath(path);
+
+        std::filesystem::path compdbDir = std::filesystem::path(path).parent_path();
+        if (compdbDir.empty())
+        {
+            std::error_code ec;
+            compdbDir = std::filesystem::current_path(ec);
+            if (ec)
+                compdbDir = std::filesystem::path(".");
+        }
+
+        for (const auto& entryValue : *array)
+        {
+            const auto* obj = entryValue.getAsObject();
+            if (!obj)
+                continue;
+
+            auto fileValue = obj->getString("file");
+            if (!fileValue)
+                continue;
+
+            std::string fileStr = fileValue->str();
+            std::string dirStr;
+            if (auto directoryValue = obj->getString("directory"))
+                dirStr = directoryValue->str();
+
+            std::filesystem::path directoryPath = normalizeDirectoryPath(compdbDir, dirStr);
+            std::string directoryKey = normalizePath(directoryPath);
+            if (directoryKey.empty())
+                continue;
+
+            std::filesystem::path filePath(fileStr);
+            if (filePath.is_relative())
+                filePath = directoryPath / filePath;
+            std::string fileKey = normalizePath(filePath);
+            if (fileKey.empty())
+                continue;
+
+            std::vector<std::string> args = extractArguments(*obj);
+            if (args.empty())
+                continue;
+
+            stripLeadingCommandTokens(args);
+            stripOutputAndDependencyArgs(args);
+            stripInputFileArg(args, directoryKey, fileKey);
+
+            if (db->commands_.find(fileKey) != db->commands_.end())
+                continue;
+
+            CompileCommand command;
+            command.directory = directoryKey;
+            command.arguments = std::move(args);
+            db->commands_.emplace(fileKey, std::move(command));
+        }
+
+        if (db->commands_.empty())
+        {
+            error = "compile commands file contains no usable entries";
+            return nullptr;
+        }
+
+        return db;
+    }
+
+    const CompileCommand* CompilationDatabase::findCommandForFile(const std::string& filePath) const
+    {
+        if (filePath.empty())
+            return nullptr;
+        std::string key = normalizePath(std::filesystem::path(filePath));
+        auto it = commands_.find(key);
+        if (it == commands_.end())
+        {
+            auto suffixes = buildPathSuffixes(key);
+            if (!suffixes.empty())
+            {
+                // Skip the full path; we already attempted exact lookup.
+                for (std::size_t s = 1; s < suffixes.size(); ++s)
+                {
+                    const std::string& suffix = suffixes[s];
+                    const CompileCommand* match = nullptr;
+                    std::size_t matchCount = 0;
+                    for (const auto& entry : commands_)
+                    {
+                        if (pathHasSuffix(entry.first, suffix))
+                        {
+                            ++matchCount;
+                            if (matchCount == 1)
+                                match = &entry.second;
+                            else
+                                break;
+                        }
+                    }
+                    if (matchCount == 1)
+                        return match;
+                    if (matchCount > 1)
+                        break;
+                }
+            }
+            return nullptr;
+        }
+        return &it->second;
+    }
+} // namespace ctrace::stack::analysis

From e8408576b6cf67eb396dbf7f3986a6f425c29651 Mon Sep 17 00:00:00 2001
From: Hugo <hugo.payet@epitech.eu>
Date: Sat, 7 Feb 2026 19:16:52 +0100
Subject: [PATCH 03/11] feat: support dump-ir output and compdb-fast
 compilation

---
 src/analysis/InputPipeline.cpp | 290 +++++++++++++++++++++++++++++++--
 1 file changed, 279 insertions(+), 11 deletions(-)

diff --git a/src/analysis/InputPipeline.cpp b/src/analysis/InputPipeline.cpp
index 5622006..9ca49ac 100644
--- a/src/analysis/InputPipeline.cpp
+++ b/src/analysis/InputPipeline.cpp
@@ -2,20 +2,243 @@
 
 #include <algorithm>
 #include <cctype>
+#include <chrono>
+#include <filesystem>
 #include <iostream>
+#include <system_error>
 #include <vector>
 
 #include <llvm/IR/LLVMContext.h>
 #include <llvm/IR/Module.h>
 #include <llvm/IRReader/IRReader.h>
+#include <llvm/Support/FileSystem.h>
 #include <llvm/Support/MemoryBuffer.h>
 #include <llvm/Support/SourceMgr.h>
 #include <llvm/Support/raw_ostream.h>
 
+#include "analysis/CompileCommands.hpp"
 #include "compilerlib/compiler.h"
 
 namespace ctrace::stack::analysis
 {
+    namespace
+    {
+        std::string makeAbsolutePath(const std::string& path)
+        {
+            std::error_code ec;
+            std::filesystem::path absPath = std::filesystem::absolute(path, ec);
+            if (ec)
+                return path;
+            return absPath.lexically_normal().generic_string();
+        }
+
+        void appendIfMissing(std::vector<std::string>& args, const std::string& flag)
+        {
+            if (std::find(args.begin(), args.end(), flag) == args.end())
+                args.push_back(flag);
+        }
+
+        bool hasDebugFlag(const std::vector<std::string>& args)
+        {
+            for (const auto& arg : args)
+            {
+                if (arg == "-g" || (arg.size() > 2 && arg.rfind("-g", 0) == 0))
+                    return true;
+            }
+            return false;
+        }
+
+        void applyCompdbFastMode(std::vector<std::string>& args)
+        {
+            std::vector<std::string> filtered;
+            filtered.reserve(args.size());
+
+            for (const auto& arg : args)
+            {
+                if (arg.size() > 1 && arg.rfind("-O", 0) == 0)
+                    continue;
+                if (arg.rfind("-g", 0) == 0)
+                    continue;
+                if (arg.rfind("-fsanitize", 0) == 0 || arg.rfind("-fno-sanitize", 0) == 0)
+                    continue;
+                if (arg == "-flto" || arg.rfind("-flto=", 0) == 0)
+                    continue;
+                if (arg.rfind("-fprofile", 0) == 0 || arg.rfind("-fcoverage", 0) == 0)
+                    continue;
+
+                filtered.push_back(arg);
+            }
+
+            filtered.push_back("-O0");
+            filtered.push_back("-gline-tables-only");
+            filtered.push_back("-fno-sanitize=all");
+            args.swap(filtered);
+        }
+
+        static bool resolveDumpIRPath(const AnalysisConfig& config, const std::string& inputPath,
+                                      const std::filesystem::path& baseDir,
+                                      std::filesystem::path& outPath, std::string& error)
+        {
+            if (config.dumpIRPath.empty())
+                return false;
+
+            std::filesystem::path dumpPath(config.dumpIRPath);
+            if (dumpPath.is_relative() && !baseDir.empty())
+                dumpPath = baseDir / dumpPath;
+
+            if (config.dumpIRIsDir)
+            {
+                std::filesystem::path baseName = std::filesystem::path(inputPath).filename();
+                std::string outName = baseName.empty() ? "module" : baseName.string();
+                outPath = dumpPath / (outName + ".ll");
+            }
+            else
+            {
+                outPath = dumpPath;
+            }
+
+            std::filesystem::path parentDir = outPath.parent_path();
+            if (!parentDir.empty())
+            {
+                std::error_code ec;
+                std::filesystem::create_directories(parentDir, ec);
+                if (ec)
+                {
+                    error = "Failed to create IR dump directory: " + parentDir.string();
+                    return false;
+                }
+            }
+
+            std::error_code absErr;
+            std::filesystem::path inputAbs = std::filesystem::absolute(inputPath, absErr);
+            std::filesystem::path outputAbs = std::filesystem::absolute(outPath, absErr);
+            if (!absErr && inputAbs == outputAbs)
+            {
+                error =
+                    "Refusing to overwrite input file with --dump-ir output: " + outPath.string();
+                return false;
+            }
+
+            return true;
+        }
+
+        static bool dumpModuleIR(const llvm::Module& module, const std::string& inputPath,
+                                 const AnalysisConfig& config, const std::filesystem::path& baseDir,
+                                 std::string& error)
+        {
+            if (config.dumpIRPath.empty())
+                return true;
+
+            std::filesystem::path outPath;
+            if (!resolveDumpIRPath(config, inputPath, baseDir, outPath, error))
+                return false;
+
+            std::error_code ec;
+            llvm::raw_fd_ostream os(outPath.string(), ec, llvm::sys::fs::OF_Text);
+            if (ec)
+            {
+                error =
+                    "Failed to write IR dump file: " + outPath.string() + " (" + ec.message() + ")";
+                return false;
+            }
+            module.print(os, nullptr);
+            os.flush();
+            return true;
+        }
+
+        bool buildCompileArgs(const std::string& filename, LanguageType language,
+                              const AnalysisConfig& config, std::vector<std::string>& args,
+                              std::string& workingDir, std::string& error)
+        {
+            const CompileCommand* command = nullptr;
+            if (config.compilationDatabase)
+            {
+                command = config.compilationDatabase->findCommandForFile(filename);
+            }
+
+            if (command)
+            {
+                args = command->arguments;
+                workingDir = command->directory;
+                if (config.compdbFast)
+                    applyCompdbFastMode(args);
+            }
+            else
+            {
+                if (config.requireCompilationDatabase)
+                {
+                    error = "No compile command found for: " + filename;
+                    if (config.compilationDatabase &&
+                        !config.compilationDatabase->sourcePath().empty())
+                    {
+                        error += " in " + config.compilationDatabase->sourcePath();
+                    }
+                    return false;
+                }
+                args.clear();
+                args.push_back("-emit-llvm");
+                args.push_back("-S");
+                args.push_back("-g");
+                if (language == LanguageType::CXX)
+                {
+                    args.push_back("-x");
+                    args.push_back("c++");
+                    args.push_back("-std=gnu++20");
+                }
+            }
+
+            for (const auto& extraArg : config.extraCompileArgs)
+            {
+                args.push_back(extraArg);
+            }
+
+            appendIfMissing(args, "-emit-llvm");
+            appendIfMissing(args, "-S");
+            if (!hasDebugFlag(args))
+                args.push_back("-g");
+            appendIfMissing(args, "-fno-discard-value-names");
+            const bool useAbsolutePath = (command != nullptr);
+            args.push_back(useAbsolutePath ? makeAbsolutePath(filename) : filename);
+            return true;
+        }
+
+        class ScopedCurrentPath
+        {
+          public:
+            explicit ScopedCurrentPath(const std::string& path, std::string& error)
+            {
+                if (path.empty())
+                    return;
+                std::error_code ec;
+                previousPath_ = std::filesystem::current_path(ec);
+                if (ec)
+                {
+                    error = "Failed to read current working directory";
+                    return;
+                }
+                std::filesystem::current_path(path, ec);
+                if (ec)
+                {
+                    error = "Failed to change working directory to: " + path;
+                    return;
+                }
+                active_ = true;
+            }
+
+            ~ScopedCurrentPath()
+            {
+                if (!active_)
+                    return;
+                std::error_code ec;
+                std::filesystem::current_path(previousPath_, ec);
+            }
+
+          private:
+            std::filesystem::path previousPath_;
+            bool active_ = false;
+        };
+    } // namespace
+
     LanguageType detectFromExtension(const std::string& path)
     {
         auto pos = path.find_last_of('.');
@@ -57,6 +280,11 @@ namespace ctrace::stack::analysis
                                            llvm::SMDiagnostic& err)
     {
         ModuleLoadResult result;
+        std::error_code cwdErr;
+        std::filesystem::path baseDir = std::filesystem::current_path(cwdErr);
+        using Clock = std::chrono::steady_clock;
+        auto compileStart = Clock::now();
+        bool compiled = false;
         result.language = detectLanguageFromFile(filename, ctx);
 
         if (result.language == LanguageType::Unknown)
@@ -68,23 +296,27 @@ namespace ctrace::stack::analysis
         if (result.language != LanguageType::LLVM_IR)
         {
             std::vector<std::string> args;
-            args.push_back("-emit-llvm");
-            args.push_back("-S");
-            args.push_back("-g");
-            if (result.language == LanguageType::CXX)
+            std::string workingDir;
+            std::string compileError;
+            if (!buildCompileArgs(filename, result.language, config, args, workingDir,
+                                  compileError))
             {
-                args.push_back("-x");
-                args.push_back("c++");
-                args.push_back("-std=gnu++20");
+                result.error = compileError + "\n";
+                return result;
             }
-            for (const auto& extraArg : config.extraCompileArgs)
+
+            if (config.timing)
+                llvm::errs() << "Compiling " << filename << "...\n";
+            std::string cwdError;
+            ScopedCurrentPath cwdGuard(workingDir, cwdError);
+            if (!cwdError.empty())
             {
-                args.push_back(extraArg);
+                result.error = cwdError + "\n";
+                return result;
             }
-            args.push_back("-fno-discard-value-names");
-            args.push_back(filename);
             compilerlib::OutputMode mode = compilerlib::OutputMode::ToMemory;
             auto res = compilerlib::compile(args, mode);
+            compiled = true;
 
             if (!res.success)
             {
@@ -98,10 +330,28 @@ namespace ctrace::stack::analysis
                 return result;
             }
 
+            if (config.timing)
+            {
+                auto compileEnd = Clock::now();
+                auto ms =
+                    std::chrono::duration_cast<std::chrono::milliseconds>(compileEnd - compileStart)
+                        .count();
+                llvm::errs() << "Compilation done in " << ms << " ms\n";
+            }
+
             auto buffer = llvm::MemoryBuffer::getMemBuffer(res.llvmIR, "in_memory_ll");
 
             llvm::SMDiagnostic diag;
+            auto parseStart = Clock::now();
             result.module = llvm::parseIR(buffer->getMemBufferRef(), diag, ctx);
+            if (config.timing)
+            {
+                auto parseEnd = Clock::now();
+                auto ms =
+                    std::chrono::duration_cast<std::chrono::milliseconds>(parseEnd - parseStart)
+                        .count();
+                llvm::errs() << "IR parse done in " << ms << " ms\n";
+            }
 
             if (!result.module)
             {
@@ -112,10 +362,28 @@ namespace ctrace::stack::analysis
                 return result;
             }
 
+            if (!dumpModuleIR(*result.module, filename, config, baseDir, result.error))
+                return result;
+
             return result;
         }
 
+        if (config.timing)
+            llvm::errs() << "Parsing IR " << filename << "...\n";
+        auto parseStart = Clock::now();
         result.module = llvm::parseIRFile(filename, err, ctx);
+        if (config.timing)
+        {
+            auto parseEnd = Clock::now();
+            auto ms = std::chrono::duration_cast<std::chrono::milliseconds>(parseEnd - parseStart)
+                          .count();
+            llvm::errs() << "IR parse done in " << ms << " ms\n";
+        }
+        if (result.module)
+        {
+            if (!dumpModuleIR(*result.module, filename, config, baseDir, result.error))
+                return result;
+        }
         return result;
     }
 } // namespace ctrace::stack::analysis

From 8129b984bc6a53ea02088d1bdd7ad32e9eab060e Mon Sep 17 00:00:00 2001
From: Hugo <hugo.payet@epitech.eu>
Date: Sat, 7 Feb 2026 19:19:40 +0100
Subject: [PATCH 04/11] fix: normalize filter paths using filesystem
 canonicalization

---
 src/analysis/AnalyzerUtils.cpp | 70 ++++++++++------------------------
 1 file changed, 20 insertions(+), 50 deletions(-)

diff --git a/src/analysis/AnalyzerUtils.cpp b/src/analysis/AnalyzerUtils.cpp
index 3676181..6b0177e 100644
--- a/src/analysis/AnalyzerUtils.cpp
+++ b/src/analysis/AnalyzerUtils.cpp
@@ -2,7 +2,9 @@
 
 #include <algorithm>
 #include <cctype>
+#include <filesystem>
 #include <set>
+#include <system_error>
 #include <vector>
 
 #include <llvm/IR/Function.h>
@@ -117,60 +119,28 @@ namespace ctrace::stack::analysis
 
     static std::string normalizePathForMatch(const std::string& input)
     {
-        std::string out = input;
-        for (char& c : out)
+        if (input.empty())
+            return {};
+
+        std::string adjusted = input;
+        for (char& c : adjusted)
         {
             if (c == '\\')
                 c = '/';
         }
-        const bool isAbs = !out.empty() && out.front() == '/';
-        std::vector<std::string> parts;
-        std::string cur;
-        for (char c : out)
-        {
-            if (c == '/')
-            {
-                if (!cur.empty())
-                {
-                    if (cur == "..")
-                    {
-                        if (!parts.empty())
-                            parts.pop_back();
-                    }
-                    else if (cur != ".")
-                    {
-                        parts.push_back(cur);
-                    }
-                    cur.clear();
-                }
-            }
-            else
-            {
-                cur.push_back(c);
-            }
-        }
-        if (!cur.empty())
-        {
-            if (cur == "..")
-            {
-                if (!parts.empty())
-                    parts.pop_back();
-            }
-            else if (cur != ".")
-            {
-                parts.push_back(cur);
-            }
-        }
-        std::string norm = isAbs ? "/" : "";
-        for (std::size_t i = 0; i < parts.size(); ++i)
-        {
-            norm += parts[i];
-            if (i + 1 < parts.size())
-                norm += "/";
-        }
-        while (!norm.empty() && norm.back() == '/')
-            norm.pop_back();
-        return norm;
+
+        std::filesystem::path path(adjusted);
+        std::error_code ec;
+        std::filesystem::path absPath = std::filesystem::absolute(path, ec);
+        if (ec)
+            absPath = path;
+
+        std::filesystem::path canonicalPath = std::filesystem::weakly_canonical(absPath, ec);
+        std::filesystem::path norm = ec ? absPath.lexically_normal() : canonicalPath;
+        std::string out = norm.generic_string();
+        while (out.size() > 1 && out.back() == '/')
+            out.pop_back();
+        return out;
     }
 
     static std::string basenameOf(const std::string& path)

From dec036e21b9de10578cb7eeac2b98beb6ba4596e Mon Sep 17 00:00:00 2001
From: Hugo <hugo.payet@epitech.eu>
Date: Sat, 7 Feb 2026 19:20:11 +0100
Subject: [PATCH 05/11] fix: reduce invalid base reconstruction false

---
 src/analysis/InvalidBaseReconstruction.cpp | 187 ++++++++++++++++++---
 1 file changed, 167 insertions(+), 20 deletions(-)

diff --git a/src/analysis/InvalidBaseReconstruction.cpp b/src/analysis/InvalidBaseReconstruction.cpp
index 39dc851..26da26a 100644
--- a/src/analysis/InvalidBaseReconstruction.cpp
+++ b/src/analysis/InvalidBaseReconstruction.cpp
@@ -1,5 +1,6 @@
 #include "analysis/InvalidBaseReconstruction.hpp"
 
+#include <cstddef>
 #include <map>
 #include <optional>
 #include <set>
@@ -21,6 +22,29 @@ namespace ctrace::stack::analysis
 {
     namespace
     {
+        constexpr std::size_t kMaxInvalidBaseWork = 200000;
+
+        struct WorkBudget
+        {
+            std::size_t remaining = kMaxInvalidBaseWork;
+
+            bool consume(std::size_t amount = 1)
+            {
+                if (remaining < amount)
+                {
+                    remaining = 0;
+                    return false;
+                }
+                remaining -= amount;
+                return true;
+            }
+
+            bool exhausted() const
+            {
+                return remaining == 0;
+            }
+        };
+
         static bool isLoadFromAlloca(const llvm::Value* V, const llvm::AllocaInst* AI)
         {
             if (!V || !AI)
@@ -179,7 +203,8 @@ namespace ctrace::stack::analysis
         }
 
         static void collectPtrToIntMatches(const llvm::Value* V,
-                                           llvm::SmallVectorImpl<PtrIntMatch>& out)
+                                           llvm::SmallVectorImpl<PtrIntMatch>& out,
+                                           WorkBudget& budget)
         {
             using namespace llvm;
 
@@ -208,6 +233,8 @@ namespace ctrace::stack::analysis
 
             while (!worklist.empty())
             {
+                if (!budget.consume())
+                    return;
                 const Value* Cur = stripIntCasts(worklist.back().val);
                 int64_t curOffset = worklist.back().offset;
                 bool curSawOffset = worklist.back().sawOffset;
@@ -395,7 +422,7 @@ namespace ctrace::stack::analysis
         };
 
         static void collectPointerOrigins(const llvm::Value* V, const llvm::DataLayout& DL,
-                                          llvm::SmallVectorImpl<PtrOrigin>& out)
+                                          llvm::SmallVectorImpl<PtrOrigin>& out, WorkBudget& budget)
         {
             using namespace llvm;
 
@@ -407,6 +434,8 @@ namespace ctrace::stack::analysis
 
             while (!worklist.empty())
             {
+                if (!budget.consume())
+                    return;
                 const Value* Cur = worklist.back().first;
                 int64_t currentOffset = worklist.back().second;
                 worklist.pop_back();
@@ -462,11 +491,37 @@ namespace ctrace::stack::analysis
                     continue;
                 }
 
+                if (auto* ITP = dyn_cast<IntToPtrInst>(Cur))
+                {
+                    SmallVector<PtrIntMatch, 8> matches;
+                    collectPtrToIntMatches(ITP->getOperand(0), matches, budget);
+                    for (const auto& match : matches)
+                    {
+                        if (!match.ptrOperand)
+                            continue;
+                        int64_t newOffset = currentOffset + match.offset;
+                        if (recordVisitedOffset(visited, match.ptrOperand, newOffset))
+                            worklist.push_back({match.ptrOperand, newOffset});
+                    }
+                    continue;
+                }
+
                 if (auto* LI = dyn_cast<LoadInst>(Cur))
                 {
-                    const Value* PtrOp = LI->getPointerOperand();
-                    if (recordVisitedOffset(visited, PtrOp, currentOffset))
-                        worklist.push_back({PtrOp, currentOffset});
+                    const Value* PtrOp = LI->getPointerOperand()->stripPointerCasts();
+                    const Value* basePtr = PtrOp;
+                    int64_t baseOffset = 0;
+                    if (getGEPConstantOffsetAndBase(basePtr, DL, baseOffset, basePtr))
+                        basePtr = basePtr->stripPointerCasts();
+                    if (auto* AI = dyn_cast<AllocaInst>(basePtr))
+                    {
+                        Type* allocTy = AI->getAllocatedType();
+                        if (allocTy && allocTy->isPointerTy())
+                        {
+                            if (recordVisitedOffset(visited, PtrOp, currentOffset))
+                                worklist.push_back({PtrOp, currentOffset});
+                        }
+                    }
                     continue;
                 }
 
@@ -500,11 +555,24 @@ namespace ctrace::stack::analysis
                         if (recordVisitedOffset(visited, Src, currentOffset))
                             worklist.push_back({Src, currentOffset});
                     }
+                    else if (CE->getOpcode() == Instruction::IntToPtr)
+                    {
+                        SmallVector<PtrIntMatch, 8> matches;
+                        collectPtrToIntMatches(CE->getOperand(0), matches, budget);
+                        for (const auto& match : matches)
+                        {
+                            if (!match.ptrOperand)
+                                continue;
+                            int64_t newOffset = currentOffset + match.offset;
+                            if (recordVisitedOffset(visited, match.ptrOperand, newOffset))
+                                worklist.push_back({match.ptrOperand, newOffset});
+                        }
+                    }
                 }
             }
         }
 
-        static bool isPointerDereferencedOrUsed(const llvm::Value* V)
+        static bool isPointerDereferencedOrUsed(const llvm::Value* V, WorkBudget& budget)
         {
             using namespace llvm;
 
@@ -514,6 +582,8 @@ namespace ctrace::stack::analysis
 
             while (!worklist.empty())
             {
+                if (!budget.consume())
+                    return false;
                 const Value* Cur = worklist.back();
                 worklist.pop_back();
                 if (!visited.insert(Cur).second)
@@ -638,6 +708,60 @@ namespace ctrace::stack::analysis
             return std::nullopt;
         }
 
+        static const llvm::StructType* getAllocaStructType(const llvm::AllocaInst* AI)
+        {
+            if (!AI)
+                return nullptr;
+            return llvm::dyn_cast<llvm::StructType>(AI->getAllocatedType());
+        }
+
+        static std::optional<unsigned> getStructMemberIndexAtOffset(const llvm::StructType* ST,
+                                                                    const llvm::DataLayout& DL,
+                                                                    uint64_t offset)
+        {
+            if (!ST)
+                return std::nullopt;
+
+            auto* mutableST = const_cast<llvm::StructType*>(ST);
+            const llvm::StructLayout* layout = DL.getStructLayout(mutableST);
+            const unsigned memberCount = ST->getNumElements();
+            for (unsigned i = 0; i < memberCount; ++i)
+            {
+                uint64_t memberOffset = layout->getElementOffset(i);
+                llvm::Type* memberTy = ST->getElementType(i);
+                uint64_t memberSize = DL.getTypeAllocSize(memberTy);
+                if (memberSize == 0)
+                {
+                    if (offset == memberOffset)
+                        return i;
+                    continue;
+                }
+                if (offset >= memberOffset && offset < memberOffset + memberSize)
+                    return i;
+            }
+
+            return std::nullopt;
+        }
+
+        static bool isOffsetWithinSameAllocaMember(int64_t originOffset, int64_t resultOffset,
+                                                   const llvm::StructType* structType,
+                                                   uint64_t allocaSize, const llvm::DataLayout& DL)
+        {
+            if (originOffset < 0 || resultOffset < 0)
+                return false;
+            if (!structType)
+                return false;
+            uint64_t uOrigin = static_cast<uint64_t>(originOffset);
+            uint64_t uResult = static_cast<uint64_t>(resultOffset);
+            if (uOrigin >= allocaSize || uResult >= allocaSize)
+                return false;
+            auto originMember = getStructMemberIndexAtOffset(structType, DL, uOrigin);
+            auto resultMember = getStructMemberIndexAtOffset(structType, DL, uResult);
+            if (!originMember.has_value() || !resultMember.has_value())
+                return false;
+            return originMember.value() == resultMember.value();
+        }
+
         static void analyzeInvalidBaseReconstructionsInFunction(
             llvm::Function& F, const llvm::DataLayout& DL,
             std::vector<InvalidBaseReconstructionIssue>& out)
@@ -647,7 +771,14 @@ namespace ctrace::stack::analysis
             if (F.isDeclaration())
                 return;
 
-            std::map<const AllocaInst*, std::pair<std::string, uint64_t>> allocaInfo;
+            WorkBudget budget;
+            struct AllocaInfo
+            {
+                std::string name;
+                uint64_t size = 0;
+                const StructType* structType = nullptr;
+            };
+            std::map<const AllocaInst*, AllocaInfo> allocaInfo;
 
             for (BasicBlock& BB : F)
             {
@@ -663,7 +794,11 @@ namespace ctrace::stack::analysis
 
                     std::string varName =
                         AI->hasName() ? AI->getName().str() : std::string("<unnamed>");
-                    allocaInfo[AI] = {varName, sizeOpt.value()};
+                    AllocaInfo info;
+                    info.name = std::move(varName);
+                    info.size = sizeOpt.value();
+                    info.structType = getAllocaStructType(AI);
+                    allocaInfo[AI] = std::move(info);
                 }
             }
 
@@ -673,13 +808,13 @@ namespace ctrace::stack::analysis
                 {
                     if (auto* ITP = dyn_cast<IntToPtrInst>(&I))
                     {
-                        if (!isPointerDereferencedOrUsed(ITP))
+                        if (!isPointerDereferencedOrUsed(ITP, budget))
                             continue;
 
                         Value* IntVal = ITP->getOperand(0);
 
                         SmallVector<PtrIntMatch, 8> matches;
-                        collectPtrToIntMatches(IntVal, matches);
+                        collectPtrToIntMatches(IntVal, matches, budget);
                         if (matches.empty())
                             continue;
 
@@ -701,7 +836,7 @@ namespace ctrace::stack::analysis
                                 continue;
 
                             SmallVector<PtrOrigin, 8> origins;
-                            collectPointerOrigins(match.ptrOperand, DL, origins);
+                            collectPointerOrigins(match.ptrOperand, DL, origins, budget);
                             if (origins.empty())
                                 continue;
 
@@ -711,13 +846,17 @@ namespace ctrace::stack::analysis
                                 if (it == allocaInfo.end())
                                     continue;
 
-                                const std::string& varName = it->second.first;
-                                uint64_t allocaSize = it->second.second;
+                                const std::string& varName = it->second.name;
+                                uint64_t allocaSize = it->second.size;
+                                const StructType* structType = it->second.structType;
 
                                 int64_t resultOffset = origin.offset + match.offset;
                                 bool isOutOfBounds =
                                     (resultOffset < 0) ||
                                     (static_cast<uint64_t>(resultOffset) >= allocaSize);
+                                bool isMemberOffset = isOffsetWithinSameAllocaMember(
+                                    origin.offset, resultOffset, structType, allocaSize, DL);
+                                bool allowMemberSuppression = match.offset != 0;
 
                                 std::string targetType;
                                 Type* targetTy = ITP->getType();
@@ -731,7 +870,8 @@ namespace ctrace::stack::analysis
                                 auto& entry = agg[key];
                                 entry.memberOffsets.insert(origin.offset);
                                 entry.anyOutOfBounds |= isOutOfBounds;
-                                if (resultOffset != 0)
+                                if (resultOffset != 0 &&
+                                    !(allowMemberSuppression && isMemberOffset))
                                     entry.anyNonZeroResult = true;
                                 entry.varName = varName;
                                 entry.allocaSize = allocaSize;
@@ -781,7 +921,7 @@ namespace ctrace::stack::analysis
 
                     if (auto* GEP = dyn_cast<GetElementPtrInst>(&I))
                     {
-                        if (!isPointerDereferencedOrUsed(GEP))
+                        if (!isPointerDereferencedOrUsed(GEP, budget))
                             continue;
 
                         int64_t gepOffset = 0;
@@ -789,8 +929,11 @@ namespace ctrace::stack::analysis
                         if (!getGEPConstantOffsetAndBase(GEP, DL, gepOffset, PtrOp))
                             continue;
 
+                        const Value* directBase = PtrOp ? PtrOp->stripPointerCasts() : nullptr;
+                        const bool isDirectAllocaBase = directBase && isa<AllocaInst>(directBase);
+
                         SmallVector<PtrOrigin, 8> origins;
-                        collectPointerOrigins(PtrOp, DL, origins);
+                        collectPointerOrigins(PtrOp, DL, origins, budget);
                         if (origins.empty())
                             continue;
 
@@ -807,7 +950,7 @@ namespace ctrace::stack::analysis
 
                         for (const auto& origin : origins)
                         {
-                            if (origin.offset == 0 && gepOffset >= 0)
+                            if (origin.offset == 0 && gepOffset >= 0 && isDirectAllocaBase)
                             {
                                 continue;
                             }
@@ -816,13 +959,17 @@ namespace ctrace::stack::analysis
                             if (it == allocaInfo.end())
                                 continue;
 
-                            const std::string& varName = it->second.first;
-                            uint64_t allocaSize = it->second.second;
+                            const std::string& varName = it->second.name;
+                            uint64_t allocaSize = it->second.size;
+                            const StructType* structType = it->second.structType;
 
                             int64_t resultOffset = origin.offset + gepOffset;
                             bool isOutOfBounds =
                                 (resultOffset < 0) ||
                                 (static_cast<uint64_t>(resultOffset) >= allocaSize);
+                            bool isMemberOffset = isOffsetWithinSameAllocaMember(
+                                origin.offset, resultOffset, structType, allocaSize, DL);
+                            bool allowMemberSuppression = gepOffset != 0;
 
                             std::string targetType;
                             Type* targetTy = GEP->getType();
@@ -832,7 +979,7 @@ namespace ctrace::stack::analysis
                             auto& entry = agg[origin.alloca];
                             entry.memberOffsets.insert(origin.offset);
                             entry.anyOutOfBounds |= isOutOfBounds;
-                            if (resultOffset != 0)
+                            if (resultOffset != 0 && !(allowMemberSuppression && isMemberOffset))
                                 entry.anyNonZeroResult = true;
                             entry.varName = varName;
                             entry.targetType = targetType;

From d74659192367931247d44db75de35a9ba40d70a5 Mon Sep 17 00:00:00 2001
From: Hugo <hugo.payet@epitech.eu>
Date: Sat, 7 Feb 2026 19:21:26 +0100
Subject: [PATCH 06/11] fix: tighten stack buffer detection with debug-info
 array heuristics

---
 src/analysis/StackBufferAnalysis.cpp | 210 +++++++++++++++++++--------
 1 file changed, 150 insertions(+), 60 deletions(-)

diff --git a/src/analysis/StackBufferAnalysis.cpp b/src/analysis/StackBufferAnalysis.cpp
index 459673c..7099bdd 100644
--- a/src/analysis/StackBufferAnalysis.cpp
+++ b/src/analysis/StackBufferAnalysis.cpp
@@ -5,8 +5,12 @@
 #include <optional>
 
 #include <llvm/ADT/SmallPtrSet.h>
+#include <llvm/Analysis/ValueTracking.h>
+#include <llvm/BinaryFormat/Dwarf.h>
 #include <llvm/IR/Constants.h>
+#include <llvm/IR/DebugInfoMetadata.h>
 #include <llvm/IR/Function.h>
+#include <llvm/IR/IntrinsicInst.h>
 #include <llvm/IR/Instructions.h>
 #include <llvm/IR/Module.h>
 #include <llvm/IR/Type.h>
@@ -32,22 +36,24 @@ namespace ctrace::stack::analysis
             }
         };
 
-        // Taille (en nombre d'éléments) pour une alloca de tableau sur la stack
+        // Size (in elements) for a stack array alloca
         static std::optional<StackSize> getAllocaElementCount(llvm::AllocaInst* AI)
         {
             using namespace llvm;
 
             Type* elemTy = AI->getAllocatedType();
             StackSize count = 1;
+            bool hasArrayType = false;
 
-            // Cas "char test[10];" => alloca [10 x i8]
+            // Case "char test[10];" => alloca [10 x i8]
             if (auto* arrTy = dyn_cast<ArrayType>(elemTy))
             {
+                hasArrayType = true;
                 count *= arrTy->getNumElements();
                 elemTy = arrTy->getElementType();
             }
 
-            // Cas "alloca i8, i64 10" => alloca tableau avec taille constante
+            // Case "alloca i8, i64 10" => array alloca with constant size
             if (AI->isArrayAllocation())
             {
                 if (auto* C = dyn_cast<ConstantInt>(AI->getArraySize()))
@@ -56,10 +62,15 @@ namespace ctrace::stack::analysis
                 }
                 else
                 {
-                    // taille non constante - analyse plus compliquée, on ignore pour l'instant
+                    // non-constant size - more complex analysis, ignore for now
                     return std::nullopt;
                 }
             }
+            else if (!hasArrayType)
+            {
+                // Scalar alloca (struct/object), not an indexable buffer
+                return std::nullopt;
+            }
 
             return count;
         }
@@ -82,10 +93,10 @@ namespace ctrace::stack::analysis
             auto isArrayAlloca = [](const AllocaInst* AI) -> bool
             {
                 Type* T = AI->getAllocatedType();
-                // On considère comme "buffer de stack" :
-                //  - les vrais tableaux,
-                //  - les allocas de type tableau (VLA côté IR),
-                //  - les structs qui contiennent au moins un champ tableau.
+                // Consider a "stack buffer" as:
+                //  - real arrays,
+                //  - array-typed allocas (VLA in IR),
+                //  - structs that contain at least one array field.
                 if (T->isArrayTy() || AI->isArrayAllocation())
                     return true;
 
@@ -100,7 +111,7 @@ namespace ctrace::stack::analysis
                 return false;
             };
 
-            // Pour éviter les boucles d'aliasing bizarres
+            // Avoid weird aliasing loops
             SmallPtrSet<const Value*, 16> visited;
             const Value* cur = V;
 
@@ -110,19 +121,19 @@ namespace ctrace::stack::analysis
                 if (cur->hasName())
                     path.push_back(cur->getName().str());
 
-                // Cas 1 : on tombe sur une alloca.
+                // Case 1: we hit an alloca.
                 if (auto* AI = dyn_cast<AllocaInst>(cur))
                 {
                     if (isArrayAlloca(AI))
                     {
-                        // Alloca d'un buffer de stack (tableau) : cible finale.
+                        // Stack buffer alloca (array): final target.
                         return AI;
                     }
 
-                    // Sinon, c'est très probablement une variable locale de type pointeur
-                    // (char *ptr; char **pp; etc.). On parcourt les stores vers cette
-                    // variable pour voir quelles valeurs lui sont assignées, et on
-                    // tente de remonter jusqu'à une vraie alloca de tableau.
+                    // Otherwise, it's very likely a local pointer variable
+                    // (char *ptr; char **pp; etc.). Walk stores into this variable
+                    // to see what values get assigned, and try to trace back to
+                    // a real array alloca.
                     const AllocaInst* foundAI = nullptr;
 
                     for (BasicBlock& BB : F)
@@ -150,8 +161,8 @@ namespace ctrace::stack::analysis
                             }
                             else if (foundAI != cand)
                             {
-                                // Plusieurs bases différentes : aliasing ambigu,
-                                // on préfère abandonner plutôt que de se tromper.
+                                // Multiple different bases: ambiguous aliasing,
+                                // prefer to stop rather than be wrong.
                                 return nullptr;
                             }
                         }
@@ -159,37 +170,37 @@ namespace ctrace::stack::analysis
                     return foundAI;
                 }
 
-                // Cas 2 : bitcast -> on remonte l'opérande.
+                // Case 2: bitcast -> follow the operand.
                 if (auto* BC = dyn_cast<BitCastInst>(cur))
                 {
                     cur = BC->getOperand(0);
                     continue;
                 }
 
-                // Cas 3 : GEP -> on remonte sur le pointeur de base.
+                // Case 3: GEP -> follow the base pointer.
                 if (auto* GEP = dyn_cast<GetElementPtrInst>(cur))
                 {
                     cur = GEP->getPointerOperand();
                     continue;
                 }
 
-                // Cas 4 : load d'un pointeur. Exemple typique :
+                // Case 4: load of a pointer. Typical example:
                 //    char *ptr = test;
                 //    char *p2  = ptr;
                 //    char **pp = &ptr;
                 //    (*pp)[i] = ...
                 //
-                // On remonte au "container" du pointeur (variable locale, ou autre valeur)
-                // en suivant l'opérande du load.
+                // Walk up to the pointer "container" (local variable, or other value)
+                // by following the load operand.
                 if (auto* LI = dyn_cast<LoadInst>(cur))
                 {
                     cur = LI->getPointerOperand();
                     continue;
                 }
 
-                // Cas 5 : PHI de pointeurs (fusion de plusieurs alias) :
-                // on tente de résoudre chaque incoming et on s'assure qu'ils
-                // pointent tous vers la même alloca de tableau.
+                // Case 5: PHI of pointers (merge of aliases):
+                // try to resolve each incoming and ensure they
+                // all point to the same array alloca.
                 if (auto* PN = dyn_cast<PHINode>(cur))
                 {
                     const AllocaInst* foundAI = nullptr;
@@ -209,7 +220,7 @@ namespace ctrace::stack::analysis
                         }
                         else if (foundAI != cand)
                         {
-                            // PHI mélange plusieurs bases différentes : trop ambigu.
+                            // PHI mixes multiple different bases: too ambiguous.
                             return nullptr;
                         }
                     }
@@ -217,13 +228,82 @@ namespace ctrace::stack::analysis
                     return foundAI;
                 }
 
-                // Autres cas (arguments, globales complexes, etc.) : on arrête l'heuristique.
+                // Other cases (arguments, complex globals, etc.): stop the heuristic.
                 break;
             }
 
             return nullptr;
         }
 
+        static std::optional<bool> isAllocaArrayByDebugInfo(const llvm::AllocaInst* AI,
+                                                            const llvm::Function& F)
+        {
+            using namespace llvm;
+
+            for (const BasicBlock& BB : F)
+            {
+                for (const Instruction& I : BB)
+                {
+                    auto* DVI = dyn_cast<DbgVariableIntrinsic>(&I);
+                    if (!DVI)
+                        continue;
+
+                    if (DVI->getNumVariableLocationOps() == 0)
+                        continue;
+
+                    const Value* loc = DVI->getVariableLocationOp(0);
+                    if (!loc)
+                        continue;
+
+                    const Value* base = getUnderlyingObject(loc);
+                    if (base != AI)
+                        continue;
+
+                    const DILocalVariable* var = DVI->getVariable();
+                    if (!var)
+                        return false;
+
+                    const DIType* type = var->getType();
+                    if (!type)
+                        return false;
+
+                    if (auto* composite = dyn_cast<DICompositeType>(type))
+                    {
+                        return composite->getTag() == dwarf::DW_TAG_array_type;
+                    }
+
+                    return false;
+                }
+            }
+
+            return std::nullopt;
+        }
+
+        static bool shouldUseAllocaFallback(const llvm::AllocaInst* AI, llvm::Function& F)
+        {
+            if (auto debugArray = isAllocaArrayByDebugInfo(AI, F); debugArray.has_value())
+            {
+                return *debugArray;
+            }
+
+            llvm::Type* allocatedTy = AI->getAllocatedType();
+            if (auto* arrTy = llvm::dyn_cast<llvm::ArrayType>(allocatedTy))
+            {
+                if (arrTy->getNumElements() <= 1 && !arrTy->getElementType()->isArrayTy())
+                    return false;
+                return true;
+            }
+
+            if (AI->isArrayAllocation())
+            {
+                if (auto* C = llvm::dyn_cast<llvm::ConstantInt>(AI->getArraySize()))
+                    return C->getZExtValue() > 1;
+                return true;
+            }
+
+            return false;
+        }
+
         static const llvm::AllocaInst* resolveArrayAllocaFromPointer(const llvm::Value* V,
                                                                      llvm::Function& F,
                                                                      std::vector<std::string>& path)
@@ -248,17 +328,17 @@ namespace ctrace::stack::analysis
                     if (!GEP)
                         continue;
 
-                    // 1) Trouver la base du pointeur (test, &test[0], ptr, etc.)
+                    // 1) Find the pointer base (test, &test[0], ptr, etc.)
                     const Value* basePtr = GEP->getPointerOperand();
                     std::vector<std::string> aliasPath;
                     const AllocaInst* AI = resolveArrayAllocaFromPointer(basePtr, F, aliasPath);
                     if (!AI)
                         continue;
 
-                    // 2) Déterminer la taille logique du tableau ciblé et récupérer l'index
-                    //    On essaie d'abord de la déduire du type traversé par la GEP
-                    //    (cas struct S { char buf[10]; }; s.buf[i]) puis on retombe
-                    //    sur la taille de l'alloca pour les cas plus simples (char buf[10]).
+                    // 2) Determine the logical target array size and retrieve the index.
+                    //    First try to infer it from the type traversed by the GEP
+                    //    (case struct S { char buf[10]; }; s.buf[i]), then fall back
+                    //    to the alloca size for simpler cases (char buf[10]).
                     StackSize arraySize = 0;
                     Value* idxVal = nullptr;
 
@@ -266,29 +346,29 @@ namespace ctrace::stack::analysis
 
                     if (auto* arrTy = dyn_cast<ArrayType>(srcElemTy))
                     {
-                        // Cas direct : alloca [N x T]; GEP indices [0, i]
+                        // Direct case: alloca [N x T]; GEP indices [0, i]
                         if (GEP->getNumIndices() < 2)
                             continue;
                         auto idxIt = GEP->idx_begin();
-                        ++idxIt; // saute le premier indice (souvent 0)
+                        ++idxIt; // skip the first index (often 0)
                         idxVal = idxIt->get();
                         arraySize = arrTy->getNumElements();
                     }
                     else if (auto* ST = dyn_cast<StructType>(srcElemTy))
                     {
-                        // Cas struct avec champ tableau:
+                        // Struct case with an array field:
                         //   %ptr = getelementptr inbounds %struct.S, %struct.S* %s,
                         //          i32 0, i32 <field>, i64 %i
                         //
-                        // On attend donc au moins 3 indices: [0, field, i]
+                        // Expect at least 3 indices: [0, field, i]
                         if (GEP->getNumIndices() >= 3)
                         {
                             auto idxIt = GEP->idx_begin();
 
-                            // premier indice (souvent 0)
+                            // first index (often 0)
                             auto* idx0 = dyn_cast<ConstantInt>(idxIt->get());
                             ++idxIt;
-                            // second indice: index de champ dans la struct
+                            // second index: field index in the struct
                             auto* fieldIdxC = dyn_cast<ConstantInt>(idxIt->get());
                             ++idxIt;
 
@@ -302,7 +382,7 @@ namespace ctrace::stack::analysis
                                     if (auto* fieldArrTy = dyn_cast<ArrayType>(fieldTy))
                                     {
                                         arraySize = fieldArrTy->getNumElements();
-                                        // Troisième indice = index dans le tableau interne
+                                        // Third index = index within the inner array
                                         idxVal = idxIt->get();
                                     }
                                 }
@@ -310,10 +390,13 @@ namespace ctrace::stack::analysis
                         }
                     }
 
-                    // Si on n'a pas réussi à déduire une taille via la GEP,
-                    // on retombe sur la taille dérivée de l'alloca (cas char buf[10]; ptr = buf; ptr[i]).
+                    // If we could not infer a size via the GEP,
+                    // fall back to the size derived from the alloca
+                    // (case char buf[10]; ptr = buf; ptr[i]).
                     if (arraySize == 0 || !idxVal)
                     {
+                        if (!shouldUseAllocaFallback(AI, F))
+                            continue;
                         auto maybeCount = getAllocaElementCount(const_cast<AllocaInst*>(AI));
                         if (!maybeCount)
                             continue;
@@ -321,7 +404,7 @@ namespace ctrace::stack::analysis
                         if (arraySize == 0)
                             continue;
 
-                        // Pour ces cas-là, on considère le premier indice comme l'index logique.
+                        // For these cases, treat the first index as the logical index.
                         if (GEP->getNumIndices() < 1)
                             continue;
                         auto idxIt = GEP->idx_begin();
@@ -331,14 +414,14 @@ namespace ctrace::stack::analysis
                     std::string varName =
                         AI->hasName() ? AI->getName().str() : std::string("<unnamed>");
 
-                    // "baseIdxVal" = variable de boucle "i" sans les casts (sext/zext...)
+                    // "baseIdxVal" = loop variable "i" without casts (sext/zext...)
                     Value* baseIdxVal = idxVal;
                     while (auto* cast = dyn_cast<CastInst>(baseIdxVal))
                     {
                         baseIdxVal = cast->getOperand(0);
                     }
 
-                    // 4) Cas index constant : test[11]
+                    // 4) Constant index case: test[11]
                     if (auto* CIdx = dyn_cast<ConstantInt>(idxVal))
                     {
                         auto idxValue = CIdx->getSExtValue();
@@ -401,12 +484,12 @@ namespace ctrace::stack::analysis
                         continue;
                     }
 
-                    // 5) Cas index variable : test[i] / ptr[i]
-                    // On regarde si on a un intervalle pour la valeur de base (i, pas le cast)
+                    // 5) Variable index case: test[i] / ptr[i]
+                    // Check whether we have a range for the base value (i, not the cast)
                     const Value* key = baseIdxVal;
 
-                    // Si l'index vient d'un load (pattern -O0 : load i, icmp, load i, gep),
-                    // on utilise le pointeur sous-jacent comme clé (l'alloca de i).
+                    // If the index comes from a load (O0 pattern: load i, icmp, load i, gep),
+                    // use the underlying pointer as the key (alloca of i).
                     if (auto* LI = dyn_cast<LoadInst>(baseIdxVal))
                     {
                         key = LI->getPointerOperand();
@@ -415,13 +498,13 @@ namespace ctrace::stack::analysis
                     auto itRange = ranges.find(key);
                     if (itRange == ranges.end())
                     {
-                        // pas de borne connue => on ne dit rien ici
+                        // no known bound => say nothing here
                         continue;
                     }
 
                     const IntRange& R = itRange->second;
 
-                    // 5.a) Borne supérieure hors bornes: UB >= arraySize
+                    // 5.a) Upper bound out of range: UB >= arraySize
                     if (R.hasUpper && R.upper >= 0 && static_cast<StackSize>(R.upper) >= arraySize)
                     {
                         StackSize ub = static_cast<StackSize>(R.upper);
@@ -481,7 +564,7 @@ namespace ctrace::stack::analysis
                         }
                     }
 
-                    // 5.b) Borne inférieure négative: LB < 0  => index potentiellement négatif
+                    // 5.b) Negative lower bound: LB < 0  => potentially negative index
                     if (R.hasLower && R.lower < 0)
                     {
                         for (User* GU : GEP->users())
@@ -540,8 +623,8 @@ namespace ctrace::stack::analysis
                             }
                         }
                     }
-                    // Si R.hasUpper && R.upper < arraySize et (pas de LB problématique),
-                    // on considère l'accès comme probablement sûr.
+                    // If R.hasUpper && R.upper < arraySize and (no problematic LB),
+                    // treat the access as probably safe.
                 }
             }
         }
@@ -576,7 +659,7 @@ namespace ctrace::stack::analysis
                     if (!GEP)
                         continue;
 
-                    // On remonte à la base pour trouver une alloca de tableau sur la stack.
+                    // Walk back to the base to find a stack array alloca.
                     const Value* basePtr = GEP->getPointerOperand();
                     std::vector<std::string> dummyAliasPath;
                     const AllocaInst* AI =
@@ -584,22 +667,29 @@ namespace ctrace::stack::analysis
                     if (!AI)
                         continue;
 
-                    // On récupère l'expression d'index utilisée dans le GEP.
+                    // Retrieve the index expression used in the GEP.
                     Value* idxVal = nullptr;
                     Type* srcElemTy = GEP->getSourceElementType();
+                    bool isDirectArray = false;
 
                     if (auto* arrTy = dyn_cast<ArrayType>(srcElemTy))
                     {
+                        isDirectArray = true;
                         // Pattern [N x T]* -> indices [0, i]
                         if (GEP->getNumIndices() < 2)
                             continue;
                         auto idxIt = GEP->idx_begin();
-                        ++idxIt; // saute le premier indice (souvent 0)
+                        ++idxIt; // skip the first index (often 0)
                         idxVal = idxIt->get();
                     }
                     else
                     {
-                        // Pattern T* -> indice unique [i] (cas char *ptr = test; ptr[i])
+                        if (!shouldUseAllocaFallback(AI, F))
+                            continue;
+                        auto maybeCount = getAllocaElementCount(const_cast<AllocaInst*>(AI));
+                        if (!maybeCount || *maybeCount <= 1)
+                            continue;
+                        // Pattern T* -> single index [i] (case char *ptr = test; ptr[i])
                         if (GEP->getNumIndices() < 1)
                             continue;
                         auto idxIt = GEP->idx_begin();
@@ -609,7 +699,7 @@ namespace ctrace::stack::analysis
                     if (!idxVal)
                         continue;
 
-                    // On normalise un peu la clé d'index en enlevant les casts SSA.
+                    // Normalize the index key by stripping SSA casts.
                     const Value* idxKey = idxVal;
                     while (auto* cast = dyn_cast<CastInst>(const_cast<Value*>(idxKey)))
                     {
@@ -623,14 +713,14 @@ namespace ctrace::stack::analysis
                 }
             }
 
-            // Construction des warnings pour chaque buffer qui reçoit plusieurs stores.
+            // Build warnings for each buffer that receives multiple stores.
             for (auto& entry : infoMap)
             {
                 const AllocaInst* AI = entry.first;
                 const Info& info = entry.second;
 
                 if (info.storeCount <= 1)
-                    continue; // un seul store -> pas de warning
+                    continue; // single store -> no warning
 
                 MultipleStoreIssue issue;
                 issue.funcName = F.getName().str();

From f505dcc3c59c59b7d5a3b4c6a5dd3eb5168711a1 Mon Sep 17 00:00:00 2001
From: Hugo <hugo.payet@epitech.eu>
Date: Sat, 7 Feb 2026 19:21:44 +0100
Subject: [PATCH 07/11] docs: document compdb, dump-ir, and related CLI options

---
 README.md | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 92f1705..5d95751 100644
--- a/README.md
+++ b/README.md
@@ -20,7 +20,10 @@
 ./stack_usage_analyzer --mode=[abi/ir] test.[ll/c/cpp] other.[ll/c/cpp]
 ./stack_usage_analyzer main.cpp -I./include
 ./stack_usage_analyzer main.cpp -I./include --compile-arg=-I/opt/homebrew/opt/llvm@20/include
+./stack_usage_analyzer main.cpp --compile-commands=build/compile_commands.json
 ./stack_usage_analyzer main.cpp -I./include --only-file=./main.cpp --only-function=main
+./stack_usage_analyzer main.cpp --dump-ir=./debug/main.ll
+./stack_usage_analyzer a.c b.c --dump-ir=./debug
 ```
 
 ```
@@ -29,6 +32,11 @@
 --warnings-only keeps only important diagnostics
 --stack-limit=<value> overrides stack limit (bytes, or KiB/MiB/GiB)
 --compile-arg=<arg> passes an extra argument to the compiler
+--compile-commands=<path> uses compile_commands.json (file or directory)
+--compdb=<path> alias for --compile-commands
+--compdb-fast drops heavy build flags for faster analysis
+--timing prints compile/analysis timings to stderr
+--dump-ir=<path> writes LLVM IR to a file (or directory for multiple inputs)
 -I<dir> or -I <dir> adds an include directory
 -D<name>[=value] or -D <name>[=value] defines a macro
 --only-file=<path> or --only-file <path> filters by file
@@ -38,6 +46,13 @@
 --dump-filter prints filter decisions (stderr)
 ```
 
+To generate `compile_commands.json` with CMake, configure with
+`-DCMAKE_EXPORT_COMPILE_COMMANDS=ON` and point to the resulting file
+(often under `build/`).
+
+If analysis feels slow, `--compdb-fast` disables heavy flags (optimizations,
+sanitizers, profiling) while keeping include paths and macros.
+
 ### Example
 
 Given this code:
@@ -142,7 +157,7 @@ Function: main
 Examples:
 ```c
 char buf[10];
-return buf;    // renvoi pointeur vers stack → use-after-return
+return buf;    // returns pointer to stack -> use-after-return
 ```
 
 Or storing:

From b7ad381bfc082194fbf7aba1ee79c0ab626ea486 Mon Sep 17 00:00:00 2001
From: Hugo <hugo.payet@epitech.eu>
Date: Sat, 7 Feb 2026 19:22:18 +0100
Subject: [PATCH 08/11] chore: translate comments

---
 test/bound-storage/ranges_test.c    | 54 ++++++++++++++---------------
 test/escape-stack/direct-callback.c |  2 +-
 test/escape-stack/global-buf.c      |  2 +-
 test/escape-stack/out_param.c       |  6 ++--
 test/escape-stack/return-buf.c      |  2 +-
 test/vla/deguised-constant.c        |  2 +-
 6 files changed, 34 insertions(+), 34 deletions(-)

diff --git a/test/bound-storage/ranges_test.c b/test/bound-storage/ranges_test.c
index 2448c93..f064f05 100644
--- a/test/bound-storage/ranges_test.c
+++ b/test/bound-storage/ranges_test.c
@@ -1,10 +1,10 @@
 #include <stdio.h>
 
 /*
- * 1) Cas simples : borne sup OK / pas OK
+ * 1) Simple cases: upper bound OK / not OK
  */
 
-// AUCUN WARNING attendu (UB = 9, taille = 10)
+// NO WARNING expected (UB = 9, size = 10)
 void ub_ok(int i)
 {
     char buf[10];
@@ -13,7 +13,7 @@ void ub_ok(int i)
         buf[i] = 'A';
 }
 
-// WARNING UB attendu (UB = 10, taille = 10)
+// WARNING UB expected (UB = 10, size = 10)
 void ub_overflow(int i)
 {
     char buf[10];
@@ -28,10 +28,10 @@ void ub_overflow(int i)
 }
 
 /*
- * 2) Borne inf négative : index potentiellement < 0
+ * 2) Negative lower bound: index potentially < 0
  */
 
-// WARNING LB négatif attendu (i >= -3 && i < 5)
+// WARNING negative LB expected (i >= -3 && i < 5)
 void lb_negative(int i)
 {
     char buf[10];
@@ -45,7 +45,7 @@ void lb_negative(int i)
         buf[i] = 'C';
 }
 
-// WARNING LB négatif + UB hors borne (i >= -3 && i <= 15)
+// WARNING negative LB + UB out of bounds (i >= -3 && i <= 15)
 void lb_and_ub(int i)
 {
     char buf[10];
@@ -66,18 +66,18 @@ void lb_and_ub(int i)
 }
 
 /*
- * 3) if imbriqués : affiner l’intervalle (LB & UB)
+ * 3) Nested ifs: refine the range (LB & UB)
  *
  *   if (i <= 10) {
  *       if (i > 5)
  *           buf[i] = 'E';
  *   }
  *
- * Ici, on sait que 6 <= i <= 10
- * avec buf[8] → UB hors borne
+ * Here we know that 6 <= i <= 10
+ * with buf[8] -> UB out of bounds
  */
 
-// ATTENDU : UB hors borne (taille 8,  i ∈ [6,10])
+// EXPECTED: UB out of bounds (size 8, i in [6,10])
 void nested_if_overflow(int i)
 {
     char buf[8];
@@ -96,7 +96,7 @@ void nested_if_overflow(int i)
     }
 }
 
-// Variante “safe” pour comparaison (taille 16,  i ∈ [6,10]) → idéalement aucun warning
+// “Safe” variant for comparison (size 16, i in [6,10]) -> ideally no warnings
 void nested_if_ok(int i)
 {
     char buf[16];
@@ -111,10 +111,10 @@ void nested_if_ok(int i)
 }
 
 /*
- * 4) Boucles : patterns classiques de for
+ * 4) Loops: classic for patterns
  */
 
-// AUCUN WARNING attendu (0 <= i < 10, taille 10)
+// NO WARNING expected (0 <= i < 10, size 10)
 void loop_ok(void)
 {
     char buf[10];
@@ -123,7 +123,7 @@ void loop_ok(void)
         buf[i] = 'G';
 }
 
-// WARNING UB attendu (0 <= i <= 10, taille = 10)
+// WARNING UB expected (0 <= i <= 10, size = 10)
 void loop_ub_overflow(void)
 {
     char buf[10];
@@ -137,7 +137,7 @@ void loop_ub_overflow(void)
         buf[i] = 'H';
 }
 
-// WARNING LB négatif attendu (-3 <= i < 5, taille = 10)
+// WARNING negative LB expected (-3 <= i < 5, size = 10)
 void loop_lb_negative(void)
 {
     char buf[10];
@@ -147,11 +147,11 @@ void loop_lb_negative(void)
 }
 
 /*
- * 5) Cas unreachable mais avec accès hors borne
- *    (tu as déjà ce genre de logique, mais ça teste qu’on garde bien l’info)
+ * 5) Unreachable case with out-of-bounds access
+ *    (you already have this logic, but this checks we keep the info)
  */
 
-// ATTENDU : warning overflow + [info] unreachable
+// EXPECTED: overflow warning + [info] unreachable
 void unreachable_example(void)
 {
     int i = 1;
@@ -164,16 +164,16 @@ void unreachable_example(void)
     //     (this is a write access)
     //     [info] this access appears unreachable at runtime (condition is always false for this branch)
     if (i > 10)
-    { // condition fausse à l’exécution
+    { // condition false at runtime
         buf[11] = 'J';
     }
 }
 
 /*
- * 6) Aliasing de pointeur + intervalle (LB & UB)
+ * 6) Pointer aliasing + range (LB & UB)
  */
 
-// ATTENDU : UB + LB négatif (p = buf)
+// EXPECTED: UB + negative LB (p = buf)
 void alias_lb_ub(int i)
 {
     char buf[10];
@@ -194,7 +194,7 @@ void alias_lb_ub(int i)
         p[i] = 'K';
 }
 
-// ATTENDU : aucun warning (0 <= i < 10)
+// EXPECTED: no warning (0 <= i < 10)
 void alias_ok(int i)
 {
     char buf[10];
@@ -205,8 +205,8 @@ void alias_ok(int i)
 }
 
 /*
- * 7) Combinaison bizarre : bornes serrées, mais toujours safe
- *    i ∈ [2,7], buf[8] → normalement OK
+ * 7) Weird combination: tight bounds, but still safe
+ *    i in [2,7], buf[8] -> normally OK
  */
 
 void tight_range_ok(int i)
@@ -218,8 +218,8 @@ void tight_range_ok(int i)
 }
 
 /*
- * 8) Cas extrême : bornes très larges
- *    i >= -100 && i <= 100, buf[10] → LB négatif + UB hors borne
+ * 8) Extreme case: very wide bounds
+ *    i >= -100 && i <= 100, buf[10] -> negative LB + UB out of bounds
  */
 
 void huge_range(int i)
@@ -242,7 +242,7 @@ void huge_range(int i)
 }
 
 /*
- * main : juste pour que le compilateur ne vire pas tout si optimisation
+ * main: just to prevent the compiler from optimizing everything away
  */
 
 int main(void)
diff --git a/test/escape-stack/direct-callback.c b/test/escape-stack/direct-callback.c
index 5a05bf7..1e02fa7 100644
--- a/test/escape-stack/direct-callback.c
+++ b/test/escape-stack/direct-callback.c
@@ -7,7 +7,7 @@
 //     // at line 10, column 5
 //     // [!!] stack pointer escape: address of variable 'buf' escapes this function
 //     //     address passed as argument to function 'sink' (callee may capture the pointer beyond this function)
-//     sink(buf); // le callee peut capturer le pointeur
+//     sink(buf); // callee may capture the pointer
 // }
 
 void temporary(void)
diff --git a/test/escape-stack/global-buf.c b/test/escape-stack/global-buf.c
index be91b83..413bbaa 100644
--- a/test/escape-stack/global-buf.c
+++ b/test/escape-stack/global-buf.c
@@ -7,7 +7,7 @@ void set_global(void)
     // at line 10, column 7
     // [!!] stack pointer escape: address of variable 'buf' escapes this function
     //     stored into global variable 'g' (pointer may be used after the function returns)
-    g = buf; // warning attendu: store_global
+    g = buf; // warning expected: store_global
 }
 
 int main(void)
diff --git a/test/escape-stack/out_param.c b/test/escape-stack/out_param.c
index 9b46005..e006deb 100644
--- a/test/escape-stack/out_param.c
+++ b/test/escape-stack/out_param.c
@@ -4,11 +4,11 @@ void leak_out_param(char** out)
     // at line 7, column 10
     // [!!] stack pointer escape: address of variable 'buf' escapes this function
     //     stored through a non-local pointer (e.g. via an out-parameter; pointer may outlive this function)
-    *out = buf; // fuite via paramètre de sortie
+    *out = buf; // leak via out-parameter
 }
 
 void safe_out_param(char** out)
 {
-    char* local = 0; // pointeur, mais pas de stack buffer derrière
-    *out = local;    // pas une adresse de variable de stack
+    char* local = 0; // pointer, but no stack buffer behind it
+    *out = local;    // not a stack variable address
 }
diff --git a/test/escape-stack/return-buf.c b/test/escape-stack/return-buf.c
index 75b82d4..00dd416 100644
--- a/test/escape-stack/return-buf.c
+++ b/test/escape-stack/return-buf.c
@@ -4,7 +4,7 @@ char* ret_buf(void)
     // at line 7, column 5
     // [!!] stack pointer escape: address of variable 'buf' escapes this function
     //     escape via return statement (pointer to stack returned to caller)
-    return buf; // warning attendu: return
+    return buf; // warning expected: return
 }
 
 int main(void)
diff --git a/test/vla/deguised-constant.c b/test/vla/deguised-constant.c
index 3575e03..54b45f3 100644
--- a/test/vla/deguised-constant.c
+++ b/test/vla/deguised-constant.c
@@ -1,7 +1,7 @@
 void foo(void)
 {
     int n = 6;
-    char buf[n]; // techniquement VLA, mais bornée et triviale, patch car faux positif
+    char buf[n]; // technically a VLA, but bounded and trivial, patch for false positive
 }
 
 int main(int ac, char** av)

From 0d14a33f82cfca2edbe6f347a9db66b645d5fcb3 Mon Sep 17 00:00:00 2001
From: Hugo <hugo.payet@epitech.eu>
Date: Sat, 7 Feb 2026 19:22:43 +0100
Subject: [PATCH 09/11] chore: translate comments

---
 src/StackUsageAnalyzer.cpp         |  99 ++++++++++++++++++------
 src/analysis/DynamicAlloca.cpp     |  26 +++----
 src/analysis/IRValueUtils.cpp      |  11 ++-
 src/analysis/IntRanges.cpp         |  10 +--
 src/analysis/StackComputation.cpp  | 117 ++++++++++++++++++++++++++---
 src/mangle.cpp                     |  14 ++--
 src/report/ReportSerialization.cpp |   6 +-
 7 files changed, 218 insertions(+), 65 deletions(-)

diff --git a/src/StackUsageAnalyzer.cpp b/src/StackUsageAnalyzer.cpp
index 153960f..f68cd22 100644
--- a/src/StackUsageAnalyzer.cpp
+++ b/src/StackUsageAnalyzer.cpp
@@ -1,5 +1,6 @@
 #include "StackUsageAnalyzer.hpp"
 
+#include <chrono>
 #include <cstdint>
 #include <map>
 #include <string>
@@ -410,8 +411,8 @@ namespace ctrace::stack
                     {
                         auto* BB = issue.inst->getParent();
 
-                        // Parcourt les prédécesseurs du bloc pour voir si certains
-                        // ont une branche conditionnelle avec une condition constante.
+                        // Walk block predecessors to see whether some
+                        // have a conditional branch with a constant condition.
                         for (auto* Pred : predecessors(BB))
                         {
                             auto* BI = dyn_cast<BranchInst>(Pred->getTerminator());
@@ -429,7 +430,7 @@ namespace ctrace::stack
                             if (!C0 || !C1)
                                 continue;
 
-                            // Évalue le résultat de l'ICmp pour ces constantes (implémentation maison).
+                            // Evaluate the ICmp result for these constants (homegrown implementation).
                             bool condTrue = false;
                             auto pred = CI->getPredicate();
                             const auto& v0 = C0->getValue();
@@ -468,22 +469,22 @@ namespace ctrace::stack
                                 condTrue = v0.uge(v1);
                                 break;
                             default:
-                                // On ne traite pas d'autres prédicats exotiques ici
+                                // Do not handle other exotic predicates here.
                                 continue;
                             }
 
-                            // Branchement du type:
+                            // Branch of the form:
                             //   br i1 %cond, label %then, label %else
-                            // Successeur 0 pris si condTrue == true
-                            // Successeur 1 pris si condTrue == false
+                            // Successor 0 taken if condTrue == true
+                            // Successor 1 taken if condTrue == false
                             if (BB == BI->getSuccessor(0) && condTrue == false)
                             {
-                                // Le bloc "then" n'est jamais atteint.
+                                // The "then" block is never reached.
                                 isUnreachable = true;
                             }
                             else if (BB == BI->getSuccessor(1) && condTrue == true)
                             {
-                                // Le bloc "else" n'est jamais atteint.
+                                // The "else" block is never reached.
                                 isUnreachable = true;
                             }
                         }
@@ -1108,75 +1109,119 @@ namespace ctrace::stack
 
     AnalysisResult analyzeModule(llvm::Module& mod, const AnalysisConfig& config)
     {
+        using Clock = std::chrono::steady_clock;
+        auto logDuration = [&](const char* label, Clock::time_point start)
+        {
+            if (!config.timing)
+                return;
+            auto end = Clock::now();
+            auto ms = std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count();
+            std::cerr << label << " done in " << ms << " ms\n";
+        };
+
+        auto t0 = Clock::now();
         runFunctionAttrsPass(mod);
+        logDuration("Function attrs pass", t0);
 
+        t0 = Clock::now();
         ModuleAnalysisContext ctx = buildContext(mod, config);
+        logDuration("Build context", t0);
         const llvm::DataLayout& DL = *ctx.dataLayout;
         auto shouldAnalyzeFunction = [&](const llvm::Function& F) -> bool
         { return ctx.shouldAnalyze(F); };
 
-        // 1) Stack locale par fonction
+        // 1) Local stack per function
+        t0 = Clock::now();
         LocalStackMap localStack = computeLocalStacks(ctx);
+        logDuration("Compute local stacks", t0);
 
-        // 2) Graphe d'appels
+        // 2) Call graph
+        t0 = Clock::now();
         analysis::CallGraph CG = buildCallGraphFiltered(ctx);
+        logDuration("Build call graph", t0);
 
-        // 3) Propagation + détection de récursivité
+        // 3) Propagation + recursion detection
+        t0 = Clock::now();
         analysis::InternalAnalysisState state = computeRecursionState(ctx, CG, localStack);
+        logDuration("Compute recursion state", t0);
 
-        // 4) Construction du résultat public
+        // 4) Build public result
         FunctionAuxData aux;
+        t0 = Clock::now();
         AnalysisResult result = buildResults(ctx, localStack, state, CG, aux);
+        logDuration("Build results", t0);
 
         // 4b) Emit summary diagnostics for recursion/overflow flags (for JSON parity)
+        t0 = Clock::now();
         emitSummaryDiagnostics(result, ctx, aux);
+        logDuration("Emit summary diagnostics", t0);
 
+        t0 = Clock::now();
         StackSize allocaLargeThreshold = analysis::computeAllocaLargeThreshold(config);
+        logDuration("Compute alloca threshold", t0);
 
-        // 6) Détection des dépassements de buffer sur la stack (analyse intra-fonction)
+        // 6) Detect stack buffer overflows (intra-function analysis)
+        t0 = Clock::now();
         std::vector<analysis::StackBufferOverflowIssue> bufferIssues =
             analysis::analyzeStackBufferOverflows(mod, shouldAnalyzeFunction);
         appendStackBufferDiagnostics(result, bufferIssues);
+        logDuration("Stack buffer overflows", t0);
 
-        // 8) Détection des allocations dynamiques sur la stack (VLA / alloca variable)
+        // 8) Detect dynamic stack allocations (VLA / variable alloca)
+        t0 = Clock::now();
         std::vector<analysis::DynamicAllocaIssue> dynAllocaIssues =
             analysis::analyzeDynamicAllocas(mod, shouldAnalyzeFunction);
         appendDynamicAllocaDiagnostics(result, dynAllocaIssues);
+        logDuration("Dynamic allocas", t0);
 
-        // 10) Analyse des usages d'alloca (tainted / taille excessive)
+        // 10) Analyze alloca usage (tainted / excessive size)
+        t0 = Clock::now();
         std::vector<analysis::AllocaUsageIssue> allocaUsageIssues = analysis::analyzeAllocaUsage(
             mod, DL, state.RecursiveFuncs, state.InfiniteRecursionFuncs, shouldAnalyzeFunction);
         appendAllocaUsageDiagnostics(result, config, allocaLargeThreshold, allocaUsageIssues);
+        logDuration("Alloca usage", t0);
 
-        // 11) Détection des débordements via memcpy/memset sur des buffers de stack
+        // 11) Detect overflows via memcpy/memset on stack buffers
+        t0 = Clock::now();
         std::vector<analysis::MemIntrinsicIssue> memIssues =
             analysis::analyzeMemIntrinsicOverflows(mod, DL, shouldAnalyzeFunction);
         appendMemIntrinsicDiagnostics(result, memIssues);
+        logDuration("Mem intrinsic overflows", t0);
 
-        // 11b) Détection d'écritures avec longueur "size-k"
+        // 11b) Detect writes with "size-k" length
+        t0 = Clock::now();
         std::vector<analysis::SizeMinusKWriteIssue> sizeMinusKIssues =
             analysis::analyzeSizeMinusKWrites(mod, DL, shouldAnalyzeFunction);
         appendSizeMinusKDiagnostics(result, sizeMinusKIssues);
+        logDuration("Size-minus-k writes", t0);
 
-        // 12) Détection de plusieurs stores dans un même buffer de stack
+        // 12) Detect multiple stores into the same stack buffer
+        t0 = Clock::now();
         std::vector<analysis::MultipleStoreIssue> multiStoreIssues =
             analysis::analyzeMultipleStores(mod, shouldAnalyzeFunction);
         appendMultipleStoreDiagnostics(result, multiStoreIssues);
+        logDuration("Multiple stores", t0);
 
-        // 13) Détection des reconstructions invalides de pointeur de base (offsetof/container_of)
+        // 13) Detect invalid base pointer reconstructions (offsetof/container_of)
+        t0 = Clock::now();
         std::vector<analysis::InvalidBaseReconstructionIssue> baseReconIssues =
             analysis::analyzeInvalidBaseReconstructions(mod, DL, shouldAnalyzeFunction);
         appendInvalidBaseReconstructionDiagnostics(result, baseReconIssues);
+        logDuration("Invalid base reconstructions", t0);
 
-        // 14) Détection de fuite de pointeurs de stack (use-after-return potentiel)
+        // 14) Detect stack pointer escapes (potential use-after-return)
+        t0 = Clock::now();
         std::vector<analysis::StackPointerEscapeIssue> escapeIssues =
             analysis::analyzeStackPointerEscapes(mod, shouldAnalyzeFunction);
         appendStackPointerEscapeDiagnostics(result, escapeIssues);
+        logDuration("Stack pointer escapes", t0);
 
         // 15) Const-correctness: parameters that can be made const
+        t0 = Clock::now();
         std::vector<analysis::ConstParamIssue> constParamIssues =
             analysis::analyzeConstParams(mod, shouldAnalyzeFunction);
         appendConstParamDiagnostics(result, constParamIssues);
+        logDuration("Const params", t0);
 
         return result;
     }
@@ -1193,7 +1238,19 @@ namespace ctrace::stack
             return AnalysisResult{config, {}};
         }
 
+        using Clock = std::chrono::steady_clock;
+        if (config.timing)
+            std::cerr << "Analyzing " << filename << "...\n";
+        auto analyzeStart = Clock::now();
         AnalysisResult result = analyzeModule(*load.module, config);
+        if (config.timing)
+        {
+            auto analyzeEnd = Clock::now();
+            auto ms =
+                std::chrono::duration_cast<std::chrono::milliseconds>(analyzeEnd - analyzeStart)
+                    .count();
+            std::cerr << "Analysis done in " << ms << " ms\n";
+        }
         for (auto& f : result.functions)
         {
             if (f.filePath.empty())
diff --git a/src/analysis/DynamicAlloca.cpp b/src/analysis/DynamicAlloca.cpp
index 1d4e93a..fb0c322 100644
--- a/src/analysis/DynamicAlloca.cpp
+++ b/src/analysis/DynamicAlloca.cpp
@@ -29,28 +29,28 @@ namespace ctrace::stack::analysis
                     if (!AI)
                         continue;
 
-                    // Taille d'allocation : on distingue trois cas :
-                    //  - constante immédiate               -> pas une VLA
-                    //  - dérivée d'une constante simple    -> pas une VLA (heuristique)
-                    //  - vraiment dépendante d'une valeur  -> VLA / alloca variable
+                    // Allocation size: we distinguish three cases:
+                    //  - immediate constant               -> not a VLA
+                    //  - derived from a simple constant   -> not a VLA (heuristic)
+                    //  - truly value-dependent            -> VLA / variable alloca
                     Value* arraySizeVal = AI->getArraySize();
 
-                    // 1) Cas taille directement constante dans l'IR
+                    // 1) Size is directly constant in the IR
                     if (llvm::isa<llvm::ConstantInt>(arraySizeVal))
-                        continue; // taille connue à la compilation, OK
+                        continue; // compile-time known size, OK
 
-                    // 2) Heuristique "smart" : essayer de remonter à une constante
-                    //    via les stores dans une variable locale (tryGetConstFromValue).
-                    //    Exemple typique :
+                    // 2) "Smart" heuristic: try to trace back to a constant
+                    //    via stores into a local variable (tryGetConstFromValue).
+                    //    Typical example:
                     //      int n = 6;
-                    //      char buf[n];   // en C : VLA, mais ici n est en fait constant
+                    //      char buf[n];   // in C: VLA, but here n is actually constant
                     //
-                    //    Dans ce cas, on ne veut pas spammer avec un warning VLA :
-                    //    on traite ça comme une taille effectivement constante.
+                    //    In this case we don't want to spam with a VLA warning:
+                    //    treat it as an effectively constant size.
                     if (tryGetConstFromValue(arraySizeVal, F) != nullptr)
                         continue;
 
-                    // 3) Ici, on considère que c'est une vraie VLA / alloca dynamique
+                    // 3) Here we consider it a real VLA / dynamic alloca
                     DynamicAllocaIssue issue;
                     issue.funcName = F.getName().str();
                     issue.varName = deriveAllocaName(AI);
diff --git a/src/analysis/IRValueUtils.cpp b/src/analysis/IRValueUtils.cpp
index 7c7d15f..05ddb33 100644
--- a/src/analysis/IRValueUtils.cpp
+++ b/src/analysis/IRValueUtils.cpp
@@ -91,19 +91,18 @@ namespace ctrace::stack::analysis
     {
         using namespace llvm;
 
-        // On enlève d'abord les cast (sext/zext/trunc, etc.) pour arriver
-        // à la vraie valeur “de base”.
+        // First remove casts (sext/zext/trunc, etc.) to reach the real base value.
         const Value* cur = V;
         while (auto* cast = dyn_cast<const CastInst>(cur))
         {
             cur = cast->getOperand(0);
         }
 
-        // Cas trivial : c'est déjà une constante entière.
+        // Trivial case: already an integer constant.
         if (auto* C = dyn_cast<const ConstantInt>(cur))
             return C;
 
-        // Cas -O0 typique : on compare un load d'une variable locale.
+        // Typical -O0 case: comparing a load from a local variable.
         auto* LI = dyn_cast<const LoadInst>(cur);
         if (!LI)
             return nullptr;
@@ -111,7 +110,7 @@ namespace ctrace::stack::analysis
         const Value* ptr = LI->getPointerOperand();
         const ConstantInt* found = nullptr;
 
-        // Version ultra-simple : on cherche un store de constante dans la fonction.
+        // Ultra-simple version: look for a constant store in the function.
         for (const BasicBlock& BB : F)
         {
             for (const Instruction& I : BB)
@@ -123,7 +122,7 @@ namespace ctrace::stack::analysis
                     continue;
                 if (auto* C = dyn_cast<const ConstantInt>(SI->getValueOperand()))
                 {
-                    // On garde la dernière constante trouvée (si plusieurs stores, c'est naïf).
+                    // Keep the last constant found (if multiple stores, this is naive).
                     found = C;
                 }
             }
diff --git a/src/analysis/IntRanges.cpp b/src/analysis/IntRanges.cpp
index 248bb2a..4f436dd 100644
--- a/src/analysis/IntRanges.cpp
+++ b/src/analysis/IntRanges.cpp
@@ -98,7 +98,7 @@ namespace ctrace::stack::analysis
                             ub = c;
                             break;
                         case ICmpInst::ICMP_NE:
-                            // approximation : V != C  => V <= C (très conservateur)
+                            // approximation: V != C  => V <= C (very conservative)
                             hasUB = true;
                             ub = c;
                             break;
@@ -108,7 +108,7 @@ namespace ctrace::stack::analysis
                     }
                     else
                     {
-                        // C ? V  <=>  V ? C (inversé)
+                        // C ? V  <=>  V ? C (reversed)
                         switch (pred)
                         {
                         case ICmpInst::ICMP_SGT: // C > V  => V < C => V <= C-1
@@ -219,7 +219,7 @@ namespace ctrace::stack::analysis
 
                 bool valueIsOp0 = (V == op0);
 
-                // On choisit le groupe de prédicats
+                // Choose the predicate group
                 if (pred == ICmpInst::ICMP_SLT || pred == ICmpInst::ICMP_SLE ||
                     pred == ICmpInst::ICMP_SGT || pred == ICmpInst::ICMP_SGE ||
                     pred == ICmpInst::ICMP_EQ || pred == ICmpInst::ICMP_NE)
@@ -235,10 +235,10 @@ namespace ctrace::stack::analysis
                 if (!(hasLB || hasUB))
                     continue;
 
-                // Applique la contrainte sur V lui-même
+                // Apply the constraint to V itself
                 applyConstraint(V, hasLB, lb, hasUB, ub);
 
-                // Et éventuellement sur le pointeur sous-jacent si V est un load
+                // And possibly to the underlying pointer if V is a load
                 if (auto* LI = dyn_cast<LoadInst>(V))
                 {
                     const Value* ptr = LI->getPointerOperand();
diff --git a/src/analysis/StackComputation.cpp b/src/analysis/StackComputation.cpp
index 28c7fb4..a197b46 100644
--- a/src/analysis/StackComputation.cpp
+++ b/src/analysis/StackComputation.cpp
@@ -1,5 +1,8 @@
 #include "analysis/StackComputation.hpp"
 
+#include <algorithm>
+#include <unordered_map>
+#include <unordered_set>
 #include <llvm/IR/Constants.h>
 #include <llvm/IR/Dominators.h>
 #include <llvm/IR/Function.h>
@@ -42,7 +45,7 @@ namespace ctrace::stack::analysis
 
                     if (Callee && !Callee->isDeclaration() && Callee != Self)
                     {
-                        return true; // appel vers une autre fonction
+                        return true; // call to another function
                     }
                 }
             }
@@ -109,7 +112,7 @@ namespace ctrace::stack::analysis
             LocalStackInfo info = computeLocalStackBase(F, DL);
 
             llvm::MaybeAlign MA = DL.getStackAlignment();
-            unsigned stackAlign = MA ? MA->value() : 1u; // 16 sur beaucoup de cibles
+            unsigned stackAlign = MA ? MA->value() : 1u; // 16 on many targets
 
             StackSize frameSize = info.bytes;
 
@@ -141,14 +144,6 @@ namespace ctrace::stack::analysis
             {
                 if (itState->second == Visiting)
                 {
-                    // Cycle détecté : on marque tous les noeuds actuellement en "Visiting"
-                    for (auto& p : State)
-                    {
-                        if (p.second == Visiting)
-                        {
-                            Res.RecursiveFuncs.insert(p.first);
-                        }
-                    }
                     auto itLocal = LocalStack.find(F);
                     if (itLocal != LocalStack.end())
                     {
@@ -194,6 +189,102 @@ namespace ctrace::stack::analysis
             State[F] = Visited;
             return total;
         }
+
+        static bool hasSelfCall(const llvm::Function* F, const CallGraph& CG)
+        {
+            auto it = CG.find(F);
+            if (it == CG.end())
+                return false;
+
+            for (const llvm::Function* Callee : it->second)
+            {
+                if (Callee == F)
+                    return true;
+            }
+            return false;
+        }
+
+        struct TarjanState
+        {
+            std::unordered_map<const llvm::Function*, int> index;
+            std::unordered_map<const llvm::Function*, int> lowlink;
+            std::vector<const llvm::Function*> stack;
+            std::unordered_set<const llvm::Function*> onStack;
+            int nextIndex = 0;
+            std::set<const llvm::Function*> recursive;
+        };
+
+        static void strongConnect(const llvm::Function* V, const CallGraph& CG, TarjanState& state)
+        {
+            state.index[V] = state.nextIndex;
+            state.lowlink[V] = state.nextIndex;
+            ++state.nextIndex;
+            state.stack.push_back(V);
+            state.onStack.insert(V);
+
+            auto it = CG.find(V);
+            if (it != CG.end())
+            {
+                for (const llvm::Function* W : it->second)
+                {
+                    if (state.index.find(W) == state.index.end())
+                    {
+                        strongConnect(W, CG, state);
+                        state.lowlink[V] = std::min(state.lowlink[V], state.lowlink[W]);
+                    }
+                    else if (state.onStack.count(W))
+                    {
+                        state.lowlink[V] = std::min(state.lowlink[V], state.index[W]);
+                    }
+                }
+            }
+
+            if (state.lowlink[V] == state.index[V])
+            {
+                std::vector<const llvm::Function*> component;
+                const llvm::Function* W = nullptr;
+                do
+                {
+                    W = state.stack.back();
+                    state.stack.pop_back();
+                    state.onStack.erase(W);
+                    component.push_back(W);
+                } while (W != V);
+
+                if (component.size() > 1)
+                {
+                    for (const llvm::Function* Fn : component)
+                    {
+                        state.recursive.insert(Fn);
+                    }
+                }
+                else if (hasSelfCall(V, CG))
+                {
+                    state.recursive.insert(V);
+                }
+            }
+        }
+
+        static std::set<const llvm::Function*>
+        computeRecursiveFunctions(const CallGraph& CG,
+                                  const std::vector<const llvm::Function*>& nodes)
+        {
+            TarjanState state;
+            state.index.reserve(nodes.size());
+            state.lowlink.reserve(nodes.size());
+            state.stack.reserve(nodes.size());
+            state.onStack.reserve(nodes.size());
+
+            for (const llvm::Function* V : nodes)
+            {
+                if (state.index.find(V) == state.index.end())
+                {
+                    strongConnect(V, CG, state);
+                }
+            }
+
+            return state.recursive;
+        }
     } // namespace
 
     CallGraph buildCallGraph(llvm::Module& M)
@@ -253,11 +344,17 @@ namespace ctrace::stack::analysis
         InternalAnalysisState Res;
         std::map<const llvm::Function*, VisitState> State;
 
+        std::vector<const llvm::Function*> nodes;
+        nodes.reserve(LocalStack.size());
+
         for (auto& p : LocalStack)
         {
             State[p.first] = NotVisited;
+            nodes.push_back(p.first);
         }
 
+        Res.RecursiveFuncs = computeRecursiveFunctions(CG, nodes);
+
         for (auto& p : LocalStack)
         {
             const llvm::Function* F = p.first;
diff --git a/src/mangle.cpp b/src/mangle.cpp
index 8c93e90..18a244f 100644
--- a/src/mangle.cpp
+++ b/src/mangle.cpp
@@ -8,20 +8,20 @@ namespace ctrace_tools
     {
         std::stringstream mangled;
 
-        // Préfixe standard pour les symboles C++ dans l'Itanium ABI
+        // Standard prefix for C++ symbols in the Itanium ABI.
         mangled << "_Z";
 
-        // Si un namespace est présent, on utilise 'N' et on encode le nom
+        // If a namespace is present, use 'N' and encode the name.
         if (!namespaceName.empty())
         {
             mangled << "N";
             mangled << namespaceName.length() << namespaceName;
         }
 
-        // Ajouter le nom de la fonction avec sa longueur
+        // Add the function name with its length.
         mangled << functionName.length() << functionName;
 
-        // Encoder les types de paramètres
+        // Encode parameter types.
         for (const std::string& param : paramTypes)
         {
             if (param == "int")
@@ -38,7 +38,7 @@ namespace ctrace_tools
             }
             else if (param == "std::string")
             {
-                mangled << "Ss"; // 'S' pour substitution, 's' pour std::string
+                mangled << "Ss"; // 'S' for substitution, 's' for std::string
             }
             else if (param == "float")
             {
@@ -54,12 +54,12 @@ namespace ctrace_tools
             }
             else
             {
-                // Pour les types complexes ou non reconnus, encoder avec longueur + nom
+                // For complex or unknown types, encode as length + name.
                 mangled << param.length() << param;
             }
         }
 
-        // Fermer le namespace avec 'E' si utilisé
+        // Close the namespace with 'E' if used.
         if (!namespaceName.empty())
         {
             mangled << "E";
diff --git a/src/report/ReportSerialization.cpp b/src/report/ReportSerialization.cpp
index 38f0b19..fd88046 100644
--- a/src/report/ReportSerialization.cpp
+++ b/src/report/ReportSerialization.cpp
@@ -10,7 +10,7 @@ namespace ctrace::stack
     namespace
     {
 
-        // Petit helper pour échapper les chaînes JSON.
+        // Small helper to escape JSON strings.
         static std::string jsonEscape(const std::string& s)
         {
             std::string out;
@@ -113,7 +113,7 @@ namespace ctrace::stack
         os << "    \"analysisTimeMs\": " << -1 << "\n";
         os << " },\n";
 
-        // Fonctions
+        // Functions
         os << "  \"functions\": [\n";
         for (std::size_t i = 0; i < result.functions.size(); ++i)
         {
@@ -261,7 +261,7 @@ namespace ctrace::stack
         {
             const auto& d = result.diagnostics[i];
             os << "        {\n";
-            // Pour le moment, un seul ruleId générique; tu pourras le spécialiser plus tard.
+            // For now, use a single generic ruleId; you can specialize it later.
             const std::string ruleId =
                 d.ruleId.empty() ? std::string(ctrace::stack::enumToString(d.errCode)) : d.ruleId;
             os << "          \"ruleId\": \"" << jsonEscape(ruleId) << "\",\n";

From 510adef7b24e4260c4d732f284b70932c3edf835 Mon Sep 17 00:00:00 2001
From: Hugo <hugo.payet@epitech.eu>
Date: Sat, 7 Feb 2026 19:24:14 +0100
Subject: [PATCH 10/11] chore: enable compile_commands.json export in CMake

---
 CMakeLists.txt | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index c7a14cf..5a90049 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -14,6 +14,7 @@ endif()
 set(CMAKE_CXX_STANDARD 20)
 set(CMAKE_CXX_STANDARD_REQUIRED ON)
 set(CMAKE_CXX_EXTENSIONS OFF)
+set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
 
 set(LLVM_LINK_LLVM_DYLIB ON)
 
@@ -50,6 +51,7 @@ set(STACK_ANALYZER_SOURCES
     src/StackUsageAnalyzer.cpp
     src/analysis/AllocaUsage.cpp
     src/analysis/AnalyzerUtils.cpp
+    src/analysis/CompileCommands.cpp
     src/analysis/ConstParamAnalysis.cpp
     src/analysis/DynamicAlloca.cpp
     src/analysis/FunctionFilter.cpp
@@ -150,8 +152,8 @@ if(BUILD_CLI)
     target_link_libraries(stack_usage_analyzer
       PRIVATE
         stack_usage_analyzer_lib
-        # pas besoin de relinker cc::compilerlib_static ici,
-        # il est déjà dans la lib
+        # No need to relink cc::compilerlib_static here,
+        # it is already linked into the library.
     )
 
     if(ENABLE_DEBUG_ASAN)

From 9f0a09086a27c705c16de5f66962989687aa179e Mon Sep 17 00:00:00 2001
From: Hugo <hugo.payet@epitech.eu>
Date: Sat, 7 Feb 2026 19:24:38 +0100
Subject: [PATCH 11/11] chore: translate comments

---
 extern-project/src/main.cpp                   |   2 +-
 include/StackUsageAnalyzer.hpp                |  47 ++--
 .../analysis/InvalidBaseReconstruction.hpp    |  10 +-
 include/analysis/SizeMinusKWrites.hpp         |   2 +-
 include/analysis/StackBufferAnalysis.hpp      |  10 +-
 include/analysis/StackComputation.hpp         |   6 +-
 include/analysis/StackPointerEscape.hpp       |   2 +-
 include/helpers.hpp                           |   3 +-
 main.cpp                                      | 213 +++++++++++++-----
 run_test.py                                   |  40 ++--
 10 files changed, 219 insertions(+), 116 deletions(-)

diff --git a/extern-project/src/main.cpp b/extern-project/src/main.cpp
index 595b887..7a96c16 100644
--- a/extern-project/src/main.cpp
+++ b/extern-project/src/main.cpp
@@ -22,7 +22,7 @@ int main(int argc, char **argv)
 
     auto res = ctrace::stack::analyzeFile(filename, cfg, ctx, diag);
 
-    // Exemple : output SARIF sur stdout
+    // Example: SARIF output to stdout
     std::cout << ctrace::stack::toSarif(res, filename) << std::endl;
 
     return 0;
diff --git a/include/StackUsageAnalyzer.hpp b/include/StackUsageAnalyzer.hpp
index 7fbba27..8715115 100644
--- a/include/StackUsageAnalyzer.hpp
+++ b/include/StackUsageAnalyzer.hpp
@@ -15,6 +15,11 @@ namespace llvm
     class SMDiagnostic;
 } // namespace llvm
 
+namespace ctrace::stack::analysis
+{
+    class CompilationDatabase;
+} // namespace ctrace::stack::analysis
+
 namespace ctrace::stack
 {
 
@@ -26,33 +31,39 @@ namespace ctrace::stack
         ABI
     };
 
-    // Configuration de l'analyse (mode + limite de stack)
+    // Analysis configuration (mode + stack limit).
     struct AnalysisConfig
     {
         AnalysisMode mode = AnalysisMode::IR;
-        StackSize stackLimit = 8ull * 1024ull * 1024ull; // 8 MiB par défaut
+        StackSize stackLimit = 8ull * 1024ull * 1024ull; // 8 MiB default
         bool quiet = false;
         bool warningsOnly = false;
         std::vector<std::string> extraCompileArgs;
+        std::shared_ptr<const analysis::CompilationDatabase> compilationDatabase;
+        bool requireCompilationDatabase = false;
+        bool compdbFast = false;
+        bool timing = false;
         std::vector<std::string> onlyFiles;
         std::vector<std::string> onlyDirs;
         std::vector<std::string> onlyFunctions;
         bool dumpFilter = false;
+        std::string dumpIRPath;
+        bool dumpIRIsDir = false;
     };
 
-    // Résultat par fonction
+    // Per-function result
     struct FunctionResult
     {
         std::string filePath;
         std::string name;
-        StackSize localStack = 0;       // taille frame locale (suivant le mode)
-        StackSize maxStack = 0;         // max stack incluant les callees
-        bool localStackUnknown = false; // taille locale inconnue (alloca dynamique)
-        bool maxStackUnknown = false;   // max stack inconnue (propagée via appels)
-        bool hasDynamicAlloca = false;  // alloca dynamique détectée dans la fonction
-
-        bool isRecursive = false;              // dans un cycle F <-> G ...
-        bool hasInfiniteSelfRecursion = false; // heuristique DominatorTree
+        StackSize localStack = 0;       // local frame size (depends on mode)
+        StackSize maxStack = 0;         // max stack including callees
+        bool localStackUnknown = false; // unknown local size (dynamic alloca)
+        bool maxStackUnknown = false;   // unknown max stack (propagated via calls)
+        bool hasDynamicAlloca = false;  // dynamic alloca detected in the function
+
+        bool isRecursive = false;              // part of a cycle F <-> G ...
+        bool hasInfiniteSelfRecursion = false; // DominatorTree heuristic
         bool exceedsLimit = false;             // maxStack > config.stackLimit
     };
 
@@ -151,7 +162,7 @@ namespace ctrace::stack
         std::string message;
     };
 
-    // Résultat global pour un module
+    // Global result for a module
     struct AnalysisResult
     {
         AnalysisConfig config;
@@ -162,22 +173,22 @@ namespace ctrace::stack
         std::vector<Diagnostic> diagnostics;
     };
 
-    // Serialize an AnalysisResult to a simple JSON format (pour CI / GitHub Actions).
-    // `inputFile` : chemin du fichier analysé (celui que tu passes à analyzeFile).
+    // Serialize an AnalysisResult to a simple JSON format (for CI / GitHub Actions).
+    // `inputFile`: path of the analyzed file (the one you pass to analyzeFile).
     std::string toJson(const AnalysisResult& result, const std::string& inputFile);
     std::string toJson(const AnalysisResult& result, const std::vector<std::string>& inputFiles);
 
     // Serialize an AnalysisResult to SARIF 2.1.0 (compatible GitHub Code Scanning).
-    // `inputFile` : chemin du fichier analysé.
-    // `toolName` / `toolVersion` : metadata du tool dans le SARIF.
+    // `inputFile`: path of the analyzed file.
+    // `toolName` / `toolVersion`: tool metadata in SARIF.
     std::string toSarif(const AnalysisResult& result, const std::string& inputFile,
                         const std::string& toolName = "coretrace-stack-analyzer",
                         const std::string& toolVersion = "0.1.0");
 
-    // Analyse un module déjà chargé (tu peux réutiliser dans d'autres outils)
+    // Analyze an already loaded module (can be reused by other tools).
     AnalysisResult analyzeModule(llvm::Module& mod, const AnalysisConfig& config);
 
-    // Helper pratique : charge un .ll et appelle analyzeModule()
+    // Convenience helper: load a .ll and call analyzeModule()
     AnalysisResult analyzeFile(const std::string& filename, const AnalysisConfig& config,
                                llvm::LLVMContext& ctx, llvm::SMDiagnostic& err);
 
diff --git a/include/analysis/InvalidBaseReconstruction.hpp b/include/analysis/InvalidBaseReconstruction.hpp
index 47bfa11..e9191cd 100644
--- a/include/analysis/InvalidBaseReconstruction.hpp
+++ b/include/analysis/InvalidBaseReconstruction.hpp
@@ -19,11 +19,11 @@ namespace ctrace::stack::analysis
     struct InvalidBaseReconstructionIssue
     {
         std::string funcName;
-        std::string varName;        // nom de la variable alloca (stack object)
-        std::string sourceMember;   // membre source (ex: "b")
-        int64_t offsetUsed = 0;     // offset utilisé dans le calcul (peut être négatif)
-        std::string targetType;     // type vers lequel on cast (ex: "struct A*")
-        bool isOutOfBounds = false; // true si on peut prouver que c'est hors bornes
+        std::string varName;        // alloca variable name (stack object)
+        std::string sourceMember;   // source member (e.g., "b")
+        int64_t offsetUsed = 0;     // offset used in the calculation (can be negative)
+        std::string targetType;     // target cast type (e.g., "struct A*")
+        bool isOutOfBounds = false; // true if we can prove it is out of bounds
         const llvm::Instruction* inst = nullptr;
     };
 
diff --git a/include/analysis/SizeMinusKWrites.hpp b/include/analysis/SizeMinusKWrites.hpp
index e478f44..99b28d8 100644
--- a/include/analysis/SizeMinusKWrites.hpp
+++ b/include/analysis/SizeMinusKWrites.hpp
@@ -18,7 +18,7 @@ namespace ctrace::stack::analysis
     struct SizeMinusKWriteIssue
     {
         std::string funcName;
-        std::string sinkName; // nom de l'appel ou "store"
+        std::string sinkName; // call name or "store"
         bool ptrNonNull = false;
         bool sizeAboveK = false;
         bool hasPointerDest = true;
diff --git a/include/analysis/StackBufferAnalysis.hpp b/include/analysis/StackBufferAnalysis.hpp
index 1ee90b3..a7928e4 100644
--- a/include/analysis/StackBufferAnalysis.hpp
+++ b/include/analysis/StackBufferAnalysis.hpp
@@ -21,14 +21,14 @@ namespace ctrace::stack::analysis
         std::string funcName;
         std::string varName;
         StackSize arraySize = 0;
-        StackSize indexOrUpperBound = 0; // utilisé pour les bornes sup (UB) ou index constant
+        StackSize indexOrUpperBound = 0; // used for upper bounds (UB) or constant index
         bool isWrite = false;
         bool indexIsConstant = false;
         const llvm::Instruction* inst = nullptr;
 
-        // Violation basée sur une borne inférieure (index potentiellement négatif)
+        // Violation based on a lower bound (index potentially negative)
         bool isLowerBoundViolation = false;
-        long long lowerBound = 0; // borne inférieure déduite (signée)
+        long long lowerBound = 0; // deduced lower bound (signed)
 
         std::string aliasPath;                 // ex: "pp -> ptr -> buf"
         std::vector<std::string> aliasPathVec; // {"pp", "ptr", "buf"}
@@ -49,8 +49,8 @@ namespace ctrace::stack::analysis
     {
         std::string funcName;
         std::string varName;
-        std::size_t storeCount = 0;         // nombre total de StoreInst vers ce buffer
-        std::size_t distinctIndexCount = 0; // nombre d'expressions d'index distinctes
+        std::size_t storeCount = 0;         // total number of StoreInsts into this buffer
+        std::size_t distinctIndexCount = 0; // number of distinct index expressions
         const llvm::AllocaInst* allocaInst = nullptr;
     };
 
diff --git a/include/analysis/StackComputation.hpp b/include/analysis/StackComputation.hpp
index 8b56401..9025b5c 100644
--- a/include/analysis/StackComputation.hpp
+++ b/include/analysis/StackComputation.hpp
@@ -33,9 +33,9 @@ namespace ctrace::stack::analysis
 
     struct InternalAnalysisState
     {
-        std::map<const llvm::Function*, StackEstimate> TotalStack; // stack max, callees inclus
-        std::set<const llvm::Function*> RecursiveFuncs;         // fonctions dans au moins un cycle
-        std::set<const llvm::Function*> InfiniteRecursionFuncs; // auto-récursion “infinie”
+        std::map<const llvm::Function*, StackEstimate> TotalStack; // max stack, including callees
+        std::set<const llvm::Function*> RecursiveFuncs;         // functions in at least one cycle
+        std::set<const llvm::Function*> InfiniteRecursionFuncs; // “infinite” self-recursion
     };
 
     CallGraph buildCallGraph(llvm::Module& M);
diff --git a/include/analysis/StackPointerEscape.hpp b/include/analysis/StackPointerEscape.hpp
index 86f6438..43cb4a9 100644
--- a/include/analysis/StackPointerEscape.hpp
+++ b/include/analysis/StackPointerEscape.hpp
@@ -19,7 +19,7 @@ namespace ctrace::stack::analysis
         std::string varName;
         std::string
             escapeKind; // "return", "store_global", "store_unknown", "call_arg", "call_callback"
-        std::string targetName; // nom du global, si applicable
+        std::string targetName; // global name, if applicable
         const llvm::Instruction* inst = nullptr;
     };
 
diff --git a/include/helpers.hpp b/include/helpers.hpp
index 2351fe4..06889c8 100644
--- a/include/helpers.hpp
+++ b/include/helpers.hpp
@@ -7,8 +7,7 @@
 namespace ctrace::stack
 {
 
-    template <typename E>
-    struct EnumTraits; // pas de définition générique -> erreur si non spécialisé
+    template <typename E> struct EnumTraits; // no generic definition -> error if not specialized
 
     template <typename E> concept EnumWithTraits = std::is_enum_v<E> && requires
     {
diff --git a/main.cpp b/main.cpp
index 8a0c346..f952b43 100644
--- a/main.cpp
+++ b/main.cpp
@@ -7,11 +7,13 @@
 #include <filesystem>
 #include <iostream>
 #include <limits>
+#include <system_error>
 #include <unordered_set>
 #include <vector>
 #include <llvm/IR/LLVMContext.h>
 #include <llvm/Support/SourceMgr.h>
 #include <llvm/Support/raw_ostream.h>
+#include "analysis/CompileCommands.hpp"
 #include "mangle.hpp"
 
 using namespace ctrace::stack;
@@ -38,11 +40,16 @@ static void printHelp()
         << "  -D<name>[=value]       Define macro for C/C++ inputs\n"
         << "  -D <name>[=value]      Define macro for C/C++ inputs\n"
         << "  --compile-arg=<arg>    Pass extra compile argument (repeatable)\n"
+        << "  --compile-commands=<path>  Use compile_commands.json (file or directory)\n"
+        << "  --compdb=<path>        Alias for --compile-commands\n"
+        << "  --compdb-fast          Speed up compdb builds (drops heavy flags)\n"
+        << "  --timing               Print compilation/analysis timing to stderr\n"
         << "  --only-file=<path>     Only report functions from this source file\n"
         << "  --only-dir=<path>      Only report functions under this directory\n"
         << "  --only-func=<name>     Only report functions with this name (comma-separated)\n"
         << "  --stack-limit=<value>  Override stack size limit (bytes, or KiB/MiB/GiB)\n"
         << "  --dump-filter          Print filter decisions to stderr\n"
+        << "  --dump-ir=<path>       Write LLVM IR to file (or directory for multiple inputs)\n"
         << "  --quiet                Suppress per-function diagnostics\n"
         << "  --warnings-only        Show warnings and errors only\n"
         << "  -h, --help             Show this help message and exit\n\n"
@@ -51,66 +58,35 @@ static void printHelp()
         << "  stack_usage_analyzer input1.ll input2.ll --format=json\n"
         << "  stack_usage_analyzer main.cpp -I../include --format=json\n"
         << "  stack_usage_analyzer main.cpp -I../include --only-dir=../src\n"
+        << "  stack_usage_analyzer main.cpp --compile-commands=build/compile_commands.json\n"
         << "  stack_usage_analyzer input.ll --mode=abi --format=json\n"
         << "  stack_usage_analyzer input.ll --warnings-only\n";
 }
 
 static std::string normalizePath(const std::string& input)
 {
-    std::string out = input;
-    for (char& c : out)
+    if (input.empty())
+        return {};
+
+    std::string adjusted = input;
+    for (char& c : adjusted)
     {
         if (c == '\\')
             c = '/';
     }
-    const bool isAbs = !out.empty() && out.front() == '/';
-    std::vector<std::string> parts;
-    std::string cur;
-    for (char c : out)
-    {
-        if (c == '/')
-        {
-            if (!cur.empty())
-            {
-                if (cur == "..")
-                {
-                    if (!parts.empty())
-                        parts.pop_back();
-                }
-                else if (cur != ".")
-                {
-                    parts.push_back(cur);
-                }
-                cur.clear();
-            }
-        }
-        else
-        {
-            cur.push_back(c);
-        }
-    }
-    if (!cur.empty())
-    {
-        if (cur == "..")
-        {
-            if (!parts.empty())
-                parts.pop_back();
-        }
-        else if (cur != ".")
-        {
-            parts.push_back(cur);
-        }
-    }
-    std::string norm = isAbs ? "/" : "";
-    for (std::size_t i = 0; i < parts.size(); ++i)
-    {
-        norm += parts[i];
-        if (i + 1 < parts.size())
-            norm += "/";
-    }
-    while (!norm.empty() && norm.back() == '/')
-        norm.pop_back();
-    return norm;
+
+    std::filesystem::path path(adjusted);
+    std::error_code ec;
+    std::filesystem::path absPath = std::filesystem::absolute(path, ec);
+    if (ec)
+        absPath = path;
+
+    std::filesystem::path canonicalPath = std::filesystem::weakly_canonical(absPath, ec);
+    std::filesystem::path norm = ec ? absPath.lexically_normal() : canonicalPath;
+    std::string out = norm.generic_string();
+    while (out.size() > 1 && out.back() == '/')
+        out.pop_back();
+    return out;
 }
 
 static std::string basenameOf(const std::string& path)
@@ -412,22 +388,17 @@ static AnalysisResult filterWarningsOnly(const AnalysisResult& result, const Ana
     return filtered;
 }
 
-void toto(void)
-{
-    char test[974] = "Hello";
-    return;
-}
-
 int main(int argc, char** argv)
 {
-    toto();
     llvm::LLVMContext context;
     std::vector<std::string> inputFilenames;
     OutputFormat outputFormat = OutputFormat::Human;
 
-    AnalysisConfig cfg; // mode = IR, stackLimit = 8MiB par défaut
+    AnalysisConfig cfg; // mode = IR, stackLimit = 8 MiB default
     cfg.quiet = false;
     cfg.warningsOnly = false;
+    std::string compileCommandsPath;
+    bool compileCommandsExplicit = false;
     // cfg.mode = AnalysisMode::IR; -> already set by default constructor
     // cfg.stackLimit = 8ull * 1024ull * 1024ull; // 8 MiB -> already set by default constructor but needed to be set with args
 
@@ -539,6 +510,21 @@ int main(int argc, char** argv)
             cfg.dumpFilter = true;
             continue;
         }
+        if (argStr == "--dump-ir")
+        {
+            if (i + 1 >= argc)
+            {
+                llvm::errs() << "Missing argument for --dump-ir\n";
+                return 1;
+            }
+            cfg.dumpIRPath = argv[++i];
+            continue;
+        }
+        if (argStr.rfind("--dump-ir=", 0) == 0)
+        {
+            cfg.dumpIRPath = argStr.substr(std::strlen("--dump-ir="));
+            continue;
+        }
         if (argStr == "-I")
         {
             if (i + 1 >= argc)
@@ -574,6 +560,39 @@ int main(int argc, char** argv)
             cfg.extraCompileArgs.emplace_back(argStr.substr(std::strlen("--compile-arg=")));
             continue;
         }
+        if (argStr == "--compdb-fast")
+        {
+            cfg.compdbFast = true;
+            continue;
+        }
+        if (argStr == "--timing")
+        {
+            cfg.timing = true;
+            continue;
+        }
+        if (argStr == "--compile-commands" || argStr == "--compdb")
+        {
+            if (i + 1 >= argc)
+            {
+                llvm::errs() << "Missing argument for " << argStr << "\n";
+                return 1;
+            }
+            compileCommandsPath = argv[++i];
+            compileCommandsExplicit = true;
+            continue;
+        }
+        if (argStr.rfind("--compile-commands=", 0) == 0)
+        {
+            compileCommandsPath = argStr.substr(std::strlen("--compile-commands="));
+            compileCommandsExplicit = true;
+            continue;
+        }
+        if (argStr.rfind("--compdb=", 0) == 0)
+        {
+            compileCommandsPath = argStr.substr(std::strlen("--compdb="));
+            compileCommandsExplicit = true;
+            continue;
+        }
         if (argStr == "--warnings-only")
         {
             cfg.warningsOnly = true;
@@ -622,6 +641,52 @@ int main(int argc, char** argv)
         }
     }
 
+    if (compileCommandsExplicit)
+    {
+        if (compileCommandsPath.empty())
+        {
+            llvm::errs() << "compile commands path is empty\n";
+            return 1;
+        }
+
+        std::filesystem::path compdbPath = compileCommandsPath;
+        std::error_code fsErr;
+        if (std::filesystem::is_directory(compdbPath, fsErr))
+        {
+            compdbPath /= "compile_commands.json";
+        }
+        else if (fsErr)
+        {
+            llvm::errs() << "Failed to inspect compile commands path: " << fsErr.message() << "\n";
+            return 1;
+        }
+
+        if (!std::filesystem::exists(compdbPath, fsErr))
+        {
+            if (fsErr)
+            {
+                llvm::errs() << "Failed to inspect compile commands path: " << fsErr.message()
+                             << "\n";
+            }
+            else
+            {
+                llvm::errs() << "compile commands file not found: " << compdbPath.string() << "\n";
+            }
+            return 1;
+        }
+
+        std::string error;
+        auto db =
+            ctrace::stack::analysis::CompilationDatabase::loadFromFile(compdbPath.string(), error);
+        if (!db)
+        {
+            llvm::errs() << "Failed to load compile commands: " << error << "\n";
+            return 1;
+        }
+        cfg.compilationDatabase = std::move(db);
+        cfg.requireCompilationDatabase = true;
+    }
+
     if (inputFilenames.empty())
     {
         llvm::errs() << "Usage: stack_usage_analyzer <file.ll> [file2.ll ...] [options]\n"
@@ -629,6 +694,36 @@ int main(int argc, char** argv)
         return 1;
     }
 
+    if (!cfg.dumpIRPath.empty())
+    {
+        const bool trailingSlash = !cfg.dumpIRPath.empty() &&
+                                   (cfg.dumpIRPath.back() == '/' || cfg.dumpIRPath.back() == '\\');
+        std::error_code fsErr;
+        std::filesystem::path dumpPath(cfg.dumpIRPath);
+        const bool exists = std::filesystem::exists(dumpPath, fsErr);
+        if (fsErr)
+        {
+            llvm::errs() << "Failed to inspect dump IR path: " << fsErr.message() << "\n";
+            return 1;
+        }
+        bool isDir = false;
+        if (exists)
+        {
+            isDir = std::filesystem::is_directory(dumpPath, fsErr);
+            if (fsErr)
+            {
+                llvm::errs() << "Failed to inspect dump IR path: " << fsErr.message() << "\n";
+                return 1;
+            }
+        }
+        if (inputFilenames.size() > 1 && !isDir && !trailingSlash)
+        {
+            llvm::errs() << "--dump-ir must point to a directory when analyzing multiple inputs\n";
+            return 1;
+        }
+        cfg.dumpIRIsDir = isDir || trailingSlash || inputFilenames.size() > 1;
+    }
+
     std::sort(inputFilenames.begin(), inputFilenames.end());
     std::vector<std::pair<std::string, AnalysisResult>> results;
     results.reserve(inputFilenames.size());
@@ -740,7 +835,7 @@ int main(int argc, char** argv)
             std::vector<std::string> param_types;
             // param_types.reserve(issue.inst->getFunction()->arg_size());
             param_types.push_back(
-                "void"); // dummy to avoid empty vector issue // refaire avec les paramèters réels
+                "void"); // dummy to avoid empty vector issue // replace with real parameters
 
             llvm::outs() << "Function: " << f.name << " "
                          << ((ctrace_tools::isMangled(f.name))
diff --git a/run_test.py b/run_test.py
index 241ff50..f2fe6de 100755
--- a/run_test.py
+++ b/run_test.py
@@ -5,17 +5,17 @@
 import re
 from pathlib import Path
 
-# Chemin vers ton binaire d'analyse
-ANALYZER = Path("./build/stack_usage_analyzer")   # à adapter si besoin
-TEST_DIR = Path("test")                     # dossier contenant les .c
+# Path to the analyzer binary
+ANALYZER = Path("./build/stack_usage_analyzer")   # adjust if needed
+TEST_DIR = Path("test")                     # folder containing the .c files
 
 
 def normalize(s: str) -> str:
     """
-    Normalise les espaces pour rendre les comparaisons plus robustes :
-    - supprime les espaces inutiles en début/fin de ligne
-    - remplace les séquences d'espaces par un seul espace
-    - garde les sauts de lignes
+    Normalize spacing to make comparisons more robust:
+    - remove unnecessary leading/trailing spaces per line
+    - replace runs of spaces with a single space
+    - keep line breaks
     """
     lines = []
     for line in s.splitlines():
@@ -34,10 +34,9 @@ def normalize(s: str) -> str:
 
 def extract_expectations(c_path: Path):
     """
-    Extrait les blocs de commentaires d'attendus dans un fichier .c.
+    Extract expected comment blocks from a .c file.
 
-    On cherche les commentaires qui commencent par "// at line".
-    On prend toutes les lignes de commentaires qui suivent.
+    Look for comments that start with "// at line" and take all following comment lines.
     """
     expectations = []
     negative_expectations = []
@@ -64,21 +63,21 @@ def extract_expectations(c_path: Path):
             i += 1
             continue
 
-        # Début d'un bloc d'attendu
+        # Start of an expectation block
         if stripped.startswith("// at line"):
             comment_block = [raw]
             i += 1
-            # Récupère toutes les lignes "// ..." qui suivent
+            # Collect all following "// ..." lines
             while i < n and lines[i].lstrip().startswith("//"):
                 comment_block.append(lines[i])
                 i += 1
 
-            # Nettoyage : retirer les "//" et les indentations
+            # Cleanup: remove "//" and indentation
             cleaned_lines = []
             for c in comment_block:
                 s = c.lstrip()
                 if s.startswith("//"):
-                    s = s[2:]  # enlève "//"
+                    s = s[2:]  # remove "//"
                 cleaned_lines.append(s.lstrip())
 
             expectation_text = "\n".join(cleaned_lines)
@@ -91,7 +90,7 @@ def extract_expectations(c_path: Path):
 
 def run_analyzer_on_file(c_path: Path, stack_limit=None) -> str:
     """
-    Lance ton analyseur sur un fichier C et récupère stdout+stderr.
+    Run the analyzer on a C file and capture stdout+stderr.
     """
     args = [str(ANALYZER), str(c_path)]
     if stack_limit:
@@ -495,7 +494,7 @@ def has_json_recursion_diag(func_name: str, needle: str) -> bool:
 
 def check_help_flags() -> bool:
     """
-    Vérifie que -h et --help affichent l'aide sur stdout et retournent 0.
+    Check that -h and --help print help to stdout and return 0.
     """
     print("=== Testing help flags ===")
     ok = True
@@ -525,7 +524,7 @@ def check_help_flags() -> bool:
 
 def check_multi_file_json() -> bool:
     """
-    Vérifie que l'analyse accepte plusieurs fichiers et que le JSON agrège correctement.
+    Check that analysis accepts multiple files and JSON aggregates correctly.
     """
     print("=== Testing multi-file JSON ===")
     file_a = TEST_DIR / "test.ll"
@@ -598,7 +597,7 @@ def matches_input(input_path: str) -> bool:
 
 def check_multi_file_failure() -> bool:
     """
-    Vérifie le comportement fail-fast quand un fichier est invalide.
+    Check fail-fast behavior when a file is invalid.
     """
     print("=== Testing multi-file failure ===")
     valid_file = TEST_DIR / "test.ll"
@@ -628,7 +627,7 @@ def check_multi_file_failure() -> bool:
 
 def check_cli_parsing_and_filters() -> bool:
     """
-    Vérifie parsing CLI (erreurs) + filtres principaux.
+    Check CLI parsing (errors) + main filters.
     """
     print("=== Testing CLI parsing & filters ===")
     ok = True
@@ -724,8 +723,7 @@ def check_cli_parsing_and_filters() -> bool:
 
 def check_file(c_path: Path):
     """
-    Vérifie qu'avec ce fichier, toutes les attentes sont présentes
-    dans la sortie de l'analyseur.
+    Check that, for this file, all expectations are present in the analyzer output.
     """
     print(f"=== Testing {c_path} ===")
     expectations, negative_expectations, stack_limit = extract_expectations(c_path)