From 3839627f759497d0e5dd407229ce4e7ad54977d5 Mon Sep 17 00:00:00 2001 From: Hugo Date: Sat, 7 Feb 2026 19:15:12 +0100 Subject: [PATCH 01/11] feat: add compilation database interface --- include/analysis/CompileCommands.hpp | 33 ++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100644 include/analysis/CompileCommands.hpp diff --git a/include/analysis/CompileCommands.hpp b/include/analysis/CompileCommands.hpp new file mode 100644 index 0000000..e1d102a --- /dev/null +++ b/include/analysis/CompileCommands.hpp @@ -0,0 +1,33 @@ +#pragma once + +#include +#include +#include +#include + +namespace ctrace::stack::analysis +{ + struct CompileCommand + { + std::string directory; + std::vector arguments; + }; + + class CompilationDatabase + { + public: + static std::shared_ptr loadFromFile(const std::string& path, + std::string& error); + + const CompileCommand* findCommandForFile(const std::string& filePath) const; + + const std::string& sourcePath() const + { + return sourcePath_; + } + + private: + std::string sourcePath_; + std::unordered_map commands_; + }; +} // namespace ctrace::stack::analysis From 8dafe2c942a4465659be675bce2b54b4cb057133 Mon Sep 17 00:00:00 2001 From: Hugo Date: Sat, 7 Feb 2026 19:15:48 +0100 Subject: [PATCH 02/11] feat: extend analysis config for compdb and IR dumping --- src/analysis/CompileCommands.cpp | 381 +++++++++++++++++++++++++++++++ 1 file changed, 381 insertions(+) create mode 100644 src/analysis/CompileCommands.cpp diff --git a/src/analysis/CompileCommands.cpp b/src/analysis/CompileCommands.cpp new file mode 100644 index 0000000..2c79ea6 --- /dev/null +++ b/src/analysis/CompileCommands.cpp @@ -0,0 +1,381 @@ +#include "analysis/CompileCommands.hpp" + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +namespace ctrace::stack::analysis +{ + namespace + { + std::string normalizePath(const std::filesystem::path& path) + { + if (path.empty()) + return {}; + + std::error_code ec; + std::filesystem::path absPath = std::filesystem::absolute(path, ec); + if (ec) + absPath = path; + + std::filesystem::path canonicalPath = std::filesystem::weakly_canonical(absPath, ec); + std::filesystem::path norm = ec ? absPath.lexically_normal() : canonicalPath; + std::string out = norm.generic_string(); + while (out.size() > 1 && out.back() == '/') + out.pop_back(); + return out; + } + + bool pathHasSuffix(const std::string& path, const std::string& suffix) + { + if (suffix.empty()) + return false; + if (path.size() < suffix.size()) + return false; + if (path.compare(path.size() - suffix.size(), suffix.size(), suffix) != 0) + return false; + if (path.size() == suffix.size()) + return true; + return path[path.size() - suffix.size() - 1] == '/'; + } + + std::vector buildPathSuffixes(const std::string& path) + { + std::vector suffixes; + if (path.empty()) + return suffixes; + + suffixes.push_back(path); + for (std::size_t i = 1; i < path.size(); ++i) + { + if (path[i] == '/' && i + 1 < path.size()) + suffixes.push_back(path.substr(i)); + } + return suffixes; + } + + std::vector tokenizeCommandLine(const std::string& command) + { + std::vector tokens; + std::string current; + enum class State + { + Normal, + SingleQuote, + DoubleQuote + }; + + State state = State::Normal; + for (std::size_t i = 0; i < command.size(); ++i) + { + char c = command[i]; + if (state == State::Normal) + { + if (std::isspace(static_cast(c))) + { + if (!current.empty()) + { + tokens.push_back(current); + current.clear(); + } + continue; + } + if (c == '\'') + { + state = State::SingleQuote; + continue; + } + if (c == '"') + { + state = State::DoubleQuote; + continue; + } + if (c == '\\' && i + 1 < command.size()) + { + current.push_back(command[++i]); + continue; + } + current.push_back(c); + continue; + } + + if (state == State::SingleQuote) + { + if (c == '\'') + { + state = State::Normal; + continue; + } + current.push_back(c); + continue; + } + + if (c == '"') + { + state = State::Normal; + continue; + } + if (c == '\\' && i + 1 < command.size()) + { + current.push_back(command[++i]); + continue; + } + current.push_back(c); + } + + if (!current.empty()) + tokens.push_back(current); + + return tokens; + } + + void stripOutputAndDependencyArgs(std::vector& args) + { + std::vector filtered; + filtered.reserve(args.size()); + + for (std::size_t i = 0; i < args.size(); ++i) + { + const std::string& arg = args[i]; + if (arg == "-o" || arg == "--output") + { + if (i + 1 < args.size()) + ++i; + continue; + } + if (arg.size() > 2 && arg.rfind("-o", 0) == 0) + continue; + + if (arg == "-MF" || arg == "-MT" || arg == "-MQ") + { + if (i + 1 < args.size()) + ++i; + continue; + } + if ((arg.size() > 3 && (arg.rfind("-MF", 0) == 0 || arg.rfind("-MT", 0) == 0 || + arg.rfind("-MQ", 0) == 0))) + continue; + + if (arg == "-M" || arg == "-MM" || arg == "-MD" || arg == "-MMD" || arg == "-MG" || + arg == "-MP") + continue; + + filtered.push_back(arg); + } + + args.swap(filtered); + } + + void stripInputFileArg(std::vector& args, const std::string& directory, + const std::string& fileKey) + { + if (fileKey.empty()) + return; + + std::vector filtered; + filtered.reserve(args.size()); + bool removed = false; + + for (const auto& arg : args) + { + if (!removed && !arg.empty() && arg[0] != '-') + { + std::filesystem::path argPath(arg); + if (argPath.is_relative()) + argPath = std::filesystem::path(directory) / argPath; + std::string argKey = normalizePath(argPath); + if (!argKey.empty() && argKey == fileKey) + { + removed = true; + continue; + } + } + + filtered.push_back(arg); + } + + args.swap(filtered); + } + + std::vector extractArguments(const llvm::json::Object& obj) + { + std::vector args; + if (auto* arr = obj.getArray("arguments")) + { + args.reserve(arr->size()); + for (const auto& value : *arr) + { + if (auto str = value.getAsString()) + args.push_back(str->str()); + } + return args; + } + + if (auto command = obj.getString("command")) + return tokenizeCommandLine(command->str()); + + return args; + } + + void stripLeadingCommandTokens(std::vector& args) + { + std::size_t start = 0; + while (start < args.size()) + { + const std::string& token = args[start]; + if (!token.empty() && (token[0] == '-' || token[0] == '@')) + break; + ++start; + } + if (start > 0) + args.erase(args.begin(), args.begin() + static_cast(start)); + } + + std::filesystem::path normalizeDirectoryPath(const std::filesystem::path& compdbDir, + const std::string& directory) + { + std::filesystem::path dirPath = + directory.empty() ? compdbDir : std::filesystem::path(directory); + if (dirPath.is_relative()) + dirPath = compdbDir / dirPath; + return dirPath; + } + } // namespace + + std::shared_ptr CompilationDatabase::loadFromFile(const std::string& path, + std::string& error) + { + error.clear(); + auto bufferOrErr = llvm::MemoryBuffer::getFile(path); + if (!bufferOrErr) + { + error = "unable to read compile commands file: " + path + " (" + + bufferOrErr.getError().message() + ")"; + return nullptr; + } + + auto parsed = llvm::json::parse(bufferOrErr.get()->getBuffer()); + if (!parsed) + { + error = "failed to parse compile commands JSON: " + llvm::toString(parsed.takeError()); + return nullptr; + } + + auto* array = parsed->getAsArray(); + if (!array) + { + error = "compile commands JSON must be an array"; + return nullptr; + } + + auto db = std::make_shared(); + db->sourcePath_ = normalizePath(path); + + std::filesystem::path compdbDir = std::filesystem::path(path).parent_path(); + if (compdbDir.empty()) + { + std::error_code ec; + compdbDir = std::filesystem::current_path(ec); + if (ec) + compdbDir = std::filesystem::path("."); + } + + for (const auto& entryValue : *array) + { + const auto* obj = entryValue.getAsObject(); + if (!obj) + continue; + + auto fileValue = obj->getString("file"); + if (!fileValue) + continue; + + std::string fileStr = fileValue->str(); + std::string dirStr; + if (auto directoryValue = obj->getString("directory")) + dirStr = directoryValue->str(); + + std::filesystem::path directoryPath = normalizeDirectoryPath(compdbDir, dirStr); + std::string directoryKey = normalizePath(directoryPath); + if (directoryKey.empty()) + continue; + + std::filesystem::path filePath(fileStr); + if (filePath.is_relative()) + filePath = directoryPath / filePath; + std::string fileKey = normalizePath(filePath); + if (fileKey.empty()) + continue; + + std::vector args = extractArguments(*obj); + if (args.empty()) + continue; + + stripLeadingCommandTokens(args); + stripOutputAndDependencyArgs(args); + stripInputFileArg(args, directoryKey, fileKey); + + if (db->commands_.find(fileKey) != db->commands_.end()) + continue; + + CompileCommand command; + command.directory = directoryKey; + command.arguments = std::move(args); + db->commands_.emplace(fileKey, std::move(command)); + } + + if (db->commands_.empty()) + { + error = "compile commands file contains no usable entries"; + return nullptr; + } + + return db; + } + + const CompileCommand* CompilationDatabase::findCommandForFile(const std::string& filePath) const + { + if (filePath.empty()) + return nullptr; + std::string key = normalizePath(std::filesystem::path(filePath)); + auto it = commands_.find(key); + if (it == commands_.end()) + { + auto suffixes = buildPathSuffixes(key); + if (!suffixes.empty()) + { + // Skip the full path; we already attempted exact lookup. + for (std::size_t s = 1; s < suffixes.size(); ++s) + { + const std::string& suffix = suffixes[s]; + const CompileCommand* match = nullptr; + std::size_t matchCount = 0; + for (const auto& entry : commands_) + { + if (pathHasSuffix(entry.first, suffix)) + { + ++matchCount; + if (matchCount == 1) + match = &entry.second; + else + break; + } + } + if (matchCount == 1) + return match; + if (matchCount > 1) + break; + } + } + return nullptr; + } + return &it->second; + } +} // namespace ctrace::stack::analysis From e8408576b6cf67eb396dbf7f3986a6f425c29651 Mon Sep 17 00:00:00 2001 From: Hugo Date: Sat, 7 Feb 2026 19:16:52 +0100 Subject: [PATCH 03/11] feat: support dump-ir output and compdb-fast compilation --- src/analysis/InputPipeline.cpp | 290 +++++++++++++++++++++++++++++++-- 1 file changed, 279 insertions(+), 11 deletions(-) diff --git a/src/analysis/InputPipeline.cpp b/src/analysis/InputPipeline.cpp index 5622006..9ca49ac 100644 --- a/src/analysis/InputPipeline.cpp +++ b/src/analysis/InputPipeline.cpp @@ -2,20 +2,243 @@ #include #include +#include +#include #include +#include #include #include #include #include +#include #include #include #include +#include "analysis/CompileCommands.hpp" #include "compilerlib/compiler.h" namespace ctrace::stack::analysis { + namespace + { + std::string makeAbsolutePath(const std::string& path) + { + std::error_code ec; + std::filesystem::path absPath = std::filesystem::absolute(path, ec); + if (ec) + return path; + return absPath.lexically_normal().generic_string(); + } + + void appendIfMissing(std::vector& args, const std::string& flag) + { + if (std::find(args.begin(), args.end(), flag) == args.end()) + args.push_back(flag); + } + + bool hasDebugFlag(const std::vector& args) + { + for (const auto& arg : args) + { + if (arg == "-g" || (arg.size() > 2 && arg.rfind("-g", 0) == 0)) + return true; + } + return false; + } + + void applyCompdbFastMode(std::vector& args) + { + std::vector filtered; + filtered.reserve(args.size()); + + for (const auto& arg : args) + { + if (arg.size() > 1 && arg.rfind("-O", 0) == 0) + continue; + if (arg.rfind("-g", 0) == 0) + continue; + if (arg.rfind("-fsanitize", 0) == 0 || arg.rfind("-fno-sanitize", 0) == 0) + continue; + if (arg == "-flto" || arg.rfind("-flto=", 0) == 0) + continue; + if (arg.rfind("-fprofile", 0) == 0 || arg.rfind("-fcoverage", 0) == 0) + continue; + + filtered.push_back(arg); + } + + filtered.push_back("-O0"); + filtered.push_back("-gline-tables-only"); + filtered.push_back("-fno-sanitize=all"); + args.swap(filtered); + } + + static bool resolveDumpIRPath(const AnalysisConfig& config, const std::string& inputPath, + const std::filesystem::path& baseDir, + std::filesystem::path& outPath, std::string& error) + { + if (config.dumpIRPath.empty()) + return false; + + std::filesystem::path dumpPath(config.dumpIRPath); + if (dumpPath.is_relative() && !baseDir.empty()) + dumpPath = baseDir / dumpPath; + + if (config.dumpIRIsDir) + { + std::filesystem::path baseName = std::filesystem::path(inputPath).filename(); + std::string outName = baseName.empty() ? "module" : baseName.string(); + outPath = dumpPath / (outName + ".ll"); + } + else + { + outPath = dumpPath; + } + + std::filesystem::path parentDir = outPath.parent_path(); + if (!parentDir.empty()) + { + std::error_code ec; + std::filesystem::create_directories(parentDir, ec); + if (ec) + { + error = "Failed to create IR dump directory: " + parentDir.string(); + return false; + } + } + + std::error_code absErr; + std::filesystem::path inputAbs = std::filesystem::absolute(inputPath, absErr); + std::filesystem::path outputAbs = std::filesystem::absolute(outPath, absErr); + if (!absErr && inputAbs == outputAbs) + { + error = + "Refusing to overwrite input file with --dump-ir output: " + outPath.string(); + return false; + } + + return true; + } + + static bool dumpModuleIR(const llvm::Module& module, const std::string& inputPath, + const AnalysisConfig& config, const std::filesystem::path& baseDir, + std::string& error) + { + if (config.dumpIRPath.empty()) + return true; + + std::filesystem::path outPath; + if (!resolveDumpIRPath(config, inputPath, baseDir, outPath, error)) + return false; + + std::error_code ec; + llvm::raw_fd_ostream os(outPath.string(), ec, llvm::sys::fs::OF_Text); + if (ec) + { + error = + "Failed to write IR dump file: " + outPath.string() + " (" + ec.message() + ")"; + return false; + } + module.print(os, nullptr); + os.flush(); + return true; + } + + bool buildCompileArgs(const std::string& filename, LanguageType language, + const AnalysisConfig& config, std::vector& args, + std::string& workingDir, std::string& error) + { + const CompileCommand* command = nullptr; + if (config.compilationDatabase) + { + command = config.compilationDatabase->findCommandForFile(filename); + } + + if (command) + { + args = command->arguments; + workingDir = command->directory; + if (config.compdbFast) + applyCompdbFastMode(args); + } + else + { + if (config.requireCompilationDatabase) + { + error = "No compile command found for: " + filename; + if (config.compilationDatabase && + !config.compilationDatabase->sourcePath().empty()) + { + error += " in " + config.compilationDatabase->sourcePath(); + } + return false; + } + args.clear(); + args.push_back("-emit-llvm"); + args.push_back("-S"); + args.push_back("-g"); + if (language == LanguageType::CXX) + { + args.push_back("-x"); + args.push_back("c++"); + args.push_back("-std=gnu++20"); + } + } + + for (const auto& extraArg : config.extraCompileArgs) + { + args.push_back(extraArg); + } + + appendIfMissing(args, "-emit-llvm"); + appendIfMissing(args, "-S"); + if (!hasDebugFlag(args)) + args.push_back("-g"); + appendIfMissing(args, "-fno-discard-value-names"); + const bool useAbsolutePath = (command != nullptr); + args.push_back(useAbsolutePath ? makeAbsolutePath(filename) : filename); + return true; + } + + class ScopedCurrentPath + { + public: + explicit ScopedCurrentPath(const std::string& path, std::string& error) + { + if (path.empty()) + return; + std::error_code ec; + previousPath_ = std::filesystem::current_path(ec); + if (ec) + { + error = "Failed to read current working directory"; + return; + } + std::filesystem::current_path(path, ec); + if (ec) + { + error = "Failed to change working directory to: " + path; + return; + } + active_ = true; + } + + ~ScopedCurrentPath() + { + if (!active_) + return; + std::error_code ec; + std::filesystem::current_path(previousPath_, ec); + } + + private: + std::filesystem::path previousPath_; + bool active_ = false; + }; + } // namespace + LanguageType detectFromExtension(const std::string& path) { auto pos = path.find_last_of('.'); @@ -57,6 +280,11 @@ namespace ctrace::stack::analysis llvm::SMDiagnostic& err) { ModuleLoadResult result; + std::error_code cwdErr; + std::filesystem::path baseDir = std::filesystem::current_path(cwdErr); + using Clock = std::chrono::steady_clock; + auto compileStart = Clock::now(); + bool compiled = false; result.language = detectLanguageFromFile(filename, ctx); if (result.language == LanguageType::Unknown) @@ -68,23 +296,27 @@ namespace ctrace::stack::analysis if (result.language != LanguageType::LLVM_IR) { std::vector args; - args.push_back("-emit-llvm"); - args.push_back("-S"); - args.push_back("-g"); - if (result.language == LanguageType::CXX) + std::string workingDir; + std::string compileError; + if (!buildCompileArgs(filename, result.language, config, args, workingDir, + compileError)) { - args.push_back("-x"); - args.push_back("c++"); - args.push_back("-std=gnu++20"); + result.error = compileError + "\n"; + return result; } - for (const auto& extraArg : config.extraCompileArgs) + + if (config.timing) + llvm::errs() << "Compiling " << filename << "...\n"; + std::string cwdError; + ScopedCurrentPath cwdGuard(workingDir, cwdError); + if (!cwdError.empty()) { - args.push_back(extraArg); + result.error = cwdError + "\n"; + return result; } - args.push_back("-fno-discard-value-names"); - args.push_back(filename); compilerlib::OutputMode mode = compilerlib::OutputMode::ToMemory; auto res = compilerlib::compile(args, mode); + compiled = true; if (!res.success) { @@ -98,10 +330,28 @@ namespace ctrace::stack::analysis return result; } + if (config.timing) + { + auto compileEnd = Clock::now(); + auto ms = + std::chrono::duration_cast(compileEnd - compileStart) + .count(); + llvm::errs() << "Compilation done in " << ms << " ms\n"; + } + auto buffer = llvm::MemoryBuffer::getMemBuffer(res.llvmIR, "in_memory_ll"); llvm::SMDiagnostic diag; + auto parseStart = Clock::now(); result.module = llvm::parseIR(buffer->getMemBufferRef(), diag, ctx); + if (config.timing) + { + auto parseEnd = Clock::now(); + auto ms = + std::chrono::duration_cast(parseEnd - parseStart) + .count(); + llvm::errs() << "IR parse done in " << ms << " ms\n"; + } if (!result.module) { @@ -112,10 +362,28 @@ namespace ctrace::stack::analysis return result; } + if (!dumpModuleIR(*result.module, filename, config, baseDir, result.error)) + return result; + return result; } + if (config.timing) + llvm::errs() << "Parsing IR " << filename << "...\n"; + auto parseStart = Clock::now(); result.module = llvm::parseIRFile(filename, err, ctx); + if (config.timing) + { + auto parseEnd = Clock::now(); + auto ms = std::chrono::duration_cast(parseEnd - parseStart) + .count(); + llvm::errs() << "IR parse done in " << ms << " ms\n"; + } + if (result.module) + { + if (!dumpModuleIR(*result.module, filename, config, baseDir, result.error)) + return result; + } return result; } } // namespace ctrace::stack::analysis From 8129b984bc6a53ea02088d1bdd7ad32e9eab060e Mon Sep 17 00:00:00 2001 From: Hugo Date: Sat, 7 Feb 2026 19:19:40 +0100 Subject: [PATCH 04/11] fix: normalize filter paths using filesystem canonicalization --- src/analysis/AnalyzerUtils.cpp | 70 ++++++++++------------------------ 1 file changed, 20 insertions(+), 50 deletions(-) diff --git a/src/analysis/AnalyzerUtils.cpp b/src/analysis/AnalyzerUtils.cpp index 3676181..6b0177e 100644 --- a/src/analysis/AnalyzerUtils.cpp +++ b/src/analysis/AnalyzerUtils.cpp @@ -2,7 +2,9 @@ #include #include +#include #include +#include #include #include @@ -117,60 +119,28 @@ namespace ctrace::stack::analysis static std::string normalizePathForMatch(const std::string& input) { - std::string out = input; - for (char& c : out) + if (input.empty()) + return {}; + + std::string adjusted = input; + for (char& c : adjusted) { if (c == '\\') c = '/'; } - const bool isAbs = !out.empty() && out.front() == '/'; - std::vector parts; - std::string cur; - for (char c : out) - { - if (c == '/') - { - if (!cur.empty()) - { - if (cur == "..") - { - if (!parts.empty()) - parts.pop_back(); - } - else if (cur != ".") - { - parts.push_back(cur); - } - cur.clear(); - } - } - else - { - cur.push_back(c); - } - } - if (!cur.empty()) - { - if (cur == "..") - { - if (!parts.empty()) - parts.pop_back(); - } - else if (cur != ".") - { - parts.push_back(cur); - } - } - std::string norm = isAbs ? "/" : ""; - for (std::size_t i = 0; i < parts.size(); ++i) - { - norm += parts[i]; - if (i + 1 < parts.size()) - norm += "/"; - } - while (!norm.empty() && norm.back() == '/') - norm.pop_back(); - return norm; + + std::filesystem::path path(adjusted); + std::error_code ec; + std::filesystem::path absPath = std::filesystem::absolute(path, ec); + if (ec) + absPath = path; + + std::filesystem::path canonicalPath = std::filesystem::weakly_canonical(absPath, ec); + std::filesystem::path norm = ec ? absPath.lexically_normal() : canonicalPath; + std::string out = norm.generic_string(); + while (out.size() > 1 && out.back() == '/') + out.pop_back(); + return out; } static std::string basenameOf(const std::string& path) From dec036e21b9de10578cb7eeac2b98beb6ba4596e Mon Sep 17 00:00:00 2001 From: Hugo Date: Sat, 7 Feb 2026 19:20:11 +0100 Subject: [PATCH 05/11] fix: reduce invalid base reconstruction false --- src/analysis/InvalidBaseReconstruction.cpp | 187 ++++++++++++++++++--- 1 file changed, 167 insertions(+), 20 deletions(-) diff --git a/src/analysis/InvalidBaseReconstruction.cpp b/src/analysis/InvalidBaseReconstruction.cpp index 39dc851..26da26a 100644 --- a/src/analysis/InvalidBaseReconstruction.cpp +++ b/src/analysis/InvalidBaseReconstruction.cpp @@ -1,5 +1,6 @@ #include "analysis/InvalidBaseReconstruction.hpp" +#include #include #include #include @@ -21,6 +22,29 @@ namespace ctrace::stack::analysis { namespace { + constexpr std::size_t kMaxInvalidBaseWork = 200000; + + struct WorkBudget + { + std::size_t remaining = kMaxInvalidBaseWork; + + bool consume(std::size_t amount = 1) + { + if (remaining < amount) + { + remaining = 0; + return false; + } + remaining -= amount; + return true; + } + + bool exhausted() const + { + return remaining == 0; + } + }; + static bool isLoadFromAlloca(const llvm::Value* V, const llvm::AllocaInst* AI) { if (!V || !AI) @@ -179,7 +203,8 @@ namespace ctrace::stack::analysis } static void collectPtrToIntMatches(const llvm::Value* V, - llvm::SmallVectorImpl& out) + llvm::SmallVectorImpl& out, + WorkBudget& budget) { using namespace llvm; @@ -208,6 +233,8 @@ namespace ctrace::stack::analysis while (!worklist.empty()) { + if (!budget.consume()) + return; const Value* Cur = stripIntCasts(worklist.back().val); int64_t curOffset = worklist.back().offset; bool curSawOffset = worklist.back().sawOffset; @@ -395,7 +422,7 @@ namespace ctrace::stack::analysis }; static void collectPointerOrigins(const llvm::Value* V, const llvm::DataLayout& DL, - llvm::SmallVectorImpl& out) + llvm::SmallVectorImpl& out, WorkBudget& budget) { using namespace llvm; @@ -407,6 +434,8 @@ namespace ctrace::stack::analysis while (!worklist.empty()) { + if (!budget.consume()) + return; const Value* Cur = worklist.back().first; int64_t currentOffset = worklist.back().second; worklist.pop_back(); @@ -462,11 +491,37 @@ namespace ctrace::stack::analysis continue; } + if (auto* ITP = dyn_cast(Cur)) + { + SmallVector matches; + collectPtrToIntMatches(ITP->getOperand(0), matches, budget); + for (const auto& match : matches) + { + if (!match.ptrOperand) + continue; + int64_t newOffset = currentOffset + match.offset; + if (recordVisitedOffset(visited, match.ptrOperand, newOffset)) + worklist.push_back({match.ptrOperand, newOffset}); + } + continue; + } + if (auto* LI = dyn_cast(Cur)) { - const Value* PtrOp = LI->getPointerOperand(); - if (recordVisitedOffset(visited, PtrOp, currentOffset)) - worklist.push_back({PtrOp, currentOffset}); + const Value* PtrOp = LI->getPointerOperand()->stripPointerCasts(); + const Value* basePtr = PtrOp; + int64_t baseOffset = 0; + if (getGEPConstantOffsetAndBase(basePtr, DL, baseOffset, basePtr)) + basePtr = basePtr->stripPointerCasts(); + if (auto* AI = dyn_cast(basePtr)) + { + Type* allocTy = AI->getAllocatedType(); + if (allocTy && allocTy->isPointerTy()) + { + if (recordVisitedOffset(visited, PtrOp, currentOffset)) + worklist.push_back({PtrOp, currentOffset}); + } + } continue; } @@ -500,11 +555,24 @@ namespace ctrace::stack::analysis if (recordVisitedOffset(visited, Src, currentOffset)) worklist.push_back({Src, currentOffset}); } + else if (CE->getOpcode() == Instruction::IntToPtr) + { + SmallVector matches; + collectPtrToIntMatches(CE->getOperand(0), matches, budget); + for (const auto& match : matches) + { + if (!match.ptrOperand) + continue; + int64_t newOffset = currentOffset + match.offset; + if (recordVisitedOffset(visited, match.ptrOperand, newOffset)) + worklist.push_back({match.ptrOperand, newOffset}); + } + } } } } - static bool isPointerDereferencedOrUsed(const llvm::Value* V) + static bool isPointerDereferencedOrUsed(const llvm::Value* V, WorkBudget& budget) { using namespace llvm; @@ -514,6 +582,8 @@ namespace ctrace::stack::analysis while (!worklist.empty()) { + if (!budget.consume()) + return false; const Value* Cur = worklist.back(); worklist.pop_back(); if (!visited.insert(Cur).second) @@ -638,6 +708,60 @@ namespace ctrace::stack::analysis return std::nullopt; } + static const llvm::StructType* getAllocaStructType(const llvm::AllocaInst* AI) + { + if (!AI) + return nullptr; + return llvm::dyn_cast(AI->getAllocatedType()); + } + + static std::optional getStructMemberIndexAtOffset(const llvm::StructType* ST, + const llvm::DataLayout& DL, + uint64_t offset) + { + if (!ST) + return std::nullopt; + + auto* mutableST = const_cast(ST); + const llvm::StructLayout* layout = DL.getStructLayout(mutableST); + const unsigned memberCount = ST->getNumElements(); + for (unsigned i = 0; i < memberCount; ++i) + { + uint64_t memberOffset = layout->getElementOffset(i); + llvm::Type* memberTy = ST->getElementType(i); + uint64_t memberSize = DL.getTypeAllocSize(memberTy); + if (memberSize == 0) + { + if (offset == memberOffset) + return i; + continue; + } + if (offset >= memberOffset && offset < memberOffset + memberSize) + return i; + } + + return std::nullopt; + } + + static bool isOffsetWithinSameAllocaMember(int64_t originOffset, int64_t resultOffset, + const llvm::StructType* structType, + uint64_t allocaSize, const llvm::DataLayout& DL) + { + if (originOffset < 0 || resultOffset < 0) + return false; + if (!structType) + return false; + uint64_t uOrigin = static_cast(originOffset); + uint64_t uResult = static_cast(resultOffset); + if (uOrigin >= allocaSize || uResult >= allocaSize) + return false; + auto originMember = getStructMemberIndexAtOffset(structType, DL, uOrigin); + auto resultMember = getStructMemberIndexAtOffset(structType, DL, uResult); + if (!originMember.has_value() || !resultMember.has_value()) + return false; + return originMember.value() == resultMember.value(); + } + static void analyzeInvalidBaseReconstructionsInFunction( llvm::Function& F, const llvm::DataLayout& DL, std::vector& out) @@ -647,7 +771,14 @@ namespace ctrace::stack::analysis if (F.isDeclaration()) return; - std::map> allocaInfo; + WorkBudget budget; + struct AllocaInfo + { + std::string name; + uint64_t size = 0; + const StructType* structType = nullptr; + }; + std::map allocaInfo; for (BasicBlock& BB : F) { @@ -663,7 +794,11 @@ namespace ctrace::stack::analysis std::string varName = AI->hasName() ? AI->getName().str() : std::string(""); - allocaInfo[AI] = {varName, sizeOpt.value()}; + AllocaInfo info; + info.name = std::move(varName); + info.size = sizeOpt.value(); + info.structType = getAllocaStructType(AI); + allocaInfo[AI] = std::move(info); } } @@ -673,13 +808,13 @@ namespace ctrace::stack::analysis { if (auto* ITP = dyn_cast(&I)) { - if (!isPointerDereferencedOrUsed(ITP)) + if (!isPointerDereferencedOrUsed(ITP, budget)) continue; Value* IntVal = ITP->getOperand(0); SmallVector matches; - collectPtrToIntMatches(IntVal, matches); + collectPtrToIntMatches(IntVal, matches, budget); if (matches.empty()) continue; @@ -701,7 +836,7 @@ namespace ctrace::stack::analysis continue; SmallVector origins; - collectPointerOrigins(match.ptrOperand, DL, origins); + collectPointerOrigins(match.ptrOperand, DL, origins, budget); if (origins.empty()) continue; @@ -711,13 +846,17 @@ namespace ctrace::stack::analysis if (it == allocaInfo.end()) continue; - const std::string& varName = it->second.first; - uint64_t allocaSize = it->second.second; + const std::string& varName = it->second.name; + uint64_t allocaSize = it->second.size; + const StructType* structType = it->second.structType; int64_t resultOffset = origin.offset + match.offset; bool isOutOfBounds = (resultOffset < 0) || (static_cast(resultOffset) >= allocaSize); + bool isMemberOffset = isOffsetWithinSameAllocaMember( + origin.offset, resultOffset, structType, allocaSize, DL); + bool allowMemberSuppression = match.offset != 0; std::string targetType; Type* targetTy = ITP->getType(); @@ -731,7 +870,8 @@ namespace ctrace::stack::analysis auto& entry = agg[key]; entry.memberOffsets.insert(origin.offset); entry.anyOutOfBounds |= isOutOfBounds; - if (resultOffset != 0) + if (resultOffset != 0 && + !(allowMemberSuppression && isMemberOffset)) entry.anyNonZeroResult = true; entry.varName = varName; entry.allocaSize = allocaSize; @@ -781,7 +921,7 @@ namespace ctrace::stack::analysis if (auto* GEP = dyn_cast(&I)) { - if (!isPointerDereferencedOrUsed(GEP)) + if (!isPointerDereferencedOrUsed(GEP, budget)) continue; int64_t gepOffset = 0; @@ -789,8 +929,11 @@ namespace ctrace::stack::analysis if (!getGEPConstantOffsetAndBase(GEP, DL, gepOffset, PtrOp)) continue; + const Value* directBase = PtrOp ? PtrOp->stripPointerCasts() : nullptr; + const bool isDirectAllocaBase = directBase && isa(directBase); + SmallVector origins; - collectPointerOrigins(PtrOp, DL, origins); + collectPointerOrigins(PtrOp, DL, origins, budget); if (origins.empty()) continue; @@ -807,7 +950,7 @@ namespace ctrace::stack::analysis for (const auto& origin : origins) { - if (origin.offset == 0 && gepOffset >= 0) + if (origin.offset == 0 && gepOffset >= 0 && isDirectAllocaBase) { continue; } @@ -816,13 +959,17 @@ namespace ctrace::stack::analysis if (it == allocaInfo.end()) continue; - const std::string& varName = it->second.first; - uint64_t allocaSize = it->second.second; + const std::string& varName = it->second.name; + uint64_t allocaSize = it->second.size; + const StructType* structType = it->second.structType; int64_t resultOffset = origin.offset + gepOffset; bool isOutOfBounds = (resultOffset < 0) || (static_cast(resultOffset) >= allocaSize); + bool isMemberOffset = isOffsetWithinSameAllocaMember( + origin.offset, resultOffset, structType, allocaSize, DL); + bool allowMemberSuppression = gepOffset != 0; std::string targetType; Type* targetTy = GEP->getType(); @@ -832,7 +979,7 @@ namespace ctrace::stack::analysis auto& entry = agg[origin.alloca]; entry.memberOffsets.insert(origin.offset); entry.anyOutOfBounds |= isOutOfBounds; - if (resultOffset != 0) + if (resultOffset != 0 && !(allowMemberSuppression && isMemberOffset)) entry.anyNonZeroResult = true; entry.varName = varName; entry.targetType = targetType; From d74659192367931247d44db75de35a9ba40d70a5 Mon Sep 17 00:00:00 2001 From: Hugo Date: Sat, 7 Feb 2026 19:21:26 +0100 Subject: [PATCH 06/11] fix: tighten stack buffer detection with debug-info array heuristics --- src/analysis/StackBufferAnalysis.cpp | 210 +++++++++++++++++++-------- 1 file changed, 150 insertions(+), 60 deletions(-) diff --git a/src/analysis/StackBufferAnalysis.cpp b/src/analysis/StackBufferAnalysis.cpp index 459673c..7099bdd 100644 --- a/src/analysis/StackBufferAnalysis.cpp +++ b/src/analysis/StackBufferAnalysis.cpp @@ -5,8 +5,12 @@ #include #include +#include +#include #include +#include #include +#include #include #include #include @@ -32,22 +36,24 @@ namespace ctrace::stack::analysis } }; - // Taille (en nombre d'éléments) pour une alloca de tableau sur la stack + // Size (in elements) for a stack array alloca static std::optional getAllocaElementCount(llvm::AllocaInst* AI) { using namespace llvm; Type* elemTy = AI->getAllocatedType(); StackSize count = 1; + bool hasArrayType = false; - // Cas "char test[10];" => alloca [10 x i8] + // Case "char test[10];" => alloca [10 x i8] if (auto* arrTy = dyn_cast(elemTy)) { + hasArrayType = true; count *= arrTy->getNumElements(); elemTy = arrTy->getElementType(); } - // Cas "alloca i8, i64 10" => alloca tableau avec taille constante + // Case "alloca i8, i64 10" => array alloca with constant size if (AI->isArrayAllocation()) { if (auto* C = dyn_cast(AI->getArraySize())) @@ -56,10 +62,15 @@ namespace ctrace::stack::analysis } else { - // taille non constante - analyse plus compliquée, on ignore pour l'instant + // non-constant size - more complex analysis, ignore for now return std::nullopt; } } + else if (!hasArrayType) + { + // Scalar alloca (struct/object), not an indexable buffer + return std::nullopt; + } return count; } @@ -82,10 +93,10 @@ namespace ctrace::stack::analysis auto isArrayAlloca = [](const AllocaInst* AI) -> bool { Type* T = AI->getAllocatedType(); - // On considère comme "buffer de stack" : - // - les vrais tableaux, - // - les allocas de type tableau (VLA côté IR), - // - les structs qui contiennent au moins un champ tableau. + // Consider a "stack buffer" as: + // - real arrays, + // - array-typed allocas (VLA in IR), + // - structs that contain at least one array field. if (T->isArrayTy() || AI->isArrayAllocation()) return true; @@ -100,7 +111,7 @@ namespace ctrace::stack::analysis return false; }; - // Pour éviter les boucles d'aliasing bizarres + // Avoid weird aliasing loops SmallPtrSet visited; const Value* cur = V; @@ -110,19 +121,19 @@ namespace ctrace::stack::analysis if (cur->hasName()) path.push_back(cur->getName().str()); - // Cas 1 : on tombe sur une alloca. + // Case 1: we hit an alloca. if (auto* AI = dyn_cast(cur)) { if (isArrayAlloca(AI)) { - // Alloca d'un buffer de stack (tableau) : cible finale. + // Stack buffer alloca (array): final target. return AI; } - // Sinon, c'est très probablement une variable locale de type pointeur - // (char *ptr; char **pp; etc.). On parcourt les stores vers cette - // variable pour voir quelles valeurs lui sont assignées, et on - // tente de remonter jusqu'à une vraie alloca de tableau. + // Otherwise, it's very likely a local pointer variable + // (char *ptr; char **pp; etc.). Walk stores into this variable + // to see what values get assigned, and try to trace back to + // a real array alloca. const AllocaInst* foundAI = nullptr; for (BasicBlock& BB : F) @@ -150,8 +161,8 @@ namespace ctrace::stack::analysis } else if (foundAI != cand) { - // Plusieurs bases différentes : aliasing ambigu, - // on préfère abandonner plutôt que de se tromper. + // Multiple different bases: ambiguous aliasing, + // prefer to stop rather than be wrong. return nullptr; } } @@ -159,37 +170,37 @@ namespace ctrace::stack::analysis return foundAI; } - // Cas 2 : bitcast -> on remonte l'opérande. + // Case 2: bitcast -> follow the operand. if (auto* BC = dyn_cast(cur)) { cur = BC->getOperand(0); continue; } - // Cas 3 : GEP -> on remonte sur le pointeur de base. + // Case 3: GEP -> follow the base pointer. if (auto* GEP = dyn_cast(cur)) { cur = GEP->getPointerOperand(); continue; } - // Cas 4 : load d'un pointeur. Exemple typique : + // Case 4: load of a pointer. Typical example: // char *ptr = test; // char *p2 = ptr; // char **pp = &ptr; // (*pp)[i] = ... // - // On remonte au "container" du pointeur (variable locale, ou autre valeur) - // en suivant l'opérande du load. + // Walk up to the pointer "container" (local variable, or other value) + // by following the load operand. if (auto* LI = dyn_cast(cur)) { cur = LI->getPointerOperand(); continue; } - // Cas 5 : PHI de pointeurs (fusion de plusieurs alias) : - // on tente de résoudre chaque incoming et on s'assure qu'ils - // pointent tous vers la même alloca de tableau. + // Case 5: PHI of pointers (merge of aliases): + // try to resolve each incoming and ensure they + // all point to the same array alloca. if (auto* PN = dyn_cast(cur)) { const AllocaInst* foundAI = nullptr; @@ -209,7 +220,7 @@ namespace ctrace::stack::analysis } else if (foundAI != cand) { - // PHI mélange plusieurs bases différentes : trop ambigu. + // PHI mixes multiple different bases: too ambiguous. return nullptr; } } @@ -217,13 +228,82 @@ namespace ctrace::stack::analysis return foundAI; } - // Autres cas (arguments, globales complexes, etc.) : on arrête l'heuristique. + // Other cases (arguments, complex globals, etc.): stop the heuristic. break; } return nullptr; } + static std::optional isAllocaArrayByDebugInfo(const llvm::AllocaInst* AI, + const llvm::Function& F) + { + using namespace llvm; + + for (const BasicBlock& BB : F) + { + for (const Instruction& I : BB) + { + auto* DVI = dyn_cast(&I); + if (!DVI) + continue; + + if (DVI->getNumVariableLocationOps() == 0) + continue; + + const Value* loc = DVI->getVariableLocationOp(0); + if (!loc) + continue; + + const Value* base = getUnderlyingObject(loc); + if (base != AI) + continue; + + const DILocalVariable* var = DVI->getVariable(); + if (!var) + return false; + + const DIType* type = var->getType(); + if (!type) + return false; + + if (auto* composite = dyn_cast(type)) + { + return composite->getTag() == dwarf::DW_TAG_array_type; + } + + return false; + } + } + + return std::nullopt; + } + + static bool shouldUseAllocaFallback(const llvm::AllocaInst* AI, llvm::Function& F) + { + if (auto debugArray = isAllocaArrayByDebugInfo(AI, F); debugArray.has_value()) + { + return *debugArray; + } + + llvm::Type* allocatedTy = AI->getAllocatedType(); + if (auto* arrTy = llvm::dyn_cast(allocatedTy)) + { + if (arrTy->getNumElements() <= 1 && !arrTy->getElementType()->isArrayTy()) + return false; + return true; + } + + if (AI->isArrayAllocation()) + { + if (auto* C = llvm::dyn_cast(AI->getArraySize())) + return C->getZExtValue() > 1; + return true; + } + + return false; + } + static const llvm::AllocaInst* resolveArrayAllocaFromPointer(const llvm::Value* V, llvm::Function& F, std::vector& path) @@ -248,17 +328,17 @@ namespace ctrace::stack::analysis if (!GEP) continue; - // 1) Trouver la base du pointeur (test, &test[0], ptr, etc.) + // 1) Find the pointer base (test, &test[0], ptr, etc.) const Value* basePtr = GEP->getPointerOperand(); std::vector aliasPath; const AllocaInst* AI = resolveArrayAllocaFromPointer(basePtr, F, aliasPath); if (!AI) continue; - // 2) Déterminer la taille logique du tableau ciblé et récupérer l'index - // On essaie d'abord de la déduire du type traversé par la GEP - // (cas struct S { char buf[10]; }; s.buf[i]) puis on retombe - // sur la taille de l'alloca pour les cas plus simples (char buf[10]). + // 2) Determine the logical target array size and retrieve the index. + // First try to infer it from the type traversed by the GEP + // (case struct S { char buf[10]; }; s.buf[i]), then fall back + // to the alloca size for simpler cases (char buf[10]). StackSize arraySize = 0; Value* idxVal = nullptr; @@ -266,29 +346,29 @@ namespace ctrace::stack::analysis if (auto* arrTy = dyn_cast(srcElemTy)) { - // Cas direct : alloca [N x T]; GEP indices [0, i] + // Direct case: alloca [N x T]; GEP indices [0, i] if (GEP->getNumIndices() < 2) continue; auto idxIt = GEP->idx_begin(); - ++idxIt; // saute le premier indice (souvent 0) + ++idxIt; // skip the first index (often 0) idxVal = idxIt->get(); arraySize = arrTy->getNumElements(); } else if (auto* ST = dyn_cast(srcElemTy)) { - // Cas struct avec champ tableau: + // Struct case with an array field: // %ptr = getelementptr inbounds %struct.S, %struct.S* %s, // i32 0, i32 , i64 %i // - // On attend donc au moins 3 indices: [0, field, i] + // Expect at least 3 indices: [0, field, i] if (GEP->getNumIndices() >= 3) { auto idxIt = GEP->idx_begin(); - // premier indice (souvent 0) + // first index (often 0) auto* idx0 = dyn_cast(idxIt->get()); ++idxIt; - // second indice: index de champ dans la struct + // second index: field index in the struct auto* fieldIdxC = dyn_cast(idxIt->get()); ++idxIt; @@ -302,7 +382,7 @@ namespace ctrace::stack::analysis if (auto* fieldArrTy = dyn_cast(fieldTy)) { arraySize = fieldArrTy->getNumElements(); - // Troisième indice = index dans le tableau interne + // Third index = index within the inner array idxVal = idxIt->get(); } } @@ -310,10 +390,13 @@ namespace ctrace::stack::analysis } } - // Si on n'a pas réussi à déduire une taille via la GEP, - // on retombe sur la taille dérivée de l'alloca (cas char buf[10]; ptr = buf; ptr[i]). + // If we could not infer a size via the GEP, + // fall back to the size derived from the alloca + // (case char buf[10]; ptr = buf; ptr[i]). if (arraySize == 0 || !idxVal) { + if (!shouldUseAllocaFallback(AI, F)) + continue; auto maybeCount = getAllocaElementCount(const_cast(AI)); if (!maybeCount) continue; @@ -321,7 +404,7 @@ namespace ctrace::stack::analysis if (arraySize == 0) continue; - // Pour ces cas-là, on considère le premier indice comme l'index logique. + // For these cases, treat the first index as the logical index. if (GEP->getNumIndices() < 1) continue; auto idxIt = GEP->idx_begin(); @@ -331,14 +414,14 @@ namespace ctrace::stack::analysis std::string varName = AI->hasName() ? AI->getName().str() : std::string(""); - // "baseIdxVal" = variable de boucle "i" sans les casts (sext/zext...) + // "baseIdxVal" = loop variable "i" without casts (sext/zext...) Value* baseIdxVal = idxVal; while (auto* cast = dyn_cast(baseIdxVal)) { baseIdxVal = cast->getOperand(0); } - // 4) Cas index constant : test[11] + // 4) Constant index case: test[11] if (auto* CIdx = dyn_cast(idxVal)) { auto idxValue = CIdx->getSExtValue(); @@ -401,12 +484,12 @@ namespace ctrace::stack::analysis continue; } - // 5) Cas index variable : test[i] / ptr[i] - // On regarde si on a un intervalle pour la valeur de base (i, pas le cast) + // 5) Variable index case: test[i] / ptr[i] + // Check whether we have a range for the base value (i, not the cast) const Value* key = baseIdxVal; - // Si l'index vient d'un load (pattern -O0 : load i, icmp, load i, gep), - // on utilise le pointeur sous-jacent comme clé (l'alloca de i). + // If the index comes from a load (O0 pattern: load i, icmp, load i, gep), + // use the underlying pointer as the key (alloca of i). if (auto* LI = dyn_cast(baseIdxVal)) { key = LI->getPointerOperand(); @@ -415,13 +498,13 @@ namespace ctrace::stack::analysis auto itRange = ranges.find(key); if (itRange == ranges.end()) { - // pas de borne connue => on ne dit rien ici + // no known bound => say nothing here continue; } const IntRange& R = itRange->second; - // 5.a) Borne supérieure hors bornes: UB >= arraySize + // 5.a) Upper bound out of range: UB >= arraySize if (R.hasUpper && R.upper >= 0 && static_cast(R.upper) >= arraySize) { StackSize ub = static_cast(R.upper); @@ -481,7 +564,7 @@ namespace ctrace::stack::analysis } } - // 5.b) Borne inférieure négative: LB < 0 => index potentiellement négatif + // 5.b) Negative lower bound: LB < 0 => potentially negative index if (R.hasLower && R.lower < 0) { for (User* GU : GEP->users()) @@ -540,8 +623,8 @@ namespace ctrace::stack::analysis } } } - // Si R.hasUpper && R.upper < arraySize et (pas de LB problématique), - // on considère l'accès comme probablement sûr. + // If R.hasUpper && R.upper < arraySize and (no problematic LB), + // treat the access as probably safe. } } } @@ -576,7 +659,7 @@ namespace ctrace::stack::analysis if (!GEP) continue; - // On remonte à la base pour trouver une alloca de tableau sur la stack. + // Walk back to the base to find a stack array alloca. const Value* basePtr = GEP->getPointerOperand(); std::vector dummyAliasPath; const AllocaInst* AI = @@ -584,22 +667,29 @@ namespace ctrace::stack::analysis if (!AI) continue; - // On récupère l'expression d'index utilisée dans le GEP. + // Retrieve the index expression used in the GEP. Value* idxVal = nullptr; Type* srcElemTy = GEP->getSourceElementType(); + bool isDirectArray = false; if (auto* arrTy = dyn_cast(srcElemTy)) { + isDirectArray = true; // Pattern [N x T]* -> indices [0, i] if (GEP->getNumIndices() < 2) continue; auto idxIt = GEP->idx_begin(); - ++idxIt; // saute le premier indice (souvent 0) + ++idxIt; // skip the first index (often 0) idxVal = idxIt->get(); } else { - // Pattern T* -> indice unique [i] (cas char *ptr = test; ptr[i]) + if (!shouldUseAllocaFallback(AI, F)) + continue; + auto maybeCount = getAllocaElementCount(const_cast(AI)); + if (!maybeCount || *maybeCount <= 1) + continue; + // Pattern T* -> single index [i] (case char *ptr = test; ptr[i]) if (GEP->getNumIndices() < 1) continue; auto idxIt = GEP->idx_begin(); @@ -609,7 +699,7 @@ namespace ctrace::stack::analysis if (!idxVal) continue; - // On normalise un peu la clé d'index en enlevant les casts SSA. + // Normalize the index key by stripping SSA casts. const Value* idxKey = idxVal; while (auto* cast = dyn_cast(const_cast(idxKey))) { @@ -623,14 +713,14 @@ namespace ctrace::stack::analysis } } - // Construction des warnings pour chaque buffer qui reçoit plusieurs stores. + // Build warnings for each buffer that receives multiple stores. for (auto& entry : infoMap) { const AllocaInst* AI = entry.first; const Info& info = entry.second; if (info.storeCount <= 1) - continue; // un seul store -> pas de warning + continue; // single store -> no warning MultipleStoreIssue issue; issue.funcName = F.getName().str(); From f505dcc3c59c59b7d5a3b4c6a5dd3eb5168711a1 Mon Sep 17 00:00:00 2001 From: Hugo Date: Sat, 7 Feb 2026 19:21:44 +0100 Subject: [PATCH 07/11] docs: document compdb, dump-ir, and related CLI options --- README.md | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 92f1705..5d95751 100644 --- a/README.md +++ b/README.md @@ -20,7 +20,10 @@ ./stack_usage_analyzer --mode=[abi/ir] test.[ll/c/cpp] other.[ll/c/cpp] ./stack_usage_analyzer main.cpp -I./include ./stack_usage_analyzer main.cpp -I./include --compile-arg=-I/opt/homebrew/opt/llvm@20/include +./stack_usage_analyzer main.cpp --compile-commands=build/compile_commands.json ./stack_usage_analyzer main.cpp -I./include --only-file=./main.cpp --only-function=main +./stack_usage_analyzer main.cpp --dump-ir=./debug/main.ll +./stack_usage_analyzer a.c b.c --dump-ir=./debug ``` ``` @@ -29,6 +32,11 @@ --warnings-only keeps only important diagnostics --stack-limit= overrides stack limit (bytes, or KiB/MiB/GiB) --compile-arg= passes an extra argument to the compiler +--compile-commands= uses compile_commands.json (file or directory) +--compdb= alias for --compile-commands +--compdb-fast drops heavy build flags for faster analysis +--timing prints compile/analysis timings to stderr +--dump-ir= writes LLVM IR to a file (or directory for multiple inputs) -I or -I adds an include directory -D[=value] or -D [=value] defines a macro --only-file= or --only-file filters by file @@ -38,6 +46,13 @@ --dump-filter prints filter decisions (stderr) ``` +To generate `compile_commands.json` with CMake, configure with +`-DCMAKE_EXPORT_COMPILE_COMMANDS=ON` and point to the resulting file +(often under `build/`). + +If analysis feels slow, `--compdb-fast` disables heavy flags (optimizations, +sanitizers, profiling) while keeping include paths and macros. + ### Example Given this code: @@ -142,7 +157,7 @@ Function: main Examples: ```c char buf[10]; -return buf; // renvoi pointeur vers stack → use-after-return +return buf; // returns pointer to stack -> use-after-return ``` Or storing: From b7ad381bfc082194fbf7aba1ee79c0ab626ea486 Mon Sep 17 00:00:00 2001 From: Hugo Date: Sat, 7 Feb 2026 19:22:18 +0100 Subject: [PATCH 08/11] chore: translate comments --- test/bound-storage/ranges_test.c | 54 ++++++++++++++--------------- test/escape-stack/direct-callback.c | 2 +- test/escape-stack/global-buf.c | 2 +- test/escape-stack/out_param.c | 6 ++-- test/escape-stack/return-buf.c | 2 +- test/vla/deguised-constant.c | 2 +- 6 files changed, 34 insertions(+), 34 deletions(-) diff --git a/test/bound-storage/ranges_test.c b/test/bound-storage/ranges_test.c index 2448c93..f064f05 100644 --- a/test/bound-storage/ranges_test.c +++ b/test/bound-storage/ranges_test.c @@ -1,10 +1,10 @@ #include /* - * 1) Cas simples : borne sup OK / pas OK + * 1) Simple cases: upper bound OK / not OK */ -// AUCUN WARNING attendu (UB = 9, taille = 10) +// NO WARNING expected (UB = 9, size = 10) void ub_ok(int i) { char buf[10]; @@ -13,7 +13,7 @@ void ub_ok(int i) buf[i] = 'A'; } -// WARNING UB attendu (UB = 10, taille = 10) +// WARNING UB expected (UB = 10, size = 10) void ub_overflow(int i) { char buf[10]; @@ -28,10 +28,10 @@ void ub_overflow(int i) } /* - * 2) Borne inf négative : index potentiellement < 0 + * 2) Negative lower bound: index potentially < 0 */ -// WARNING LB négatif attendu (i >= -3 && i < 5) +// WARNING negative LB expected (i >= -3 && i < 5) void lb_negative(int i) { char buf[10]; @@ -45,7 +45,7 @@ void lb_negative(int i) buf[i] = 'C'; } -// WARNING LB négatif + UB hors borne (i >= -3 && i <= 15) +// WARNING negative LB + UB out of bounds (i >= -3 && i <= 15) void lb_and_ub(int i) { char buf[10]; @@ -66,18 +66,18 @@ void lb_and_ub(int i) } /* - * 3) if imbriqués : affiner l’intervalle (LB & UB) + * 3) Nested ifs: refine the range (LB & UB) * * if (i <= 10) { * if (i > 5) * buf[i] = 'E'; * } * - * Ici, on sait que 6 <= i <= 10 - * avec buf[8] → UB hors borne + * Here we know that 6 <= i <= 10 + * with buf[8] -> UB out of bounds */ -// ATTENDU : UB hors borne (taille 8, i ∈ [6,10]) +// EXPECTED: UB out of bounds (size 8, i in [6,10]) void nested_if_overflow(int i) { char buf[8]; @@ -96,7 +96,7 @@ void nested_if_overflow(int i) } } -// Variante “safe” pour comparaison (taille 16, i ∈ [6,10]) → idéalement aucun warning +// “Safe” variant for comparison (size 16, i in [6,10]) -> ideally no warnings void nested_if_ok(int i) { char buf[16]; @@ -111,10 +111,10 @@ void nested_if_ok(int i) } /* - * 4) Boucles : patterns classiques de for + * 4) Loops: classic for patterns */ -// AUCUN WARNING attendu (0 <= i < 10, taille 10) +// NO WARNING expected (0 <= i < 10, size 10) void loop_ok(void) { char buf[10]; @@ -123,7 +123,7 @@ void loop_ok(void) buf[i] = 'G'; } -// WARNING UB attendu (0 <= i <= 10, taille = 10) +// WARNING UB expected (0 <= i <= 10, size = 10) void loop_ub_overflow(void) { char buf[10]; @@ -137,7 +137,7 @@ void loop_ub_overflow(void) buf[i] = 'H'; } -// WARNING LB négatif attendu (-3 <= i < 5, taille = 10) +// WARNING negative LB expected (-3 <= i < 5, size = 10) void loop_lb_negative(void) { char buf[10]; @@ -147,11 +147,11 @@ void loop_lb_negative(void) } /* - * 5) Cas unreachable mais avec accès hors borne - * (tu as déjà ce genre de logique, mais ça teste qu’on garde bien l’info) + * 5) Unreachable case with out-of-bounds access + * (you already have this logic, but this checks we keep the info) */ -// ATTENDU : warning overflow + [info] unreachable +// EXPECTED: overflow warning + [info] unreachable void unreachable_example(void) { int i = 1; @@ -164,16 +164,16 @@ void unreachable_example(void) // (this is a write access) // [info] this access appears unreachable at runtime (condition is always false for this branch) if (i > 10) - { // condition fausse à l’exécution + { // condition false at runtime buf[11] = 'J'; } } /* - * 6) Aliasing de pointeur + intervalle (LB & UB) + * 6) Pointer aliasing + range (LB & UB) */ -// ATTENDU : UB + LB négatif (p = buf) +// EXPECTED: UB + negative LB (p = buf) void alias_lb_ub(int i) { char buf[10]; @@ -194,7 +194,7 @@ void alias_lb_ub(int i) p[i] = 'K'; } -// ATTENDU : aucun warning (0 <= i < 10) +// EXPECTED: no warning (0 <= i < 10) void alias_ok(int i) { char buf[10]; @@ -205,8 +205,8 @@ void alias_ok(int i) } /* - * 7) Combinaison bizarre : bornes serrées, mais toujours safe - * i ∈ [2,7], buf[8] → normalement OK + * 7) Weird combination: tight bounds, but still safe + * i in [2,7], buf[8] -> normally OK */ void tight_range_ok(int i) @@ -218,8 +218,8 @@ void tight_range_ok(int i) } /* - * 8) Cas extrême : bornes très larges - * i >= -100 && i <= 100, buf[10] → LB négatif + UB hors borne + * 8) Extreme case: very wide bounds + * i >= -100 && i <= 100, buf[10] -> negative LB + UB out of bounds */ void huge_range(int i) @@ -242,7 +242,7 @@ void huge_range(int i) } /* - * main : juste pour que le compilateur ne vire pas tout si optimisation + * main: just to prevent the compiler from optimizing everything away */ int main(void) diff --git a/test/escape-stack/direct-callback.c b/test/escape-stack/direct-callback.c index 5a05bf7..1e02fa7 100644 --- a/test/escape-stack/direct-callback.c +++ b/test/escape-stack/direct-callback.c @@ -7,7 +7,7 @@ // // at line 10, column 5 // // [!!] stack pointer escape: address of variable 'buf' escapes this function // // address passed as argument to function 'sink' (callee may capture the pointer beyond this function) -// sink(buf); // le callee peut capturer le pointeur +// sink(buf); // callee may capture the pointer // } void temporary(void) diff --git a/test/escape-stack/global-buf.c b/test/escape-stack/global-buf.c index be91b83..413bbaa 100644 --- a/test/escape-stack/global-buf.c +++ b/test/escape-stack/global-buf.c @@ -7,7 +7,7 @@ void set_global(void) // at line 10, column 7 // [!!] stack pointer escape: address of variable 'buf' escapes this function // stored into global variable 'g' (pointer may be used after the function returns) - g = buf; // warning attendu: store_global + g = buf; // warning expected: store_global } int main(void) diff --git a/test/escape-stack/out_param.c b/test/escape-stack/out_param.c index 9b46005..e006deb 100644 --- a/test/escape-stack/out_param.c +++ b/test/escape-stack/out_param.c @@ -4,11 +4,11 @@ void leak_out_param(char** out) // at line 7, column 10 // [!!] stack pointer escape: address of variable 'buf' escapes this function // stored through a non-local pointer (e.g. via an out-parameter; pointer may outlive this function) - *out = buf; // fuite via paramètre de sortie + *out = buf; // leak via out-parameter } void safe_out_param(char** out) { - char* local = 0; // pointeur, mais pas de stack buffer derrière - *out = local; // pas une adresse de variable de stack + char* local = 0; // pointer, but no stack buffer behind it + *out = local; // not a stack variable address } diff --git a/test/escape-stack/return-buf.c b/test/escape-stack/return-buf.c index 75b82d4..00dd416 100644 --- a/test/escape-stack/return-buf.c +++ b/test/escape-stack/return-buf.c @@ -4,7 +4,7 @@ char* ret_buf(void) // at line 7, column 5 // [!!] stack pointer escape: address of variable 'buf' escapes this function // escape via return statement (pointer to stack returned to caller) - return buf; // warning attendu: return + return buf; // warning expected: return } int main(void) diff --git a/test/vla/deguised-constant.c b/test/vla/deguised-constant.c index 3575e03..54b45f3 100644 --- a/test/vla/deguised-constant.c +++ b/test/vla/deguised-constant.c @@ -1,7 +1,7 @@ void foo(void) { int n = 6; - char buf[n]; // techniquement VLA, mais bornée et triviale, patch car faux positif + char buf[n]; // technically a VLA, but bounded and trivial, patch for false positive } int main(int ac, char** av) From 0d14a33f82cfca2edbe6f347a9db66b645d5fcb3 Mon Sep 17 00:00:00 2001 From: Hugo Date: Sat, 7 Feb 2026 19:22:43 +0100 Subject: [PATCH 09/11] chore: translate comments --- src/StackUsageAnalyzer.cpp | 99 ++++++++++++++++++------ src/analysis/DynamicAlloca.cpp | 26 +++---- src/analysis/IRValueUtils.cpp | 11 ++- src/analysis/IntRanges.cpp | 10 +-- src/analysis/StackComputation.cpp | 117 ++++++++++++++++++++++++++--- src/mangle.cpp | 14 ++-- src/report/ReportSerialization.cpp | 6 +- 7 files changed, 218 insertions(+), 65 deletions(-) diff --git a/src/StackUsageAnalyzer.cpp b/src/StackUsageAnalyzer.cpp index 153960f..f68cd22 100644 --- a/src/StackUsageAnalyzer.cpp +++ b/src/StackUsageAnalyzer.cpp @@ -1,5 +1,6 @@ #include "StackUsageAnalyzer.hpp" +#include #include #include #include @@ -410,8 +411,8 @@ namespace ctrace::stack { auto* BB = issue.inst->getParent(); - // Parcourt les prédécesseurs du bloc pour voir si certains - // ont une branche conditionnelle avec une condition constante. + // Walk block predecessors to see whether some + // have a conditional branch with a constant condition. for (auto* Pred : predecessors(BB)) { auto* BI = dyn_cast(Pred->getTerminator()); @@ -429,7 +430,7 @@ namespace ctrace::stack if (!C0 || !C1) continue; - // Évalue le résultat de l'ICmp pour ces constantes (implémentation maison). + // Evaluate the ICmp result for these constants (homegrown implementation). bool condTrue = false; auto pred = CI->getPredicate(); const auto& v0 = C0->getValue(); @@ -468,22 +469,22 @@ namespace ctrace::stack condTrue = v0.uge(v1); break; default: - // On ne traite pas d'autres prédicats exotiques ici + // Do not handle other exotic predicates here. continue; } - // Branchement du type: + // Branch of the form: // br i1 %cond, label %then, label %else - // Successeur 0 pris si condTrue == true - // Successeur 1 pris si condTrue == false + // Successor 0 taken if condTrue == true + // Successor 1 taken if condTrue == false if (BB == BI->getSuccessor(0) && condTrue == false) { - // Le bloc "then" n'est jamais atteint. + // The "then" block is never reached. isUnreachable = true; } else if (BB == BI->getSuccessor(1) && condTrue == true) { - // Le bloc "else" n'est jamais atteint. + // The "else" block is never reached. isUnreachable = true; } } @@ -1108,75 +1109,119 @@ namespace ctrace::stack AnalysisResult analyzeModule(llvm::Module& mod, const AnalysisConfig& config) { + using Clock = std::chrono::steady_clock; + auto logDuration = [&](const char* label, Clock::time_point start) + { + if (!config.timing) + return; + auto end = Clock::now(); + auto ms = std::chrono::duration_cast(end - start).count(); + std::cerr << label << " done in " << ms << " ms\n"; + }; + + auto t0 = Clock::now(); runFunctionAttrsPass(mod); + logDuration("Function attrs pass", t0); + t0 = Clock::now(); ModuleAnalysisContext ctx = buildContext(mod, config); + logDuration("Build context", t0); const llvm::DataLayout& DL = *ctx.dataLayout; auto shouldAnalyzeFunction = [&](const llvm::Function& F) -> bool { return ctx.shouldAnalyze(F); }; - // 1) Stack locale par fonction + // 1) Local stack per function + t0 = Clock::now(); LocalStackMap localStack = computeLocalStacks(ctx); + logDuration("Compute local stacks", t0); - // 2) Graphe d'appels + // 2) Call graph + t0 = Clock::now(); analysis::CallGraph CG = buildCallGraphFiltered(ctx); + logDuration("Build call graph", t0); - // 3) Propagation + détection de récursivité + // 3) Propagation + recursion detection + t0 = Clock::now(); analysis::InternalAnalysisState state = computeRecursionState(ctx, CG, localStack); + logDuration("Compute recursion state", t0); - // 4) Construction du résultat public + // 4) Build public result FunctionAuxData aux; + t0 = Clock::now(); AnalysisResult result = buildResults(ctx, localStack, state, CG, aux); + logDuration("Build results", t0); // 4b) Emit summary diagnostics for recursion/overflow flags (for JSON parity) + t0 = Clock::now(); emitSummaryDiagnostics(result, ctx, aux); + logDuration("Emit summary diagnostics", t0); + t0 = Clock::now(); StackSize allocaLargeThreshold = analysis::computeAllocaLargeThreshold(config); + logDuration("Compute alloca threshold", t0); - // 6) Détection des dépassements de buffer sur la stack (analyse intra-fonction) + // 6) Detect stack buffer overflows (intra-function analysis) + t0 = Clock::now(); std::vector bufferIssues = analysis::analyzeStackBufferOverflows(mod, shouldAnalyzeFunction); appendStackBufferDiagnostics(result, bufferIssues); + logDuration("Stack buffer overflows", t0); - // 8) Détection des allocations dynamiques sur la stack (VLA / alloca variable) + // 8) Detect dynamic stack allocations (VLA / variable alloca) + t0 = Clock::now(); std::vector dynAllocaIssues = analysis::analyzeDynamicAllocas(mod, shouldAnalyzeFunction); appendDynamicAllocaDiagnostics(result, dynAllocaIssues); + logDuration("Dynamic allocas", t0); - // 10) Analyse des usages d'alloca (tainted / taille excessive) + // 10) Analyze alloca usage (tainted / excessive size) + t0 = Clock::now(); std::vector allocaUsageIssues = analysis::analyzeAllocaUsage( mod, DL, state.RecursiveFuncs, state.InfiniteRecursionFuncs, shouldAnalyzeFunction); appendAllocaUsageDiagnostics(result, config, allocaLargeThreshold, allocaUsageIssues); + logDuration("Alloca usage", t0); - // 11) Détection des débordements via memcpy/memset sur des buffers de stack + // 11) Detect overflows via memcpy/memset on stack buffers + t0 = Clock::now(); std::vector memIssues = analysis::analyzeMemIntrinsicOverflows(mod, DL, shouldAnalyzeFunction); appendMemIntrinsicDiagnostics(result, memIssues); + logDuration("Mem intrinsic overflows", t0); - // 11b) Détection d'écritures avec longueur "size-k" + // 11b) Detect writes with "size-k" length + t0 = Clock::now(); std::vector sizeMinusKIssues = analysis::analyzeSizeMinusKWrites(mod, DL, shouldAnalyzeFunction); appendSizeMinusKDiagnostics(result, sizeMinusKIssues); + logDuration("Size-minus-k writes", t0); - // 12) Détection de plusieurs stores dans un même buffer de stack + // 12) Detect multiple stores into the same stack buffer + t0 = Clock::now(); std::vector multiStoreIssues = analysis::analyzeMultipleStores(mod, shouldAnalyzeFunction); appendMultipleStoreDiagnostics(result, multiStoreIssues); + logDuration("Multiple stores", t0); - // 13) Détection des reconstructions invalides de pointeur de base (offsetof/container_of) + // 13) Detect invalid base pointer reconstructions (offsetof/container_of) + t0 = Clock::now(); std::vector baseReconIssues = analysis::analyzeInvalidBaseReconstructions(mod, DL, shouldAnalyzeFunction); appendInvalidBaseReconstructionDiagnostics(result, baseReconIssues); + logDuration("Invalid base reconstructions", t0); - // 14) Détection de fuite de pointeurs de stack (use-after-return potentiel) + // 14) Detect stack pointer escapes (potential use-after-return) + t0 = Clock::now(); std::vector escapeIssues = analysis::analyzeStackPointerEscapes(mod, shouldAnalyzeFunction); appendStackPointerEscapeDiagnostics(result, escapeIssues); + logDuration("Stack pointer escapes", t0); // 15) Const-correctness: parameters that can be made const + t0 = Clock::now(); std::vector constParamIssues = analysis::analyzeConstParams(mod, shouldAnalyzeFunction); appendConstParamDiagnostics(result, constParamIssues); + logDuration("Const params", t0); return result; } @@ -1193,7 +1238,19 @@ namespace ctrace::stack return AnalysisResult{config, {}}; } + using Clock = std::chrono::steady_clock; + if (config.timing) + std::cerr << "Analyzing " << filename << "...\n"; + auto analyzeStart = Clock::now(); AnalysisResult result = analyzeModule(*load.module, config); + if (config.timing) + { + auto analyzeEnd = Clock::now(); + auto ms = + std::chrono::duration_cast(analyzeEnd - analyzeStart) + .count(); + std::cerr << "Analysis done in " << ms << " ms\n"; + } for (auto& f : result.functions) { if (f.filePath.empty()) diff --git a/src/analysis/DynamicAlloca.cpp b/src/analysis/DynamicAlloca.cpp index 1d4e93a..fb0c322 100644 --- a/src/analysis/DynamicAlloca.cpp +++ b/src/analysis/DynamicAlloca.cpp @@ -29,28 +29,28 @@ namespace ctrace::stack::analysis if (!AI) continue; - // Taille d'allocation : on distingue trois cas : - // - constante immédiate -> pas une VLA - // - dérivée d'une constante simple -> pas une VLA (heuristique) - // - vraiment dépendante d'une valeur -> VLA / alloca variable + // Allocation size: we distinguish three cases: + // - immediate constant -> not a VLA + // - derived from a simple constant -> not a VLA (heuristic) + // - truly value-dependent -> VLA / variable alloca Value* arraySizeVal = AI->getArraySize(); - // 1) Cas taille directement constante dans l'IR + // 1) Size is directly constant in the IR if (llvm::isa(arraySizeVal)) - continue; // taille connue à la compilation, OK + continue; // compile-time known size, OK - // 2) Heuristique "smart" : essayer de remonter à une constante - // via les stores dans une variable locale (tryGetConstFromValue). - // Exemple typique : + // 2) "Smart" heuristic: try to trace back to a constant + // via stores into a local variable (tryGetConstFromValue). + // Typical example: // int n = 6; - // char buf[n]; // en C : VLA, mais ici n est en fait constant + // char buf[n]; // in C: VLA, but here n is actually constant // - // Dans ce cas, on ne veut pas spammer avec un warning VLA : - // on traite ça comme une taille effectivement constante. + // In this case we don't want to spam with a VLA warning: + // treat it as an effectively constant size. if (tryGetConstFromValue(arraySizeVal, F) != nullptr) continue; - // 3) Ici, on considère que c'est une vraie VLA / alloca dynamique + // 3) Here we consider it a real VLA / dynamic alloca DynamicAllocaIssue issue; issue.funcName = F.getName().str(); issue.varName = deriveAllocaName(AI); diff --git a/src/analysis/IRValueUtils.cpp b/src/analysis/IRValueUtils.cpp index 7c7d15f..05ddb33 100644 --- a/src/analysis/IRValueUtils.cpp +++ b/src/analysis/IRValueUtils.cpp @@ -91,19 +91,18 @@ namespace ctrace::stack::analysis { using namespace llvm; - // On enlève d'abord les cast (sext/zext/trunc, etc.) pour arriver - // à la vraie valeur “de base”. + // First remove casts (sext/zext/trunc, etc.) to reach the real base value. const Value* cur = V; while (auto* cast = dyn_cast(cur)) { cur = cast->getOperand(0); } - // Cas trivial : c'est déjà une constante entière. + // Trivial case: already an integer constant. if (auto* C = dyn_cast(cur)) return C; - // Cas -O0 typique : on compare un load d'une variable locale. + // Typical -O0 case: comparing a load from a local variable. auto* LI = dyn_cast(cur); if (!LI) return nullptr; @@ -111,7 +110,7 @@ namespace ctrace::stack::analysis const Value* ptr = LI->getPointerOperand(); const ConstantInt* found = nullptr; - // Version ultra-simple : on cherche un store de constante dans la fonction. + // Ultra-simple version: look for a constant store in the function. for (const BasicBlock& BB : F) { for (const Instruction& I : BB) @@ -123,7 +122,7 @@ namespace ctrace::stack::analysis continue; if (auto* C = dyn_cast(SI->getValueOperand())) { - // On garde la dernière constante trouvée (si plusieurs stores, c'est naïf). + // Keep the last constant found (if multiple stores, this is naive). found = C; } } diff --git a/src/analysis/IntRanges.cpp b/src/analysis/IntRanges.cpp index 248bb2a..4f436dd 100644 --- a/src/analysis/IntRanges.cpp +++ b/src/analysis/IntRanges.cpp @@ -98,7 +98,7 @@ namespace ctrace::stack::analysis ub = c; break; case ICmpInst::ICMP_NE: - // approximation : V != C => V <= C (très conservateur) + // approximation: V != C => V <= C (very conservative) hasUB = true; ub = c; break; @@ -108,7 +108,7 @@ namespace ctrace::stack::analysis } else { - // C ? V <=> V ? C (inversé) + // C ? V <=> V ? C (reversed) switch (pred) { case ICmpInst::ICMP_SGT: // C > V => V < C => V <= C-1 @@ -219,7 +219,7 @@ namespace ctrace::stack::analysis bool valueIsOp0 = (V == op0); - // On choisit le groupe de prédicats + // Choose the predicate group if (pred == ICmpInst::ICMP_SLT || pred == ICmpInst::ICMP_SLE || pred == ICmpInst::ICMP_SGT || pred == ICmpInst::ICMP_SGE || pred == ICmpInst::ICMP_EQ || pred == ICmpInst::ICMP_NE) @@ -235,10 +235,10 @@ namespace ctrace::stack::analysis if (!(hasLB || hasUB)) continue; - // Applique la contrainte sur V lui-même + // Apply the constraint to V itself applyConstraint(V, hasLB, lb, hasUB, ub); - // Et éventuellement sur le pointeur sous-jacent si V est un load + // And possibly to the underlying pointer if V is a load if (auto* LI = dyn_cast(V)) { const Value* ptr = LI->getPointerOperand(); diff --git a/src/analysis/StackComputation.cpp b/src/analysis/StackComputation.cpp index 28c7fb4..a197b46 100644 --- a/src/analysis/StackComputation.cpp +++ b/src/analysis/StackComputation.cpp @@ -1,5 +1,8 @@ #include "analysis/StackComputation.hpp" +#include +#include +#include #include #include #include @@ -42,7 +45,7 @@ namespace ctrace::stack::analysis if (Callee && !Callee->isDeclaration() && Callee != Self) { - return true; // appel vers une autre fonction + return true; // call to another function } } } @@ -109,7 +112,7 @@ namespace ctrace::stack::analysis LocalStackInfo info = computeLocalStackBase(F, DL); llvm::MaybeAlign MA = DL.getStackAlignment(); - unsigned stackAlign = MA ? MA->value() : 1u; // 16 sur beaucoup de cibles + unsigned stackAlign = MA ? MA->value() : 1u; // 16 on many targets StackSize frameSize = info.bytes; @@ -141,14 +144,6 @@ namespace ctrace::stack::analysis { if (itState->second == Visiting) { - // Cycle détecté : on marque tous les noeuds actuellement en "Visiting" - for (auto& p : State) - { - if (p.second == Visiting) - { - Res.RecursiveFuncs.insert(p.first); - } - } auto itLocal = LocalStack.find(F); if (itLocal != LocalStack.end()) { @@ -194,6 +189,102 @@ namespace ctrace::stack::analysis State[F] = Visited; return total; } + + static bool hasSelfCall(const llvm::Function* F, const CallGraph& CG) + { + auto it = CG.find(F); + if (it == CG.end()) + return false; + + for (const llvm::Function* Callee : it->second) + { + if (Callee == F) + return true; + } + return false; + } + + struct TarjanState + { + std::unordered_map index; + std::unordered_map lowlink; + std::vector stack; + std::unordered_set onStack; + int nextIndex = 0; + std::set recursive; + }; + + static void strongConnect(const llvm::Function* V, const CallGraph& CG, TarjanState& state) + { + state.index[V] = state.nextIndex; + state.lowlink[V] = state.nextIndex; + ++state.nextIndex; + state.stack.push_back(V); + state.onStack.insert(V); + + auto it = CG.find(V); + if (it != CG.end()) + { + for (const llvm::Function* W : it->second) + { + if (state.index.find(W) == state.index.end()) + { + strongConnect(W, CG, state); + state.lowlink[V] = std::min(state.lowlink[V], state.lowlink[W]); + } + else if (state.onStack.count(W)) + { + state.lowlink[V] = std::min(state.lowlink[V], state.index[W]); + } + } + } + + if (state.lowlink[V] == state.index[V]) + { + std::vector component; + const llvm::Function* W = nullptr; + do + { + W = state.stack.back(); + state.stack.pop_back(); + state.onStack.erase(W); + component.push_back(W); + } while (W != V); + + if (component.size() > 1) + { + for (const llvm::Function* Fn : component) + { + state.recursive.insert(Fn); + } + } + else if (hasSelfCall(V, CG)) + { + state.recursive.insert(V); + } + } + } + + static std::set + computeRecursiveFunctions(const CallGraph& CG, + const std::vector& nodes) + { + TarjanState state; + state.index.reserve(nodes.size()); + state.lowlink.reserve(nodes.size()); + state.stack.reserve(nodes.size()); + state.onStack.reserve(nodes.size()); + + for (const llvm::Function* V : nodes) + { + if (state.index.find(V) == state.index.end()) + { + strongConnect(V, CG, state); + } + } + + return state.recursive; + } } // namespace CallGraph buildCallGraph(llvm::Module& M) @@ -253,11 +344,17 @@ namespace ctrace::stack::analysis InternalAnalysisState Res; std::map State; + std::vector nodes; + nodes.reserve(LocalStack.size()); + for (auto& p : LocalStack) { State[p.first] = NotVisited; + nodes.push_back(p.first); } + Res.RecursiveFuncs = computeRecursiveFunctions(CG, nodes); + for (auto& p : LocalStack) { const llvm::Function* F = p.first; diff --git a/src/mangle.cpp b/src/mangle.cpp index 8c93e90..18a244f 100644 --- a/src/mangle.cpp +++ b/src/mangle.cpp @@ -8,20 +8,20 @@ namespace ctrace_tools { std::stringstream mangled; - // Préfixe standard pour les symboles C++ dans l'Itanium ABI + // Standard prefix for C++ symbols in the Itanium ABI. mangled << "_Z"; - // Si un namespace est présent, on utilise 'N' et on encode le nom + // If a namespace is present, use 'N' and encode the name. if (!namespaceName.empty()) { mangled << "N"; mangled << namespaceName.length() << namespaceName; } - // Ajouter le nom de la fonction avec sa longueur + // Add the function name with its length. mangled << functionName.length() << functionName; - // Encoder les types de paramètres + // Encode parameter types. for (const std::string& param : paramTypes) { if (param == "int") @@ -38,7 +38,7 @@ namespace ctrace_tools } else if (param == "std::string") { - mangled << "Ss"; // 'S' pour substitution, 's' pour std::string + mangled << "Ss"; // 'S' for substitution, 's' for std::string } else if (param == "float") { @@ -54,12 +54,12 @@ namespace ctrace_tools } else { - // Pour les types complexes ou non reconnus, encoder avec longueur + nom + // For complex or unknown types, encode as length + name. mangled << param.length() << param; } } - // Fermer le namespace avec 'E' si utilisé + // Close the namespace with 'E' if used. if (!namespaceName.empty()) { mangled << "E"; diff --git a/src/report/ReportSerialization.cpp b/src/report/ReportSerialization.cpp index 38f0b19..fd88046 100644 --- a/src/report/ReportSerialization.cpp +++ b/src/report/ReportSerialization.cpp @@ -10,7 +10,7 @@ namespace ctrace::stack namespace { - // Petit helper pour échapper les chaînes JSON. + // Small helper to escape JSON strings. static std::string jsonEscape(const std::string& s) { std::string out; @@ -113,7 +113,7 @@ namespace ctrace::stack os << " \"analysisTimeMs\": " << -1 << "\n"; os << " },\n"; - // Fonctions + // Functions os << " \"functions\": [\n"; for (std::size_t i = 0; i < result.functions.size(); ++i) { @@ -261,7 +261,7 @@ namespace ctrace::stack { const auto& d = result.diagnostics[i]; os << " {\n"; - // Pour le moment, un seul ruleId générique; tu pourras le spécialiser plus tard. + // For now, use a single generic ruleId; you can specialize it later. const std::string ruleId = d.ruleId.empty() ? std::string(ctrace::stack::enumToString(d.errCode)) : d.ruleId; os << " \"ruleId\": \"" << jsonEscape(ruleId) << "\",\n"; From 510adef7b24e4260c4d732f284b70932c3edf835 Mon Sep 17 00:00:00 2001 From: Hugo Date: Sat, 7 Feb 2026 19:24:14 +0100 Subject: [PATCH 10/11] chore: enable compile_commands.json export in CMake --- CMakeLists.txt | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index c7a14cf..5a90049 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -14,6 +14,7 @@ endif() set(CMAKE_CXX_STANDARD 20) set(CMAKE_CXX_STANDARD_REQUIRED ON) set(CMAKE_CXX_EXTENSIONS OFF) +set(CMAKE_EXPORT_COMPILE_COMMANDS ON) set(LLVM_LINK_LLVM_DYLIB ON) @@ -50,6 +51,7 @@ set(STACK_ANALYZER_SOURCES src/StackUsageAnalyzer.cpp src/analysis/AllocaUsage.cpp src/analysis/AnalyzerUtils.cpp + src/analysis/CompileCommands.cpp src/analysis/ConstParamAnalysis.cpp src/analysis/DynamicAlloca.cpp src/analysis/FunctionFilter.cpp @@ -150,8 +152,8 @@ if(BUILD_CLI) target_link_libraries(stack_usage_analyzer PRIVATE stack_usage_analyzer_lib - # pas besoin de relinker cc::compilerlib_static ici, - # il est déjà dans la lib + # No need to relink cc::compilerlib_static here, + # it is already linked into the library. ) if(ENABLE_DEBUG_ASAN) From 9f0a09086a27c705c16de5f66962989687aa179e Mon Sep 17 00:00:00 2001 From: Hugo Date: Sat, 7 Feb 2026 19:24:38 +0100 Subject: [PATCH 11/11] chore: translate comments --- extern-project/src/main.cpp | 2 +- include/StackUsageAnalyzer.hpp | 47 ++-- .../analysis/InvalidBaseReconstruction.hpp | 10 +- include/analysis/SizeMinusKWrites.hpp | 2 +- include/analysis/StackBufferAnalysis.hpp | 10 +- include/analysis/StackComputation.hpp | 6 +- include/analysis/StackPointerEscape.hpp | 2 +- include/helpers.hpp | 3 +- main.cpp | 213 +++++++++++++----- run_test.py | 40 ++-- 10 files changed, 219 insertions(+), 116 deletions(-) diff --git a/extern-project/src/main.cpp b/extern-project/src/main.cpp index 595b887..7a96c16 100644 --- a/extern-project/src/main.cpp +++ b/extern-project/src/main.cpp @@ -22,7 +22,7 @@ int main(int argc, char **argv) auto res = ctrace::stack::analyzeFile(filename, cfg, ctx, diag); - // Exemple : output SARIF sur stdout + // Example: SARIF output to stdout std::cout << ctrace::stack::toSarif(res, filename) << std::endl; return 0; diff --git a/include/StackUsageAnalyzer.hpp b/include/StackUsageAnalyzer.hpp index 7fbba27..8715115 100644 --- a/include/StackUsageAnalyzer.hpp +++ b/include/StackUsageAnalyzer.hpp @@ -15,6 +15,11 @@ namespace llvm class SMDiagnostic; } // namespace llvm +namespace ctrace::stack::analysis +{ + class CompilationDatabase; +} // namespace ctrace::stack::analysis + namespace ctrace::stack { @@ -26,33 +31,39 @@ namespace ctrace::stack ABI }; - // Configuration de l'analyse (mode + limite de stack) + // Analysis configuration (mode + stack limit). struct AnalysisConfig { AnalysisMode mode = AnalysisMode::IR; - StackSize stackLimit = 8ull * 1024ull * 1024ull; // 8 MiB par défaut + StackSize stackLimit = 8ull * 1024ull * 1024ull; // 8 MiB default bool quiet = false; bool warningsOnly = false; std::vector extraCompileArgs; + std::shared_ptr compilationDatabase; + bool requireCompilationDatabase = false; + bool compdbFast = false; + bool timing = false; std::vector onlyFiles; std::vector onlyDirs; std::vector onlyFunctions; bool dumpFilter = false; + std::string dumpIRPath; + bool dumpIRIsDir = false; }; - // Résultat par fonction + // Per-function result struct FunctionResult { std::string filePath; std::string name; - StackSize localStack = 0; // taille frame locale (suivant le mode) - StackSize maxStack = 0; // max stack incluant les callees - bool localStackUnknown = false; // taille locale inconnue (alloca dynamique) - bool maxStackUnknown = false; // max stack inconnue (propagée via appels) - bool hasDynamicAlloca = false; // alloca dynamique détectée dans la fonction - - bool isRecursive = false; // dans un cycle F <-> G ... - bool hasInfiniteSelfRecursion = false; // heuristique DominatorTree + StackSize localStack = 0; // local frame size (depends on mode) + StackSize maxStack = 0; // max stack including callees + bool localStackUnknown = false; // unknown local size (dynamic alloca) + bool maxStackUnknown = false; // unknown max stack (propagated via calls) + bool hasDynamicAlloca = false; // dynamic alloca detected in the function + + bool isRecursive = false; // part of a cycle F <-> G ... + bool hasInfiniteSelfRecursion = false; // DominatorTree heuristic bool exceedsLimit = false; // maxStack > config.stackLimit }; @@ -151,7 +162,7 @@ namespace ctrace::stack std::string message; }; - // Résultat global pour un module + // Global result for a module struct AnalysisResult { AnalysisConfig config; @@ -162,22 +173,22 @@ namespace ctrace::stack std::vector diagnostics; }; - // Serialize an AnalysisResult to a simple JSON format (pour CI / GitHub Actions). - // `inputFile` : chemin du fichier analysé (celui que tu passes à analyzeFile). + // Serialize an AnalysisResult to a simple JSON format (for CI / GitHub Actions). + // `inputFile`: path of the analyzed file (the one you pass to analyzeFile). std::string toJson(const AnalysisResult& result, const std::string& inputFile); std::string toJson(const AnalysisResult& result, const std::vector& inputFiles); // Serialize an AnalysisResult to SARIF 2.1.0 (compatible GitHub Code Scanning). - // `inputFile` : chemin du fichier analysé. - // `toolName` / `toolVersion` : metadata du tool dans le SARIF. + // `inputFile`: path of the analyzed file. + // `toolName` / `toolVersion`: tool metadata in SARIF. std::string toSarif(const AnalysisResult& result, const std::string& inputFile, const std::string& toolName = "coretrace-stack-analyzer", const std::string& toolVersion = "0.1.0"); - // Analyse un module déjà chargé (tu peux réutiliser dans d'autres outils) + // Analyze an already loaded module (can be reused by other tools). AnalysisResult analyzeModule(llvm::Module& mod, const AnalysisConfig& config); - // Helper pratique : charge un .ll et appelle analyzeModule() + // Convenience helper: load a .ll and call analyzeModule() AnalysisResult analyzeFile(const std::string& filename, const AnalysisConfig& config, llvm::LLVMContext& ctx, llvm::SMDiagnostic& err); diff --git a/include/analysis/InvalidBaseReconstruction.hpp b/include/analysis/InvalidBaseReconstruction.hpp index 47bfa11..e9191cd 100644 --- a/include/analysis/InvalidBaseReconstruction.hpp +++ b/include/analysis/InvalidBaseReconstruction.hpp @@ -19,11 +19,11 @@ namespace ctrace::stack::analysis struct InvalidBaseReconstructionIssue { std::string funcName; - std::string varName; // nom de la variable alloca (stack object) - std::string sourceMember; // membre source (ex: "b") - int64_t offsetUsed = 0; // offset utilisé dans le calcul (peut être négatif) - std::string targetType; // type vers lequel on cast (ex: "struct A*") - bool isOutOfBounds = false; // true si on peut prouver que c'est hors bornes + std::string varName; // alloca variable name (stack object) + std::string sourceMember; // source member (e.g., "b") + int64_t offsetUsed = 0; // offset used in the calculation (can be negative) + std::string targetType; // target cast type (e.g., "struct A*") + bool isOutOfBounds = false; // true if we can prove it is out of bounds const llvm::Instruction* inst = nullptr; }; diff --git a/include/analysis/SizeMinusKWrites.hpp b/include/analysis/SizeMinusKWrites.hpp index e478f44..99b28d8 100644 --- a/include/analysis/SizeMinusKWrites.hpp +++ b/include/analysis/SizeMinusKWrites.hpp @@ -18,7 +18,7 @@ namespace ctrace::stack::analysis struct SizeMinusKWriteIssue { std::string funcName; - std::string sinkName; // nom de l'appel ou "store" + std::string sinkName; // call name or "store" bool ptrNonNull = false; bool sizeAboveK = false; bool hasPointerDest = true; diff --git a/include/analysis/StackBufferAnalysis.hpp b/include/analysis/StackBufferAnalysis.hpp index 1ee90b3..a7928e4 100644 --- a/include/analysis/StackBufferAnalysis.hpp +++ b/include/analysis/StackBufferAnalysis.hpp @@ -21,14 +21,14 @@ namespace ctrace::stack::analysis std::string funcName; std::string varName; StackSize arraySize = 0; - StackSize indexOrUpperBound = 0; // utilisé pour les bornes sup (UB) ou index constant + StackSize indexOrUpperBound = 0; // used for upper bounds (UB) or constant index bool isWrite = false; bool indexIsConstant = false; const llvm::Instruction* inst = nullptr; - // Violation basée sur une borne inférieure (index potentiellement négatif) + // Violation based on a lower bound (index potentially negative) bool isLowerBoundViolation = false; - long long lowerBound = 0; // borne inférieure déduite (signée) + long long lowerBound = 0; // deduced lower bound (signed) std::string aliasPath; // ex: "pp -> ptr -> buf" std::vector aliasPathVec; // {"pp", "ptr", "buf"} @@ -49,8 +49,8 @@ namespace ctrace::stack::analysis { std::string funcName; std::string varName; - std::size_t storeCount = 0; // nombre total de StoreInst vers ce buffer - std::size_t distinctIndexCount = 0; // nombre d'expressions d'index distinctes + std::size_t storeCount = 0; // total number of StoreInsts into this buffer + std::size_t distinctIndexCount = 0; // number of distinct index expressions const llvm::AllocaInst* allocaInst = nullptr; }; diff --git a/include/analysis/StackComputation.hpp b/include/analysis/StackComputation.hpp index 8b56401..9025b5c 100644 --- a/include/analysis/StackComputation.hpp +++ b/include/analysis/StackComputation.hpp @@ -33,9 +33,9 @@ namespace ctrace::stack::analysis struct InternalAnalysisState { - std::map TotalStack; // stack max, callees inclus - std::set RecursiveFuncs; // fonctions dans au moins un cycle - std::set InfiniteRecursionFuncs; // auto-récursion “infinie” + std::map TotalStack; // max stack, including callees + std::set RecursiveFuncs; // functions in at least one cycle + std::set InfiniteRecursionFuncs; // “infinite” self-recursion }; CallGraph buildCallGraph(llvm::Module& M); diff --git a/include/analysis/StackPointerEscape.hpp b/include/analysis/StackPointerEscape.hpp index 86f6438..43cb4a9 100644 --- a/include/analysis/StackPointerEscape.hpp +++ b/include/analysis/StackPointerEscape.hpp @@ -19,7 +19,7 @@ namespace ctrace::stack::analysis std::string varName; std::string escapeKind; // "return", "store_global", "store_unknown", "call_arg", "call_callback" - std::string targetName; // nom du global, si applicable + std::string targetName; // global name, if applicable const llvm::Instruction* inst = nullptr; }; diff --git a/include/helpers.hpp b/include/helpers.hpp index 2351fe4..06889c8 100644 --- a/include/helpers.hpp +++ b/include/helpers.hpp @@ -7,8 +7,7 @@ namespace ctrace::stack { - template - struct EnumTraits; // pas de définition générique -> erreur si non spécialisé + template struct EnumTraits; // no generic definition -> error if not specialized template concept EnumWithTraits = std::is_enum_v && requires { diff --git a/main.cpp b/main.cpp index 8a0c346..f952b43 100644 --- a/main.cpp +++ b/main.cpp @@ -7,11 +7,13 @@ #include #include #include +#include #include #include #include #include #include +#include "analysis/CompileCommands.hpp" #include "mangle.hpp" using namespace ctrace::stack; @@ -38,11 +40,16 @@ static void printHelp() << " -D[=value] Define macro for C/C++ inputs\n" << " -D [=value] Define macro for C/C++ inputs\n" << " --compile-arg= Pass extra compile argument (repeatable)\n" + << " --compile-commands= Use compile_commands.json (file or directory)\n" + << " --compdb= Alias for --compile-commands\n" + << " --compdb-fast Speed up compdb builds (drops heavy flags)\n" + << " --timing Print compilation/analysis timing to stderr\n" << " --only-file= Only report functions from this source file\n" << " --only-dir= Only report functions under this directory\n" << " --only-func= Only report functions with this name (comma-separated)\n" << " --stack-limit= Override stack size limit (bytes, or KiB/MiB/GiB)\n" << " --dump-filter Print filter decisions to stderr\n" + << " --dump-ir= Write LLVM IR to file (or directory for multiple inputs)\n" << " --quiet Suppress per-function diagnostics\n" << " --warnings-only Show warnings and errors only\n" << " -h, --help Show this help message and exit\n\n" @@ -51,66 +58,35 @@ static void printHelp() << " stack_usage_analyzer input1.ll input2.ll --format=json\n" << " stack_usage_analyzer main.cpp -I../include --format=json\n" << " stack_usage_analyzer main.cpp -I../include --only-dir=../src\n" + << " stack_usage_analyzer main.cpp --compile-commands=build/compile_commands.json\n" << " stack_usage_analyzer input.ll --mode=abi --format=json\n" << " stack_usage_analyzer input.ll --warnings-only\n"; } static std::string normalizePath(const std::string& input) { - std::string out = input; - for (char& c : out) + if (input.empty()) + return {}; + + std::string adjusted = input; + for (char& c : adjusted) { if (c == '\\') c = '/'; } - const bool isAbs = !out.empty() && out.front() == '/'; - std::vector parts; - std::string cur; - for (char c : out) - { - if (c == '/') - { - if (!cur.empty()) - { - if (cur == "..") - { - if (!parts.empty()) - parts.pop_back(); - } - else if (cur != ".") - { - parts.push_back(cur); - } - cur.clear(); - } - } - else - { - cur.push_back(c); - } - } - if (!cur.empty()) - { - if (cur == "..") - { - if (!parts.empty()) - parts.pop_back(); - } - else if (cur != ".") - { - parts.push_back(cur); - } - } - std::string norm = isAbs ? "/" : ""; - for (std::size_t i = 0; i < parts.size(); ++i) - { - norm += parts[i]; - if (i + 1 < parts.size()) - norm += "/"; - } - while (!norm.empty() && norm.back() == '/') - norm.pop_back(); - return norm; + + std::filesystem::path path(adjusted); + std::error_code ec; + std::filesystem::path absPath = std::filesystem::absolute(path, ec); + if (ec) + absPath = path; + + std::filesystem::path canonicalPath = std::filesystem::weakly_canonical(absPath, ec); + std::filesystem::path norm = ec ? absPath.lexically_normal() : canonicalPath; + std::string out = norm.generic_string(); + while (out.size() > 1 && out.back() == '/') + out.pop_back(); + return out; } static std::string basenameOf(const std::string& path) @@ -412,22 +388,17 @@ static AnalysisResult filterWarningsOnly(const AnalysisResult& result, const Ana return filtered; } -void toto(void) -{ - char test[974] = "Hello"; - return; -} - int main(int argc, char** argv) { - toto(); llvm::LLVMContext context; std::vector inputFilenames; OutputFormat outputFormat = OutputFormat::Human; - AnalysisConfig cfg; // mode = IR, stackLimit = 8MiB par défaut + AnalysisConfig cfg; // mode = IR, stackLimit = 8 MiB default cfg.quiet = false; cfg.warningsOnly = false; + std::string compileCommandsPath; + bool compileCommandsExplicit = false; // cfg.mode = AnalysisMode::IR; -> already set by default constructor // cfg.stackLimit = 8ull * 1024ull * 1024ull; // 8 MiB -> already set by default constructor but needed to be set with args @@ -539,6 +510,21 @@ int main(int argc, char** argv) cfg.dumpFilter = true; continue; } + if (argStr == "--dump-ir") + { + if (i + 1 >= argc) + { + llvm::errs() << "Missing argument for --dump-ir\n"; + return 1; + } + cfg.dumpIRPath = argv[++i]; + continue; + } + if (argStr.rfind("--dump-ir=", 0) == 0) + { + cfg.dumpIRPath = argStr.substr(std::strlen("--dump-ir=")); + continue; + } if (argStr == "-I") { if (i + 1 >= argc) @@ -574,6 +560,39 @@ int main(int argc, char** argv) cfg.extraCompileArgs.emplace_back(argStr.substr(std::strlen("--compile-arg="))); continue; } + if (argStr == "--compdb-fast") + { + cfg.compdbFast = true; + continue; + } + if (argStr == "--timing") + { + cfg.timing = true; + continue; + } + if (argStr == "--compile-commands" || argStr == "--compdb") + { + if (i + 1 >= argc) + { + llvm::errs() << "Missing argument for " << argStr << "\n"; + return 1; + } + compileCommandsPath = argv[++i]; + compileCommandsExplicit = true; + continue; + } + if (argStr.rfind("--compile-commands=", 0) == 0) + { + compileCommandsPath = argStr.substr(std::strlen("--compile-commands=")); + compileCommandsExplicit = true; + continue; + } + if (argStr.rfind("--compdb=", 0) == 0) + { + compileCommandsPath = argStr.substr(std::strlen("--compdb=")); + compileCommandsExplicit = true; + continue; + } if (argStr == "--warnings-only") { cfg.warningsOnly = true; @@ -622,6 +641,52 @@ int main(int argc, char** argv) } } + if (compileCommandsExplicit) + { + if (compileCommandsPath.empty()) + { + llvm::errs() << "compile commands path is empty\n"; + return 1; + } + + std::filesystem::path compdbPath = compileCommandsPath; + std::error_code fsErr; + if (std::filesystem::is_directory(compdbPath, fsErr)) + { + compdbPath /= "compile_commands.json"; + } + else if (fsErr) + { + llvm::errs() << "Failed to inspect compile commands path: " << fsErr.message() << "\n"; + return 1; + } + + if (!std::filesystem::exists(compdbPath, fsErr)) + { + if (fsErr) + { + llvm::errs() << "Failed to inspect compile commands path: " << fsErr.message() + << "\n"; + } + else + { + llvm::errs() << "compile commands file not found: " << compdbPath.string() << "\n"; + } + return 1; + } + + std::string error; + auto db = + ctrace::stack::analysis::CompilationDatabase::loadFromFile(compdbPath.string(), error); + if (!db) + { + llvm::errs() << "Failed to load compile commands: " << error << "\n"; + return 1; + } + cfg.compilationDatabase = std::move(db); + cfg.requireCompilationDatabase = true; + } + if (inputFilenames.empty()) { llvm::errs() << "Usage: stack_usage_analyzer [file2.ll ...] [options]\n" @@ -629,6 +694,36 @@ int main(int argc, char** argv) return 1; } + if (!cfg.dumpIRPath.empty()) + { + const bool trailingSlash = !cfg.dumpIRPath.empty() && + (cfg.dumpIRPath.back() == '/' || cfg.dumpIRPath.back() == '\\'); + std::error_code fsErr; + std::filesystem::path dumpPath(cfg.dumpIRPath); + const bool exists = std::filesystem::exists(dumpPath, fsErr); + if (fsErr) + { + llvm::errs() << "Failed to inspect dump IR path: " << fsErr.message() << "\n"; + return 1; + } + bool isDir = false; + if (exists) + { + isDir = std::filesystem::is_directory(dumpPath, fsErr); + if (fsErr) + { + llvm::errs() << "Failed to inspect dump IR path: " << fsErr.message() << "\n"; + return 1; + } + } + if (inputFilenames.size() > 1 && !isDir && !trailingSlash) + { + llvm::errs() << "--dump-ir must point to a directory when analyzing multiple inputs\n"; + return 1; + } + cfg.dumpIRIsDir = isDir || trailingSlash || inputFilenames.size() > 1; + } + std::sort(inputFilenames.begin(), inputFilenames.end()); std::vector> results; results.reserve(inputFilenames.size()); @@ -740,7 +835,7 @@ int main(int argc, char** argv) std::vector param_types; // param_types.reserve(issue.inst->getFunction()->arg_size()); param_types.push_back( - "void"); // dummy to avoid empty vector issue // refaire avec les paramèters réels + "void"); // dummy to avoid empty vector issue // replace with real parameters llvm::outs() << "Function: " << f.name << " " << ((ctrace_tools::isMangled(f.name)) diff --git a/run_test.py b/run_test.py index 241ff50..f2fe6de 100755 --- a/run_test.py +++ b/run_test.py @@ -5,17 +5,17 @@ import re from pathlib import Path -# Chemin vers ton binaire d'analyse -ANALYZER = Path("./build/stack_usage_analyzer") # à adapter si besoin -TEST_DIR = Path("test") # dossier contenant les .c +# Path to the analyzer binary +ANALYZER = Path("./build/stack_usage_analyzer") # adjust if needed +TEST_DIR = Path("test") # folder containing the .c files def normalize(s: str) -> str: """ - Normalise les espaces pour rendre les comparaisons plus robustes : - - supprime les espaces inutiles en début/fin de ligne - - remplace les séquences d'espaces par un seul espace - - garde les sauts de lignes + Normalize spacing to make comparisons more robust: + - remove unnecessary leading/trailing spaces per line + - replace runs of spaces with a single space + - keep line breaks """ lines = [] for line in s.splitlines(): @@ -34,10 +34,9 @@ def normalize(s: str) -> str: def extract_expectations(c_path: Path): """ - Extrait les blocs de commentaires d'attendus dans un fichier .c. + Extract expected comment blocks from a .c file. - On cherche les commentaires qui commencent par "// at line". - On prend toutes les lignes de commentaires qui suivent. + Look for comments that start with "// at line" and take all following comment lines. """ expectations = [] negative_expectations = [] @@ -64,21 +63,21 @@ def extract_expectations(c_path: Path): i += 1 continue - # Début d'un bloc d'attendu + # Start of an expectation block if stripped.startswith("// at line"): comment_block = [raw] i += 1 - # Récupère toutes les lignes "// ..." qui suivent + # Collect all following "// ..." lines while i < n and lines[i].lstrip().startswith("//"): comment_block.append(lines[i]) i += 1 - # Nettoyage : retirer les "//" et les indentations + # Cleanup: remove "//" and indentation cleaned_lines = [] for c in comment_block: s = c.lstrip() if s.startswith("//"): - s = s[2:] # enlève "//" + s = s[2:] # remove "//" cleaned_lines.append(s.lstrip()) expectation_text = "\n".join(cleaned_lines) @@ -91,7 +90,7 @@ def extract_expectations(c_path: Path): def run_analyzer_on_file(c_path: Path, stack_limit=None) -> str: """ - Lance ton analyseur sur un fichier C et récupère stdout+stderr. + Run the analyzer on a C file and capture stdout+stderr. """ args = [str(ANALYZER), str(c_path)] if stack_limit: @@ -495,7 +494,7 @@ def has_json_recursion_diag(func_name: str, needle: str) -> bool: def check_help_flags() -> bool: """ - Vérifie que -h et --help affichent l'aide sur stdout et retournent 0. + Check that -h and --help print help to stdout and return 0. """ print("=== Testing help flags ===") ok = True @@ -525,7 +524,7 @@ def check_help_flags() -> bool: def check_multi_file_json() -> bool: """ - Vérifie que l'analyse accepte plusieurs fichiers et que le JSON agrège correctement. + Check that analysis accepts multiple files and JSON aggregates correctly. """ print("=== Testing multi-file JSON ===") file_a = TEST_DIR / "test.ll" @@ -598,7 +597,7 @@ def matches_input(input_path: str) -> bool: def check_multi_file_failure() -> bool: """ - Vérifie le comportement fail-fast quand un fichier est invalide. + Check fail-fast behavior when a file is invalid. """ print("=== Testing multi-file failure ===") valid_file = TEST_DIR / "test.ll" @@ -628,7 +627,7 @@ def check_multi_file_failure() -> bool: def check_cli_parsing_and_filters() -> bool: """ - Vérifie parsing CLI (erreurs) + filtres principaux. + Check CLI parsing (errors) + main filters. """ print("=== Testing CLI parsing & filters ===") ok = True @@ -724,8 +723,7 @@ def check_cli_parsing_and_filters() -> bool: def check_file(c_path: Path): """ - Vérifie qu'avec ce fichier, toutes les attentes sont présentes - dans la sortie de l'analyseur. + Check that, for this file, all expectations are present in the analyzer output. """ print(f"=== Testing {c_path} ===") expectations, negative_expectations, stack_limit = extract_expectations(c_path)