diff --git a/.github/workflows/commit-check.yml b/.github/workflows/commit-check.yml new file mode 100644 index 0000000..5d75c9a --- /dev/null +++ b/.github/workflows/commit-check.yml @@ -0,0 +1,16 @@ +name: Commit conventions + +on: + push: + +jobs: + commit_check: + name: Check conventional commits + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + - name: Validate commit messages + run: | + python3 scripts/ci/commit_checker.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..d352530 --- /dev/null +++ b/.gitignore @@ -0,0 +1,35 @@ +# Prerequisites +*.d + +# Compiled Object files +*.slo +*.lo +*.o +*.obj + +# Precompiled Headers +*.gch +*.pch + +# Compiled Dynamic libraries +*.so +*.dylib +*.dll + +# Fortran module files +*.mod +*.smod + +# Compiled Static libraries +*.lai +*.la +*.a +*.lib + +# Executables +*.exe +*.out +*.app + +# Build directories +/build/ diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..bad34cd --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,9 @@ +repos: + - repo: local + hooks: + - id: conventional-commit-msg + name: conventional commit message + entry: python3 scripts/git/commit_msg_checker.py + language: system + stages: [commit-msg] + pass_filenames: true diff --git a/CMakeLists.txt b/CMakeLists.txt index 9f07e0c..c7a14cf 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -48,7 +48,23 @@ option(BUILD_SHARED_LIB "Build shared library variant" ON) # =========================== set(STACK_ANALYZER_SOURCES src/StackUsageAnalyzer.cpp + src/analysis/AllocaUsage.cpp + src/analysis/AnalyzerUtils.cpp + src/analysis/ConstParamAnalysis.cpp + src/analysis/DynamicAlloca.cpp + src/analysis/FunctionFilter.cpp + src/analysis/IRValueUtils.cpp + src/analysis/IntRanges.cpp + src/analysis/InputPipeline.cpp + src/analysis/InvalidBaseReconstruction.cpp + src/analysis/MemIntrinsicOverflow.cpp + src/analysis/SizeMinusKWrites.cpp + src/analysis/StackBufferAnalysis.cpp + src/analysis/StackComputation.cpp + src/analysis/StackPointerEscape.cpp + src/report/ReportSerialization.cpp src/mangle.cpp + src/passes/ModulePasses.cpp ) include_directories(${LLVM_INCLUDE_DIRS}) diff --git a/include/StackUsageAnalyzer.hpp b/include/StackUsageAnalyzer.hpp index 4a3bdf3..7fbba27 100644 --- a/include/StackUsageAnalyzer.hpp +++ b/include/StackUsageAnalyzer.hpp @@ -106,12 +106,13 @@ namespace ctrace::stack AllocaTooLarge = 8, AllocaUsageWarning = 9, InvalidBaseReconstruction = 10, - ConstParameterNotModified = 11 + ConstParameterNotModified = 11, + SizeMinusOneWrite = 12 }; template <> struct EnumTraits { - static constexpr std::array names = {"None", + static constexpr std::array names = {"None", "StackBufferOverflow", "NegativeStackIndex", "VLAUsage", @@ -122,7 +123,8 @@ namespace ctrace::stack "AllocaTooLarge", "AllocaUsageWarning", "InvalidBaseReconstruction", - "ConstParameterNotModified"}; + "ConstParameterNotModified", + "SizeMinusOneWrite"}; }; /* diff --git a/include/analysis/AllocaUsage.hpp b/include/analysis/AllocaUsage.hpp new file mode 100644 index 0000000..1d50512 --- /dev/null +++ b/include/analysis/AllocaUsage.hpp @@ -0,0 +1,41 @@ +#pragma once + +#include +#include +#include +#include + +#include "StackUsageAnalyzer.hpp" + +namespace llvm +{ + class AllocaInst; + class DataLayout; + class Function; + class Module; +} // namespace llvm + +namespace ctrace::stack::analysis +{ + struct AllocaUsageIssue + { + std::string funcName; + std::string varName; + const llvm::AllocaInst* allocaInst = nullptr; + + bool userControlled = false; // size derived from argument / non-local value + bool sizeIsConst = false; // size known exactly + bool hasUpperBound = false; // bounded size (from ICmp-derived range) + bool isRecursive = false; // function participates in a recursion cycle + bool isInfiniteRecursive = false; // unconditional self recursion + + StackSize sizeBytes = 0; // exact size in bytes (if sizeIsConst) + StackSize upperBoundBytes = 0; // upper bound in bytes (if hasUpperBound) + }; + + std::vector + analyzeAllocaUsage(llvm::Module& mod, const llvm::DataLayout& DL, + const std::set& recursiveFuncs, + const std::set& infiniteRecursionFuncs, + const std::function& shouldAnalyze); +} // namespace ctrace::stack::analysis diff --git a/include/analysis/AnalyzerUtils.hpp b/include/analysis/AnalyzerUtils.hpp new file mode 100644 index 0000000..99fe3e5 --- /dev/null +++ b/include/analysis/AnalyzerUtils.hpp @@ -0,0 +1,27 @@ +#pragma once + +#include + +#include "StackUsageAnalyzer.hpp" +#include "analysis/StackComputation.hpp" + +namespace llvm +{ + class Function; +} // namespace llvm + +namespace ctrace::stack::analysis +{ + std::string formatFunctionNameForMessage(const std::string& name); + + std::string getFunctionSourcePath(const llvm::Function& F); + + bool getFunctionSourceLocation(const llvm::Function& F, unsigned& line, unsigned& column); + + std::string buildMaxStackCallPath(const llvm::Function* F, const CallGraph& CG, + const InternalAnalysisState& state); + + bool shouldIncludePath(const std::string& path, const AnalysisConfig& config); + + bool functionNameMatches(const llvm::Function& F, const AnalysisConfig& config); +} // namespace ctrace::stack::analysis diff --git a/include/analysis/ConstParamAnalysis.hpp b/include/analysis/ConstParamAnalysis.hpp new file mode 100644 index 0000000..ca1ee14 --- /dev/null +++ b/include/analysis/ConstParamAnalysis.hpp @@ -0,0 +1,32 @@ +#pragma once + +#include +#include +#include + +namespace llvm +{ + class Function; + class Module; +} // namespace llvm + +namespace ctrace::stack::analysis +{ + struct ConstParamIssue + { + std::string funcName; + std::string paramName; + std::string currentType; + std::string suggestedType; + std::string suggestedTypeAlt; + bool pointerConstOnly = false; // ex: T * const param + bool isReference = false; + bool isRvalueRef = false; + unsigned line = 0; + unsigned column = 0; + }; + + std::vector + analyzeConstParams(llvm::Module& mod, + const std::function& shouldAnalyze); +} // namespace ctrace::stack::analysis diff --git a/include/analysis/DynamicAlloca.hpp b/include/analysis/DynamicAlloca.hpp new file mode 100644 index 0000000..ec6d852 --- /dev/null +++ b/include/analysis/DynamicAlloca.hpp @@ -0,0 +1,27 @@ +#pragma once + +#include +#include +#include + +namespace llvm +{ + class AllocaInst; + class Function; + class Module; +} // namespace llvm + +namespace ctrace::stack::analysis +{ + struct DynamicAllocaIssue + { + std::string funcName; + std::string varName; + std::string typeName; + const llvm::AllocaInst* allocaInst = nullptr; + }; + + std::vector + analyzeDynamicAllocas(llvm::Module& mod, + const std::function& shouldAnalyze); +} // namespace ctrace::stack::analysis diff --git a/include/analysis/FunctionFilter.hpp b/include/analysis/FunctionFilter.hpp new file mode 100644 index 0000000..2bd1a0c --- /dev/null +++ b/include/analysis/FunctionFilter.hpp @@ -0,0 +1,27 @@ +#pragma once + +#include + +#include "StackUsageAnalyzer.hpp" + +namespace llvm +{ + class Function; + class Module; +} // namespace llvm + +namespace ctrace::stack::analysis +{ + struct FunctionFilter + { + bool hasPathFilter = false; + bool hasFuncFilter = false; + bool hasFilter = false; + std::string moduleSourcePath; + const AnalysisConfig* config = nullptr; + + bool shouldAnalyze(const llvm::Function& F) const; + }; + + FunctionFilter buildFunctionFilter(const llvm::Module& mod, const AnalysisConfig& config); +} // namespace ctrace::stack::analysis diff --git a/include/analysis/IRValueUtils.hpp b/include/analysis/IRValueUtils.hpp new file mode 100644 index 0000000..560c8c4 --- /dev/null +++ b/include/analysis/IRValueUtils.hpp @@ -0,0 +1,18 @@ +#pragma once + +#include + +namespace llvm +{ + class AllocaInst; + class ConstantInt; + class Function; + class Value; +} // namespace llvm + +namespace ctrace::stack::analysis +{ + std::string deriveAllocaName(const llvm::AllocaInst* AI); + + const llvm::ConstantInt* tryGetConstFromValue(const llvm::Value* V, const llvm::Function& F); +} // namespace ctrace::stack::analysis diff --git a/include/analysis/InputPipeline.hpp b/include/analysis/InputPipeline.hpp new file mode 100644 index 0000000..0426542 --- /dev/null +++ b/include/analysis/InputPipeline.hpp @@ -0,0 +1,31 @@ +#pragma once + +#include +#include + +#include "StackUsageAnalyzer.hpp" + +namespace llvm +{ + class LLVMContext; + class Module; + class SMDiagnostic; +} // namespace llvm + +namespace ctrace::stack::analysis +{ + struct ModuleLoadResult + { + std::unique_ptr module; + LanguageType language = LanguageType::Unknown; + std::string error; + }; + + LanguageType detectFromExtension(const std::string& path); + + LanguageType detectLanguageFromFile(const std::string& path, llvm::LLVMContext& ctx); + + ModuleLoadResult loadModuleForAnalysis(const std::string& filename, + const AnalysisConfig& config, llvm::LLVMContext& ctx, + llvm::SMDiagnostic& err); +} // namespace ctrace::stack::analysis diff --git a/include/analysis/IntRanges.hpp b/include/analysis/IntRanges.hpp new file mode 100644 index 0000000..60ced92 --- /dev/null +++ b/include/analysis/IntRanges.hpp @@ -0,0 +1,22 @@ +#pragma once + +#include + +namespace llvm +{ + class Function; + class Value; +} // namespace llvm + +namespace ctrace::stack::analysis +{ + struct IntRange + { + bool hasLower = false; + long long lower = 0; + bool hasUpper = false; + long long upper = 0; + }; + + std::map computeIntRangesFromICmps(llvm::Function& F); +} // namespace ctrace::stack::analysis diff --git a/include/analysis/InvalidBaseReconstruction.hpp b/include/analysis/InvalidBaseReconstruction.hpp new file mode 100644 index 0000000..47bfa11 --- /dev/null +++ b/include/analysis/InvalidBaseReconstruction.hpp @@ -0,0 +1,33 @@ +#pragma once + +#include +#include +#include + +#include "StackUsageAnalyzer.hpp" + +namespace llvm +{ + class DataLayout; + class Function; + class Instruction; + class Module; +} // namespace llvm + +namespace ctrace::stack::analysis +{ + struct InvalidBaseReconstructionIssue + { + std::string funcName; + std::string varName; // nom de la variable alloca (stack object) + std::string sourceMember; // membre source (ex: "b") + int64_t offsetUsed = 0; // offset utilisé dans le calcul (peut être négatif) + std::string targetType; // type vers lequel on cast (ex: "struct A*") + bool isOutOfBounds = false; // true si on peut prouver que c'est hors bornes + const llvm::Instruction* inst = nullptr; + }; + + std::vector analyzeInvalidBaseReconstructions( + llvm::Module& mod, const llvm::DataLayout& DL, + const std::function& shouldAnalyze); +} // namespace ctrace::stack::analysis diff --git a/include/analysis/MemIntrinsicOverflow.hpp b/include/analysis/MemIntrinsicOverflow.hpp new file mode 100644 index 0000000..14e9a17 --- /dev/null +++ b/include/analysis/MemIntrinsicOverflow.hpp @@ -0,0 +1,32 @@ +#pragma once + +#include +#include +#include + +#include "StackUsageAnalyzer.hpp" + +namespace llvm +{ + class DataLayout; + class Function; + class Instruction; + class Module; +} // namespace llvm + +namespace ctrace::stack::analysis +{ + struct MemIntrinsicIssue + { + std::string funcName; + std::string varName; + std::string intrinsicName; // "memcpy" / "memset" / "memmove" + StackSize destSizeBytes = 0; + StackSize lengthBytes = 0; + const llvm::Instruction* inst = nullptr; + }; + + std::vector + analyzeMemIntrinsicOverflows(llvm::Module& mod, const llvm::DataLayout& DL, + const std::function& shouldAnalyze); +} // namespace ctrace::stack::analysis diff --git a/include/analysis/SizeMinusKWrites.hpp b/include/analysis/SizeMinusKWrites.hpp new file mode 100644 index 0000000..e478f44 --- /dev/null +++ b/include/analysis/SizeMinusKWrites.hpp @@ -0,0 +1,32 @@ +#pragma once + +#include +#include +#include +#include + +namespace llvm +{ + class DataLayout; + class Function; + class Instruction; + class Module; +} // namespace llvm + +namespace ctrace::stack::analysis +{ + struct SizeMinusKWriteIssue + { + std::string funcName; + std::string sinkName; // nom de l'appel ou "store" + bool ptrNonNull = false; + bool sizeAboveK = false; + bool hasPointerDest = true; + int64_t k = 1; + const llvm::Instruction* inst = nullptr; + }; + + std::vector analyzeSizeMinusKWrites( + llvm::Module& mod, const llvm::DataLayout& DL, + const std::function& shouldAnalyzeFunction); +} // namespace ctrace::stack::analysis diff --git a/include/analysis/StackBufferAnalysis.hpp b/include/analysis/StackBufferAnalysis.hpp new file mode 100644 index 0000000..1ee90b3 --- /dev/null +++ b/include/analysis/StackBufferAnalysis.hpp @@ -0,0 +1,64 @@ +#pragma once + +#include +#include +#include + +#include "StackUsageAnalyzer.hpp" + +namespace llvm +{ + class AllocaInst; + class Function; + class Instruction; + class Module; +} // namespace llvm + +namespace ctrace::stack::analysis +{ + struct StackBufferOverflowIssue + { + std::string funcName; + std::string varName; + StackSize arraySize = 0; + StackSize indexOrUpperBound = 0; // utilisé pour les bornes sup (UB) ou index constant + bool isWrite = false; + bool indexIsConstant = false; + const llvm::Instruction* inst = nullptr; + + // Violation basée sur une borne inférieure (index potentiellement négatif) + bool isLowerBoundViolation = false; + long long lowerBound = 0; // borne inférieure déduite (signée) + + std::string aliasPath; // ex: "pp -> ptr -> buf" + std::vector aliasPathVec; // {"pp", "ptr", "buf"} + // Optional : helper for sync string <- vector + void rebuildAliasPathString(const std::string& sep = " -> ") + { + aliasPath.clear(); + for (size_t i = 0; i < aliasPathVec.size(); ++i) + { + aliasPath += aliasPathVec[i]; + if (i + 1 < aliasPathVec.size()) + aliasPath += sep; + } + } + }; + + struct MultipleStoreIssue + { + std::string funcName; + std::string varName; + std::size_t storeCount = 0; // nombre total de StoreInst vers ce buffer + std::size_t distinctIndexCount = 0; // nombre d'expressions d'index distinctes + const llvm::AllocaInst* allocaInst = nullptr; + }; + + std::vector + analyzeStackBufferOverflows(llvm::Module& mod, + const std::function& shouldAnalyze); + + std::vector + analyzeMultipleStores(llvm::Module& mod, + const std::function& shouldAnalyze); +} // namespace ctrace::stack::analysis diff --git a/include/analysis/StackComputation.hpp b/include/analysis/StackComputation.hpp new file mode 100644 index 0000000..8b56401 --- /dev/null +++ b/include/analysis/StackComputation.hpp @@ -0,0 +1,53 @@ +#pragma once + +#include +#include +#include + +#include "StackUsageAnalyzer.hpp" + +namespace llvm +{ + class DataLayout; + class Function; + class Module; +} // namespace llvm + +namespace ctrace::stack::analysis +{ + using CallGraph = std::map>; + + struct StackEstimate + { + StackSize bytes = 0; + bool unknown = false; + }; + + struct LocalStackInfo + { + StackSize bytes = 0; + bool unknown = false; + bool hasDynamicAlloca = false; + std::vector> localAllocas; + }; + + struct InternalAnalysisState + { + std::map TotalStack; // stack max, callees inclus + std::set RecursiveFuncs; // fonctions dans au moins un cycle + std::set InfiniteRecursionFuncs; // auto-récursion “infinie” + }; + + CallGraph buildCallGraph(llvm::Module& M); + + LocalStackInfo computeLocalStack(llvm::Function& F, const llvm::DataLayout& DL, + AnalysisMode mode); + + InternalAnalysisState + computeGlobalStackUsage(const CallGraph& CG, + const std::map& LocalStack); + + bool detectInfiniteSelfRecursion(llvm::Function& F); + + StackSize computeAllocaLargeThreshold(const AnalysisConfig& config); +} // namespace ctrace::stack::analysis diff --git a/include/analysis/StackPointerEscape.hpp b/include/analysis/StackPointerEscape.hpp new file mode 100644 index 0000000..86f6438 --- /dev/null +++ b/include/analysis/StackPointerEscape.hpp @@ -0,0 +1,29 @@ +#pragma once + +#include +#include +#include + +namespace llvm +{ + class Function; + class Instruction; + class Module; +} // namespace llvm + +namespace ctrace::stack::analysis +{ + struct StackPointerEscapeIssue + { + std::string funcName; + std::string varName; + std::string + escapeKind; // "return", "store_global", "store_unknown", "call_arg", "call_callback" + std::string targetName; // nom du global, si applicable + const llvm::Instruction* inst = nullptr; + }; + + std::vector + analyzeStackPointerEscapes(llvm::Module& mod, + const std::function& shouldAnalyze); +} // namespace ctrace::stack::analysis diff --git a/include/passes/ModulePasses.hpp b/include/passes/ModulePasses.hpp new file mode 100644 index 0000000..0ebe5b3 --- /dev/null +++ b/include/passes/ModulePasses.hpp @@ -0,0 +1,8 @@ +#pragma once + +#include + +namespace ctrace::stack +{ + void runFunctionAttrsPass(llvm::Module& mod); +} diff --git a/run_test.py b/run_test.py index fb195f9..241ff50 100755 --- a/run_test.py +++ b/run_test.py @@ -26,6 +26,8 @@ def normalize(s: str) -> str: # Normalize spacing around pointer/reference symbols for cross-platform demangler output. normalized = normalized.replace(" *", "*").replace("* ", "*") normalized = normalized.replace(" &", "&").replace("& ", "&") + # Normalize fortified libc function names (e.g., "__strncpy_chk" -> "strncpy"). + normalized = re.sub(r"__([A-Za-z0-9_]+)_chk\b", r"\1", normalized) lines.append(normalized) return "\n".join(lines).strip() diff --git a/scripts/ci/commit_checker.py b/scripts/ci/commit_checker.py new file mode 100644 index 0000000..a665d83 --- /dev/null +++ b/scripts/ci/commit_checker.py @@ -0,0 +1,211 @@ +#!/usr/bin/env python3 +from __future__ import annotations + +import json +import os +import re +import subprocess +import sys +from dataclasses import dataclass +from typing import Iterable, List, Optional, Tuple + +ALLOWED_TYPES = ( + "feat", + "fix", + "chore", + "docs", + "refactor", + "perf", + "ci", + "build", + "style", + "revert", + "test", +) + +MAX_SUBJECT_LEN = 72 + +CONVENTIONAL_RE = re.compile( + r"^(?P" + "|".join(ALLOWED_TYPES) + r")" + r"(?P\([^)]+\))?" + r"(?P!)?: " + r"(?P.+)$" +) + + +@dataclass +class Commit: + sha: str + subject: str + body: str + + +@dataclass +class InvalidCommit: + commit: Commit + reason: str + + +def run_git(args: List[str]) -> str: + try: + out = subprocess.check_output(["git", *args], text=True).strip() + except subprocess.CalledProcessError as exc: + print(exc.output, file=sys.stderr) + raise + return out + + +def get_event_range() -> Optional[str]: + override = os.environ.get("CHECK_RANGE") + if override: + return override + + event = os.environ.get("GITHUB_EVENT_NAME", "") + if event == "pull_request": + event_path = os.environ.get("GITHUB_EVENT_PATH") + if not event_path or not os.path.exists(event_path): + return None + with open(event_path, "r", encoding="utf-8") as fh: + payload = json.load(fh) + base_sha = payload.get("pull_request", {}).get("base", {}).get("sha") + head_sha = payload.get("pull_request", {}).get("head", {}).get("sha") + if base_sha and head_sha: + return f"{base_sha}..{head_sha}" + return None + + if event == "push": + before = os.environ.get("GITHUB_BEFORE") + head = os.environ.get("GITHUB_SHA") + if before and head and not is_zero_sha(before): + return f"{before}..{head}" + if head: + return None + + return None + + +def is_zero_sha(value: str) -> bool: + return re.fullmatch(r"0+", value or "") is not None + + +def range_for_ref(ref: str) -> str: + try: + run_git(["rev-parse", f"{ref}^"]) + except subprocess.CalledProcessError: + return ref + return f"{ref}^..{ref}" + + +def get_upstream_ref() -> Optional[str]: + try: + upstream = run_git(["rev-parse", "--abbrev-ref", "--symbolic-full-name", "@{u}"]) + except subprocess.CalledProcessError: + return None + return upstream or None + + +def find_base_ref() -> Optional[str]: + explicit = os.environ.get("BASE_BRANCH") + candidates: List[str] = [] + if explicit: + candidates.extend([explicit, f"origin/{explicit}"]) + + upstream = get_upstream_ref() + if upstream: + candidates.append(upstream) + + candidates.extend(["origin/main", "origin/master", "main", "master"]) + + seen = set() + for ref in candidates: + if ref in seen: + continue + seen.add(ref) + try: + run_git(["rev-parse", "--verify", ref]) + return ref + except subprocess.CalledProcessError: + continue + return None + + +def compute_branch_range() -> str: + base_ref = find_base_ref() + if not base_ref: + return range_for_ref("HEAD") + try: + base_sha = run_git(["merge-base", "HEAD", base_ref]) + except subprocess.CalledProcessError: + return range_for_ref("HEAD") + if not base_sha: + return range_for_ref("HEAD") + return f"{base_sha}..HEAD" + + +def iter_commits(range_spec: Optional[str]) -> Iterable[Commit]: + args = ["log", "--format=%H%x1f%s%x1f%b%x1e"] + if range_spec: + args.insert(1, range_spec) + raw = run_git(args) + if not raw: + return [] + commits: List[Commit] = [] + for record in raw.split("\x1e"): + record = record.strip() + if not record: + continue + parts = record.split("\x1f") + if len(parts) < 2: + continue + sha = parts[0] + subject = parts[1] + body = parts[2] if len(parts) > 2 else "" + commits.append(Commit(sha=sha, subject=subject, body=body)) + return commits + + +def is_merge_commit(commit: Commit) -> bool: + subject = commit.subject + return subject.startswith("Merge ") or subject.startswith("Merge pull request") + + +def validate_commits(commits: Iterable[Commit]) -> List[InvalidCommit]: + invalid: List[InvalidCommit] = [] + for commit in commits: + if is_merge_commit(commit): + continue + if len(commit.subject) > MAX_SUBJECT_LEN: + invalid.append( + InvalidCommit( + commit=commit, + reason=f"subject too long ({len(commit.subject)} > {MAX_SUBJECT_LEN})", + ) + ) + continue + if not CONVENTIONAL_RE.match(commit.subject): + invalid.append(InvalidCommit(commit=commit, reason="invalid conventional format")) + return invalid + + +def main() -> int: + check_range = get_event_range() + if not check_range: + check_range = compute_branch_range() + print(f"Commit check range: {check_range}") + commits = list(iter_commits(check_range)) + invalid = validate_commits(commits) + if invalid: + print("Non-conventional commits detected:", file=sys.stderr) + for entry in invalid: + print( + f"- {entry.commit.sha[:7]} {entry.commit.subject} ({entry.reason})", + file=sys.stderr, + ) + return 1 + + print("All commits follow Conventional Commits.") + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/scripts/ci/semver_check.py b/scripts/ci/semver_check.py new file mode 100644 index 0000000..350ea57 --- /dev/null +++ b/scripts/ci/semver_check.py @@ -0,0 +1,162 @@ +#!/usr/bin/env python3 +from __future__ import annotations + +import os +import re +import subprocess +import sys +from typing import Iterable, List, Optional, Tuple + +ALLOWED_TYPES = ( + "feat", + "fix", + "chore", + "docs", + "refactor", + "perf", + "ci", + "build", + "style", + "revert", + "test", +) + +CONVENTIONAL_RE = re.compile( + r"^(?P" + "|".join(ALLOWED_TYPES) + r")" + r"(?P\([^)]+\))?" + r"(?P!)?: " + r"(?P.+)$" +) + +BREAKING_RE = re.compile(r"^BREAKING[ -]CHANGE:", re.MULTILINE) + + +class Commit: + def __init__(self, sha: str, subject: str, body: str) -> None: + self.sha = sha + self.subject = subject + self.body = body + + +def run_git(args: List[str]) -> str: + try: + out = subprocess.check_output(["git", *args], text=True).strip() + except subprocess.CalledProcessError as exc: + print(exc.output, file=sys.stderr) + raise + return out + + +def is_zero_sha(value: str) -> bool: + return re.fullmatch(r"0+", value or "") is not None + + +def get_latest_tag() -> Optional[str]: + tags = run_git(["tag", "--list", "v[0-9]*.[0-9]*.[0-9]*", "--sort=-v:refname"]) + if not tags: + return None + return tags.splitlines()[0].strip() + + +def parse_version(tag: Optional[str]) -> Tuple[int, int, int]: + if not tag: + return (0, 0, 0) + match = re.match(r"^v(\d+)\.(\d+)\.(\d+)$", tag) + if not match: + return (0, 0, 0) + return tuple(int(p) for p in match.groups()) + + +def iter_commits(range_spec: Optional[str]) -> Iterable[Commit]: + args = ["log", "--format=%H%x1f%s%x1f%b%x1e"] + if range_spec: + args.insert(1, range_spec) + raw = run_git(args) + if not raw: + return [] + commits: List[Commit] = [] + for record in raw.split("\x1e"): + record = record.strip() + if not record: + continue + parts = record.split("\x1f") + if len(parts) < 2: + continue + sha = parts[0] + subject = parts[1] + body = parts[2] if len(parts) > 2 else "" + commits.append(Commit(sha=sha, subject=subject, body=body)) + return commits + + +def is_merge_commit(commit: Commit) -> bool: + subject = commit.subject + return subject.startswith("Merge ") or subject.startswith("Merge pull request") + + +def classify_bump(commits: Iterable[Commit]) -> str: + bump = "none" + for commit in commits: + if is_merge_commit(commit): + continue + match = CONVENTIONAL_RE.match(commit.subject) + if not match: + continue + if match.group("breaking") or BREAKING_RE.search(commit.body or ""): + return "major" + ctype = match.group("type") + if ctype == "feat" and bump != "minor": + bump = "minor" + elif ctype == "fix" and bump == "none": + bump = "patch" + return bump + + +def bump_version(base: Tuple[int, int, int], bump: str) -> Tuple[int, int, int]: + major, minor, patch = base + if bump == "major": + return (major + 1, 0, 0) + if bump == "minor": + return (major, minor + 1, 0) + if bump == "patch": + return (major, minor, patch + 1) + return base + + +def write_output(key: str, value: str) -> None: + output_path = os.environ.get("GITHUB_OUTPUT") + if not output_path: + return + with open(output_path, "a", encoding="utf-8") as fh: + fh.write(f"{key}={value}\n") + + +def main() -> int: + base_tag = get_latest_tag() + base_version = parse_version(base_tag) + version_range = os.environ.get("VERSION_RANGE") + if not version_range: + version_range = f"{base_tag}..HEAD" if base_tag else "HEAD" + + version_commits = list(iter_commits(version_range)) + bump = classify_bump(version_commits) + next_version = bump_version(base_version, bump) + + base_tag_out = base_tag or "" + next_version_str = f"v{next_version[0]}.{next_version[1]}.{next_version[2]}" + next_version_raw = f"{next_version[0]}.{next_version[1]}.{next_version[2]}" + + write_output("base_tag", base_tag_out) + write_output("bump", bump) + write_output("next_version", next_version_str) + write_output("next_version_raw", next_version_raw) + + print(f"Base tag: {base_tag_out or 'none'}") + print(f"Version range: {version_range}") + print(f"Bump: {bump}") + print(f"Next version: {next_version_str}") + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/scripts/git/commit_msg_checker.py b/scripts/git/commit_msg_checker.py new file mode 100644 index 0000000..d529432 --- /dev/null +++ b/scripts/git/commit_msg_checker.py @@ -0,0 +1,86 @@ +#!/usr/bin/env python3 +from __future__ import annotations + +import re +import sys +from typing import List + +ALLOWED_TYPES = ( + "feat", + "fix", + "chore", + "docs", + "refactor", + "perf", + "ci", + "build", + "style", + "revert", + "test", +) + +MAX_SUBJECT_LEN = 72 + +CONVENTIONAL_RE = re.compile( + r"^(?P" + "|".join(ALLOWED_TYPES) + r")" + r"(?P\([^)]+\))?" + r"(?P!)?: " + r"(?P.+)$" +) + + +def read_subject(path: str) -> str: + with open(path, "r", encoding="utf-8") as fh: + for line in fh: + stripped = line.strip() + if not stripped: + continue + if stripped.startswith("#"): + continue + return stripped + return "" + + +def is_merge_message(subject: str) -> bool: + return subject.startswith("Merge ") or subject.startswith("Merge pull request") + + +def is_git_revert(subject: str) -> bool: + return subject.startswith("Revert ") + + +def main(argv: List[str]) -> int: + if len(argv) < 2: + print("commit message file path missing", file=sys.stderr) + return 1 + + subject = read_subject(argv[1]) + if not subject: + print("empty commit message", file=sys.stderr) + return 1 + + if is_merge_message(subject) or is_git_revert(subject): + return 0 + + if len(subject) > MAX_SUBJECT_LEN: + print( + f"Commit subject too long ({len(subject)} > {MAX_SUBJECT_LEN}).", + file=sys.stderr, + ) + return 1 + + if not CONVENTIONAL_RE.match(subject): + print("Commit message must follow Conventional Commits.", file=sys.stderr) + print(f"Got: {subject}", file=sys.stderr) + print( + "Expected: ()?: with type in: " + + ", ".join(ALLOWED_TYPES), + file=sys.stderr, + ) + return 1 + + return 0 + + +if __name__ == "__main__": + raise SystemExit(main(sys.argv)) diff --git a/scripts/setup-dev.sh b/scripts/setup-dev.sh new file mode 100755 index 0000000..4fc5478 --- /dev/null +++ b/scripts/setup-dev.sh @@ -0,0 +1,18 @@ +#!/usr/bin/env bash +set -euo pipefail + +if ! command -v python3 >/dev/null 2>&1; then + echo "python3 is required to install pre-commit" >&2 + exit 1 +fi + +ROOT_DIR=$(git rev-parse --show-toplevel 2>/dev/null || pwd) +VENV_DIR="${ROOT_DIR}/.venv-pre-commit" + +python3 -m venv "${VENV_DIR}" +source "${VENV_DIR}/bin/activate" +python -m pip install --upgrade pip >/dev/null 2>&1 +python -m pip install pre-commit >/dev/null 2>&1 +python -m pre_commit install --hook-type commit-msg + +echo "Commit-msg hook installed using ${VENV_DIR}." diff --git a/src/StackUsageAnalyzer.cpp b/src/StackUsageAnalyzer.cpp index 33eefb3..153960f 100644 --- a/src/StackUsageAnalyzer.cpp +++ b/src/StackUsageAnalyzer.cpp @@ -1,5126 +1,1199 @@ #include "StackUsageAnalyzer.hpp" #include -#include #include -#include #include #include +#include #include #include #include -#include // std::snprintf - -#include -#include -#include -#include +#include #include #include #include -#include #include -#include -#include -#include -#include -#include -#include -#include #include -#include #include -#include -#include -#include -#include -#include -#include "compilerlib/compiler.h" -#include "mangle.hpp" +#include "analysis/AllocaUsage.hpp" +#include "analysis/AnalyzerUtils.hpp" +#include "analysis/ConstParamAnalysis.hpp" +#include "analysis/DynamicAlloca.hpp" +#include "analysis/FunctionFilter.hpp" +#include "analysis/InputPipeline.hpp" +#include "analysis/IRValueUtils.hpp" +#include "analysis/InvalidBaseReconstruction.hpp" +#include "analysis/MemIntrinsicOverflow.hpp" +#include "analysis/SizeMinusKWrites.hpp" +#include "analysis/StackBufferAnalysis.hpp" +#include "analysis/StackComputation.hpp" +#include "analysis/StackPointerEscape.hpp" +#include "passes/ModulePasses.hpp" namespace ctrace::stack { - // ============================================================================ - // Types internes - // ============================================================================ - - using CallGraph = std::map>; - - enum VisitState - { - NotVisited = 0, - Visiting = 1, - Visited = 2 - }; - - struct StackEstimate - { - StackSize bytes = 0; - bool unknown = false; - }; - - struct LocalStackInfo - { - StackSize bytes = 0; - bool unknown = false; - bool hasDynamicAlloca = false; - std::vector> localAllocas; - }; - - // État interne pour la propagation - struct InternalAnalysisState - { - std::map TotalStack; // stack max, callees inclus - std::set RecursiveFuncs; // fonctions dans au moins un cycle - std::set InfiniteRecursionFuncs; // auto-récursion “infinie” - }; - - // Rapport interne pour les dépassements de buffer sur la stack - struct StackBufferOverflow + namespace { - std::string funcName; - std::string varName; - StackSize arraySize = 0; - StackSize indexOrUpperBound = 0; // utilisé pour les bornes sup (UB) ou index constant - bool isWrite = false; - bool indexIsConstant = false; - const llvm::Instruction* inst = nullptr; + struct SourceLocation + { + unsigned line = 0; + unsigned column = 0; + }; - // Nouveau : violation basée sur une borne inférieure (index potentiellement négatif) - bool isLowerBoundViolation = false; - long long lowerBound = 0; // borne inférieure déduite (signée) + struct FunctionAuxData + { + llvm::DenseMap locations; + llvm::DenseMap callPaths; + llvm::DenseMap>> + localAllocas; + llvm::DenseMap indices; + }; - std::string aliasPath; // ex: "pp -> ptr -> buf" - std::vector aliasPathVec; // {"pp", "ptr", "buf"} - // Optional : helper for sync string <- vector - void rebuildAliasPathString(const std::string& sep = " -> ") + struct ModuleAnalysisContext { - aliasPath.clear(); - for (size_t i = 0; i < aliasPathVec.size(); ++i) + llvm::Module& mod; + const AnalysisConfig& config; + const llvm::DataLayout* dataLayout = nullptr; + analysis::FunctionFilter filter; + std::vector functions; + std::unordered_set functionSet; + std::vector allDefinedFunctions; + std::unordered_set allDefinedSet; + + bool shouldAnalyze(const llvm::Function& F) const { - aliasPath += aliasPathVec[i]; - if (i + 1 < aliasPathVec.size()) - aliasPath += sep; + return functionSet.find(&F) != functionSet.end(); } - } - }; - - // Intervalle d'entier pour une valeur : borne inférieure / supérieure (signées) - struct IntRange - { - bool hasLower = false; - long long lower = 0; - bool hasUpper = false; - long long upper = 0; - }; - - // Rapport interne pour les allocations dynamiques sur la stack (VLA / alloca variable) - struct DynamicAllocaIssue - { - std::string funcName; - std::string varName; - std::string typeName; - const llvm::AllocaInst* allocaInst = nullptr; - }; - - // Internal report for alloca/VLA usages with potentially risky sizes - struct AllocaUsageIssue - { - std::string funcName; - std::string varName; - const llvm::AllocaInst* allocaInst = nullptr; - - bool userControlled = false; // size derived from argument / non-local value - bool sizeIsConst = false; // size known exactly - bool hasUpperBound = false; // bounded size (from ICmp-derived range) - bool isRecursive = false; // function participates in a recursion cycle - bool isInfiniteRecursive = false; // unconditional self recursion - - StackSize sizeBytes = 0; // exact size in bytes (if sizeIsConst) - StackSize upperBoundBytes = 0; // upper bound in bytes (if hasUpperBound) - }; - - // Rapport interne pour les usages dangereux de memcpy/memset sur la stack - struct MemIntrinsicIssue - { - std::string funcName; - std::string varName; - std::string intrinsicName; // "memcpy" / "memset" / "memmove" - StackSize destSizeBytes = 0; - StackSize lengthBytes = 0; - const llvm::Instruction* inst = nullptr; - }; - // Rapport interne pour plusieurs stores dans un même buffer de stack - struct MultipleStoreIssue - { - std::string funcName; - std::string varName; - std::size_t storeCount = 0; // nombre total de StoreInst vers ce buffer - std::size_t distinctIndexCount = 0; // nombre d'expressions d'index distinctes - const llvm::AllocaInst* allocaInst = nullptr; - }; + bool isDefined(const llvm::Function& F) const + { + return allDefinedSet.find(&F) != allDefinedSet.end(); + } + }; - // Rapport interne pour les fuites de pointeurs vers la stack - struct StackPointerEscapeIssue - { - std::string funcName; - std::string varName; - std::string - escapeKind; // "return", "store_global", "store_unknown", "call_arg", "call_callback" - std::string targetName; // nom du global, si applicable - const llvm::Instruction* inst = nullptr; - }; + using LocalStackMap = std::map; - // Rapport interne pour les reconstructions invalides de pointeur de base (offsetof/container_of) - struct InvalidBaseReconstructionIssue - { - std::string funcName; - std::string varName; // nom de la variable alloca (stack object) - std::string sourceMember; // membre source (ex: "b") - int64_t offsetUsed = 0; // offset utilisé dans le calcul (peut être négatif) - std::string targetType; // type vers lequel on cast (ex: "struct A*") - bool isOutOfBounds = false; // true si on peut prouver que c'est hors bornes - const llvm::Instruction* inst = nullptr; - }; + static ModuleAnalysisContext buildContext(llvm::Module& mod, const AnalysisConfig& config) + { + ModuleAnalysisContext ctx{mod, config, &mod.getDataLayout(), + analysis::buildFunctionFilter(mod, config)}; - // Rapport interne pour les paramètres qui peuvent être rendus const (pointee/referent) - struct ConstParamIssue - { - std::string funcName; - std::string paramName; - std::string currentType; - std::string suggestedType; - std::string suggestedTypeAlt; - bool pointerConstOnly = false; // ex: T * const param - bool isReference = false; - bool isRvalueRef = false; - unsigned line = 0; - unsigned column = 0; - }; - // Analyse intra-fonction pour détecter les "fuites" de pointeurs de stack : - // - retour d'une adresse de variable locale (return buf;) - // - stockage de l'adresse d'une variable locale dans un global (global = buf;) - // - // Heuristique : pour chaque AllocaInst, on remonte son graphe d'utilisation - // en suivant les bitcast, GEP, PHI, select de type pointeur, et on marque - // comme "escape" : - // - tout return qui renvoie une valeur dérivée de cette alloca - // - tout store qui écrit cette valeur dans une GlobalVariable. - static void analyzeStackPointerEscapesInFunction(llvm::Function& F, - std::vector& out) - { - using namespace llvm; + for (llvm::Function& F : mod) + { + if (F.isDeclaration()) + continue; + ctx.allDefinedFunctions.push_back(&F); + if (ctx.filter.shouldAnalyze(F)) + ctx.functions.push_back(&F); + } + ctx.allDefinedSet.reserve(ctx.allDefinedFunctions.size()); + for (const llvm::Function* F : ctx.allDefinedFunctions) + { + ctx.allDefinedSet.insert(F); + } + ctx.functionSet.reserve(ctx.functions.size()); + for (const llvm::Function* F : ctx.functions) + { + ctx.functionSet.insert(F); + } - if (F.isDeclaration()) - return; + return ctx; + } - for (BasicBlock& BB : F) + static LocalStackMap computeLocalStacks(const ModuleAnalysisContext& ctx) { - for (Instruction& I : BB) + LocalStackMap localStack; + for (llvm::Function* F : ctx.allDefinedFunctions) { - auto* AI = dyn_cast(&I); - if (!AI) - continue; + analysis::LocalStackInfo info = + analysis::computeLocalStack(*F, *ctx.dataLayout, ctx.config.mode); + localStack[F] = info; + } + return localStack; + } - // On limite l'analyse aux slots "classiques" de stack (tout alloca) - SmallPtrSet visited; - SmallVector worklist; - worklist.push_back(AI); + static analysis::CallGraph buildCallGraphFiltered(const ModuleAnalysisContext& ctx) + { + analysis::CallGraph CG; + for (llvm::Function* F : ctx.allDefinedFunctions) + { + auto& vec = CG[F]; - while (!worklist.empty()) + for (llvm::BasicBlock& BB : *F) { - const Value* V = worklist.back(); - worklist.pop_back(); - if (visited.contains(V)) - continue; - visited.insert(V); - - for (const Use& U : V->uses()) + for (llvm::Instruction& I : BB) { - const User* Usr = U.getUser(); - - // 1) Retour direct ou via chaîne d'alias : return - if (auto* RI = dyn_cast(Usr)) - { - StackPointerEscapeIssue issue; - issue.funcName = F.getName().str(); - issue.varName = - AI->hasName() ? AI->getName().str() : std::string(""); - issue.escapeKind = "return"; - issue.targetName = {}; - issue.inst = RI; - out.push_back(std::move(issue)); - continue; - } - - // 2) Stockage de l'adresse : global = ; ou *out = ; - if (auto* SI = dyn_cast(Usr)) + const llvm::Function* Callee = nullptr; + if (auto* CI = llvm::dyn_cast(&I)) { - // Si la valeur stockée est notre pointeur (ou un alias de celui-ci) - if (SI->getValueOperand() == V) - { - const Value* dstRaw = SI->getPointerOperand(); - const Value* dst = dstRaw->stripPointerCasts(); - - // 2.a) Stockage direct dans une variable globale : fuite évidente - if (auto* GV = dyn_cast(dst)) - { - StackPointerEscapeIssue issue; - issue.funcName = F.getName().str(); - issue.varName = AI->hasName() ? AI->getName().str() - : std::string(""); - issue.escapeKind = "store_global"; - issue.targetName = - GV->hasName() ? GV->getName().str() : std::string{}; - issue.inst = SI; - out.push_back(std::move(issue)); - continue; - } - - // 2.b) Stockage via un pointeur non local (ex: *out = buf;) - // On ne connaît pas la durée de vie de la mémoire pointée par dst, - // mais si ce n'est pas une alloca de cette fonction, on considère - // que le pointeur de stack peut s'échapper (paramètre, heap, etc.). - if (!isa(dst)) - { - StackPointerEscapeIssue issue; - issue.funcName = F.getName().str(); - issue.varName = AI->hasName() ? AI->getName().str() - : std::string(""); - issue.escapeKind = "store_unknown"; - issue.targetName = - dst->hasName() ? dst->getName().str() : std::string{}; - issue.inst = SI; - out.push_back(std::move(issue)); - continue; - } - - // 2.c) Stockage dans une alloca locale : on laisse l'alias - // continuer à être exploré via la boucle de travail. On ne - // considère pas cela comme une fuite immédiate. - const AllocaInst* dstAI = cast(dst); - worklist.push_back(dstAI); - } - // Sinon, c'est un store vers la stack ou un autre emplacement local - // qui ne contient pas directement notre pointeur, pas une fuite en soi. - continue; + Callee = CI->getCalledFunction(); } - - // 3) Passage de l'adresse à un appel de fonction : cb(buf); ou f(buf); - if (auto* CB = dyn_cast(Usr)) + else if (auto* II = llvm::dyn_cast(&I)) { - // On inspecte tous les arguments; si l'un d'eux est V (ou un alias direct), - // on considère que l'adresse de la variable locale est transmise. - for (unsigned argIndex = 0; argIndex < CB->arg_size(); ++argIndex) - { - if (CB->getArgOperand(argIndex) != V) - continue; - - const Value* calledVal = CB->getCalledOperand(); - const Value* calledStripped = - calledVal ? calledVal->stripPointerCasts() : nullptr; - const Function* directCallee = - calledStripped ? dyn_cast(calledStripped) : nullptr; - if (CB->paramHasAttr(argIndex, llvm::Attribute::NoCapture) || - CB->paramHasAttr(argIndex, llvm::Attribute::ByVal) || - CB->paramHasAttr(argIndex, llvm::Attribute::ByRef)) - { - continue; - } - if (directCallee) - { - llvm::StringRef calleeName = directCallee->getName(); - if (calleeName.contains("unique_ptr") || - calleeName.contains("make_unique")) - { - continue; - } - } - - StackPointerEscapeIssue issue; - issue.funcName = F.getName().str(); - issue.varName = - AI->hasName() ? AI->getName().str() : std::string(""); - issue.inst = cast(CB); - - if (!directCallee) - { - // Appel indirect via pointeur de fonction : callback typique. - issue.escapeKind = "call_callback"; - issue.targetName.clear(); - } - else - { - // Appel direct : on n'a pas de connaissance précise de la sémantique - // de la fonction appelée; on marque ça comme une fuite potentielle - // plus permissive. - issue.escapeKind = "call_arg"; - issue.targetName = directCallee->hasName() - ? directCallee->getName().str() - : std::string{}; - } - - out.push_back(std::move(issue)); - } - - // On ne propage pas l'alias via l'appel, mais on considère que - // l'adresse peut être capturée par la fonction appelée. - continue; + Callee = II->getCalledFunction(); } - // 4) Propagation des alias de pointeurs : - if (auto* BC = dyn_cast(Usr)) - { - if (BC->getType()->isPointerTy()) - worklist.push_back(BC); - continue; - } - if (auto* GEP = dyn_cast(Usr)) - { - worklist.push_back(GEP); - continue; - } - if (auto* PN = dyn_cast(Usr)) - { - if (PN->getType()->isPointerTy()) - worklist.push_back(PN); - continue; - } - if (auto* Sel = dyn_cast(Usr)) + if (Callee && !Callee->isDeclaration() && ctx.isDefined(*Callee)) { - if (Sel->getType()->isPointerTy()) - worklist.push_back(Sel); - continue; + vec.push_back(Callee); } - - // Autres usages (load, comparaison, etc.) : pas une fuite, - // et on ne propage pas davantage. } } } - } - } - - // -------------------------------------------------------------------------- - // Détection des reconstructions invalides de pointeur de base (offsetof/container_of) - // -------------------------------------------------------------------------- - - // Forward declaration - static std::optional getAllocaTotalSizeBytes(const llvm::AllocaInst* AI, - const llvm::DataLayout& DL); - static const llvm::Value* getPtrToIntOperand(const llvm::Value* V); - - // Pointer origin (base alloca + offset from base) - struct PtrOrigin - { - const llvm::AllocaInst* alloca = nullptr; - int64_t offset = 0; - }; - - static bool recordVisitedOffset(std::map>& visited, - const llvm::Value* V, int64_t offset) - { - auto& setRef = visited[V]; - return setRef.insert(offset).second; - } - - static bool getGEPConstantOffsetAndBase(const llvm::Value* V, const llvm::DataLayout& DL, - int64_t& outOffset, const llvm::Value*& outBase) - { - using namespace llvm; - if (auto* GEP = dyn_cast(V)) - { - APInt offset(64, 0); - if (!GEP->accumulateConstantOffset(DL, offset)) - return false; - outOffset = offset.getSExtValue(); - outBase = GEP->getPointerOperand(); - return true; + return CG; } - if (auto* CE = dyn_cast(V)) + static analysis::InternalAnalysisState + computeRecursionState(const ModuleAnalysisContext& ctx, const analysis::CallGraph& CG, + const LocalStackMap& localStack) { - if (CE->getOpcode() == Instruction::GetElementPtr) - { - auto* GEP = cast(CE); - APInt offset(64, 0); - if (!GEP->accumulateConstantOffset(DL, offset)) - return false; - outOffset = offset.getSExtValue(); - outBase = GEP->getPointerOperand(); - return true; - } - } - - return false; - } - - static const llvm::Value* stripIntCasts(const llvm::Value* V) - { - using namespace llvm; + analysis::InternalAnalysisState state = + analysis::computeGlobalStackUsage(CG, localStack); - const Value* Cur = V; - while (Cur) - { - if (auto* CI = dyn_cast(Cur)) + for (llvm::Function* F : ctx.allDefinedFunctions) { - const Value* Op = CI->getOperand(0); - if (CI->getType()->isIntegerTy() && Op->getType()->isIntegerTy()) - { - Cur = Op; + const llvm::Function* Fn = F; + if (!state.RecursiveFuncs.count(Fn)) continue; - } - } - else if (auto* CE = dyn_cast(Cur)) - { - if (CE->isCast()) + + if (analysis::detectInfiniteSelfRecursion(*F)) { - const Value* Op = CE->getOperand(0); - if (CE->getType()->isIntegerTy() && Op->getType()->isIntegerTy()) - { - Cur = Op; - continue; - } + state.InfiniteRecursionFuncs.insert(Fn); } } - break; - } - return Cur; - } - - static bool isLoadFromAlloca(const llvm::Value* V, const llvm::AllocaInst* AI) - { - using namespace llvm; - if (auto* LI = dyn_cast(V)) - { - const Value* PtrOp = LI->getPointerOperand()->stripPointerCasts(); - return PtrOp == AI; - } - return false; - } - - static bool valueDependsOnAlloca(const llvm::Value* V, const llvm::AllocaInst* AI, - llvm::SmallPtrSet& visited) - { - using namespace llvm; - - if (!V) - return false; - if (!visited.insert(V).second) - return false; - if (isLoadFromAlloca(V, AI)) - return true; - - if (auto* I = dyn_cast(V)) - { - for (const Value* Op : I->operands()) - { - if (valueDependsOnAlloca(Op, AI, visited)) - return true; - } - } - else if (auto* CE = dyn_cast(V)) - { - for (const Value* Op : CE->operands()) - { - if (valueDependsOnAlloca(Op, AI, visited)) - return true; - } + return state; } - return false; - } - - static bool matchAllocaLoadAddSub(const llvm::Value* V, const llvm::AllocaInst* AI, - int64_t& deltaOut) - { - using namespace llvm; - - const Value* lhs = nullptr; - const Value* rhs = nullptr; - unsigned opcode = 0; - - if (auto* BO = dyn_cast(V)) - { - opcode = BO->getOpcode(); - lhs = BO->getOperand(0); - rhs = BO->getOperand(1); - } - else if (auto* CE = dyn_cast(V)) - { - opcode = CE->getOpcode(); - lhs = CE->getOperand(0); - rhs = CE->getOperand(1); - } - else + static AnalysisResult buildResults(const ModuleAnalysisContext& ctx, + const LocalStackMap& localStack, + const analysis::InternalAnalysisState& state, + const analysis::CallGraph& CG, FunctionAuxData& aux) { - return false; - } + AnalysisResult result; + result.config = ctx.config; - if (opcode != Instruction::Add && opcode != Instruction::Sub) - return false; - - const auto* lhsC = dyn_cast(lhs); - const auto* rhsC = dyn_cast(rhs); - bool lhsIsLoad = isLoadFromAlloca(lhs, AI); - bool rhsIsLoad = isLoadFromAlloca(rhs, AI); - - if (opcode == Instruction::Add) - { - if (lhsIsLoad && rhsC) - { - deltaOut = rhsC->getSExtValue(); - return true; - } - if (rhsIsLoad && lhsC) - { - deltaOut = lhsC->getSExtValue(); - return true; - } - } - else if (opcode == Instruction::Sub) - { - if (lhsIsLoad && rhsC) + for (llvm::Function* F : ctx.functions) { - deltaOut = -rhsC->getSExtValue(); - return true; - } - // Do not accept C - load - } - - return false; - } - - struct PtrIntMatch - { - const llvm::Value* ptrOperand = nullptr; - int64_t offset = 0; - bool sawOffset = false; - }; - - static void collectPtrToIntMatches(const llvm::Value* V, - llvm::SmallVectorImpl& out) - { - using namespace llvm; + const llvm::Function* Fn = F; - struct IntWorkItem - { - const Value* val = nullptr; - int64_t offset = 0; - bool sawOffset = false; - }; + analysis::LocalStackInfo localInfo; + analysis::StackEstimate totalInfo; - SmallVector worklist; - std::map> visited; + auto itLocal = localStack.find(Fn); + if (itLocal != localStack.end()) + localInfo = itLocal->second; - auto recordVisited = [&](const Value* Val, int64_t offset, bool sawOffset) - { - unsigned bit = sawOffset ? 2u : 1u; - unsigned& flags = visited[Val][offset]; - if (flags & bit) - return false; - flags |= bit; - return true; - }; + auto itTotal = state.TotalStack.find(Fn); + if (itTotal != state.TotalStack.end()) + totalInfo = itTotal->second; - worklist.push_back({V, 0, false}); - recordVisited(V, 0, false); + FunctionResult fr; + fr.name = F->getName().str(); + fr.filePath = analysis::getFunctionSourcePath(*F); + if (fr.filePath.empty() && !ctx.filter.moduleSourcePath.empty()) + fr.filePath = ctx.filter.moduleSourcePath; + fr.localStack = localInfo.bytes; + fr.localStackUnknown = localInfo.unknown; + fr.maxStack = totalInfo.bytes; + fr.maxStackUnknown = totalInfo.unknown; + fr.hasDynamicAlloca = localInfo.hasDynamicAlloca; + fr.isRecursive = state.RecursiveFuncs.count(Fn) != 0; + fr.hasInfiniteSelfRecursion = state.InfiniteRecursionFuncs.count(Fn) != 0; + fr.exceedsLimit = (!fr.maxStackUnknown && totalInfo.bytes > ctx.config.stackLimit); - while (!worklist.empty()) - { - const Value* Cur = stripIntCasts(worklist.back().val); - int64_t curOffset = worklist.back().offset; - bool curSawOffset = worklist.back().sawOffset; - worklist.pop_back(); + unsigned line = 0; + unsigned column = 0; + if (analysis::getFunctionSourceLocation(*F, line, column)) + { + aux.locations[Fn] = {line, column}; + } + if (!fr.isRecursive && totalInfo.bytes > localInfo.bytes) + { + std::string path = analysis::buildMaxStackCallPath(Fn, CG, state); + if (!path.empty()) + aux.callPaths[Fn] = path; + } + if (!localInfo.localAllocas.empty()) + { + aux.localAllocas[Fn] = localInfo.localAllocas; + } - if (const Value* PtrOp = getPtrToIntOperand(Cur)) - { - out.push_back({PtrOp, curOffset, curSawOffset}); - continue; + result.functions.push_back(std::move(fr)); + aux.indices[Fn] = result.functions.size() - 1; } - const Value* lhs = nullptr; - const Value* rhs = nullptr; - unsigned opcode = 0; - - if (auto* BO = dyn_cast(Cur)) - { - opcode = BO->getOpcode(); - lhs = BO->getOperand(0); - rhs = BO->getOperand(1); - } - else if (auto* CE = dyn_cast(Cur)) - { - opcode = CE->getOpcode(); - lhs = CE->getOperand(0); - rhs = CE->getOperand(1); - } + return result; + } - if (opcode == Instruction::Add || opcode == Instruction::Sub) + static void emitSummaryDiagnostics(AnalysisResult& result, const ModuleAnalysisContext& ctx, + const FunctionAuxData& aux) + { + for (const llvm::Function* Fn : ctx.functions) { - const auto* lhsC = dyn_cast(lhs); - const auto* rhsC = dyn_cast(rhs); - if (rhsC) - { - int64_t delta = rhsC->getSExtValue(); - if (opcode == Instruction::Sub) - delta = -delta; - int64_t newOffset = curOffset + delta; - if (recordVisited(lhs, newOffset, true)) - worklist.push_back({lhs, newOffset, true}); + auto itIndex = aux.indices.find(Fn); + if (itIndex == aux.indices.end()) continue; - } - if (lhsC && opcode == Instruction::Add) - { - int64_t delta = lhsC->getSExtValue(); - int64_t newOffset = curOffset + delta; - if (recordVisited(rhs, newOffset, true)) - worklist.push_back({rhs, newOffset, true}); + const std::size_t index = itIndex->second; + if (index >= result.functions.size()) continue; + const FunctionResult& fr = result.functions[index]; + + if (fr.isRecursive) + { + Diagnostic diag; + diag.funcName = fr.name; + diag.filePath = fr.filePath; + diag.severity = DiagnosticSeverity::Warning; + diag.errCode = DescriptiveErrorCode::None; + diag.message = " [!] recursive or mutually recursive function detected\n"; + result.diagnostics.push_back(std::move(diag)); } - // sub with constant on LHS is not a valid reconstruction - } - if (auto* PN = dyn_cast(Cur)) - { - for (const Value* In : PN->incoming_values()) + if (fr.hasInfiniteSelfRecursion) { - if (recordVisited(In, curOffset, curSawOffset)) - worklist.push_back({In, curOffset, curSawOffset}); + Diagnostic diag; + diag.funcName = fr.name; + diag.filePath = fr.filePath; + diag.severity = DiagnosticSeverity::Warning; + diag.errCode = DescriptiveErrorCode::None; + diag.message = " [!!!] unconditional self recursion detected (no base case)\n" + " this will eventually overflow the stack at runtime\n"; + result.diagnostics.push_back(std::move(diag)); } - continue; - } - if (auto* Sel = dyn_cast(Cur)) - { - const Value* T = Sel->getTrueValue(); - const Value* F = Sel->getFalseValue(); - if (recordVisited(T, curOffset, curSawOffset)) - worklist.push_back({T, curOffset, curSawOffset}); - if (recordVisited(F, curOffset, curSawOffset)) - worklist.push_back({F, curOffset, curSawOffset}); - continue; - } - if (auto* LI = dyn_cast(Cur)) - { - const Value* PtrOp = LI->getPointerOperand()->stripPointerCasts(); - if (auto* AI = dyn_cast(PtrOp)) + if (fr.exceedsLimit) { - Type* allocTy = AI->getAllocatedType(); - if (allocTy && allocTy->isIntegerTy()) + Diagnostic diag; + diag.funcName = fr.name; + diag.filePath = fr.filePath; + diag.severity = DiagnosticSeverity::Warning; + diag.errCode = DescriptiveErrorCode::None; + auto itLoc = aux.locations.find(Fn); + if (itLoc != aux.locations.end()) { - SmallVector seeds; - SmallVector deltas; - - for (const User* Usr : AI->users()) + diag.line = itLoc->second.line; + diag.column = itLoc->second.column; + } + std::string message; + bool suppressLocation = false; + StackSize maxCallee = + (fr.maxStack > fr.localStack) ? (fr.maxStack - fr.localStack) : 0; + auto itLocals = aux.localAllocas.find(Fn); + std::string aliasLine; + if (fr.localStack >= maxCallee && itLocals != aux.localAllocas.end()) + { + std::string localsDetails; + std::string singleName; + StackSize singleSize = 0; + for (const auto& entry : itLocals->second) { - auto* SI = dyn_cast(Usr); - if (!SI) - continue; - if (SI->getPointerOperand()->stripPointerCasts() != AI) - continue; - const Value* StoredVal = SI->getValueOperand(); - - int64_t delta = 0; - if (matchAllocaLoadAddSub(StoredVal, AI, delta)) - { - deltas.push_back(delta); + if (entry.first == "") continue; - } - - llvm::SmallPtrSet depVisited; - if (!valueDependsOnAlloca(StoredVal, AI, depVisited)) + if (entry.second >= ctx.config.stackLimit && entry.second > singleSize) { - seeds.push_back(StoredVal); + singleName = entry.first; + singleSize = entry.second; } } - - if (!seeds.empty()) + if (!singleName.empty()) { - for (const Value* Seed : seeds) - { - if (recordVisited(Seed, curOffset, curSawOffset)) - worklist.push_back({Seed, curOffset, curSawOffset}); - for (int64_t delta : deltas) - { - int64_t newOffset = curOffset + delta; - if (recordVisited(Seed, newOffset, true)) - worklist.push_back({Seed, newOffset, true}); - } - } + aliasLine = " alias path: " + singleName + "\n"; } - else + else if (!itLocals->second.empty()) { - // Fallback: explore stored values directly (may be imprecise). - for (const User* Usr : AI->users()) + localsDetails += + " locals: " + std::to_string(itLocals->second.size()) + + " variables (total " + std::to_string(fr.localStack) + " bytes)\n"; + + std::vector> named = itLocals->second; + named.erase(std::remove_if(named.begin(), named.end(), [](const auto& v) + { return v.first == ""; }), + named.end()); + std::sort(named.begin(), named.end(), + [](const auto& a, const auto& b) + { + if (a.second != b.second) + return a.second > b.second; + return a.first < b.first; + }); + if (!named.empty()) { - auto* SI = dyn_cast(Usr); - if (!SI) - continue; - if (SI->getPointerOperand()->stripPointerCasts() != AI) - continue; - const Value* StoredVal = SI->getValueOperand(); - if (recordVisited(StoredVal, curOffset, curSawOffset)) - worklist.push_back({StoredVal, curOffset, curSawOffset}); + constexpr std::size_t kMaxLocalsForLocation = 5; + if (named.size() > kMaxLocalsForLocation) + suppressLocation = true; + std::string listLine = " locals list: "; + for (std::size_t idx = 0; idx < named.size(); ++idx) + { + if (idx > 0) + listLine += ", "; + listLine += named[idx].first + "(" + + std::to_string(named[idx].second) + ")"; + } + localsDetails += listLine + "\n"; } } - continue; + if (!localsDetails.empty()) + message += localsDetails; + } + auto itPath = aux.callPaths.find(Fn); + std::string suffix; + if (itPath != aux.callPaths.end()) + { + suffix += " path: " + itPath->second + "\n"; + } + std::string mainLine = " [!] potential stack overflow: exceeds limit of " + + std::to_string(ctx.config.stackLimit) + " bytes\n"; + message = mainLine + aliasLine + suffix + message; + if (suppressLocation) + { + diag.line = 0; + diag.column = 0; } + diag.message = std::move(message); + result.diagnostics.push_back(std::move(diag)); } } } - } - static void collectPointerOrigins(const llvm::Value* V, const llvm::DataLayout& DL, - llvm::SmallVectorImpl& out) - { - using namespace llvm; - - SmallVector, 16> worklist; - std::map> visited; - - worklist.push_back({V, 0}); - recordVisitedOffset(visited, V, 0); - - while (!worklist.empty()) + static void appendStackBufferDiagnostics( + AnalysisResult& result, + const std::vector& bufferIssues) { - const Value* Cur = worklist.back().first; - int64_t currentOffset = worklist.back().second; - worklist.pop_back(); - - if (auto* AI = dyn_cast(Cur)) + for (const auto& issue : bufferIssues) { - Type* allocaTy = AI->getAllocatedType(); - if (allocaTy->isPointerTy()) + unsigned line = 0; + unsigned column = 0; + unsigned startLine = 0; + unsigned startColumn = 0; + unsigned endLine = 0; + unsigned endColumn = 0; + bool haveLoc = false; + + if (issue.inst) { - // Pointer slot: follow what gets stored there. - for (const User* Usr : AI->users()) + llvm::DebugLoc DL = issue.inst->getDebugLoc(); + if (DL) { - if (auto* SI = dyn_cast(Usr)) + line = DL.getLine(); + startLine = DL.getLine(); + + startColumn = DL.getCol(); + column = DL.getCol(); + + // By default, same as start + endLine = DL.getLine(); + endColumn = DL.getCol(); + haveLoc = true; + if (auto* loc = DL.get()) { - if (SI->getPointerOperand() != AI) - continue; - const Value* StoredVal = SI->getValueOperand(); - if (recordVisitedOffset(visited, StoredVal, currentOffset)) + if (auto* scope = llvm::dyn_cast(loc)) { - worklist.push_back({StoredVal, currentOffset}); + if (scope->getColumn() != 0) + { + endColumn = scope->getColumn() + 1; + } } } } - continue; } - out.push_back({AI, currentOffset}); - continue; - } + bool isUnreachable = false; + { + using namespace llvm; - if (auto* BC = dyn_cast(Cur)) - { - const Value* Src = BC->getOperand(0); - if (recordVisitedOffset(visited, Src, currentOffset)) - worklist.push_back({Src, currentOffset}); - continue; - } + if (issue.inst) + { + auto* BB = issue.inst->getParent(); - if (auto* ASC = dyn_cast(Cur)) - { - const Value* Src = ASC->getOperand(0); - if (recordVisitedOffset(visited, Src, currentOffset)) - worklist.push_back({Src, currentOffset}); - continue; - } + // Parcourt les prédécesseurs du bloc pour voir si certains + // ont une branche conditionnelle avec une condition constante. + for (auto* Pred : predecessors(BB)) + { + auto* BI = dyn_cast(Pred->getTerminator()); + if (!BI || !BI->isConditional()) + continue; - int64_t gepOffset = 0; - const Value* gepBase = nullptr; - if (getGEPConstantOffsetAndBase(Cur, DL, gepOffset, gepBase)) - { - int64_t newOffset = currentOffset + gepOffset; - if (recordVisitedOffset(visited, gepBase, newOffset)) - worklist.push_back({gepBase, newOffset}); - continue; - } + auto* CI = dyn_cast(BI->getCondition()); + if (!CI) + continue; - if (auto* LI = dyn_cast(Cur)) - { - const Value* PtrOp = LI->getPointerOperand(); - if (recordVisitedOffset(visited, PtrOp, currentOffset)) - worklist.push_back({PtrOp, currentOffset}); - continue; - } + const llvm::Function& Func = *issue.inst->getFunction(); - if (auto* PN = dyn_cast(Cur)) - { - for (const Value* In : PN->incoming_values()) - { - if (recordVisitedOffset(visited, In, currentOffset)) - worklist.push_back({In, currentOffset}); - } - continue; - } + auto* C0 = analysis::tryGetConstFromValue(CI->getOperand(0), Func); + auto* C1 = analysis::tryGetConstFromValue(CI->getOperand(1), Func); + if (!C0 || !C1) + continue; - if (auto* Sel = dyn_cast(Cur)) - { - const Value* T = Sel->getTrueValue(); - const Value* F = Sel->getFalseValue(); - if (recordVisitedOffset(visited, T, currentOffset)) - worklist.push_back({T, currentOffset}); - if (recordVisitedOffset(visited, F, currentOffset)) - worklist.push_back({F, currentOffset}); - continue; - } - - if (auto* CE = dyn_cast(Cur)) - { - if (CE->getOpcode() == Instruction::BitCast || - CE->getOpcode() == Instruction::AddrSpaceCast) - { - const Value* Src = CE->getOperand(0); - if (recordVisitedOffset(visited, Src, currentOffset)) - worklist.push_back({Src, currentOffset}); - } - } - } - } - - static bool isPointerDereferencedOrUsed(const llvm::Value* V) - { - using namespace llvm; - - SmallVector worklist; - SmallPtrSet visited; - worklist.push_back(V); - - while (!worklist.empty()) - { - const Value* Cur = worklist.back(); - worklist.pop_back(); - if (!visited.insert(Cur).second) - continue; - - for (const Use& U : Cur->uses()) - { - const User* Usr = U.getUser(); + // Évalue le résultat de l'ICmp pour ces constantes (implémentation maison). + bool condTrue = false; + auto pred = CI->getPredicate(); + const auto& v0 = C0->getValue(); + const auto& v1 = C1->getValue(); - if (auto* LI = dyn_cast(Usr)) - { - if (LI->getPointerOperand() == Cur) - return true; - continue; - } - if (auto* SI = dyn_cast(Usr)) - { - if (SI->getPointerOperand() == Cur) - return true; - if (SI->getValueOperand() == Cur) - { - const Value* dst = SI->getPointerOperand()->stripPointerCasts(); - if (auto* AI = dyn_cast(dst)) - { - Type* allocTy = AI->getAllocatedType(); - if (allocTy && allocTy->isPointerTy()) + switch (pred) { - for (const User* AUser : AI->users()) - { - if (auto* LI = dyn_cast(AUser)) - { - if (LI->getPointerOperand()->stripPointerCasts() == AI) - { - worklist.push_back(LI); - } - } - } - } - } - } - continue; - } - if (auto* RMW = dyn_cast(Usr)) - { - if (RMW->getPointerOperand() == Cur) - return true; - continue; - } - if (auto* CX = dyn_cast(Usr)) - { - if (CX->getPointerOperand() == Cur) - return true; - continue; - } - if (auto* MI = dyn_cast(Usr)) - { - if (MI->getRawDest() == Cur) - return true; - if (auto* MTI = dyn_cast(MI)) - { - if (MTI->getRawSource() == Cur) - return true; - } - continue; - } - - if (auto* BC = dyn_cast(Usr)) - { - worklist.push_back(BC); - continue; - } - if (auto* ASC = dyn_cast(Usr)) - { - worklist.push_back(ASC); - continue; - } - if (auto* GEP = dyn_cast(Usr)) - { - worklist.push_back(GEP); - continue; - } - if (auto* PN = dyn_cast(Usr)) - { - worklist.push_back(PN); - continue; - } - if (auto* Sel = dyn_cast(Usr)) - { - worklist.push_back(Sel); - continue; - } - if (auto* CE = dyn_cast(Usr)) - { - worklist.push_back(CE); - continue; - } - } - } - - return false; - } - - static const llvm::Value* getPtrToIntOperand(const llvm::Value* V) - { - using namespace llvm; - - if (auto* PTI = dyn_cast(V)) - return PTI->getOperand(0); - if (auto* CE = dyn_cast(V)) - { - if (CE->getOpcode() == Instruction::PtrToInt) - return CE->getOperand(0); - } - return nullptr; - } - - static bool matchPtrToIntAddSub(const llvm::Value* V, const llvm::Value*& outPtrOperand, - int64_t& outOffset) - { - using namespace llvm; - - const Value* lhs = nullptr; - const Value* rhs = nullptr; - unsigned opcode = 0; - - if (auto* BO = dyn_cast(V)) - { - opcode = BO->getOpcode(); - lhs = BO->getOperand(0); - rhs = BO->getOperand(1); - } - else if (auto* CE = dyn_cast(V)) - { - opcode = CE->getOpcode(); - lhs = CE->getOperand(0); - rhs = CE->getOperand(1); - } - else - { - return false; - } - - if (opcode != Instruction::Add && opcode != Instruction::Sub) - return false; - - const Value* lhsPtr = getPtrToIntOperand(lhs); - const Value* rhsPtr = getPtrToIntOperand(rhs); - - const ConstantInt* lhsC = dyn_cast(lhs); - const ConstantInt* rhsC = dyn_cast(rhs); - - if (lhsPtr && rhsC) - { - outPtrOperand = lhsPtr; - outOffset = rhsC->getSExtValue(); - if (opcode == Instruction::Sub) - outOffset = -outOffset; - return true; - } - - if (rhsPtr && lhsC) - { - if (opcode == Instruction::Sub) - { - // Pattern is C - ptrtoint(P): not a base reconstruction. - return false; - } - outPtrOperand = rhsPtr; - outOffset = lhsC->getSExtValue(); - return true; - } - - return false; - } - - // Détecte les patterns de type: - // inttoptr(ptrtoint(P) +/- C) - // ou (char*)P +/- C - // où P est un membre d'une structure sur la stack et C est un offset constant, - // et le résultat est utilisé comme pointeur vers la structure de base. - // - // Ce pattern est typiquement utilisé dans container_of() / offsetof() mais peut - // être incorrect si l'offset ne correspond pas au membre réel. - static void - analyzeInvalidBaseReconstructionsInFunction(llvm::Function& F, const llvm::DataLayout& DL, - std::vector& out) - { - using namespace llvm; - - if (F.isDeclaration()) - return; - - // Recherche des allocas (objets stack) - std::map> allocaInfo; - - for (BasicBlock& BB : F) - { - for (Instruction& I : BB) - { - auto* AI = dyn_cast(&I); - if (!AI) - continue; - - // Calcul de la taille de l'objet alloué - std::optional sizeOpt = getAllocaTotalSizeBytes(AI, DL); - if (!sizeOpt.has_value()) - continue; // Taille dynamique, on ne peut pas analyser - - std::string varName = - AI->hasName() ? AI->getName().str() : std::string(""); - allocaInfo[AI] = {varName, sizeOpt.value()}; - } - } - - // Maintenant, recherchons les patterns de reconstruction de pointeur de base - for (BasicBlock& BB : F) - { - for (Instruction& I : BB) - { - - // Pattern 1: inttoptr(ptrtoint(P) +/- C) - if (auto* ITP = dyn_cast(&I)) - { - if (!isPointerDereferencedOrUsed(ITP)) - continue; - - Value* IntVal = ITP->getOperand(0); - - SmallVector matches; - collectPtrToIntMatches(IntVal, matches); - if (matches.empty()) - continue; - - struct AggEntry - { - std::set memberOffsets; - bool anyOutOfBounds = false; - bool anyNonZeroResult = false; - std::string varName; - uint64_t allocaSize = 0; - std::string targetType; - }; - - std::map, AggEntry> agg; - - for (const auto& match : matches) - { - if (!match.sawOffset) - continue; - - SmallVector origins; - collectPointerOrigins(match.ptrOperand, DL, origins); - if (origins.empty()) - continue; - - for (const auto& origin : origins) - { - auto it = allocaInfo.find(origin.alloca); - if (it == allocaInfo.end()) + case ICmpInst::ICMP_EQ: + condTrue = (v0 == v1); + break; + case ICmpInst::ICMP_NE: + condTrue = (v0 != v1); + break; + case ICmpInst::ICMP_SLT: + condTrue = v0.slt(v1); + break; + case ICmpInst::ICMP_SLE: + condTrue = v0.sle(v1); + break; + case ICmpInst::ICMP_SGT: + condTrue = v0.sgt(v1); + break; + case ICmpInst::ICMP_SGE: + condTrue = v0.sge(v1); + break; + case ICmpInst::ICMP_ULT: + condTrue = v0.ult(v1); + break; + case ICmpInst::ICMP_ULE: + condTrue = v0.ule(v1); + break; + case ICmpInst::ICMP_UGT: + condTrue = v0.ugt(v1); + break; + case ICmpInst::ICMP_UGE: + condTrue = v0.uge(v1); + break; + default: + // On ne traite pas d'autres prédicats exotiques ici continue; - - const std::string& varName = it->second.first; - uint64_t allocaSize = it->second.second; - - int64_t resultOffset = origin.offset + match.offset; - bool isOutOfBounds = - (resultOffset < 0) || - (static_cast(resultOffset) >= allocaSize); - - std::string targetType; - Type* targetTy = ITP->getType(); - if (auto* PtrTy = dyn_cast(targetTy)) - { - raw_string_ostream rso(targetType); - PtrTy->print(rso); } - auto key = std::make_pair(origin.alloca, match.offset); - auto& entry = agg[key]; - entry.memberOffsets.insert(origin.offset); - entry.anyOutOfBounds |= isOutOfBounds; - if (resultOffset != 0) - entry.anyNonZeroResult = true; - entry.varName = varName; - entry.allocaSize = allocaSize; - entry.targetType = targetType.empty() ? "" : targetType; - } - } - - for (auto& kv : agg) - { - const auto& entry = kv.second; - if (entry.memberOffsets.empty()) - continue; - if (!entry.anyOutOfBounds && !entry.anyNonZeroResult) - continue; - - std::ostringstream memberStr; - if (entry.memberOffsets.size() == 1) - { - int64_t mo = *entry.memberOffsets.begin(); - memberStr << (mo != 0 ? "offset +" + std::to_string(mo) : "base"); - } - else - { - memberStr << "offsets "; - bool first = true; - for (int64_t mo : entry.memberOffsets) + // Branchement du type: + // br i1 %cond, label %then, label %else + // Successeur 0 pris si condTrue == true + // Successeur 1 pris si condTrue == false + if (BB == BI->getSuccessor(0) && condTrue == false) { - if (!first) - memberStr << ", "; - memberStr << (mo != 0 ? "+" + std::to_string(mo) : "base"); - first = false; + // Le bloc "then" n'est jamais atteint. + isUnreachable = true; } - } - - InvalidBaseReconstructionIssue issue; - issue.funcName = F.getName().str(); - issue.varName = entry.varName; - issue.sourceMember = memberStr.str(); - issue.offsetUsed = kv.first.second; - issue.targetType = entry.targetType; - issue.isOutOfBounds = entry.anyOutOfBounds; - issue.inst = &I; - - out.push_back(std::move(issue)); - } - } - - // Pattern 2: GEP avec offset constant sur un membre - // (équivalent à (char*)ptr + offset) - if (auto* GEP = dyn_cast(&I)) - { - if (!isPointerDereferencedOrUsed(GEP)) - continue; - - int64_t gepOffset = 0; - const Value* PtrOp = nullptr; - if (!getGEPConstantOffsetAndBase(GEP, DL, gepOffset, PtrOp)) - continue; - - SmallVector origins; - collectPointerOrigins(PtrOp, DL, origins); - if (origins.empty()) - continue; - - struct AggEntry - { - std::set memberOffsets; - bool anyOutOfBounds = false; - bool anyNonZeroResult = false; - std::string varName; - std::string targetType; - }; - - std::map agg; - - for (const auto& origin : origins) - { - if (origin.offset == 0 && gepOffset >= 0) - { - // Likely a normal member access from the base object. - continue; - } - - auto it = allocaInfo.find(origin.alloca); - if (it == allocaInfo.end()) - continue; - - const std::string& varName = it->second.first; - uint64_t allocaSize = it->second.second; - - int64_t resultOffset = origin.offset + gepOffset; - bool isOutOfBounds = (resultOffset < 0) || - (static_cast(resultOffset) >= allocaSize); - - std::string targetType; - Type* targetTy = GEP->getType(); - raw_string_ostream rso(targetType); - targetTy->print(rso); - - auto& entry = agg[origin.alloca]; - entry.memberOffsets.insert(origin.offset); - entry.anyOutOfBounds |= isOutOfBounds; - if (resultOffset != 0) - entry.anyNonZeroResult = true; - entry.varName = varName; - entry.targetType = targetType; - } - - for (auto& kv : agg) - { - const auto& entry = kv.second; - if (entry.memberOffsets.empty()) - continue; - if (!entry.anyOutOfBounds && !entry.anyNonZeroResult) - continue; - - std::ostringstream memberStr; - if (entry.memberOffsets.size() == 1) - { - int64_t mo = *entry.memberOffsets.begin(); - memberStr << (mo != 0 ? "offset +" + std::to_string(mo) : "base"); - } - else - { - memberStr << "offsets "; - bool first = true; - for (int64_t mo : entry.memberOffsets) + else if (BB == BI->getSuccessor(1) && condTrue == true) { - if (!first) - memberStr << ", "; - memberStr << (mo != 0 ? "+" + std::to_string(mo) : "base"); - first = false; - } - } - - InvalidBaseReconstructionIssue issue; - issue.funcName = F.getName().str(); - issue.varName = entry.varName; - issue.sourceMember = memberStr.str(); - issue.offsetUsed = gepOffset; - issue.targetType = entry.targetType; - issue.isOutOfBounds = entry.anyOutOfBounds; - issue.inst = &I; - - out.push_back(std::move(issue)); - } - } - } - } - } - - // -------------------------------------------------------------------------- - // Helpers pour analyser les allocas et les bornes d'index - // -------------------------------------------------------------------------- - - // Taille (en nombre d'éléments) pour une alloca de tableau sur la stack - static std::optional getAllocaElementCount(llvm::AllocaInst* AI) - { - using namespace llvm; - - Type* elemTy = AI->getAllocatedType(); - StackSize count = 1; - - // Cas "char test[10];" => alloca [10 x i8] - if (auto* arrTy = dyn_cast(elemTy)) - { - count *= arrTy->getNumElements(); - elemTy = arrTy->getElementType(); - } - - // Cas "alloca i8, i64 10" => alloca tableau avec taille constante - if (AI->isArrayAllocation()) - { - if (auto* C = dyn_cast(AI->getArraySize())) - { - count *= C->getZExtValue(); - } - else - { - // taille non constante - analyse plus compliquée, on ignore pour l'instant - return std::nullopt; - } - } - - return count; - } - - // Taille totale en octets pour une alloca sur la stack. - // Retourne std::nullopt si la taille dépend d'une valeur non constante (VLA). - static std::optional getAllocaTotalSizeBytes(const llvm::AllocaInst* AI, - const llvm::DataLayout& DL) - { - using namespace llvm; - - Type* allocatedTy = AI->getAllocatedType(); - - // Cas alloca [N x T] (taille connue dans le type) - if (!AI->isArrayAllocation()) - { - return DL.getTypeAllocSize(allocatedTy); - } - - // Cas alloca T, i64 (taille passée séparément) - if (auto* C = dyn_cast(AI->getArraySize())) - { - uint64_t count = C->getZExtValue(); - uint64_t elemSize = DL.getTypeAllocSize(allocatedTy); - return count * elemSize; - } - - // Taille dynamique - traitée par l'analyse DynamicAllocaIssue - return std::nullopt; - } - - // Analyse des comparaisons ICmp pour déduire les intervalles d'entiers (bornes inf/sup) - static std::map computeIntRangesFromICmps(llvm::Function& F) - { - using namespace llvm; - - std::map ranges; - - auto applyConstraint = - [&ranges](const Value* V, bool hasLB, long long newLB, bool hasUB, long long newUB) - { - auto& R = ranges[V]; - if (hasLB) - { - if (!R.hasLower || newLB > R.lower) - { - R.hasLower = true; - R.lower = newLB; - } - } - if (hasUB) - { - if (!R.hasUpper || newUB < R.upper) - { - R.hasUpper = true; - R.upper = newUB; - } - } - }; - - for (BasicBlock& BB : F) - { - for (Instruction& I : BB) - { - auto* icmp = dyn_cast(&I); - if (!icmp) - continue; - - Value* op0 = icmp->getOperand(0); - Value* op1 = icmp->getOperand(1); - - ConstantInt* C = nullptr; - Value* V = nullptr; - - // On cherche un pattern "V ? C" ou "C ? V" - if ((C = dyn_cast(op1)) && !isa(op0)) - { - V = op0; - } - else if ((C = dyn_cast(op0)) && !isa(op1)) - { - V = op1; - } - else - { - continue; - } - - auto pred = icmp->getPredicate(); - - bool hasLB = false, hasUB = false; - long long lb = 0, ub = 0; - - auto updateForSigned = [&](bool valueIsOp0) - { - long long c = C->getSExtValue(); - if (valueIsOp0) - { - switch (pred) - { - case ICmpInst::ICMP_SLT: // V < C => V <= C-1 - hasUB = true; - ub = c - 1; - break; - case ICmpInst::ICMP_SLE: // V <= C => V <= C - hasUB = true; - ub = c; - break; - case ICmpInst::ICMP_SGT: // V > C => V >= C+1 - hasLB = true; - lb = c + 1; - break; - case ICmpInst::ICMP_SGE: // V >= C => V >= C - hasLB = true; - lb = c; - break; - case ICmpInst::ICMP_EQ: // V == C => [C, C] - hasLB = true; - lb = c; - hasUB = true; - ub = c; - break; - case ICmpInst::ICMP_NE: - // approximation : V != C => V <= C (très conservateur) - hasUB = true; - ub = c; - break; - default: - break; - } - } - else - { - // C ? V <=> V ? C (inversé) - switch (pred) - { - case ICmpInst::ICMP_SGT: // C > V => V < C => V <= C-1 - hasUB = true; - ub = c - 1; - break; - case ICmpInst::ICMP_SGE: // C >= V => V <= C - hasUB = true; - ub = c; - break; - case ICmpInst::ICMP_SLT: // C < V => V > C => V >= C+1 - hasLB = true; - lb = c + 1; - break; - case ICmpInst::ICMP_SLE: // C <= V => V >= C - hasLB = true; - lb = c; - break; - case ICmpInst::ICMP_EQ: // C == V => [C, C] - hasLB = true; - lb = c; - hasUB = true; - ub = c; - break; - case ICmpInst::ICMP_NE: - hasUB = true; - ub = c; - break; - default: - break; - } - } - }; - - auto updateForUnsigned = [&](bool valueIsOp0) - { - unsigned long long cu = C->getZExtValue(); - long long c = static_cast(cu); - if (valueIsOp0) - { - switch (pred) - { - case ICmpInst::ICMP_ULT: // V < C => V <= C-1 - hasUB = true; - ub = c - 1; - break; - case ICmpInst::ICMP_ULE: // V <= C - hasUB = true; - ub = c; - break; - case ICmpInst::ICMP_UGT: // V > C => V >= C+1 - hasLB = true; - lb = c + 1; - break; - case ICmpInst::ICMP_UGE: // V >= C - hasLB = true; - lb = c; - break; - case ICmpInst::ICMP_EQ: - hasLB = true; - lb = c; - hasUB = true; - ub = c; - break; - case ICmpInst::ICMP_NE: - hasUB = true; - ub = c; - break; - default: - break; - } - } - else - { - switch (pred) - { - case ICmpInst::ICMP_UGT: // C > V => V < C - hasUB = true; - ub = c - 1; - break; - case ICmpInst::ICMP_UGE: // C >= V => V <= C - hasUB = true; - ub = c; - break; - case ICmpInst::ICMP_ULT: // C < V => V > C - hasLB = true; - lb = c + 1; - break; - case ICmpInst::ICMP_ULE: // C <= V => V >= C - hasLB = true; - lb = c; - break; - case ICmpInst::ICMP_EQ: - hasLB = true; - lb = c; - hasUB = true; - ub = c; - break; - case ICmpInst::ICMP_NE: - hasUB = true; - ub = c; - break; - default: - break; - } - } - }; - - bool valueIsOp0 = (V == op0); - - // On choisit le groupe de prédicats - if (pred == ICmpInst::ICMP_SLT || pred == ICmpInst::ICMP_SLE || - pred == ICmpInst::ICMP_SGT || pred == ICmpInst::ICMP_SGE || - pred == ICmpInst::ICMP_EQ || pred == ICmpInst::ICMP_NE) - { - updateForSigned(valueIsOp0); - } - else if (pred == ICmpInst::ICMP_ULT || pred == ICmpInst::ICMP_ULE || - pred == ICmpInst::ICMP_UGT || pred == ICmpInst::ICMP_UGE) - { - updateForUnsigned(valueIsOp0); - } - - if (!(hasLB || hasUB)) - continue; - - // Applique la contrainte sur V lui-même - applyConstraint(V, hasLB, lb, hasUB, ub); - - // Et éventuellement sur le pointeur sous-jacent si V est un load - if (auto* LI = dyn_cast(V)) - { - const Value* ptr = LI->getPointerOperand(); - applyConstraint(ptr, hasLB, lb, hasUB, ub); - } - } - } - - return ranges; - } - - // Heuristic: determine if a Value is user-controlled - // (function argument, load from a non-local pointer, call result, etc.). - static bool isValueUserControlledImpl(const llvm::Value* V, const llvm::Function& F, - llvm::SmallPtrSet& visited, - int depth = 0) - { - using namespace llvm; - - if (!V || depth > 20) - return false; - if (visited.contains(V)) - return false; - visited.insert(V); - - if (isa(V)) - return true; // function argument -> considered user-provided - - if (isa(V)) - return false; - - if (auto* LI = dyn_cast(V)) - { - const Value* ptr = LI->getPointerOperand()->stripPointerCasts(); - if (isa(ptr)) - return true; // load through pointer passed as argument - if (!isa(ptr)) - { - return true; // load from non-local memory (global / heap / unknown) - } - // If it's a local alloca, inspect what gets stored there. - const AllocaInst* AI = cast(ptr); - for (const Use& U : AI->uses()) - { - if (auto* SI = dyn_cast(U.getUser())) - { - if (SI->getPointerOperand()->stripPointerCasts() != ptr) - continue; - if (isValueUserControlledImpl(SI->getValueOperand(), F, visited, depth + 1)) - return true; - } - } - } - - if (auto* CB = dyn_cast(V)) - { - // Value produced by a call: conservatively treat as external/user input. - (void)F; - (void)CB; - return true; - } - - if (auto* I = dyn_cast(V)) - { - for (const Value* Op : I->operands()) - { - if (isValueUserControlledImpl(Op, F, visited, depth + 1)) - return true; - } - } - else if (auto* CE = dyn_cast(V)) - { - for (const Value* Op : CE->operands()) - { - if (isValueUserControlledImpl(Op, F, visited, depth + 1)) - return true; - } - } - - return false; - } - - static bool isValueUserControlled(const llvm::Value* V, const llvm::Function& F) - { - llvm::SmallPtrSet visited; - return isValueUserControlledImpl(V, F, visited, 0); - } - - // Try to recover a human-friendly name for an alloca even when the instruction - // itself is unnamed (typical IR for "char *buf = alloca(n);"). - static std::string deriveAllocaName(const llvm::AllocaInst* AI) - { - using namespace llvm; - - if (!AI) - return std::string(""); - if (AI->hasName()) - return AI->getName().str(); - - SmallPtrSet visited; - SmallVector worklist; - worklist.push_back(AI); - - while (!worklist.empty()) - { - const Value* V = worklist.back(); - worklist.pop_back(); - if (!visited.insert(V).second) - continue; - - for (const Use& U : V->uses()) - { - const User* Usr = U.getUser(); - - if (auto* DVI = dyn_cast(Usr)) - { - if (auto* var = DVI->getVariable()) - { - if (!var->getName().empty()) - return var->getName().str(); - } - continue; - } - - if (auto* SI = dyn_cast(Usr)) - { - if (SI->getValueOperand() != V) - continue; - const Value* dst = SI->getPointerOperand()->stripPointerCasts(); - if (auto* dstAI = dyn_cast(dst)) - { - if (dstAI->hasName()) - return dstAI->getName().str(); - } - worklist.push_back(dst); - continue; - } - - if (auto* BC = dyn_cast(Usr)) - { - worklist.push_back(BC); - continue; - } - if (auto* GEP = dyn_cast(Usr)) - { - worklist.push_back(GEP); - continue; - } - if (auto* PN = dyn_cast(Usr)) - { - if (PN->getType()->isPointerTy()) - worklist.push_back(PN); - continue; - } - if (auto* Sel = dyn_cast(Usr)) - { - if (Sel->getType()->isPointerTy()) - worklist.push_back(Sel); - continue; - } - } - } - - return std::string(""); - } - - static std::string formatFunctionNameForMessage(const std::string& name) - { - if (ctrace_tools::isMangled(name)) - return ctrace_tools::demangle(name.c_str()); - return name; - } - - struct TypeQualifiers - { - bool isConst = false; - bool isVolatile = false; - bool isRestrict = false; - }; - - struct StrippedDIType - { - const llvm::DIType* type = nullptr; - TypeQualifiers quals; - }; - - struct ParamDebugInfo - { - std::string name; - const llvm::DIType* type = nullptr; - unsigned line = 0; - unsigned column = 0; - }; - - struct ParamTypeInfo - { - const llvm::DIType* originalType = nullptr; - const llvm::DIType* pointeeType = nullptr; // unqualified, typedefs stripped - const llvm::DIType* pointeeDisplayType = nullptr; // unqualified, typedefs preserved - bool isPointer = false; - bool isReference = false; - bool isRvalueReference = false; - bool pointerConst = false; - bool pointerVolatile = false; - bool pointerRestrict = false; - bool pointeeConst = false; - bool pointeeVolatile = false; - bool pointeeRestrict = false; - bool isDoublePointer = false; - bool isVoid = false; - bool isFunctionPointer = false; - }; - - static const llvm::DIType* stripTypedefs(const llvm::DIType* type) - { - using namespace llvm; - const DIType* cur = type; - while (cur) - { - auto* DT = dyn_cast(cur); - if (!DT) - break; - auto tag = DT->getTag(); - if (tag == dwarf::DW_TAG_typedef) - { - cur = DT->getBaseType(); - continue; - } - break; - } - return cur; - } - - static StrippedDIType stripQualifiers(const llvm::DIType* type) - { - using namespace llvm; - StrippedDIType out; - out.type = type; - - while (out.type) - { - auto* DT = dyn_cast(out.type); - if (!DT) - break; - auto tag = DT->getTag(); - if (tag == dwarf::DW_TAG_const_type) - { - out.quals.isConst = true; - out.type = DT->getBaseType(); - continue; - } - if (tag == dwarf::DW_TAG_volatile_type) - { - out.quals.isVolatile = true; - out.type = DT->getBaseType(); - continue; - } - if (tag == dwarf::DW_TAG_restrict_type) - { - out.quals.isRestrict = true; - out.type = DT->getBaseType(); - continue; - } - break; - } - - return out; - } - - static std::string formatDITypeName(const llvm::DIType* type) - { - using namespace llvm; - if (!type) - return std::string(""); - - if (auto* BT = dyn_cast(type)) - { - if (!BT->getName().empty()) - return BT->getName().str(); - } - - if (auto* CT = dyn_cast(type)) - { - if (!CT->getName().empty()) - return CT->getName().str(); - if (!CT->getIdentifier().empty()) - return CT->getIdentifier().str(); - } - - if (auto* DT = dyn_cast(type)) - { - auto tag = DT->getTag(); - if (tag == dwarf::DW_TAG_typedef && !DT->getName().empty()) - { - return DT->getName().str(); - } - if ((tag == dwarf::DW_TAG_const_type) || (tag == dwarf::DW_TAG_volatile_type) || - (tag == dwarf::DW_TAG_restrict_type)) - { - return formatDITypeName(DT->getBaseType()); - } - if (!DT->getName().empty()) - return DT->getName().str(); - } - - if (auto* ST = dyn_cast(type)) - { - (void)ST; - return std::string(""); - } - - return std::string(""); - } - - static bool buildParamTypeInfo(const llvm::DIType* type, ParamTypeInfo& info) - { - using namespace llvm; - if (!type) - return false; - - info.originalType = type; - - StrippedDIType top = stripQualifiers(type); - info.pointerConst = top.quals.isConst; - info.pointerVolatile = top.quals.isVolatile; - info.pointerRestrict = top.quals.isRestrict; - - const DIType* topType = stripTypedefs(top.type); - auto* derived = dyn_cast(topType); - if (!derived) - return false; - - auto tag = derived->getTag(); - if (tag == dwarf::DW_TAG_pointer_type) - { - info.isPointer = true; - } - else if (tag == dwarf::DW_TAG_reference_type) - { - info.isReference = true; - } - else if (tag == dwarf::DW_TAG_rvalue_reference_type) - { - info.isReference = true; - info.isRvalueReference = true; - } - else - { - return false; - } - - const DIType* baseType = derived->getBaseType(); - StrippedDIType base = stripQualifiers(baseType); - info.pointeeConst = base.quals.isConst; - info.pointeeVolatile = base.quals.isVolatile; - info.pointeeRestrict = base.quals.isRestrict; - info.pointeeDisplayType = base.type ? base.type : baseType; - - const DIType* baseNoTypedef = stripTypedefs(base.type); - info.pointeeType = baseNoTypedef; - - if (!baseNoTypedef) - return true; - - if (auto* baseDerived = dyn_cast(baseNoTypedef)) - { - auto baseTag = baseDerived->getTag(); - if (baseTag == dwarf::DW_TAG_pointer_type || baseTag == dwarf::DW_TAG_reference_type || - baseTag == dwarf::DW_TAG_rvalue_reference_type) - { - info.isDoublePointer = true; - } - } - - if (isa(baseNoTypedef)) - info.isFunctionPointer = true; - - if (auto* basic = dyn_cast(baseNoTypedef)) - { - if (basic->getName() == "void") - info.isVoid = true; - } - - return true; - } - - static std::string buildTypeString(const ParamTypeInfo& info, const std::string& baseName, - bool addPointeeConst, bool includePointerConst, - const std::string& paramName) - { - std::string out; - if (info.pointeeConst || addPointeeConst) - out += "const "; - if (info.pointeeVolatile) - out += "volatile "; - out += baseName.empty() ? std::string("") : baseName; - - if (info.isReference) - { - out += info.isRvalueReference ? " &&" : " &"; - if (!paramName.empty()) - { - out += paramName; - } - return out; - } - - if (info.isPointer) - { - out += " *"; - if (includePointerConst && info.pointerConst) - out += " const"; - if (info.pointerVolatile) - out += " volatile"; - if (info.pointerRestrict) - out += " restrict"; - } - - if (!paramName.empty()) - { - if (!out.empty() && (out.back() == '*' || out.back() == '&')) - out += paramName; - else - out += " " + paramName; - } - - return out; - } - - static std::string buildPointeeQualPrefix(const ParamTypeInfo& info, bool addConst) - { - std::string out; - if (addConst) - out += "const "; - if (info.pointeeVolatile) - out += "volatile "; - if (info.pointeeRestrict) - out += "restrict "; - return out; - } - - static ParamDebugInfo getParamDebugInfo(const llvm::Function& F, const llvm::Argument& Arg) - { - using namespace llvm; - ParamDebugInfo info; - info.name = Arg.getName().str(); - - if (auto* SP = F.getSubprogram()) - { - for (DINode* node : SP->getRetainedNodes()) - { - auto* var = dyn_cast(node); - if (!var || !var->isParameter()) - continue; - if (var->getArg() != Arg.getArgNo() + 1) - continue; - if (!var->getName().empty()) - info.name = var->getName().str(); - info.type = var->getType(); - if (var->getLine() != 0) - info.line = var->getLine(); - break; - } - - if (!info.type) - { - if (auto* subTy = SP->getType()) - { - auto types = subTy->getTypeArray(); - if (types.size() > Arg.getArgNo() + 1) - info.type = types[Arg.getArgNo() + 1]; - } - } - - if (info.line == 0) - info.line = SP->getLine(); - } - - return info; - } - - static bool calleeParamIsReadOnly(const llvm::Function* callee, unsigned argIndex) - { - if (!callee || argIndex >= callee->arg_size()) - return false; - - const llvm::Argument& param = *callee->getArg(argIndex); - ParamDebugInfo dbg = getParamDebugInfo(*callee, param); - if (!dbg.type) - return false; - - ParamTypeInfo typeInfo; - if (!buildParamTypeInfo(dbg.type, typeInfo)) - return false; - - if (typeInfo.isDoublePointer || typeInfo.isVoid || typeInfo.isFunctionPointer) - return false; - - if (!typeInfo.isPointer && !typeInfo.isReference) - return false; - - return typeInfo.pointeeConst; - } - - static bool callArgMayWriteThrough(const llvm::CallBase& CB, unsigned argIndex) - { - using namespace llvm; - - const Function* callee = CB.getCalledFunction(); - if (!callee) - { - const Value* called = CB.getCalledOperand(); - if (called) - called = called->stripPointerCasts(); - callee = dyn_cast(called); - } - - if (!callee) - return true; - - if (auto* MI = dyn_cast(&CB)) - { - if (isa(MI)) - return argIndex == 0; - if (isa(MI)) - return argIndex == 0; - } - - if (callee->isIntrinsic()) - { - switch (callee->getIntrinsicID()) - { - case Intrinsic::dbg_declare: - case Intrinsic::dbg_value: - case Intrinsic::dbg_label: - case Intrinsic::lifetime_start: - case Intrinsic::lifetime_end: - case Intrinsic::invariant_start: - case Intrinsic::invariant_end: - case Intrinsic::assume: - return false; - default: - break; - } - } - - if (callee->doesNotAccessMemory()) - return false; - if (callee->onlyReadsMemory()) - return false; - - if (argIndex >= callee->arg_size()) - return true; // varargs or unknown - - const AttributeList& attrs = callee->getAttributes(); - if (attrs.hasParamAttr(argIndex, Attribute::ReadOnly) || - attrs.hasParamAttr(argIndex, Attribute::ReadNone)) - { - return false; - } - if (attrs.hasParamAttr(argIndex, Attribute::WriteOnly)) - return true; - - if (calleeParamIsReadOnly(callee, argIndex)) - return false; - - return true; - } - - static bool valueMayBeWrittenThrough(const llvm::Value* root, const llvm::Function& F) - { - using namespace llvm; - (void)F; - - SmallPtrSet visited; - SmallVector worklist; - worklist.push_back(root); - - while (!worklist.empty()) - { - const Value* V = worklist.pop_back_val(); - if (!visited.insert(V).second) - continue; - - for (const Use& U : V->uses()) - { - const User* Usr = U.getUser(); - - if (auto* SI = dyn_cast(Usr)) - { - if (SI->getPointerOperand() == V) - return true; - if (SI->getValueOperand() == V) - { - const Value* dst = SI->getPointerOperand()->stripPointerCasts(); - if (auto* AI = dyn_cast(dst)) - { - for (const Use& AU : AI->uses()) - { - if (auto* LI = dyn_cast(AU.getUser())) - { - if (LI->getPointerOperand()->stripPointerCasts() == AI) - worklist.push_back(LI); - } - } - } - else - { - return true; // pointer escapes to non-local memory - } - } - continue; - } - - if (auto* AI = dyn_cast(Usr)) - { - if (AI->getPointerOperand() == V) - return true; - continue; - } - - if (auto* CX = dyn_cast(Usr)) - { - if (CX->getPointerOperand() == V) - return true; - continue; - } - - if (auto* CB = dyn_cast(Usr)) - { - for (unsigned i = 0; i < CB->arg_size(); ++i) - { - if (CB->getArgOperand(i) == V) - { - if (callArgMayWriteThrough(*CB, i)) - return true; - } - } - continue; - } - - if (auto* GEP = dyn_cast(Usr)) - { - worklist.push_back(GEP); - continue; - } - if (auto* BC = dyn_cast(Usr)) - { - worklist.push_back(BC); - continue; - } - if (auto* ASC = dyn_cast(Usr)) - { - worklist.push_back(ASC); - continue; - } - if (auto* PN = dyn_cast(Usr)) - { - if (PN->getType()->isPointerTy()) - worklist.push_back(PN); - continue; - } - if (auto* Sel = dyn_cast(Usr)) - { - if (Sel->getType()->isPointerTy()) - worklist.push_back(Sel); - continue; - } - if (auto* CI = dyn_cast(Usr)) - { - if (CI->getType()->isPointerTy()) - worklist.push_back(CI); - continue; - } - if (isa(Usr)) - return true; // unknown aliasing, be conservative - } - } - - return false; - } - - static void analyzeConstParamsInFunction(llvm::Function& F, std::vector& out) - { - using namespace llvm; - - if (F.isDeclaration()) - return; - - for (Argument& Arg : F.args()) - { - ParamDebugInfo dbg = getParamDebugInfo(F, Arg); - if (!dbg.type) - continue; - - ParamTypeInfo typeInfo; - if (!buildParamTypeInfo(dbg.type, typeInfo)) - continue; - - if (!typeInfo.isPointer && !typeInfo.isReference) - continue; - if (typeInfo.isDoublePointer || typeInfo.isVoid || typeInfo.isFunctionPointer) - continue; - if (typeInfo.pointeeConst) - continue; - - if (valueMayBeWrittenThrough(&Arg, F)) - continue; - - ConstParamIssue issue; - issue.funcName = F.getName().str(); - issue.paramName = dbg.name.empty() ? Arg.getName().str() : dbg.name; - issue.line = dbg.line; - issue.column = dbg.column; - issue.pointerConstOnly = - typeInfo.isPointer && typeInfo.pointerConst && !typeInfo.pointeeConst; - issue.isReference = typeInfo.isReference; - issue.isRvalueRef = typeInfo.isRvalueReference; - - std::string baseName = formatDITypeName(typeInfo.pointeeDisplayType); - issue.currentType = buildTypeString(typeInfo, baseName, false, true, issue.paramName); - if (typeInfo.isRvalueReference) - { - std::string valuePrefix = buildPointeeQualPrefix(typeInfo, false); - std::string constRefPrefix = buildPointeeQualPrefix(typeInfo, true); - issue.suggestedType = valuePrefix + baseName + " " + issue.paramName; - issue.suggestedTypeAlt = constRefPrefix + baseName + " &" + issue.paramName; - } - else - { - issue.suggestedType = - buildTypeString(typeInfo, baseName, true, false, issue.paramName); - } - - out.push_back(std::move(issue)); - } - } - - // Forward declaration : essaie de retrouver une constante derrière une Value - static const llvm::ConstantInt* tryGetConstFromValue(const llvm::Value* V, - const llvm::Function& F); - - // Analyse intra-fonction pour détecter les allocations dynamiques sur la stack - // (par exemple : int n = read(); char buf[n];) - static void analyzeDynamicAllocasInFunction(llvm::Function& F, - std::vector& out) - { - using namespace llvm; - - if (F.isDeclaration()) - return; - - for (BasicBlock& BB : F) - { - for (Instruction& I : BB) - { - auto* AI = dyn_cast(&I); - if (!AI) - continue; - - // Taille d'allocation : on distingue trois cas : - // - constante immédiate -> pas une VLA - // - dérivée d'une constante simple -> pas une VLA (heuristique) - // - vraiment dépendante d'une valeur -> VLA / alloca variable - Value* arraySizeVal = AI->getArraySize(); - - // 1) Cas taille directement constante dans l'IR - if (llvm::isa(arraySizeVal)) - continue; // taille connue à la compilation, OK - - // 2) Heuristique "smart" : essayer de remonter à une constante - // via les stores dans une variable locale (tryGetConstFromValue). - // Exemple typique : - // int n = 6; - // char buf[n]; // en C : VLA, mais ici n est en fait constant - // - // Dans ce cas, on ne veut pas spammer avec un warning VLA : - // on traite ça comme une taille effectivement constante. - if (tryGetConstFromValue(arraySizeVal, F) != nullptr) - continue; - - // 3) Ici, on considère que c'est une vraie VLA / alloca dynamique - DynamicAllocaIssue issue; - issue.funcName = F.getName().str(); - issue.varName = deriveAllocaName(AI); - if (AI->getAllocatedType()) - { - std::string tyStr; - llvm::raw_string_ostream rso(tyStr); - AI->getAllocatedType()->print(rso); - issue.typeName = rso.str(); - } - else - { - issue.typeName = ""; - } - issue.allocaInst = AI; - out.push_back(std::move(issue)); - } - } - } - - // Heuristic: compute an upper bound (if any) from an IntRange - static std::optional - getAllocaUpperBoundBytes(const llvm::AllocaInst* AI, const llvm::DataLayout& DL, - const std::map& ranges) - { - using namespace llvm; - - const Value* sizeVal = AI->getArraySize(); - auto findRange = [&ranges](const Value* V) -> const IntRange* - { - auto it = ranges.find(V); - if (it != ranges.end()) - return &it->second; - return nullptr; - }; - - const IntRange* r = findRange(sizeVal); - if (!r) - { - if (auto* LI = dyn_cast(sizeVal)) - { - const Value* ptr = LI->getPointerOperand(); - r = findRange(ptr); - } - } - - if (r && r->hasUpper && r->upper > 0) - { - StackSize elemSize = DL.getTypeAllocSize(AI->getAllocatedType()); - return static_cast(r->upper) * elemSize; - } - - return std::nullopt; - } - - // Analyze alloca/VLA uses whose size depends on a runtime value. - static void analyzeAllocaUsageInFunction(llvm::Function& F, const llvm::DataLayout& DL, - bool isRecursive, bool isInfiniteRecursive, - std::vector& out) - { - using namespace llvm; - - if (F.isDeclaration()) - return; - - auto ranges = computeIntRangesFromICmps(F); - - for (BasicBlock& BB : F) - { - for (Instruction& I : BB) - { - auto* AI = dyn_cast(&I); - if (!AI) - continue; - - // Only consider dynamic allocas: alloca(T, size) or VLA. - if (!AI->isArrayAllocation()) - continue; - - AllocaUsageIssue issue; - issue.funcName = F.getName().str(); - issue.varName = deriveAllocaName(AI); - issue.allocaInst = AI; - issue.userControlled = isValueUserControlled(AI->getArraySize(), F); - issue.isRecursive = isRecursive; - issue.isInfiniteRecursive = isInfiniteRecursive; - - StackSize elemSize = DL.getTypeAllocSize(AI->getAllocatedType()); - const Value* arraySizeVal = AI->getArraySize(); - - if (auto* C = dyn_cast(arraySizeVal)) - { - issue.sizeIsConst = true; - issue.sizeBytes = C->getZExtValue() * elemSize; - } - else if (auto* C = tryGetConstFromValue(arraySizeVal, F)) - { - issue.sizeIsConst = true; - issue.sizeBytes = C->getZExtValue() * elemSize; - } - else if (auto upper = getAllocaUpperBoundBytes(AI, DL, ranges)) - { - issue.hasUpperBound = true; - issue.upperBoundBytes = *upper; - } - - out.push_back(std::move(issue)); - } - } - } - - // Forward declaration pour la résolution d'alloca de tableau depuis un pointeur - static const llvm::AllocaInst* resolveArrayAllocaFromPointer(const llvm::Value* V, - llvm::Function& F, - std::vector& path); - - // Analyse intra-fonction pour détecter des accès potentiellement hors bornes - // sur des buffers alloués sur la stack (alloca). - static void analyzeStackBufferOverflowsInFunction(llvm::Function& F, - std::vector& out) - { - using namespace llvm; - - auto ranges = computeIntRangesFromICmps(F); - - for (BasicBlock& BB : F) - { - for (Instruction& I : BB) - { - auto* GEP = dyn_cast(&I); - if (!GEP) - continue; - - // 1) Trouver la base du pointeur (test, &test[0], ptr, etc.) - const Value* basePtr = GEP->getPointerOperand(); - std::vector aliasPath; - const AllocaInst* AI = resolveArrayAllocaFromPointer(basePtr, F, aliasPath); - if (!AI) - continue; - - // 2) Déterminer la taille logique du tableau ciblé et récupérer l'index - // On essaie d'abord de la déduire du type traversé par la GEP - // (cas struct S { char buf[10]; }; s.buf[i]) puis on retombe - // sur la taille de l'alloca pour les cas plus simples (char buf[10]). - StackSize arraySize = 0; - Value* idxVal = nullptr; - - Type* srcElemTy = GEP->getSourceElementType(); - - if (auto* arrTy = dyn_cast(srcElemTy)) - { - // Cas direct : alloca [N x T]; GEP indices [0, i] - if (GEP->getNumIndices() < 2) - continue; - auto idxIt = GEP->idx_begin(); - ++idxIt; // saute le premier indice (souvent 0) - idxVal = idxIt->get(); - arraySize = arrTy->getNumElements(); - } - else if (auto* ST = dyn_cast(srcElemTy)) - { - // Cas struct avec champ tableau: - // %ptr = getelementptr inbounds %struct.S, %struct.S* %s, - // i32 0, i32 , i64 %i - // - // On attend donc au moins 3 indices: [0, field, i] - if (GEP->getNumIndices() >= 3) - { - auto idxIt = GEP->idx_begin(); - - // premier indice (souvent 0) - auto* idx0 = dyn_cast(idxIt->get()); - ++idxIt; - // second indice: index de champ dans la struct - auto* fieldIdxC = dyn_cast(idxIt->get()); - ++idxIt; - - if (idx0 && fieldIdxC) - { - unsigned fieldIdx = static_cast(fieldIdxC->getZExtValue()); - if (fieldIdx < ST->getNumElements()) - { - Type* fieldTy = ST->getElementType(fieldIdx); - if (auto* fieldArrTy = dyn_cast(fieldTy)) - { - arraySize = fieldArrTy->getNumElements(); - // Troisième indice = index dans le tableau interne - idxVal = idxIt->get(); - } - } - } - } - } - - // Si on n'a pas réussi à déduire une taille via la GEP, - // on retombe sur la taille dérivée de l'alloca (cas char buf[10]; ptr = buf; ptr[i]). - if (arraySize == 0 || !idxVal) - { - auto maybeCount = getAllocaElementCount(const_cast(AI)); - if (!maybeCount) - continue; - arraySize = *maybeCount; - if (arraySize == 0) - continue; - - // Pour ces cas-là, on considère le premier indice comme l'index logique. - if (GEP->getNumIndices() < 1) - continue; - auto idxIt = GEP->idx_begin(); - idxVal = idxIt->get(); - } - - std::string varName = - AI->hasName() ? AI->getName().str() : std::string(""); - - // "baseIdxVal" = variable de boucle "i" sans les casts (sext/zext...) - Value* baseIdxVal = idxVal; - while (auto* cast = dyn_cast(baseIdxVal)) - { - baseIdxVal = cast->getOperand(0); - } - - // 4) Cas index constant : test[11] - if (auto* CIdx = dyn_cast(idxVal)) - { - auto idxValue = CIdx->getSExtValue(); - if (idxValue < 0 || static_cast(idxValue) >= arraySize) - { - - for (User* GU : GEP->users()) - { - if (auto* S = dyn_cast(GU)) - { - StackBufferOverflow report; - report.funcName = F.getName().str(); - report.varName = varName; - report.arraySize = arraySize; - report.indexOrUpperBound = static_cast(idxValue); - report.isWrite = true; - report.indexIsConstant = true; - report.inst = S; - report.aliasPathVec = aliasPath; - if (!aliasPath.empty()) - { - std::reverse(aliasPath.begin(), aliasPath.end()); - std::string chain; - for (size_t i = 0; i < aliasPath.size(); ++i) - { - chain += aliasPath[i]; - if (i + 1 < aliasPath.size()) - chain += " -> "; - } - report.aliasPath = chain; - } - out.push_back(std::move(report)); - } - else if (auto* L = dyn_cast(GU)) - { - StackBufferOverflow report; - report.funcName = F.getName().str(); - report.varName = varName; - report.arraySize = arraySize; - report.indexOrUpperBound = static_cast(idxValue); - report.isWrite = false; - report.indexIsConstant = true; - report.inst = L; - report.aliasPathVec = aliasPath; - if (!aliasPath.empty()) - { - std::reverse(aliasPath.begin(), aliasPath.end()); - std::string chain; - for (size_t i = 0; i < aliasPath.size(); ++i) - { - chain += aliasPath[i]; - if (i + 1 < aliasPath.size()) - chain += " -> "; - } - report.aliasPath = chain; - } - out.push_back(std::move(report)); - } - } - } - continue; - } - - // 5) Cas index variable : test[i] / ptr[i] - // On regarde si on a un intervalle pour la valeur de base (i, pas le cast) - const Value* key = baseIdxVal; - - // Si l'index vient d'un load (pattern -O0 : load i, icmp, load i, gep), - // on utilise le pointeur sous-jacent comme clé (l'alloca de i). - if (auto* LI = dyn_cast(baseIdxVal)) - { - key = LI->getPointerOperand(); - } - - auto itRange = ranges.find(key); - if (itRange == ranges.end()) - { - // pas de borne connue => on ne dit rien ici - continue; - } - - const IntRange& R = itRange->second; - - // 5.a) Borne supérieure hors bornes: UB >= arraySize - if (R.hasUpper && R.upper >= 0 && static_cast(R.upper) >= arraySize) - { - - StackSize ub = static_cast(R.upper); - - for (User* GU : GEP->users()) - { - if (auto* S = dyn_cast(GU)) - { - StackBufferOverflow report; - report.funcName = F.getName().str(); - report.varName = varName; - report.arraySize = arraySize; - report.indexOrUpperBound = ub; - report.isWrite = true; - report.indexIsConstant = false; - report.inst = S; - report.aliasPathVec = aliasPath; - if (!aliasPath.empty()) - { - std::reverse(aliasPath.begin(), aliasPath.end()); - std::string chain; - for (size_t i = 0; i < aliasPath.size(); ++i) - { - chain += aliasPath[i]; - if (i + 1 < aliasPath.size()) - chain += " -> "; - } - report.aliasPath = chain; - } - out.push_back(std::move(report)); - } - else if (auto* L = dyn_cast(GU)) - { - StackBufferOverflow report; - report.funcName = F.getName().str(); - report.varName = varName; - report.arraySize = arraySize; - report.indexOrUpperBound = ub; - report.isWrite = false; - report.indexIsConstant = false; - report.inst = L; - report.aliasPathVec = aliasPath; - if (!aliasPath.empty()) - { - std::reverse(aliasPath.begin(), aliasPath.end()); - std::string chain; - for (size_t i = 0; i < aliasPath.size(); ++i) - { - chain += aliasPath[i]; - if (i + 1 < aliasPath.size()) - chain += " -> "; - } - report.aliasPath = chain; - } - out.push_back(std::move(report)); - } - } - } - - // 5.b) Borne inférieure négative: LB < 0 => index potentiellement négatif - if (R.hasLower && R.lower < 0) - { - for (User* GU : GEP->users()) - { - if (auto* S = dyn_cast(GU)) - { - StackBufferOverflow report; - report.funcName = F.getName().str(); - report.varName = varName; - report.arraySize = arraySize; - report.isWrite = true; - report.indexIsConstant = false; - report.inst = S; - report.isLowerBoundViolation = true; - report.lowerBound = R.lower; - report.aliasPathVec = aliasPath; - if (!aliasPath.empty()) - { - std::reverse(aliasPath.begin(), aliasPath.end()); - std::string chain; - for (size_t i = 0; i < aliasPath.size(); ++i) - { - chain += aliasPath[i]; - if (i + 1 < aliasPath.size()) - chain += " -> "; - } - report.aliasPath = chain; - } - out.push_back(std::move(report)); - } - else if (auto* L = dyn_cast(GU)) - { - StackBufferOverflow report; - report.funcName = F.getName().str(); - report.varName = varName; - report.arraySize = arraySize; - report.isWrite = false; - report.indexIsConstant = false; - report.inst = L; - report.isLowerBoundViolation = true; - report.lowerBound = R.lower; - report.aliasPathVec = aliasPath; - if (!aliasPath.empty()) - { - std::reverse(aliasPath.begin(), aliasPath.end()); - std::string chain; - for (size_t i = 0; i < aliasPath.size(); ++i) - { - chain += aliasPath[i]; - if (i + 1 < aliasPath.size()) - chain += " -> "; - } - report.aliasPath = chain; - } - out.push_back(std::move(report)); - } - } - } - // Si R.hasUpper && R.upper < arraySize et (pas de LB problématique), - // on considère l'accès comme probablement sûr. - } - } - } - - // ============================================================================ - // Helpers - // ============================================================================ - - static void analyzeMemIntrinsicOverflowsInFunction(llvm::Function& F, - const llvm::DataLayout& DL, - std::vector& out) - { - using namespace llvm; - - if (F.isDeclaration()) - return; - - for (BasicBlock& BB : F) - { - for (Instruction& I : BB) - { - - // On s'intéresse uniquement aux appels (intrinsics ou libc) - auto* CB = dyn_cast(&I); - if (!CB) - continue; - - Function* callee = CB->getCalledFunction(); - if (!callee) - continue; - - StringRef name = callee->getName(); - - enum class MemKind - { - None, - MemCpy, - MemSet, - MemMove - }; - MemKind kind = MemKind::None; - - // 1) Cas intrinsics LLVM: llvm.memcpy.*, llvm.memset.*, llvm.memmove.* - if (auto* II = dyn_cast(CB)) - { - switch (II->getIntrinsicID()) - { - case Intrinsic::memcpy: - kind = MemKind::MemCpy; - break; - case Intrinsic::memset: - kind = MemKind::MemSet; - break; - case Intrinsic::memmove: - kind = MemKind::MemMove; - break; - default: - break; - } - } - - // 2) Cas appels libc classiques ou symboles similaires - if (kind == MemKind::None) - { - if (name == "memcpy" || name.contains("memcpy")) - kind = MemKind::MemCpy; - else if (name == "memset" || name.contains("memset")) - kind = MemKind::MemSet; - else if (name == "memmove" || name.contains("memmove")) - kind = MemKind::MemMove; - } - - if (kind == MemKind::None) - continue; - - // On attend au moins 3 arguments: dest, src/val, len - if (CB->arg_size() < 3) - continue; - - Value* dest = CB->getArgOperand(0); - - // Résolution heuristique : on enlève les casts/GEPI de surface - // et on remonte jusqu'à une alloca éventuelle. - const Value* cur = dest->stripPointerCasts(); - if (auto* GEP = dyn_cast(cur)) - { - cur = GEP->getPointerOperand(); - } - const AllocaInst* AI = dyn_cast(cur); - if (!AI) - continue; - - auto maybeSize = getAllocaTotalSizeBytes(AI, DL); - if (!maybeSize) - continue; - StackSize destBytes = *maybeSize; - - Value* lenV = CB->getArgOperand(2); - auto* lenC = dyn_cast(lenV); - if (!lenC) - continue; // pour l'instant, on ne traite que les tailles constantes - - uint64_t len = lenC->getZExtValue(); - if (len <= destBytes) - continue; // pas de débordement évident - - MemIntrinsicIssue issue; - issue.funcName = F.getName().str(); - issue.varName = AI->hasName() ? AI->getName().str() : std::string(""); - issue.destSizeBytes = destBytes; - issue.lengthBytes = len; - issue.inst = &I; - - switch (kind) - { - case MemKind::MemCpy: - issue.intrinsicName = "memcpy"; - break; - case MemKind::MemSet: - issue.intrinsicName = "memset"; - break; - case MemKind::MemMove: - issue.intrinsicName = "memmove"; - break; - default: - break; - } - - out.push_back(std::move(issue)); - } - } - } - - // Appelle-t-on une autre fonction que soi-même ? - static bool hasNonSelfCall(const llvm::Function& F) - { - const llvm::Function* Self = &F; - - for (const llvm::BasicBlock& BB : F) - { - for (const llvm::Instruction& I : BB) - { - - const llvm::Function* Callee = nullptr; - - if (auto* CI = llvm::dyn_cast(&I)) - { - Callee = CI->getCalledFunction(); - } - else if (auto* II = llvm::dyn_cast(&I)) - { - Callee = II->getCalledFunction(); - } - - if (Callee && !Callee->isDeclaration() && Callee != Self) - { - return true; // appel vers une autre fonction - } - } - } - return false; - } - - // ============================================================================ - // Analyse locale de la stack (deux variantes) - // ============================================================================ - - static std::string deriveAllocaName(const llvm::AllocaInst* AI); - - static LocalStackInfo computeLocalStackBase(llvm::Function& F, const llvm::DataLayout& DL) - { - LocalStackInfo info; - - for (llvm::BasicBlock& BB : F) - { - for (llvm::Instruction& I : BB) - { - auto* alloca = llvm::dyn_cast(&I); - if (!alloca) - continue; - - llvm::Type* ty = alloca->getAllocatedType(); - StackSize count = 1; - - if (auto* CI = llvm::dyn_cast(alloca->getArraySize())) - { - count = CI->getZExtValue(); - } - else if (auto* C = tryGetConstFromValue(alloca->getArraySize(), F)) - { - count = C->getZExtValue(); - } - else - { - info.hasDynamicAlloca = true; - info.unknown = true; - continue; - } - - StackSize size = DL.getTypeAllocSize(ty) * count; - info.bytes += size; - info.localAllocas.emplace_back(deriveAllocaName(alloca), size); - } - } - - return info; - } - - // Mode IR pur : somme des allocas, alignée - static LocalStackInfo computeLocalStackIR(llvm::Function& F, const llvm::DataLayout& DL) - { - LocalStackInfo info = computeLocalStackBase(F, DL); - - if (info.bytes == 0) - return info; - - llvm::MaybeAlign MA = DL.getStackAlignment(); - unsigned stackAlign = MA ? MA->value() : 1u; - - if (stackAlign > 1) - info.bytes = llvm::alignTo(info.bytes, stackAlign); - - return info; - } - - // Mode ABI heuristique : frame minimale + overhead sur calls - static LocalStackInfo computeLocalStackABI(llvm::Function& F, const llvm::DataLayout& DL) - { - LocalStackInfo info = computeLocalStackBase(F, DL); - - llvm::MaybeAlign MA = DL.getStackAlignment(); - unsigned stackAlign = MA ? MA->value() : 1u; // 16 sur beaucoup de cibles - - StackSize frameSize = info.bytes; - - if (stackAlign > 1) - frameSize = llvm::alignTo(frameSize, stackAlign); - - if (!F.isDeclaration() && stackAlign > 1 && frameSize < stackAlign) - { - frameSize = stackAlign; - } - - if (stackAlign > 1 && hasNonSelfCall(F)) - { - frameSize = llvm::alignTo(frameSize + stackAlign, stackAlign); - } - - info.bytes = frameSize; - return info; - } - - // Wrapper qui sélectionne le mode - static LocalStackInfo computeLocalStack(llvm::Function& F, const llvm::DataLayout& DL, - AnalysisMode mode) - { - switch (mode) - { - case AnalysisMode::IR: - return computeLocalStackIR(F, DL); - case AnalysisMode::ABI: - return computeLocalStackABI(F, DL); - } - return {}; - } - - // Threshold heuristic: consider an alloca "too large" if it consumes at least - // 1/8 of the configured stack budget (8 MiB default), with a 64 KiB floor for - // small limits. - static StackSize computeAllocaLargeThreshold(const AnalysisConfig& config) - { - const StackSize defaultStack = 8ull * 1024ull * 1024ull; - const StackSize minThreshold = 64ull * 1024ull; // 64 KiB - - StackSize base = config.stackLimit ? config.stackLimit : defaultStack; - StackSize derived = base / 8; - - if (derived < minThreshold) - derived = minThreshold; - - return derived; - } - - // ============================================================================ - // Construction du graphe d'appels (CallInst / InvokeInst) - // ============================================================================ - - static CallGraph buildCallGraph(llvm::Module& M) - { - CallGraph CG; - - for (llvm::Function& F : M) - { - if (F.isDeclaration()) - continue; - - auto& vec = CG[&F]; - - for (llvm::BasicBlock& BB : F) - { - for (llvm::Instruction& I : BB) - { - - const llvm::Function* Callee = nullptr; - - if (auto* CI = llvm::dyn_cast(&I)) - { - Callee = CI->getCalledFunction(); - } - else if (auto* II = llvm::dyn_cast(&I)) - { - Callee = II->getCalledFunction(); - } - - if (Callee && !Callee->isDeclaration()) - { - vec.push_back(Callee); - } - } - } - } - - return CG; - } - - // ============================================================================ - // Propagation de la stack + détection de cycles - // ============================================================================ - - static StackEstimate - dfsComputeStack(const llvm::Function* F, const CallGraph& CG, - const std::map& LocalStack, - std::map& State, InternalAnalysisState& Res) - { - auto itState = State.find(F); - if (itState != State.end()) - { - if (itState->second == Visiting) - { - // Cycle détecté : on marque tous les noeuds actuellement en "Visiting" - for (auto& p : State) - { - if (p.second == Visiting) - { - Res.RecursiveFuncs.insert(p.first); - } - } - auto itLocal = LocalStack.find(F); - if (itLocal != LocalStack.end()) - { - return StackEstimate{itLocal->second.bytes, itLocal->second.unknown}; - } - return {}; - } - else if (itState->second == Visited) - { - auto itTotal = Res.TotalStack.find(F); - return (itTotal != Res.TotalStack.end()) ? itTotal->second : StackEstimate{}; - } - } - - State[F] = Visiting; - - auto itLocal = LocalStack.find(F); - StackEstimate local = {}; - if (itLocal != LocalStack.end()) - { - local.bytes = itLocal->second.bytes; - local.unknown = itLocal->second.unknown; - } - StackEstimate maxCallee = {}; - - auto itCG = CG.find(F); - if (itCG != CG.end()) - { - for (const llvm::Function* Callee : itCG->second) - { - StackEstimate calleeStack = dfsComputeStack(Callee, CG, LocalStack, State, Res); - if (calleeStack.bytes > maxCallee.bytes) - maxCallee.bytes = calleeStack.bytes; - if (calleeStack.unknown) - maxCallee.unknown = true; - } - } - - StackEstimate total; - total.bytes = local.bytes + maxCallee.bytes; - total.unknown = local.unknown || maxCallee.unknown; - Res.TotalStack[F] = total; - State[F] = Visited; - return total; - } - - static InternalAnalysisState - computeGlobalStackUsage(const CallGraph& CG, - const std::map& LocalStack) - { - InternalAnalysisState Res; - std::map State; - - for (auto& p : LocalStack) - { - State[p.first] = NotVisited; - } - - for (auto& p : LocalStack) - { - const llvm::Function* F = p.first; - if (State[F] == NotVisited) - { - dfsComputeStack(F, CG, LocalStack, State, Res); - } - } - - return Res; - } - - // ============================================================================ - // Détection d’auto-récursion “infinie” (heuristique DominatorTree) - // ============================================================================ - - static bool detectInfiniteSelfRecursion(llvm::Function& F) - { - if (F.isDeclaration()) - return false; - - const llvm::Function* Self = &F; - - std::vector SelfCallBlocks; - - for (llvm::BasicBlock& BB : F) - { - for (llvm::Instruction& I : BB) - { - const llvm::Function* Callee = nullptr; - - if (auto* CI = llvm::dyn_cast(&I)) - { - Callee = CI->getCalledFunction(); - } - else if (auto* II = llvm::dyn_cast(&I)) - { - Callee = II->getCalledFunction(); - } - - if (Callee == Self) - { - SelfCallBlocks.push_back(&BB); - break; - } - } - } - - if (SelfCallBlocks.empty()) - return false; - - llvm::DominatorTree DT(F); - - bool hasReturn = false; - - for (llvm::BasicBlock& BB : F) - { - for (llvm::Instruction& I : BB) - { - if (llvm::isa(&I)) - { - hasReturn = true; - - bool dominatedBySelfCall = false; - for (llvm::BasicBlock* SCB : SelfCallBlocks) - { - if (DT.dominates(SCB, &BB)) - { - dominatedBySelfCall = true; - break; - } - } - - if (!dominatedBySelfCall) - return false; - } - } - } - - if (!hasReturn) - { - return true; - } - - return true; - } - - // HELPERS - // Essaie de retrouver une alloca de tableau à partir d'un pointeur, - // en suivant les bitcast, GEP(0,0), et un pattern simple de pointeur local : - // char test[10]; - // char *ptr = test; - // ... load ptr ... ; gep -> ptr[i] - namespace - { - struct RecursionGuard - { - llvm::SmallPtrSetImpl& set; - const llvm::Value* value; - RecursionGuard(llvm::SmallPtrSetImpl& s, const llvm::Value* v) - : set(s), value(v) - { - set.insert(value); - } - ~RecursionGuard() - { - set.erase(value); - } - }; - } // namespace - - static const llvm::AllocaInst* resolveArrayAllocaFromPointerInternal( - const llvm::Value* V, llvm::Function& F, std::vector& path, - llvm::SmallPtrSetImpl& recursionStack, int depth) - { - using namespace llvm; - - if (!V) - return nullptr; - if (depth > 64) - return nullptr; - if (recursionStack.contains(V)) - return nullptr; - - RecursionGuard guard(recursionStack, V); - - auto isArrayAlloca = [](const AllocaInst* AI) -> bool - { - Type* T = AI->getAllocatedType(); - // On considère comme "buffer de stack" : - // - les vrais tableaux, - // - les allocas de type tableau (VLA côté IR), - // - les structs qui contiennent au moins un champ tableau. - if (T->isArrayTy() || AI->isArrayAllocation()) - return true; - - if (auto* ST = llvm::dyn_cast(T)) - { - for (unsigned i = 0; i < ST->getNumElements(); ++i) - { - if (ST->getElementType(i)->isArrayTy()) - return true; - } - } - return false; - }; - - // Pour éviter les boucles d'aliasing bizarres - SmallPtrSet visited; - const Value* cur = V; - - while (cur && !visited.contains(cur)) - { - visited.insert(cur); - if (cur->hasName()) - path.push_back(cur->getName().str()); - - // Cas 1 : on tombe sur une alloca. - if (auto* AI = dyn_cast(cur)) - { - if (isArrayAlloca(AI)) - { - // Alloca d'un buffer de stack (tableau) : cible finale. - return AI; - } - - // Sinon, c'est très probablement une variable locale de type pointeur - // (char *ptr; char **pp; etc.). On parcourt les stores vers cette - // variable pour voir quelles valeurs lui sont assignées, et on - // tente de remonter jusqu'à une vraie alloca de tableau. - const AllocaInst* foundAI = nullptr; - - for (BasicBlock& BB : F) - { - for (Instruction& I : BB) - { - auto* SI = dyn_cast(&I); - if (!SI) - continue; - if (SI->getPointerOperand() != AI) - continue; - - const Value* storedPtr = SI->getValueOperand(); - std::vector subPath; - const AllocaInst* cand = resolveArrayAllocaFromPointerInternal( - storedPtr, F, subPath, recursionStack, depth + 1); - if (!cand) - continue; - - if (!foundAI) - { - foundAI = cand; - // Append subPath to path - path.insert(path.end(), subPath.begin(), subPath.end()); - } - else if (foundAI != cand) - { - // Plusieurs bases différentes : aliasing ambigu, - // on préfère abandonner plutôt que de se tromper. - return nullptr; - } - } - } - return foundAI; - } - - // Cas 2 : bitcast -> on remonte l'opérande. - if (auto* BC = dyn_cast(cur)) - { - cur = BC->getOperand(0); - continue; - } - - // Cas 3 : GEP -> on remonte sur le pointeur de base. - if (auto* GEP = dyn_cast(cur)) - { - cur = GEP->getPointerOperand(); - continue; - } - - // Cas 4 : load d'un pointeur. Exemple typique : - // char *ptr = test; - // char *p2 = ptr; - // char **pp = &ptr; - // (*pp)[i] = ... - // - // On remonte au "container" du pointeur (variable locale, ou autre valeur) - // en suivant l'opérande du load. - if (auto* LI = dyn_cast(cur)) - { - cur = LI->getPointerOperand(); - continue; - } - - // Cas 5 : PHI de pointeurs (fusion de plusieurs alias) : - // on tente de résoudre chaque incoming et on s'assure qu'ils - // pointent tous vers la même alloca de tableau. - if (auto* PN = dyn_cast(cur)) - { - const AllocaInst* foundAI = nullptr; - std::vector phiPath; - for (unsigned i = 0; i < PN->getNumIncomingValues(); ++i) - { - const Value* inV = PN->getIncomingValue(i); - std::vector subPath; - const AllocaInst* cand = resolveArrayAllocaFromPointerInternal( - inV, F, subPath, recursionStack, depth + 1); - if (!cand) - continue; - if (!foundAI) - { - foundAI = cand; - phiPath = subPath; - } - else if (foundAI != cand) - { - // PHI mélange plusieurs bases différentes : trop ambigu. - return nullptr; - } - } - path.insert(path.end(), phiPath.begin(), phiPath.end()); - return foundAI; - } - - // Autres cas (arguments, globales complexes, etc.) : on arrête l'heuristique. - break; - } - - return nullptr; - } - - static const llvm::AllocaInst* resolveArrayAllocaFromPointer(const llvm::Value* V, - llvm::Function& F, - std::vector& path) - { - llvm::SmallPtrSet recursionStack; - return resolveArrayAllocaFromPointerInternal(V, F, path, recursionStack, 0); - } - - // Analyse intra-fonction pour détecter plusieurs stores dans un même buffer de stack. - // Heuristique : on compte le nombre de StoreInst qui écrivent dans un GEP basé sur - // une alloca de tableau sur la stack. Si une même alloca reçoit plus d'un store, - // on émet un warning. - static void analyzeMultipleStoresInFunction(llvm::Function& F, - std::vector& out) - { - using namespace llvm; - - if (F.isDeclaration()) - return; - - struct Info - { - std::size_t storeCount = 0; - llvm::SmallPtrSet indexKeys; - const AllocaInst* AI = nullptr; - }; - - std::map infoMap; - - for (BasicBlock& BB : F) - { - for (Instruction& I : BB) - { - auto* S = dyn_cast(&I); - if (!S) - continue; - - Value* ptr = S->getPointerOperand(); - auto* GEP = dyn_cast(ptr); - if (!GEP) - continue; - - // On remonte à la base pour trouver une alloca de tableau sur la stack. - const Value* basePtr = GEP->getPointerOperand(); - std::vector dummyAliasPath; - const AllocaInst* AI = resolveArrayAllocaFromPointer(basePtr, F, dummyAliasPath); - if (!AI) - continue; - - // On récupère l'expression d'index utilisée dans le GEP. - Value* idxVal = nullptr; - Type* srcElemTy = GEP->getSourceElementType(); - - if (auto* arrTy = dyn_cast(srcElemTy)) - { - // Pattern [N x T]* -> indices [0, i] - if (GEP->getNumIndices() < 2) - continue; - auto idxIt = GEP->idx_begin(); - ++idxIt; // saute le premier indice (souvent 0) - idxVal = idxIt->get(); - } - else - { - // Pattern T* -> indice unique [i] (cas char *ptr = test; ptr[i]) - if (GEP->getNumIndices() < 1) - continue; - auto idxIt = GEP->idx_begin(); - idxVal = idxIt->get(); - } - - if (!idxVal) - continue; - - // On normalise un peu la clé d'index en enlevant les casts SSA. - const Value* idxKey = idxVal; - while (auto* cast = dyn_cast(const_cast(idxKey))) - { - idxKey = cast->getOperand(0); - } - - auto& info = infoMap[AI]; - info.AI = AI; - info.storeCount++; - info.indexKeys.insert(idxKey); - } - } - - // Construction des warnings pour chaque buffer qui reçoit plusieurs stores. - for (auto& entry : infoMap) - { - const AllocaInst* AI = entry.first; - const Info& info = entry.second; - - if (info.storeCount <= 1) - continue; // un seul store -> pas de warning - - MultipleStoreIssue issue; - issue.funcName = F.getName().str(); - issue.varName = AI->hasName() ? AI->getName().str() : std::string(""); - issue.storeCount = info.storeCount; - issue.distinctIndexCount = info.indexKeys.size(); - issue.allocaInst = AI; - - out.push_back(std::move(issue)); - } - } - - // HELPERS - static const llvm::ConstantInt* tryGetConstFromValue(const llvm::Value* V, - const llvm::Function& F) - { - using namespace llvm; - - // On enlève d'abord les cast (sext/zext/trunc, etc.) pour arriver - // à la vraie valeur “de base”. - const Value* cur = V; - while (auto* cast = dyn_cast(cur)) - { - cur = cast->getOperand(0); - } - - // Cas trivial : c'est déjà une constante entière. - if (auto* C = dyn_cast(cur)) - return C; - - // Cas -O0 typique : on compare un load d'une variable locale. - auto* LI = dyn_cast(cur); - if (!LI) - return nullptr; - - const Value* ptr = LI->getPointerOperand(); - const ConstantInt* found = nullptr; - - // Version ultra-simple : on cherche un store de constante dans la fonction. - for (const BasicBlock& BB : F) - { - for (const Instruction& I : BB) - { - auto* SI = dyn_cast(&I); - if (!SI) - continue; - if (SI->getPointerOperand() != ptr) - continue; - if (auto* C = dyn_cast(SI->getValueOperand())) - { - // On garde la dernière constante trouvée (si plusieurs stores, c'est naïf). - found = C; - } - } - } - return found; - } - - // ============================================================================ - // API publique : analyzeModule / analyzeFile - // ============================================================================ - - namespace - { - static std::string getFunctionSourcePath(const llvm::Function& F) - { - if (auto* sp = F.getSubprogram()) - { - if (auto* file = sp->getFile()) - { - std::string dir = file->getDirectory().str(); - std::string name = file->getFilename().str(); - if (!dir.empty()) - return dir + "/" + name; - return name; - } - } - return {}; - } - - static bool getFunctionSourceLocation(const llvm::Function& F, unsigned& line, - unsigned& column) - { - line = 0; - column = 0; - - for (const llvm::BasicBlock& BB : F) - { - for (const llvm::Instruction& I : BB) - { - llvm::DebugLoc DL = I.getDebugLoc(); - if (!DL) - continue; - line = DL.getLine(); - column = DL.getCol(); - if (line != 0) - { - if (column == 0) - column = 1; - return true; - } - } - } - - if (auto* sp = F.getSubprogram()) - { - line = sp->getLine(); - if (line != 0) - { - column = 1; - return true; - } - } - - return false; - } - - static std::string buildMaxStackCallPath(const llvm::Function* F, const CallGraph& CG, - const InternalAnalysisState& state) - { - std::string path; - std::set visited; - const llvm::Function* current = F; - - while (current) - { - if (!visited.insert(current).second) - break; - - if (!path.empty()) - path += " -> "; - path += current->getName().str(); - - const llvm::Function* bestCallee = nullptr; - StackEstimate bestStack{}; - - auto itCG = CG.find(current); - if (itCG == CG.end()) - break; - - for (const llvm::Function* callee : itCG->second) - { - auto itTotal = state.TotalStack.find(callee); - StackEstimate est = - (itTotal != state.TotalStack.end()) ? itTotal->second : StackEstimate{}; - if (!bestCallee || est.bytes > bestStack.bytes) - { - bestCallee = callee; - bestStack = est; - } - } - - if (!bestCallee || bestStack.bytes == 0) - break; - - current = bestCallee; - } - - return path; - } - - static std::string normalizePathForMatch(const std::string& input) - { - std::string out = input; - for (char& c : out) - { - if (c == '\\') - c = '/'; - } - const bool isAbs = !out.empty() && out.front() == '/'; - std::vector parts; - std::string cur; - for (char c : out) - { - if (c == '/') - { - if (!cur.empty()) - { - if (cur == "..") - { - if (!parts.empty()) - parts.pop_back(); - } - else if (cur != ".") - { - parts.push_back(cur); - } - cur.clear(); - } - } - else - { - cur.push_back(c); - } - } - if (!cur.empty()) - { - if (cur == "..") - { - if (!parts.empty()) - parts.pop_back(); - } - else if (cur != ".") - { - parts.push_back(cur); - } - } - std::string norm = isAbs ? "/" : ""; - for (std::size_t i = 0; i < parts.size(); ++i) - { - norm += parts[i]; - if (i + 1 < parts.size()) - norm += "/"; - } - while (!norm.empty() && norm.back() == '/') - norm.pop_back(); - return norm; - } - - static std::string basenameOf(const std::string& path) - { - std::size_t pos = path.find_last_of('/'); - if (pos == std::string::npos) - return path; - if (pos + 1 >= path.size()) - return {}; - return path.substr(pos + 1); - } - - static bool pathHasSuffix(const std::string& path, const std::string& suffix) - { - if (suffix.empty()) - return false; - if (path.size() < suffix.size()) - return false; - if (path.compare(path.size() - suffix.size(), suffix.size(), suffix) != 0) - return false; - if (path.size() == suffix.size()) - return true; - return path[path.size() - suffix.size() - 1] == '/'; - } - - static bool pathHasPrefix(const std::string& path, const std::string& prefix) - { - if (prefix.empty()) - return false; - if (path.size() < prefix.size()) - return false; - if (path.compare(0, prefix.size(), prefix) != 0) - return false; - if (path.size() == prefix.size()) - return true; - return path[prefix.size()] == '/'; - } - - static bool shouldIncludePath(const std::string& path, const AnalysisConfig& config) - { - if (config.onlyFiles.empty() && config.onlyDirs.empty()) - return true; - if (path.empty()) - return false; - - const std::string normPath = normalizePathForMatch(path); - - for (const auto& file : config.onlyFiles) - { - const std::string normFile = normalizePathForMatch(file); - if (normPath == normFile || pathHasSuffix(normPath, normFile)) - return true; - const std::string fileBase = basenameOf(normFile); - if (!fileBase.empty() && basenameOf(normPath) == fileBase) - return true; - } - - for (const auto& dir : config.onlyDirs) - { - const std::string normDir = normalizePathForMatch(dir); - if (pathHasPrefix(normPath, normDir) || pathHasSuffix(normPath, normDir)) - return true; - const std::string needle = "/" + normDir + "/"; - if (normPath.find(needle) != std::string::npos) - return true; - } - - return false; - } - - static bool functionNameMatches(const llvm::Function& F, const AnalysisConfig& config) - { - if (config.onlyFunctions.empty()) - return true; - - auto itaniumBaseName = [](const std::string& symbol) -> std::string - { - if (symbol.rfind("_Z", 0) != 0) - return {}; - std::size_t i = 2; - if (i < symbol.size() && symbol[i] == 'L') - ++i; - if (i >= symbol.size() || !std::isdigit(static_cast(symbol[i]))) - return {}; - std::size_t len = 0; - while (i < symbol.size() && std::isdigit(static_cast(symbol[i]))) - { - len = len * 10 + static_cast(symbol[i] - '0'); - ++i; - } - if (len == 0 || i + len > symbol.size()) - return {}; - return symbol.substr(i, len); - }; - - std::string name = F.getName().str(); - std::string demangledName; - if (ctrace_tools::isMangled(name) || name.rfind("_Z", 0) == 0) - demangledName = ctrace_tools::demangle(name.c_str()); - std::string demangledBase; - if (!demangledName.empty()) - { - std::size_t pos = demangledName.find('('); - if (pos != std::string::npos && pos > 0) - demangledBase = demangledName.substr(0, pos); - } - std::string itaniumBase = itaniumBaseName(name); - - for (const auto& filter : config.onlyFunctions) - { - if (name == filter) - return true; - if (!demangledName.empty() && demangledName == filter) - return true; - if (!demangledBase.empty() && demangledBase == filter) - return true; - if (!itaniumBase.empty() && itaniumBase == filter) - return true; - if (ctrace_tools::isMangled(filter)) - { - std::string demangledFilter = ctrace_tools::demangle(filter.c_str()); - if (!demangledName.empty() && demangledName == demangledFilter) - return true; - std::size_t pos = demangledFilter.find('('); - if (pos != std::string::npos && pos > 0) - { - if (demangledBase == demangledFilter.substr(0, pos)) - return true; + // Le bloc "else" n'est jamais atteint. + isUnreachable = true; + } + } } } - } - - return false; - } - } // namespace - AnalysisResult analyzeModule(llvm::Module& mod, const AnalysisConfig& config) - { - const llvm::DataLayout& DL = mod.getDataLayout(); - const bool hasPathFilter = !config.onlyFiles.empty() || !config.onlyDirs.empty(); - const bool hasFuncFilter = !config.onlyFunctions.empty(); - const bool hasFilter = hasPathFilter || hasFuncFilter; - const std::string moduleSourcePath = mod.getSourceFileName(); + std::ostringstream body; + Diagnostic diag; - auto shouldAnalyzeFunction = [&](const llvm::Function& F) -> bool - { - if (!hasFilter) - return true; - if (hasFuncFilter && !functionNameMatches(F, config)) - { - if (config.dumpFilter) + if (issue.isLowerBoundViolation) { - llvm::errs() << "[filter] func=" << F.getName() - << " file= keep=no\n"; + diag.errCode = DescriptiveErrorCode::NegativeStackIndex; + body << " [!!] potential negative index on variable '" << issue.varName + << "' (size " << issue.arraySize << ")\n"; + if (!issue.aliasPath.empty()) + { + body << " alias path: " << issue.aliasPath << "\n"; + } + body << " inferred lower bound for index expression: " << issue.lowerBound + << " (index may be < 0)\n"; } - return false; - } - if (!hasPathFilter) - return true; - std::string path = getFunctionSourcePath(F); - std::string usedPath; - bool decision = false; - if (!path.empty()) - { - usedPath = path; - decision = shouldIncludePath(usedPath, config); - } - else - { - llvm::StringRef name = F.getName(); - if (name.starts_with("__") || name.starts_with("llvm.") || - name.starts_with("clang.")) + else { - decision = false; + diag.errCode = DescriptiveErrorCode::StackBufferOverflow; + body << " [!!] potential stack buffer overflow on variable '" << issue.varName + << "' (size " << issue.arraySize << ")\n"; + if (!issue.aliasPath.empty()) + { + body << " alias path: " << issue.aliasPath << "\n"; + } + if (issue.indexIsConstant) + { + body << " constant index " << issue.indexOrUpperBound + << " is out of bounds (0.." + << (issue.arraySize ? issue.arraySize - 1 : 0) << ")\n"; + } + else + { + body << " index variable may go up to " << issue.indexOrUpperBound + << " (array last valid index: " + << (issue.arraySize ? issue.arraySize - 1 : 0) << ")\n"; + } } - else if (!moduleSourcePath.empty()) + + if (issue.isWrite) { - usedPath = moduleSourcePath; - decision = shouldIncludePath(usedPath, config); + body << " (this is a write access)\n"; } else { - decision = false; + body << " (this is a read access)\n"; + } + if (isUnreachable) + { + body << " [info] this access appears unreachable at runtime " + "(condition is always false for this branch)\n"; } - } - if (config.dumpFilter) - { - llvm::errs() << "[filter] func=" << F.getName() << " file="; - if (usedPath.empty()) - llvm::errs() << ""; - else - llvm::errs() << usedPath; - llvm::errs() << " keep=" << (decision ? "yes" : "no") << "\n"; + diag.funcName = issue.funcName; + diag.line = haveLoc ? line : 0; + diag.column = haveLoc ? column : 0; + diag.startLine = haveLoc ? startLine : 0; + diag.startColumn = haveLoc ? startColumn : 0; + diag.endLine = haveLoc ? endLine : 0; + diag.endColumn = haveLoc ? endColumn : 0; + diag.severity = DiagnosticSeverity::Warning; + diag.message = body.str(); + diag.variableAliasingVec = issue.aliasPathVec; + result.diagnostics.push_back(std::move(diag)); } - - return decision; - }; - - // 1) Stack locale par fonction - std::map LocalStack; - std::map> functionLocations; - std::map functionCallPaths; - std::map>> functionLocalAllocas; - - for (llvm::Function& F : mod) - { - if (F.isDeclaration()) - continue; - if (!shouldAnalyzeFunction(F)) - continue; - LocalStackInfo info = computeLocalStack(F, DL, config.mode); - LocalStack[&F] = info; } - // 2) Graphe d'appels - CallGraph CG; - if (!hasFilter) - { - CG = buildCallGraph(mod); - } - else + static void + appendDynamicAllocaDiagnostics(AnalysisResult& result, + const std::vector& issues) { - for (llvm::Function& F : mod) + for (const auto& d : issues) { - if (F.isDeclaration()) - continue; - if (!shouldAnalyzeFunction(F)) - continue; - - auto& vec = CG[&F]; - - for (llvm::BasicBlock& BB : F) + unsigned line = 0; + unsigned column = 0; + bool haveLoc = false; + if (d.allocaInst) { - for (llvm::Instruction& I : BB) + llvm::DebugLoc DL = d.allocaInst->getDebugLoc(); + if (DL) { - const llvm::Function* Callee = nullptr; - if (auto* CI = llvm::dyn_cast(&I)) - { - Callee = CI->getCalledFunction(); - } - else if (auto* II = llvm::dyn_cast(&I)) - { - Callee = II->getCalledFunction(); - } - - if (Callee && !Callee->isDeclaration() && shouldAnalyzeFunction(*Callee)) - { - vec.push_back(Callee); - } + line = DL.getLine(); + column = DL.getCol(); + haveLoc = true; } } - } - } - // 3) Propagation + détection de récursivité - InternalAnalysisState state = computeGlobalStackUsage(CG, LocalStack); + std::ostringstream body; - // 4) Détection d’auto-récursion “infinie” pour les fonctions récursives - for (llvm::Function& F : mod) - { - if (F.isDeclaration()) - continue; - if (!shouldAnalyzeFunction(F)) - continue; - const llvm::Function* Fn = &F; - if (!state.RecursiveFuncs.count(Fn)) - continue; + body << " [!] dynamic stack allocation detected for variable '" << d.varName + << "'\n"; + body << " allocated type: " << d.typeName << "\n"; + body << " size of this allocation is not compile-time constant " + "(VLA / variable alloca) and may lead to unbounded stack usage\n"; - if (detectInfiniteSelfRecursion(F)) - { - state.InfiniteRecursionFuncs.insert(Fn); + Diagnostic diag; + diag.funcName = d.funcName; + diag.line = haveLoc ? line : 0; + diag.column = haveLoc ? column : 0; + diag.severity = DiagnosticSeverity::Warning; + diag.errCode = DescriptiveErrorCode::VLAUsage; + diag.message = body.str(); + result.diagnostics.push_back(std::move(diag)); } } - // 5) Construction du résultat public - AnalysisResult result; - result.config = config; - StackSize allocaLargeThreshold = computeAllocaLargeThreshold(config); - - for (llvm::Function& F : mod) + static void + appendAllocaUsageDiagnostics(AnalysisResult& result, const AnalysisConfig& config, + StackSize allocaLargeThreshold, + const std::vector& issues) { - if (F.isDeclaration()) - continue; - if (!shouldAnalyzeFunction(F)) - continue; - - const llvm::Function* Fn = &F; - - LocalStackInfo localInfo; - StackEstimate totalInfo; - - auto itLocal = LocalStack.find(Fn); - if (itLocal != LocalStack.end()) - localInfo = itLocal->second; - - auto itTotal = state.TotalStack.find(Fn); - if (itTotal != state.TotalStack.end()) - totalInfo = itTotal->second; - - FunctionResult fr; - fr.name = F.getName().str(); - fr.filePath = getFunctionSourcePath(F); - if (fr.filePath.empty() && !moduleSourcePath.empty()) - fr.filePath = moduleSourcePath; - fr.localStack = localInfo.bytes; - fr.localStackUnknown = localInfo.unknown; - fr.maxStack = totalInfo.bytes; - fr.maxStackUnknown = totalInfo.unknown; - fr.hasDynamicAlloca = localInfo.hasDynamicAlloca; - fr.isRecursive = state.RecursiveFuncs.count(Fn) != 0; - fr.hasInfiniteSelfRecursion = state.InfiniteRecursionFuncs.count(Fn) != 0; - fr.exceedsLimit = (!fr.maxStackUnknown && totalInfo.bytes > config.stackLimit); - - unsigned line = 0; - unsigned column = 0; - if (getFunctionSourceLocation(F, line, column)) + for (const auto& a : issues) { - functionLocations[fr.name] = {line, column}; - } - if (!fr.isRecursive && totalInfo.bytes > localInfo.bytes) - { - std::string path = buildMaxStackCallPath(Fn, CG, state); - if (!path.empty()) - functionCallPaths[fr.name] = path; - } - if (!localInfo.localAllocas.empty()) - { - functionLocalAllocas[fr.name] = localInfo.localAllocas; - } - - result.functions.push_back(std::move(fr)); - } + unsigned line = 0; + unsigned column = 0; + bool haveLoc = false; + if (a.allocaInst) + { + llvm::DebugLoc DL = a.allocaInst->getDebugLoc(); + if (DL) + { + line = DL.getLine(); + column = DL.getCol(); + haveLoc = true; + } + } - // 5b) Emit summary diagnostics for recursion/overflow flags (for JSON parity) - for (const auto& fr : result.functions) - { - if (fr.isRecursive) - { - Diagnostic diag; - diag.funcName = fr.name; - diag.filePath = fr.filePath; - diag.severity = DiagnosticSeverity::Warning; - diag.errCode = DescriptiveErrorCode::None; - diag.message = " [!] recursive or mutually recursive function detected\n"; - result.diagnostics.push_back(std::move(diag)); - } + bool isOversized = false; + if (a.sizeIsConst && a.sizeBytes >= allocaLargeThreshold) + isOversized = true; + else if (a.hasUpperBound && a.upperBoundBytes >= allocaLargeThreshold) + isOversized = true; + else if (a.sizeIsConst && config.stackLimit != 0 && + a.sizeBytes >= config.stackLimit) + isOversized = true; - if (fr.hasInfiniteSelfRecursion) - { + std::ostringstream body; Diagnostic diag; - diag.funcName = fr.name; - diag.filePath = fr.filePath; - diag.severity = DiagnosticSeverity::Warning; - diag.errCode = DescriptiveErrorCode::None; - diag.message = " [!!!] unconditional self recursion detected (no base case)\n" - " this will eventually overflow the stack at runtime\n"; - result.diagnostics.push_back(std::move(diag)); - } + diag.funcName = a.funcName; + diag.line = haveLoc ? line : 0; + diag.column = haveLoc ? column : 0; - if (fr.exceedsLimit) - { - Diagnostic diag; - diag.funcName = fr.name; - diag.filePath = fr.filePath; - diag.severity = DiagnosticSeverity::Warning; - diag.errCode = DescriptiveErrorCode::None; - auto it = functionLocations.find(fr.name); - if (it != functionLocations.end()) + if (isOversized) { - diag.line = it->second.first; - diag.column = it->second.second; + diag.severity = DiagnosticSeverity::Error; + diag.errCode = DescriptiveErrorCode::AllocaTooLarge; + body << " [!!] large alloca on the stack for variable '" << a.varName << "'\n"; } - std::string message; - bool suppressLocation = false; - StackSize maxCallee = - (fr.maxStack > fr.localStack) ? (fr.maxStack - fr.localStack) : 0; - auto itLocals = functionLocalAllocas.find(fr.name); - std::string aliasLine; - if (fr.localStack >= maxCallee && itLocals != functionLocalAllocas.end()) + else if (a.userControlled) { - std::string localsDetails; - std::string singleName; - StackSize singleSize = 0; - for (const auto& entry : itLocals->second) - { - if (entry.first == "") - continue; - if (entry.second >= config.stackLimit && entry.second > singleSize) - { - singleName = entry.first; - singleSize = entry.second; - } - } - if (!singleName.empty()) - { - aliasLine = " alias path: " + singleName + "\n"; - } - else if (!itLocals->second.empty()) - { - localsDetails += - " locals: " + std::to_string(itLocals->second.size()) + - " variables (total " + std::to_string(fr.localStack) + " bytes)\n"; + diag.severity = DiagnosticSeverity::Warning; + diag.errCode = DescriptiveErrorCode::AllocaUserControlled; + body << " [!!] user-controlled alloca size for variable '" << a.varName + << "'\n"; + } + else + { + diag.severity = DiagnosticSeverity::Warning; + diag.errCode = DescriptiveErrorCode::AllocaUsageWarning; + body << " [!] dynamic alloca on the stack for variable '" << a.varName + << "'\n"; + } - std::vector> named = itLocals->second; - named.erase(std::remove_if(named.begin(), named.end(), [](const auto& v) - { return v.first == ""; }), - named.end()); - std::sort(named.begin(), named.end(), - [](const auto& a, const auto& b) - { - if (a.second != b.second) - return a.second > b.second; - return a.first < b.first; - }); - if (!named.empty()) - { - constexpr std::size_t kMaxLocalsForLocation = 5; - if (named.size() > kMaxLocalsForLocation) - suppressLocation = true; - std::string listLine = " locals list: "; - for (std::size_t idx = 0; idx < named.size(); ++idx) - { - if (idx > 0) - listLine += ", "; - listLine += named[idx].first + "(" + - std::to_string(named[idx].second) + ")"; - } - localsDetails += listLine + "\n"; - } - } - if (!localsDetails.empty()) - message += localsDetails; + body + << " allocation performed via alloca/VLA; stack usage grows with runtime " + "value\n"; + + if (a.sizeIsConst) + { + body << " requested stack size: " << a.sizeBytes << " bytes\n"; } - auto itPath = functionCallPaths.find(fr.name); - std::string suffix; - if (itPath != functionCallPaths.end()) + else if (a.hasUpperBound) { - suffix += " path: " + itPath->second + "\n"; + body << " inferred upper bound for size: " << a.upperBoundBytes + << " bytes\n"; } - std::string mainLine = " [!] potential stack overflow: exceeds limit of " + - std::to_string(config.stackLimit) + " bytes\n"; - message = mainLine + aliasLine + suffix + message; - if (suppressLocation) + else { - diag.line = 0; - diag.column = 0; + body << " size is unbounded at compile time\n"; } - diag.message = std::move(message); - result.diagnostics.push_back(std::move(diag)); - } - } - // 6) Détection des dépassements de buffer sur la stack (analyse intra-fonction) - std::vector bufferIssues; - for (llvm::Function& F : mod) - { - if (F.isDeclaration()) - continue; - if (!shouldAnalyzeFunction(F)) - continue; - analyzeStackBufferOverflowsInFunction(F, bufferIssues); - } - - // 7) Affichage des problèmes détectés (pour l'instant, sortie directe) - for (const auto& issue : bufferIssues) - { - unsigned line = 0; - unsigned column = 0; - unsigned startLine = 0; - unsigned startColumn = 0; - unsigned endLine = 0; - unsigned endColumn = 0; - bool haveLoc = false; - - if (issue.inst) - { - llvm::DebugLoc DL = issue.inst->getDebugLoc(); - if (DL) + if (a.isInfiniteRecursive) { - line = DL.getLine(); - startLine = DL.getLine(); - - startColumn = DL.getCol(); - column = DL.getCol(); - - // By default, same as start - endLine = DL.getLine(); - endColumn = DL.getCol(); - haveLoc = true; - if (auto* loc = DL.get()) - { - if (auto* scope = llvm::dyn_cast(loc)) - { - if (scope->getColumn() != 0) - { - endColumn = scope->getColumn() + 1; - } - } - } + // Any alloca inside infinite recursion will blow the stack. + diag.severity = DiagnosticSeverity::Error; + body << " function is infinitely recursive; this alloca runs at every " + "frame and guarantees stack overflow\n"; } - } - - bool isUnreachable = false; - { - using namespace llvm; - - if (issue.inst) + else if (a.isRecursive) { - auto* BB = issue.inst->getParent(); - - // Parcourt les prédécesseurs du bloc pour voir si certains - // ont une branche conditionnelle avec une condition constante. - for (auto* Pred : predecessors(BB)) + // Controlled recursion still compounds stack usage across frames. + if (diag.severity != DiagnosticSeverity::Error && + (isOversized || a.userControlled)) { - auto* BI = dyn_cast(Pred->getTerminator()); - if (!BI || !BI->isConditional()) - continue; - - auto* CI = dyn_cast(BI->getCondition()); - if (!CI) - continue; - - const llvm::Function& Func = *issue.inst->getFunction(); - - auto* C0 = tryGetConstFromValue(CI->getOperand(0), Func); - auto* C1 = tryGetConstFromValue(CI->getOperand(1), Func); - if (!C0 || !C1) - continue; - - // Évalue le résultat de l'ICmp pour ces constantes (implémentation maison). - bool condTrue = false; - auto pred = CI->getPredicate(); - const auto& v0 = C0->getValue(); - const auto& v1 = C1->getValue(); - - switch (pred) - { - case ICmpInst::ICMP_EQ: - condTrue = (v0 == v1); - break; - case ICmpInst::ICMP_NE: - condTrue = (v0 != v1); - break; - case ICmpInst::ICMP_SLT: - condTrue = v0.slt(v1); - break; - case ICmpInst::ICMP_SLE: - condTrue = v0.sle(v1); - break; - case ICmpInst::ICMP_SGT: - condTrue = v0.sgt(v1); - break; - case ICmpInst::ICMP_SGE: - condTrue = v0.sge(v1); - break; - case ICmpInst::ICMP_ULT: - condTrue = v0.ult(v1); - break; - case ICmpInst::ICMP_ULE: - condTrue = v0.ule(v1); - break; - case ICmpInst::ICMP_UGT: - condTrue = v0.ugt(v1); - break; - case ICmpInst::ICMP_UGE: - condTrue = v0.uge(v1); - break; - default: - // On ne traite pas d'autres prédicats exotiques ici - continue; - } - - // Branchement du type: - // br i1 %cond, label %then, label %else - // Successeur 0 pris si condTrue == true - // Successeur 1 pris si condTrue == false - if (BB == BI->getSuccessor(0) && condTrue == false) - { - // Le bloc "then" n'est jamais atteint. - isUnreachable = true; - } - else if (BB == BI->getSuccessor(1) && condTrue == true) - { - // Le bloc "else" n'est jamais atteint. - isUnreachable = true; - } + diag.severity = DiagnosticSeverity::Error; } + body << " function is recursive; this allocation repeats at each " + "recursion " + "depth and can exhaust the stack\n"; } - } - - std::ostringstream body; - Diagnostic diag; - if (issue.isLowerBoundViolation) - { - diag.errCode = DescriptiveErrorCode::NegativeStackIndex; - body << " [!!] potential negative index on variable '" << issue.varName - << "' (size " << issue.arraySize << ")\n"; - if (!issue.aliasPath.empty()) - { - body << " alias path: " << issue.aliasPath << "\n"; - } - body << " inferred lower bound for index expression: " << issue.lowerBound - << " (index may be < 0)\n"; - } - else - { - diag.errCode = DescriptiveErrorCode::StackBufferOverflow; - body << " [!!] potential stack buffer overflow on variable '" << issue.varName - << "' (size " << issue.arraySize << ")\n"; - if (!issue.aliasPath.empty()) + if (isOversized) { - body << " alias path: " << issue.aliasPath << "\n"; + body << " exceeds safety threshold of " << allocaLargeThreshold + << " bytes"; + if (config.stackLimit != 0) + { + body << " (stack limit: " << config.stackLimit << " bytes)"; + } + body << "\n"; } - if (issue.indexIsConstant) + else if (a.userControlled) { - body << " constant index " << issue.indexOrUpperBound - << " is out of bounds (0.." << (issue.arraySize ? issue.arraySize - 1 : 0) - << ")\n"; + body << " size depends on user-controlled input " + "(function argument or non-local value)\n"; } else { - body << " index variable may go up to " << issue.indexOrUpperBound - << " (array last valid index: " - << (issue.arraySize ? issue.arraySize - 1 : 0) << ")\n"; + body << " size does not appear user-controlled but remains " + "runtime-dependent\n"; } - } - if (issue.isWrite) - { - body << " (this is a write access)\n"; - } - else - { - body << " (this is a read access)\n"; - } - if (isUnreachable) - { - body << " [info] this access appears unreachable at runtime " - "(condition is always false for this branch)\n"; + diag.message = body.str(); + result.diagnostics.push_back(std::move(diag)); } - - diag.funcName = issue.funcName; - diag.line = haveLoc ? line : 0; - diag.column = haveLoc ? column : 0; - diag.startLine = haveLoc ? startLine : 0; - diag.startColumn = haveLoc ? startColumn : 0; - diag.endLine = haveLoc ? endLine : 0; - diag.endColumn = haveLoc ? endColumn : 0; - diag.severity = DiagnosticSeverity::Warning; - diag.message = body.str(); - diag.variableAliasingVec = issue.aliasPathVec; - result.diagnostics.push_back(std::move(diag)); - } - - // 8) Détection des allocations dynamiques sur la stack (VLA / alloca variable) - std::vector dynAllocaIssues; - for (llvm::Function& F : mod) - { - if (F.isDeclaration()) - continue; - if (!shouldAnalyzeFunction(F)) - continue; - analyzeDynamicAllocasInFunction(F, dynAllocaIssues); } - // 9) Affichage des allocations dynamiques détectées - for (const auto& d : dynAllocaIssues) + static void + appendMemIntrinsicDiagnostics(AnalysisResult& result, + const std::vector& issues) { - unsigned line = 0; - unsigned column = 0; - bool haveLoc = false; - if (d.allocaInst) + for (const auto& m : issues) { - llvm::DebugLoc DL = d.allocaInst->getDebugLoc(); - if (DL) + unsigned line = 0; + unsigned column = 0; + bool haveLoc = false; + if (m.inst) { - line = DL.getLine(); - column = DL.getCol(); - haveLoc = true; + llvm::DebugLoc DL = m.inst->getDebugLoc(); + if (DL) + { + line = DL.getLine(); + column = DL.getCol(); + haveLoc = true; + } } - } - - std::ostringstream body; - - body << " [!] dynamic stack allocation detected for variable '" << d.varName << "'\n"; - body << " allocated type: " << d.typeName << "\n"; - body << " size of this allocation is not compile-time constant " - "(VLA / variable alloca) and may lead to unbounded stack usage\n"; - Diagnostic diag; - diag.funcName = d.funcName; - diag.line = haveLoc ? line : 0; - diag.column = haveLoc ? column : 0; - diag.severity = DiagnosticSeverity::Warning; - diag.errCode = DescriptiveErrorCode::VLAUsage; - diag.message = body.str(); - result.diagnostics.push_back(std::move(diag)); - } - - // 10) Analyse des usages d'alloca (tainted / taille excessive) - std::vector allocaUsageIssues; - for (llvm::Function& F : mod) - { - if (F.isDeclaration()) - continue; - if (!shouldAnalyzeFunction(F)) - continue; - bool isRec = state.RecursiveFuncs.count(&F) != 0; - bool isInf = state.InfiniteRecursionFuncs.count(&F) != 0; - analyzeAllocaUsageInFunction(F, DL, isRec, isInf, allocaUsageIssues); - } + std::ostringstream body; - for (const auto& a : allocaUsageIssues) - { - unsigned line = 0; - unsigned column = 0; - bool haveLoc = false; - if (a.allocaInst) - { - llvm::DebugLoc DL = a.allocaInst->getDebugLoc(); - if (DL) + body << "Function: " << m.funcName; + if (haveLoc) { - line = DL.getLine(); - column = DL.getCol(); - haveLoc = true; + body << " (line " << line << ", column " << column << ")"; } - } - - bool isOversized = false; - if (a.sizeIsConst && a.sizeBytes >= allocaLargeThreshold) - isOversized = true; - else if (a.hasUpperBound && a.upperBoundBytes >= allocaLargeThreshold) - isOversized = true; - else if (a.sizeIsConst && config.stackLimit != 0 && a.sizeBytes >= config.stackLimit) - isOversized = true; + body << "\n"; - std::ostringstream body; - Diagnostic diag; - diag.funcName = a.funcName; - diag.line = haveLoc ? line : 0; - diag.column = haveLoc ? column : 0; + body << " [!!] potential stack buffer overflow in " << m.intrinsicName + << " on variable '" << m.varName << "'\n"; + body << " destination stack buffer size: " << m.destSizeBytes << " bytes\n"; + body << " requested " << m.lengthBytes << " bytes to be copied/initialized\n"; - if (isOversized) - { - diag.severity = DiagnosticSeverity::Error; - diag.errCode = DescriptiveErrorCode::AllocaTooLarge; - body << " [!!] large alloca on the stack for variable '" << a.varName << "'\n"; - } - else if (a.userControlled) - { - diag.severity = DiagnosticSeverity::Warning; - diag.errCode = DescriptiveErrorCode::AllocaUserControlled; - body << " [!!] user-controlled alloca size for variable '" << a.varName << "'\n"; - } - else - { + Diagnostic diag; + diag.funcName = m.funcName; + diag.line = haveLoc ? line : 0; + diag.column = haveLoc ? column : 0; diag.severity = DiagnosticSeverity::Warning; - diag.errCode = DescriptiveErrorCode::AllocaUsageWarning; - body << " [!] dynamic alloca on the stack for variable '" << a.varName << "'\n"; - } - - body << " allocation performed via alloca/VLA; stack usage grows with runtime " - "value\n"; - - if (a.sizeIsConst) - { - body << " requested stack size: " << a.sizeBytes << " bytes\n"; - } - else if (a.hasUpperBound) - { - body << " inferred upper bound for size: " << a.upperBoundBytes << " bytes\n"; - } - else - { - body << " size is unbounded at compile time\n"; + diag.message = body.str(); + result.diagnostics.push_back(std::move(diag)); } + } - if (a.isInfiniteRecursive) - { - // Any alloca inside infinite recursion will blow the stack. - diag.severity = DiagnosticSeverity::Error; - body << " function is infinitely recursive; this alloca runs at every frame " - "and guarantees stack overflow\n"; - } - else if (a.isRecursive) + static void + appendSizeMinusKDiagnostics(AnalysisResult& result, + const std::vector& issues) + { + for (const auto& s : issues) { - // Controlled recursion still compounds stack usage across frames. - if (diag.severity != DiagnosticSeverity::Error && (isOversized || a.userControlled)) + unsigned line = 0; + unsigned column = 0; + bool haveLoc = false; + if (s.inst) { - diag.severity = DiagnosticSeverity::Error; + llvm::DebugLoc DL = s.inst->getDebugLoc(); + if (DL) + { + line = DL.getLine(); + column = DL.getCol(); + haveLoc = true; + } } - body << " function is recursive; this allocation repeats at each recursion " - "depth and can exhaust the stack\n"; - } - if (isOversized) - { - body << " exceeds safety threshold of " << allocaLargeThreshold << " bytes"; - if (config.stackLimit != 0) + std::ostringstream body; + if (s.hasPointerDest) { - body << " (stack limit: " << config.stackLimit << " bytes)"; + body << " [!] potential unsafe write with length (size - " << s.k << ")"; } - body << "\n"; - } - else if (a.userControlled) - { - body << " size depends on user-controlled input " - "(function argument or non-local value)\n"; - } - else - { - body << " size does not appear user-controlled but remains " - "runtime-dependent\n"; - } - - diag.message = body.str(); - result.diagnostics.push_back(std::move(diag)); - } - - // 11) Détection des débordements via memcpy/memset sur des buffers de stack - std::vector memIssues; - for (llvm::Function& F : mod) - { - if (F.isDeclaration()) - continue; - if (!shouldAnalyzeFunction(F)) - continue; - analyzeMemIntrinsicOverflowsInFunction(F, DL, memIssues); - } - - for (const auto& m : memIssues) - { - unsigned line = 0; - unsigned column = 0; - bool haveLoc = false; - if (m.inst) - { - llvm::DebugLoc DL = m.inst->getDebugLoc(); - if (DL) + else { - line = DL.getLine(); - column = DL.getCol(); - haveLoc = true; + body << " [!] potential unsafe size-" << s.k << " argument passed"; } - } - - std::ostringstream body; - - body << "Function: " << m.funcName; - if (haveLoc) - { - body << " (line " << line << ", column " << column << ")"; - } - body << "\n"; - - body << " [!!] potential stack buffer overflow in " << m.intrinsicName - << " on variable '" << m.varName << "'\n"; - body << " destination stack buffer size: " << m.destSizeBytes << " bytes\n"; - body << " requested " << m.lengthBytes << " bytes to be copied/initialized\n"; - - Diagnostic diag; - diag.funcName = m.funcName; - diag.line = haveLoc ? line : 0; - diag.column = haveLoc ? column : 0; - diag.severity = DiagnosticSeverity::Warning; - diag.message = body.str(); - result.diagnostics.push_back(std::move(diag)); - } + if (!s.sinkName.empty()) + body << " in " << s.sinkName; + body << "\n"; + if (s.hasPointerDest && !s.ptrNonNull) + body << " destination pointer may be null\n"; + if (!s.sizeAboveK) + body << " size operand may be <= " << s.k << "\n"; - // 12) Détection de plusieurs stores dans un même buffer de stack - std::vector multiStoreIssues; - for (llvm::Function& F : mod) - { - if (F.isDeclaration()) - continue; - if (!shouldAnalyzeFunction(F)) - continue; - analyzeMultipleStoresInFunction(F, multiStoreIssues); + Diagnostic diag; + diag.funcName = s.funcName; + diag.line = haveLoc ? line : 0; + diag.column = haveLoc ? column : 0; + diag.severity = DiagnosticSeverity::Warning; + diag.errCode = DescriptiveErrorCode::SizeMinusOneWrite; + diag.message = body.str(); + result.diagnostics.push_back(std::move(diag)); + } } - for (const auto& ms : multiStoreIssues) + static void + appendMultipleStoreDiagnostics(AnalysisResult& result, + const std::vector& issues) { - unsigned line = 0; - unsigned column = 0; - bool haveLoc = false; - if (ms.allocaInst) + for (const auto& ms : issues) { - llvm::DebugLoc DL = ms.allocaInst->getDebugLoc(); - if (DL) + unsigned line = 0; + unsigned column = 0; + bool haveLoc = false; + if (ms.allocaInst) { - line = DL.getLine(); - column = DL.getCol(); - haveLoc = true; + llvm::DebugLoc DL = ms.allocaInst->getDebugLoc(); + if (DL) + { + line = DL.getLine(); + column = DL.getCol(); + haveLoc = true; + } } - } - - std::ostringstream body; - Diagnostic diag; - body << " [!Info] multiple stores to stack buffer '" << ms.varName - << "' in this function (" << ms.storeCount << " store instruction(s)"; - diag.errCode = DescriptiveErrorCode::MultipleStoresToStackBuffer; - if (ms.distinctIndexCount > 0) - { - body << ", " << ms.distinctIndexCount << " distinct index expression(s)"; - } - body << ")\n"; + std::ostringstream body; + Diagnostic diag; - if (ms.distinctIndexCount == 1) - { - body << " all stores use the same index expression " - "(possible redundant or unintended overwrite)\n"; - } - else if (ms.distinctIndexCount > 1) - { - body << " stores use different index expressions; " - "verify indices are correct and non-overlapping\n"; - } + body << " [!Info] multiple stores to stack buffer '" << ms.varName + << "' in this function (" << ms.storeCount << " store instruction(s)"; + diag.errCode = DescriptiveErrorCode::MultipleStoresToStackBuffer; + if (ms.distinctIndexCount > 0) + { + body << ", " << ms.distinctIndexCount << " distinct index expression(s)"; + } + body << ")\n"; - diag.funcName = ms.funcName; - diag.line = haveLoc ? line : 0; - diag.column = haveLoc ? column : 0; - diag.severity = DiagnosticSeverity::Info; - diag.message = body.str(); - result.diagnostics.push_back(std::move(diag)); - } + if (ms.distinctIndexCount == 1) + { + body << " all stores use the same index expression " + "(possible redundant or unintended overwrite)\n"; + } + else if (ms.distinctIndexCount > 1) + { + body << " stores use different index expressions; " + "verify indices are correct and non-overlapping\n"; + } - // 13) Détection des reconstructions invalides de pointeur de base (offsetof/container_of) - std::vector baseReconIssues; - for (llvm::Function& F : mod) - { - if (F.isDeclaration()) - continue; - if (!shouldAnalyzeFunction(F)) - continue; - analyzeInvalidBaseReconstructionsInFunction(F, DL, baseReconIssues); + diag.funcName = ms.funcName; + diag.line = haveLoc ? line : 0; + diag.column = haveLoc ? column : 0; + diag.severity = DiagnosticSeverity::Info; + diag.message = body.str(); + result.diagnostics.push_back(std::move(diag)); + } } - for (const auto& br : baseReconIssues) + static void appendInvalidBaseReconstructionDiagnostics( + AnalysisResult& result, + const std::vector& issues) { - unsigned line = 0; - unsigned column = 0; - unsigned startLine = 0; - unsigned startColumn = 0; - unsigned endLine = 0; - unsigned endColumn = 0; - bool haveLoc = false; - - if (br.inst) + for (const auto& br : issues) { - llvm::DebugLoc DL = br.inst->getDebugLoc(); - if (DL) - { - line = DL.getLine(); - startLine = DL.getLine(); - startColumn = DL.getCol(); - column = DL.getCol(); - endLine = DL.getLine(); - endColumn = DL.getCol(); - haveLoc = true; + unsigned line = 0; + unsigned column = 0; + unsigned startLine = 0; + unsigned startColumn = 0; + unsigned endLine = 0; + unsigned endColumn = 0; + bool haveLoc = false; - if (auto* loc = DL.get()) + if (br.inst) + { + llvm::DebugLoc DL = br.inst->getDebugLoc(); + if (DL) { - if (auto* scope = llvm::dyn_cast(loc)) + line = DL.getLine(); + startLine = DL.getLine(); + startColumn = DL.getCol(); + column = DL.getCol(); + endLine = DL.getLine(); + endColumn = DL.getCol(); + haveLoc = true; + + if (auto* loc = DL.get()) { - if (scope->getColumn() != 0) + if (auto* scope = llvm::dyn_cast(loc)) { - endColumn = scope->getColumn() + 1; + if (scope->getColumn() != 0) + { + endColumn = scope->getColumn() + 1; + } } } } } - } - std::ostringstream body; + std::ostringstream body; - body << " [!!] potential UB: invalid base reconstruction via offsetof/container_of\n"; - body << " variable: '" << br.varName << "'\n"; - body << " source member: " << br.sourceMember << "\n"; - body << " offset applied: " << (br.offsetUsed >= 0 ? "+" : "") << br.offsetUsed - << " bytes\n"; - body << " target type: " << br.targetType << "\n"; - - if (br.isOutOfBounds) - { - body << " [ERROR] derived pointer points OUTSIDE the valid object range\n"; - body << " (this will cause undefined behavior if dereferenced)\n"; - } - else - { - body << " [WARNING] unable to verify that derived pointer points to a valid " - "object\n"; - body << " (potential undefined behavior if offset is incorrect)\n"; - } + body << " [!!] potential UB: invalid base reconstruction via " + "offsetof/container_of\n"; + body << " variable: '" << br.varName << "'\n"; + body << " source member: " << br.sourceMember << "\n"; + body << " offset applied: " << (br.offsetUsed >= 0 ? "+" : "") + << br.offsetUsed << " bytes\n"; + body << " target type: " << br.targetType << "\n"; - Diagnostic diag; - diag.funcName = br.funcName; - diag.line = haveLoc ? line : 0; - diag.column = haveLoc ? column : 0; - diag.startLine = haveLoc ? startLine : 0; - diag.startColumn = haveLoc ? startColumn : 0; - diag.endLine = haveLoc ? endLine : 0; - diag.endColumn = haveLoc ? endColumn : 0; - diag.severity = - br.isOutOfBounds ? DiagnosticSeverity::Error : DiagnosticSeverity::Warning; - diag.errCode = DescriptiveErrorCode::InvalidBaseReconstruction; - diag.message = body.str(); - result.diagnostics.push_back(std::move(diag)); - } + if (br.isOutOfBounds) + { + body + << " [ERROR] derived pointer points OUTSIDE the valid object range\n"; + body << " (this will cause undefined behavior if dereferenced)\n"; + } + else + { + body << " [WARNING] unable to verify that derived pointer points to a " + "valid " + "object\n"; + body << " (potential undefined behavior if offset is " + "incorrect)\n"; + } - // 14) Détection de fuite de pointeurs de stack (use-after-return potentiel) - std::vector escapeIssues; - for (llvm::Function& F : mod) - { - if (F.isDeclaration()) - continue; - if (!shouldAnalyzeFunction(F)) - continue; - analyzeStackPointerEscapesInFunction(F, escapeIssues); + Diagnostic diag; + diag.funcName = br.funcName; + diag.line = haveLoc ? line : 0; + diag.column = haveLoc ? column : 0; + diag.startLine = haveLoc ? startLine : 0; + diag.startColumn = haveLoc ? startColumn : 0; + diag.endLine = haveLoc ? endLine : 0; + diag.endColumn = haveLoc ? endColumn : 0; + diag.severity = + br.isOutOfBounds ? DiagnosticSeverity::Error : DiagnosticSeverity::Warning; + diag.errCode = DescriptiveErrorCode::InvalidBaseReconstruction; + diag.message = body.str(); + result.diagnostics.push_back(std::move(diag)); + } } - for (const auto& e : escapeIssues) + static void appendStackPointerEscapeDiagnostics( + AnalysisResult& result, const std::vector& issues) { - unsigned line = 0; - unsigned column = 0; - bool haveLoc = false; - if (e.inst) + for (const auto& e : issues) { - llvm::DebugLoc DL = e.inst->getDebugLoc(); - if (DL) + unsigned line = 0; + unsigned column = 0; + bool haveLoc = false; + if (e.inst) { - line = DL.getLine(); - column = DL.getCol(); - haveLoc = true; + llvm::DebugLoc DL = e.inst->getDebugLoc(); + if (DL) + { + line = DL.getLine(); + column = DL.getCol(); + haveLoc = true; + } } - } - std::ostringstream body; + std::ostringstream body; - body << " [!!] stack pointer escape: address of variable '" << e.varName - << "' escapes this function\n"; + body << " [!!] stack pointer escape: address of variable '" << e.varName + << "' escapes this function\n"; - if (e.escapeKind == "return") - { - body << " escape via return statement " - "(pointer to stack returned to caller)\n"; - } - else if (e.escapeKind == "store_global") - { - if (!e.targetName.empty()) + if (e.escapeKind == "return") { - body << " stored into global variable '" << e.targetName - << "' (pointer may be used after the function returns)\n"; + body << " escape via return statement " + "(pointer to stack returned to caller)\n"; } - else + else if (e.escapeKind == "store_global") { - body << " stored into a global variable " - "(pointer may be used after the function returns)\n"; + if (!e.targetName.empty()) + { + body << " stored into global variable '" << e.targetName + << "' (pointer may be used after the function returns)\n"; + } + else + { + body << " stored into a global variable " + "(pointer may be used after the function returns)\n"; + } } - } - else if (e.escapeKind == "store_unknown") - { - body << " stored through a non-local pointer " - "(e.g. via an out-parameter; pointer may outlive this function)\n"; - if (!e.targetName.empty()) + else if (e.escapeKind == "store_unknown") { - body << " destination pointer/value name: '" << e.targetName << "'\n"; + body << " stored through a non-local pointer " + "(e.g. via an out-parameter; pointer may outlive this function)\n"; + if (!e.targetName.empty()) + { + body << " destination pointer/value name: '" << e.targetName << "'\n"; + } } - } - else if (e.escapeKind == "call_callback") - { - body << " address passed as argument to an indirect call " - "(callback may capture the pointer beyond this function)\n"; - } - else if (e.escapeKind == "call_arg") - { - if (!e.targetName.empty()) + else if (e.escapeKind == "call_callback") { - body << " address passed as argument to function '" << e.targetName - << "' (callee may capture the pointer beyond this function)\n"; + body << " address passed as argument to an indirect call " + "(callback may capture the pointer beyond this function)\n"; } - else + else if (e.escapeKind == "call_arg") { - body << " address passed as argument to a function " - "(callee may capture the pointer beyond this function)\n"; + if (!e.targetName.empty()) + { + body << " address passed as argument to function '" << e.targetName + << "' (callee may capture the pointer beyond this function)\n"; + } + else + { + body << " address passed as argument to a function " + "(callee may capture the pointer beyond this function)\n"; + } } - } - Diagnostic diag; - diag.funcName = e.funcName; - diag.line = haveLoc ? line : 0; - diag.column = haveLoc ? column : 0; - diag.severity = DiagnosticSeverity::Warning; - diag.errCode = DescriptiveErrorCode::StackPointerEscape; - diag.message = body.str(); - result.diagnostics.push_back(std::move(diag)); + Diagnostic diag; + diag.funcName = e.funcName; + diag.line = haveLoc ? line : 0; + diag.column = haveLoc ? column : 0; + diag.severity = DiagnosticSeverity::Warning; + diag.errCode = DescriptiveErrorCode::StackPointerEscape; + diag.message = body.str(); + result.diagnostics.push_back(std::move(diag)); + } } - // 15) Const-correctness: parameters that can be made const - std::vector constParamIssues; - for (llvm::Function& F : mod) + static void + appendConstParamDiagnostics(AnalysisResult& result, + const std::vector& issues) { - if (F.isDeclaration()) - continue; - if (!shouldAnalyzeFunction(F)) - continue; - analyzeConstParamsInFunction(F, constParamIssues); - } + for (const auto& cp : issues) + { + std::ostringstream body; + Diagnostic diag; + std::string displayFuncName = analysis::formatFunctionNameForMessage(cp.funcName); - for (const auto& cp : constParamIssues) - { - std::ostringstream body; - Diagnostic diag; - std::string displayFuncName = formatFunctionNameForMessage(cp.funcName); + diag.severity = DiagnosticSeverity::Info; + diag.errCode = DescriptiveErrorCode::ConstParameterNotModified; - diag.severity = DiagnosticSeverity::Info; - diag.errCode = DescriptiveErrorCode::ConstParameterNotModified; + const char* prefix = "[!]"; + if (diag.severity == DiagnosticSeverity::Warning) + prefix = "[!!]"; + else if (diag.severity == DiagnosticSeverity::Error) + prefix = "[!!!]"; - const char* prefix = "[!]"; - if (diag.severity == DiagnosticSeverity::Warning) - prefix = "[!!]"; - else if (diag.severity == DiagnosticSeverity::Error) - prefix = "[!!!]"; + const char* subLabel = "Pointer"; + if (cp.pointerConstOnly) + { + subLabel = "PointerConstOnly"; + } + else if (cp.isReference) + { + subLabel = cp.isRvalueRef ? "ReferenceRvaluePreferValue" : "Reference"; + } - const char* subLabel = "Pointer"; - if (cp.pointerConstOnly) - { - subLabel = "PointerConstOnly"; - } - else if (cp.isReference) - { - subLabel = cp.isRvalueRef ? "ReferenceRvaluePreferValue" : "Reference"; - } + if (cp.isRvalueRef) + { + body << " " << prefix << "ConstParameterNotModified." << subLabel + << ": parameter '" << cp.paramName << "' in function '" << displayFuncName + << "' is an rvalue reference and is never used to modify the referred " + "object\n"; + body << " consider passing by value (" << cp.suggestedType + << ") or const reference (" << cp.suggestedTypeAlt << ")\n"; + body << " current type: " << cp.currentType << "\n"; + } + else if (cp.pointerConstOnly) + { + body << " " << prefix << "ConstParameterNotModified." << subLabel + << ": parameter '" << cp.paramName << "' in function '" << displayFuncName + << "' is declared '" << cp.currentType + << "' but the pointed object is never modified\n"; + body << " consider '" << cp.suggestedType + << "' for API const-correctness\n"; + } + else + { + body << " " << prefix << "ConstParameterNotModified." << subLabel + << ": parameter '" << cp.paramName << "' in function '" << displayFuncName + << "' is never used to modify the " + << (cp.isReference ? "referred" : "pointed") << " object\n"; + } - if (cp.isRvalueRef) - { - body - << " " << prefix << "ConstParameterNotModified." << subLabel << ": parameter '" - << cp.paramName << "' in function '" << displayFuncName - << "' is an rvalue reference and is never used to modify the referred object\n"; - body << " consider passing by value (" << cp.suggestedType - << ") or const reference (" << cp.suggestedTypeAlt << ")\n"; - body << " current type: " << cp.currentType << "\n"; - } - else if (cp.pointerConstOnly) - { - body << " " << prefix << "ConstParameterNotModified." << subLabel - << ": parameter '" << cp.paramName << "' in function '" << displayFuncName - << "' is declared '" << cp.currentType - << "' but the pointed object is never modified\n"; - body << " consider '" << cp.suggestedType << "' for API const-correctness\n"; - } - else - { - body << " " << prefix << "ConstParameterNotModified." << subLabel - << ": parameter '" << cp.paramName << "' in function '" << displayFuncName - << "' is never used to modify the " - << (cp.isReference ? "referred" : "pointed") << " object\n"; - } + if (!cp.isRvalueRef) + { + body << " current type: " << cp.currentType << "\n"; + body << " suggested type: " << cp.suggestedType << "\n"; + } - if (!cp.isRvalueRef) - { - body << " current type: " << cp.currentType << "\n"; - body << " suggested type: " << cp.suggestedType << "\n"; + diag.funcName = cp.funcName; + diag.line = cp.line; + diag.column = cp.column; + diag.startLine = cp.line; + diag.startColumn = cp.column; + diag.endLine = cp.line; + diag.endColumn = cp.column; + diag.message = body.str(); + diag.ruleId = std::string("ConstParameterNotModified.") + subLabel; + result.diagnostics.push_back(std::move(diag)); } - - diag.funcName = cp.funcName; - diag.line = cp.line; - diag.column = cp.column; - diag.startLine = cp.line; - diag.startColumn = cp.column; - diag.endLine = cp.line; - diag.endColumn = cp.column; - diag.message = body.str(); - diag.ruleId = std::string("ConstParameterNotModified.") + subLabel; - result.diagnostics.push_back(std::move(diag)); } + } // namespace - return result; - } + // ============================================================================ + // Types internes + // ============================================================================ + + // ============================================================================ + // API publique : analyzeModule / analyzeFile + // ============================================================================ - static LanguageType detectFromExtension(const std::string& path) + AnalysisResult analyzeModule(llvm::Module& mod, const AnalysisConfig& config) { - auto pos = path.find_last_of('.'); - if (pos == std::string::npos) - return LanguageType::Unknown; + runFunctionAttrsPass(mod); - std::string ext = path.substr(pos + 1); - std::transform(ext.begin(), ext.end(), ext.begin(), - [](unsigned char c) { return std::tolower(c); }); + ModuleAnalysisContext ctx = buildContext(mod, config); + const llvm::DataLayout& DL = *ctx.dataLayout; + auto shouldAnalyzeFunction = [&](const llvm::Function& F) -> bool + { return ctx.shouldAnalyze(F); }; - if (ext == "ll") - return LanguageType::LLVM_IR; + // 1) Stack locale par fonction + LocalStackMap localStack = computeLocalStacks(ctx); - if (ext == "c") - return LanguageType::C; + // 2) Graphe d'appels + analysis::CallGraph CG = buildCallGraphFiltered(ctx); - if (ext == "cpp" || ext == "cc" || ext == "cxx" || ext == "c++" || ext == "cp" || - ext == "C") - return LanguageType::CXX; + // 3) Propagation + détection de récursivité + analysis::InternalAnalysisState state = computeRecursionState(ctx, CG, localStack); - return LanguageType::Unknown; - } + // 4) Construction du résultat public + FunctionAuxData aux; + AnalysisResult result = buildResults(ctx, localStack, state, CG, aux); - LanguageType detectLanguageFromFile(const std::string& path, llvm::LLVMContext& ctx) - { - { - llvm::SMDiagnostic diag; - if (auto mod = llvm::parseIRFile(path, diag, ctx)) - { - return LanguageType::LLVM_IR; - } - } + // 4b) Emit summary diagnostics for recursion/overflow flags (for JSON parity) + emitSummaryDiagnostics(result, ctx, aux); - return detectFromExtension(path); - } + StackSize allocaLargeThreshold = analysis::computeAllocaLargeThreshold(config); - AnalysisResult analyzeFile(const std::string& filename, const AnalysisConfig& config, - llvm::LLVMContext& ctx, llvm::SMDiagnostic& err) - { + // 6) Détection des dépassements de buffer sur la stack (analyse intra-fonction) + std::vector bufferIssues = + analysis::analyzeStackBufferOverflows(mod, shouldAnalyzeFunction); + appendStackBufferDiagnostics(result, bufferIssues); - LanguageType lang = detectLanguageFromFile(filename, ctx); - std::unique_ptr mod; + // 8) Détection des allocations dynamiques sur la stack (VLA / alloca variable) + std::vector dynAllocaIssues = + analysis::analyzeDynamicAllocas(mod, shouldAnalyzeFunction); + appendDynamicAllocaDiagnostics(result, dynAllocaIssues); - if (lang == LanguageType::Unknown) - { - std::cerr << "Unsupported input file type: " << filename << "\n"; - return AnalysisResult{config, {}}; - } + // 10) Analyse des usages d'alloca (tainted / taille excessive) + std::vector allocaUsageIssues = analysis::analyzeAllocaUsage( + mod, DL, state.RecursiveFuncs, state.InfiniteRecursionFuncs, shouldAnalyzeFunction); + appendAllocaUsageDiagnostics(result, config, allocaLargeThreshold, allocaUsageIssues); - // if (verboseLevel >= 1) - // std::cout << "Language: " << ctrace::stack::enumToString(lang) << "\n"; + // 11) Détection des débordements via memcpy/memset sur des buffers de stack + std::vector memIssues = + analysis::analyzeMemIntrinsicOverflows(mod, DL, shouldAnalyzeFunction); + appendMemIntrinsicDiagnostics(result, memIssues); - if (lang != LanguageType::LLVM_IR) - { - // if (verboseLevel >= 1) - // std::cout << "Compiling source file to LLVM IR...\n"; - std::vector args; - args.push_back("-emit-llvm"); - args.push_back("-S"); - args.push_back("-g"); - if (lang == LanguageType::CXX) - { - args.push_back("-x"); - args.push_back("c++"); - args.push_back("-std=gnu++20"); - } - for (const auto& extraArg : config.extraCompileArgs) - { - args.push_back(extraArg); - } - args.push_back("-fno-discard-value-names"); - args.push_back(filename); - compilerlib::OutputMode mode = compilerlib::OutputMode::ToMemory; - auto res = compilerlib::compile(args, mode); + // 11b) Détection d'écritures avec longueur "size-k" + std::vector sizeMinusKIssues = + analysis::analyzeSizeMinusKWrites(mod, DL, shouldAnalyzeFunction); + appendSizeMinusKDiagnostics(result, sizeMinusKIssues); - if (!res.success) - { - std::cerr << "Compilation failed:\n" << res.diagnostics << '\n'; - return AnalysisResult{config, {}}; - } + // 12) Détection de plusieurs stores dans un même buffer de stack + std::vector multiStoreIssues = + analysis::analyzeMultipleStores(mod, shouldAnalyzeFunction); + appendMultipleStoreDiagnostics(result, multiStoreIssues); - if (res.llvmIR.empty()) - { - std::cerr << "No LLVM IR produced by compilerlib::compile\n"; - return AnalysisResult{config, {}}; - } + // 13) Détection des reconstructions invalides de pointeur de base (offsetof/container_of) + std::vector baseReconIssues = + analysis::analyzeInvalidBaseReconstructions(mod, DL, shouldAnalyzeFunction); + appendInvalidBaseReconstructionDiagnostics(result, baseReconIssues); - auto buffer = llvm::MemoryBuffer::getMemBuffer(res.llvmIR, "in_memory_ll"); + // 14) Détection de fuite de pointeurs de stack (use-after-return potentiel) + std::vector escapeIssues = + analysis::analyzeStackPointerEscapes(mod, shouldAnalyzeFunction); + appendStackPointerEscapeDiagnostics(result, escapeIssues); - llvm::SMDiagnostic diag; - mod = llvm::parseIR(buffer->getMemBufferRef(), diag, ctx); + // 15) Const-correctness: parameters that can be made const + std::vector constParamIssues = + analysis::analyzeConstParams(mod, shouldAnalyzeFunction); + appendConstParamDiagnostics(result, constParamIssues); - if (!mod) - { - std::string msg; - llvm::raw_string_ostream os(msg); - diag.print("in_memory_ll", os); - std::cerr << "Failed to parse in-memory LLVM IR:\n" << os.str(); - return AnalysisResult{config, {}}; - } - } + return result; + } - if (lang == LanguageType::LLVM_IR) + AnalysisResult analyzeFile(const std::string& filename, const AnalysisConfig& config, + llvm::LLVMContext& ctx, llvm::SMDiagnostic& err) + { + analysis::ModuleLoadResult load = + analysis::loadModuleForAnalysis(filename, config, ctx, err); + if (!load.module) { - mod = llvm::parseIRFile(filename, err, ctx); - if (!mod) - { - // on laisse err.print au caller si besoin - return AnalysisResult{config, {}}; - } + if (!load.error.empty()) + std::cerr << load.error; + return AnalysisResult{config, {}}; } - AnalysisResult result = analyzeModule(*mod, config); + + AnalysisResult result = analyzeModule(*load.module, config); for (auto& f : result.functions) { if (f.filePath.empty()) @@ -5134,295 +1207,4 @@ namespace ctrace::stack return result; } - // --------------------------------------------------------------------------- - // JSON / SARIF serialization helpers - // --------------------------------------------------------------------------- - - namespace - { - - // Petit helper pour échapper les chaînes JSON. - static std::string jsonEscape(const std::string& s) - { - std::string out; - out.reserve(s.size() + 16); - for (char c : s) - { - switch (c) - { - case '\\': - out += "\\\\"; - break; - case '\"': - out += "\\\""; - break; - case '\n': - out += "\\n"; - break; - case '\r': - out += "\\r"; - break; - case '\t': - out += "\\t"; - break; - default: - if (static_cast(c) < 0x20) - { - char buf[7]; - std::snprintf(buf, sizeof(buf), "\\u%04x", c & 0xFF); - out += buf; - } - else - { - out += c; - } - break; - } - } - return out; - } - - // Old helper to convert DiagnosticSeverity to string, don't use it anymore. - static const char* severityToJsonString(DiagnosticSeverity sev) - { - switch (sev) - { - case DiagnosticSeverity::Info: - return "info"; - case DiagnosticSeverity::Warning: - return "warning"; - case DiagnosticSeverity::Error: - return "error"; - } - return "info"; - } - - static const char* severityToSarifLevel(DiagnosticSeverity sev) - { - // SARIF levels: "none", "note", "warning", "error" - switch (sev) - { - case DiagnosticSeverity::Info: - return "note"; - case DiagnosticSeverity::Warning: - return "warning"; - case DiagnosticSeverity::Error: - return "error"; - } - return "note"; - } - - } // anonymous namespace - - static std::string toJsonImpl(const AnalysisResult& result, const std::string* inputFile, - const std::vector* inputFiles) - { - std::ostringstream os; - os << "{\n"; - os << " \"meta\": {\n"; - os << " \"tool\": \"" - << "ctrace-stack-analyzer" - << "\",\n"; - if (inputFiles && !inputFiles->empty()) - { - os << " \"inputFiles\": ["; - for (std::size_t i = 0; i < inputFiles->size(); ++i) - { - os << "\"" << jsonEscape((*inputFiles)[i]) << "\""; - if (i + 1 < inputFiles->size()) - os << ", "; - } - os << "],\n"; - } - else if (inputFile) - { - os << " \"inputFile\": \"" << jsonEscape(*inputFile) << "\",\n"; - } - os << " \"mode\": \"" << (result.config.mode == AnalysisMode::IR ? "IR" : "ABI") - << "\",\n"; - os << " \"stackLimit\": " << result.config.stackLimit << ",\n"; - os << " \"analysisTimeMs\": " << -1 << "\n"; - os << " },\n"; - - // Fonctions - os << " \"functions\": [\n"; - for (std::size_t i = 0; i < result.functions.size(); ++i) - { - const auto& f = result.functions[i]; - os << " {\n"; - std::string filePath = f.filePath; - if (filePath.empty() && inputFile) - { - filePath = *inputFile; - } - os << " \"file\": \"" << jsonEscape(filePath) << "\",\n"; - os << " \"name\": \"" << jsonEscape(f.name) << "\",\n"; - os << " \"localStack\": "; - if (f.localStackUnknown) - { - os << "null"; - } - else - { - os << f.localStack; - } - os << ",\n"; - os << " \"localStackLowerBound\": "; - if (f.localStackUnknown && f.localStack > 0) - { - os << f.localStack; - } - else - { - os << "null"; - } - os << ",\n"; - os << " \"localStackUnknown\": " << (f.localStackUnknown ? "true" : "false") - << ",\n"; - os << " \"maxStack\": "; - if (f.maxStackUnknown) - { - os << "null"; - } - else - { - os << f.maxStack; - } - os << ",\n"; - os << " \"maxStackLowerBound\": "; - if (f.maxStackUnknown && f.maxStack > 0) - { - os << f.maxStack; - } - else - { - os << "null"; - } - os << ",\n"; - os << " \"maxStackUnknown\": " << (f.maxStackUnknown ? "true" : "false") << ",\n"; - os << " \"hasDynamicAlloca\": " << (f.hasDynamicAlloca ? "true" : "false") - << ",\n"; - os << " \"isRecursive\": " << (f.isRecursive ? "true" : "false") << ",\n"; - os << " \"hasInfiniteSelfRecursion\": " - << (f.hasInfiniteSelfRecursion ? "true" : "false") << ",\n"; - os << " \"exceedsLimit\": " << (f.exceedsLimit ? "true" : "false") << "\n"; - os << " }"; - if (i + 1 < result.functions.size()) - os << ","; - os << "\n"; - } - os << " ],\n"; - - // Diagnostics - os << " \"diagnostics\": [\n"; - for (std::size_t i = 0; i < result.diagnostics.size(); ++i) - { - const auto& d = result.diagnostics[i]; - os << " {\n"; - os << " \"id\": \"diag-" << (i + 1) << "\",\n"; - os << " \"severity\": \"" << ctrace::stack::enumToString(d.severity) << "\",\n"; - const std::string ruleId = - d.ruleId.empty() ? std::string(ctrace::stack::enumToString(d.errCode)) : d.ruleId; - os << " \"ruleId\": \"" << jsonEscape(ruleId) << "\",\n"; - - std::string diagFilePath = d.filePath; - if (diagFilePath.empty() && inputFile) - { - diagFilePath = *inputFile; - } - os << " \"location\": {\n"; - os << " \"file\": \"" << jsonEscape(diagFilePath) << "\",\n"; - os << " \"function\": \"" << jsonEscape(d.funcName) << "\",\n"; - os << " \"startLine\": " << d.line << ",\n"; - os << " \"startColumn\": " << d.column << ",\n"; - os << " \"endLine\": " << d.endLine << ",\n"; - os << " \"endColumn\": " << d.endColumn << "\n"; - os << " },\n"; - - os << " \"details\": {\n"; - os << " \"message\": \"" << jsonEscape(d.message) << "\",\n"; - os << " \"variableAliasing\": ["; - for (std::size_t j = 0; j < d.variableAliasingVec.size(); ++j) - { - os << "\"" << jsonEscape(d.variableAliasingVec[j]) << "\""; - if (j + 1 < d.variableAliasingVec.size()) - os << ", "; - } - os << "]\n"; - os << " }\n"; // <-- ferme "details" - os << " }"; // <-- ferme le diagnostic - if (i + 1 < result.diagnostics.size()) - os << ","; - os << "\n"; - } - os << " ]\n"; - os << "}\n"; - return os.str(); - } - - std::string toJson(const AnalysisResult& result, const std::string& inputFile) - { - return toJsonImpl(result, &inputFile, nullptr); - } - - std::string toJson(const AnalysisResult& result, const std::vector& inputFiles) - { - return toJsonImpl(result, nullptr, &inputFiles); - } - - std::string toSarif(const AnalysisResult& result, const std::string& inputFile, - const std::string& toolName, const std::string& toolVersion) - { - std::ostringstream os; - os << "{\n"; - os << " \"version\": \"2.1.0\",\n"; - os << " \"$schema\": " - "\"https://schemastore.azurewebsites.net/schemas/json/sarif-2.1.0.json\",\n"; - os << " \"runs\": [\n"; - os << " {\n"; - os << " \"tool\": {\n"; - os << " \"driver\": {\n"; - os << " \"name\": \"" << jsonEscape(toolName) << "\",\n"; - os << " \"version\": \"" << jsonEscape(toolVersion) << "\"\n"; - os << " }\n"; - os << " },\n"; - os << " \"results\": [\n"; - - for (std::size_t i = 0; i < result.diagnostics.size(); ++i) - { - const auto& d = result.diagnostics[i]; - os << " {\n"; - // Pour le moment, un seul ruleId générique; tu pourras le spécialiser plus tard. - const std::string ruleId = - d.ruleId.empty() ? std::string(ctrace::stack::enumToString(d.errCode)) : d.ruleId; - os << " \"ruleId\": \"" << jsonEscape(ruleId) << "\",\n"; - os << " \"level\": \"" << severityToSarifLevel(d.severity) << "\",\n"; - os << " \"message\": { \"text\": \"" << jsonEscape(d.message) << "\" },\n"; - os << " \"locations\": [\n"; - os << " {\n"; - os << " \"physicalLocation\": {\n"; - std::string diagFilePath = d.filePath.empty() ? inputFile : d.filePath; - os << " \"artifactLocation\": { \"uri\": \"" << jsonEscape(diagFilePath) - << "\" },\n"; - os << " \"region\": {\n"; - os << " \"startLine\": " << d.line << ",\n"; - os << " \"startColumn\": " << d.column << "\n"; - os << " }\n"; - os << " }\n"; - os << " }\n"; - os << " ]\n"; - os << " }"; - if (i + 1 < result.diagnostics.size()) - os << ","; - os << "\n"; - } - - os << " ]\n"; - os << " }\n"; - os << " ]\n"; - os << "}\n"; - - return os.str(); - } - } // namespace ctrace::stack diff --git a/src/analysis/AllocaUsage.cpp b/src/analysis/AllocaUsage.cpp new file mode 100644 index 0000000..373f2ef --- /dev/null +++ b/src/analysis/AllocaUsage.cpp @@ -0,0 +1,210 @@ +#include "analysis/AllocaUsage.hpp" + +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "analysis/IRValueUtils.hpp" +#include "analysis/IntRanges.hpp" + +namespace ctrace::stack::analysis +{ + namespace + { + static bool isValueUserControlledImpl(const llvm::Value* V, const llvm::Function& F, + llvm::SmallPtrSet& visited, + int depth = 0) + { + using namespace llvm; + + if (!V || depth > 20) + return false; + if (visited.contains(V)) + return false; + visited.insert(V); + + if (isa(V)) + return true; // function argument -> considered user-provided + + if (isa(V)) + return false; + + if (auto* LI = dyn_cast(V)) + { + const Value* ptr = LI->getPointerOperand()->stripPointerCasts(); + if (isa(ptr)) + return true; // load through pointer passed as argument + if (!isa(ptr)) + { + return true; // load from non-local memory (global / heap / unknown) + } + // If it's a local alloca, inspect what gets stored there. + const AllocaInst* AI = cast(ptr); + for (const Use& U : AI->uses()) + { + if (auto* SI = dyn_cast(U.getUser())) + { + if (SI->getPointerOperand()->stripPointerCasts() != ptr) + continue; + if (isValueUserControlledImpl(SI->getValueOperand(), F, visited, depth + 1)) + { + return true; + } + } + } + } + + if (auto* CB = dyn_cast(V)) + { + // Value produced by a call: conservatively treat as external/user input. + (void)F; + (void)CB; + return true; + } + + if (auto* I = dyn_cast(V)) + { + for (const Value* Op : I->operands()) + { + if (isValueUserControlledImpl(Op, F, visited, depth + 1)) + return true; + } + } + else if (auto* CE = dyn_cast(V)) + { + for (const Value* Op : CE->operands()) + { + if (isValueUserControlledImpl(Op, F, visited, depth + 1)) + return true; + } + } + + return false; + } + + static bool isValueUserControlled(const llvm::Value* V, const llvm::Function& F) + { + llvm::SmallPtrSet visited; + return isValueUserControlledImpl(V, F, visited, 0); + } + + static std::optional + getAllocaUpperBoundBytes(const llvm::AllocaInst* AI, const llvm::DataLayout& DL, + const std::map& ranges) + { + using namespace llvm; + + const Value* sizeVal = AI->getArraySize(); + auto findRange = [&ranges](const Value* V) -> const IntRange* + { + auto it = ranges.find(V); + if (it != ranges.end()) + return &it->second; + return nullptr; + }; + + const IntRange* r = findRange(sizeVal); + if (!r) + { + if (auto* LI = dyn_cast(sizeVal)) + { + const Value* ptr = LI->getPointerOperand(); + r = findRange(ptr); + } + } + + if (r && r->hasUpper && r->upper > 0) + { + StackSize elemSize = DL.getTypeAllocSize(AI->getAllocatedType()); + return static_cast(r->upper) * elemSize; + } + + return std::nullopt; + } + + static void analyzeAllocaUsageInFunction(llvm::Function& F, const llvm::DataLayout& DL, + bool isRecursive, bool isInfiniteRecursive, + std::vector& out) + { + using namespace llvm; + + if (F.isDeclaration()) + return; + + auto ranges = computeIntRangesFromICmps(F); + + for (BasicBlock& BB : F) + { + for (Instruction& I : BB) + { + auto* AI = dyn_cast(&I); + if (!AI) + continue; + + // Only consider dynamic allocas: alloca(T, size) or VLA. + if (!AI->isArrayAllocation()) + continue; + + AllocaUsageIssue issue; + issue.funcName = F.getName().str(); + issue.varName = deriveAllocaName(AI); + issue.allocaInst = AI; + issue.userControlled = isValueUserControlled(AI->getArraySize(), F); + issue.isRecursive = isRecursive; + issue.isInfiniteRecursive = isInfiniteRecursive; + + StackSize elemSize = DL.getTypeAllocSize(AI->getAllocatedType()); + const Value* arraySizeVal = AI->getArraySize(); + + if (auto* C = dyn_cast(arraySizeVal)) + { + issue.sizeIsConst = true; + issue.sizeBytes = C->getZExtValue() * elemSize; + } + else if (auto* C = tryGetConstFromValue(arraySizeVal, F)) + { + issue.sizeIsConst = true; + issue.sizeBytes = C->getZExtValue() * elemSize; + } + else if (auto upper = getAllocaUpperBoundBytes(AI, DL, ranges)) + { + issue.hasUpperBound = true; + issue.upperBoundBytes = *upper; + } + + out.push_back(std::move(issue)); + } + } + } + } // namespace + + std::vector + analyzeAllocaUsage(llvm::Module& mod, const llvm::DataLayout& DL, + const std::set& recursiveFuncs, + const std::set& infiniteRecursionFuncs, + const std::function& shouldAnalyze) + { + std::vector out; + + for (llvm::Function& F : mod) + { + if (F.isDeclaration()) + continue; + if (!shouldAnalyze(F)) + continue; + + bool isRec = recursiveFuncs.count(&F) != 0; + bool isInf = infiniteRecursionFuncs.count(&F) != 0; + analyzeAllocaUsageInFunction(F, DL, isRec, isInf, out); + } + + return out; + } +} // namespace ctrace::stack::analysis diff --git a/src/analysis/AnalyzerUtils.cpp b/src/analysis/AnalyzerUtils.cpp new file mode 100644 index 0000000..3676181 --- /dev/null +++ b/src/analysis/AnalyzerUtils.cpp @@ -0,0 +1,309 @@ +#include "analysis/AnalyzerUtils.hpp" + +#include +#include +#include +#include + +#include +#include + +#include "mangle.hpp" + +namespace ctrace::stack::analysis +{ + std::string formatFunctionNameForMessage(const std::string& name) + { + if (ctrace_tools::isMangled(name)) + return ctrace_tools::demangle(name.c_str()); + return name; + } + + std::string getFunctionSourcePath(const llvm::Function& F) + { + if (auto* sp = F.getSubprogram()) + { + if (auto* file = sp->getFile()) + { + std::string dir = file->getDirectory().str(); + std::string name = file->getFilename().str(); + if (!dir.empty()) + return dir + "/" + name; + return name; + } + } + return {}; + } + + bool getFunctionSourceLocation(const llvm::Function& F, unsigned& line, unsigned& column) + { + line = 0; + column = 0; + + for (const llvm::BasicBlock& BB : F) + { + for (const llvm::Instruction& I : BB) + { + llvm::DebugLoc DL = I.getDebugLoc(); + if (!DL) + continue; + line = DL.getLine(); + column = DL.getCol(); + if (line != 0) + { + if (column == 0) + column = 1; + return true; + } + } + } + + if (auto* sp = F.getSubprogram()) + { + line = sp->getLine(); + if (line != 0) + { + column = 1; + return true; + } + } + + return false; + } + + std::string buildMaxStackCallPath(const llvm::Function* F, const CallGraph& CG, + const InternalAnalysisState& state) + { + std::string path; + std::set visited; + const llvm::Function* current = F; + + while (current) + { + if (!visited.insert(current).second) + break; + + if (!path.empty()) + path += " -> "; + path += current->getName().str(); + + const llvm::Function* bestCallee = nullptr; + StackEstimate bestStack{}; + + auto itCG = CG.find(current); + if (itCG == CG.end()) + break; + + for (const llvm::Function* callee : itCG->second) + { + auto itTotal = state.TotalStack.find(callee); + StackEstimate est = + (itTotal != state.TotalStack.end()) ? itTotal->second : StackEstimate{}; + if (!bestCallee || est.bytes > bestStack.bytes) + { + bestCallee = callee; + bestStack = est; + } + } + + if (!bestCallee || bestStack.bytes == 0) + break; + + current = bestCallee; + } + + return path; + } + + static std::string normalizePathForMatch(const std::string& input) + { + std::string out = input; + for (char& c : out) + { + if (c == '\\') + c = '/'; + } + const bool isAbs = !out.empty() && out.front() == '/'; + std::vector parts; + std::string cur; + for (char c : out) + { + if (c == '/') + { + if (!cur.empty()) + { + if (cur == "..") + { + if (!parts.empty()) + parts.pop_back(); + } + else if (cur != ".") + { + parts.push_back(cur); + } + cur.clear(); + } + } + else + { + cur.push_back(c); + } + } + if (!cur.empty()) + { + if (cur == "..") + { + if (!parts.empty()) + parts.pop_back(); + } + else if (cur != ".") + { + parts.push_back(cur); + } + } + std::string norm = isAbs ? "/" : ""; + for (std::size_t i = 0; i < parts.size(); ++i) + { + norm += parts[i]; + if (i + 1 < parts.size()) + norm += "/"; + } + while (!norm.empty() && norm.back() == '/') + norm.pop_back(); + return norm; + } + + static std::string basenameOf(const std::string& path) + { + std::size_t pos = path.find_last_of('/'); + if (pos == std::string::npos) + return path; + if (pos + 1 >= path.size()) + return {}; + return path.substr(pos + 1); + } + + static bool pathHasSuffix(const std::string& path, const std::string& suffix) + { + if (suffix.empty()) + return false; + if (path.size() < suffix.size()) + return false; + if (path.compare(path.size() - suffix.size(), suffix.size(), suffix) != 0) + return false; + if (path.size() == suffix.size()) + return true; + return path[path.size() - suffix.size() - 1] == '/'; + } + + static bool pathHasPrefix(const std::string& path, const std::string& prefix) + { + if (prefix.empty()) + return false; + if (path.size() < prefix.size()) + return false; + if (path.compare(0, prefix.size(), prefix) != 0) + return false; + if (path.size() == prefix.size()) + return true; + return path[prefix.size()] == '/'; + } + + bool shouldIncludePath(const std::string& path, const AnalysisConfig& config) + { + if (config.onlyFiles.empty() && config.onlyDirs.empty()) + return true; + if (path.empty()) + return false; + + const std::string normPath = normalizePathForMatch(path); + + for (const auto& file : config.onlyFiles) + { + const std::string normFile = normalizePathForMatch(file); + if (normPath == normFile || pathHasSuffix(normPath, normFile)) + return true; + const std::string fileBase = basenameOf(normFile); + if (!fileBase.empty() && basenameOf(normPath) == fileBase) + return true; + } + + for (const auto& dir : config.onlyDirs) + { + const std::string normDir = normalizePathForMatch(dir); + if (pathHasPrefix(normPath, normDir) || pathHasSuffix(normPath, normDir)) + return true; + const std::string needle = "/" + normDir + "/"; + if (normPath.find(needle) != std::string::npos) + return true; + } + + return false; + } + + bool functionNameMatches(const llvm::Function& F, const AnalysisConfig& config) + { + if (config.onlyFunctions.empty()) + return true; + + auto itaniumBaseName = [](const std::string& symbol) -> std::string + { + if (symbol.rfind("_Z", 0) != 0) + return {}; + std::size_t i = 2; + if (i < symbol.size() && symbol[i] == 'L') + ++i; + if (i >= symbol.size() || !std::isdigit(static_cast(symbol[i]))) + return {}; + std::size_t len = 0; + while (i < symbol.size() && std::isdigit(static_cast(symbol[i]))) + { + len = len * 10 + static_cast(symbol[i] - '0'); + ++i; + } + if (len == 0 || i + len > symbol.size()) + return {}; + return symbol.substr(i, len); + }; + + std::string name = F.getName().str(); + std::string demangledName; + if (ctrace_tools::isMangled(name) || name.rfind("_Z", 0) == 0) + demangledName = ctrace_tools::demangle(name.c_str()); + std::string demangledBase; + if (!demangledName.empty()) + { + std::size_t pos = demangledName.find('('); + if (pos != std::string::npos && pos > 0) + demangledBase = demangledName.substr(0, pos); + } + std::string itaniumBase = itaniumBaseName(name); + + for (const auto& filter : config.onlyFunctions) + { + if (name == filter) + return true; + if (!demangledName.empty() && demangledName == filter) + return true; + if (!demangledBase.empty() && demangledBase == filter) + return true; + if (!itaniumBase.empty() && itaniumBase == filter) + return true; + if (ctrace_tools::isMangled(filter)) + { + std::string demangledFilter = ctrace_tools::demangle(filter.c_str()); + if (!demangledName.empty() && demangledName == demangledFilter) + return true; + std::size_t pos = demangledFilter.find('('); + if (pos != std::string::npos && pos > 0) + { + std::string demangledFilterBase = demangledFilter.substr(0, pos); + if (!demangledBase.empty() && demangledBase == demangledFilterBase) + return true; + } + } + } + + return false; + } +} // namespace ctrace::stack::analysis diff --git a/src/analysis/ConstParamAnalysis.cpp b/src/analysis/ConstParamAnalysis.cpp new file mode 100644 index 0000000..2f95d33 --- /dev/null +++ b/src/analysis/ConstParamAnalysis.cpp @@ -0,0 +1,606 @@ +#include "analysis/ConstParamAnalysis.hpp" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace ctrace::stack::analysis +{ + namespace + { + struct TypeQualifiers + { + bool isConst = false; + bool isVolatile = false; + bool isRestrict = false; + }; + + struct StrippedDIType + { + const llvm::DIType* type = nullptr; + TypeQualifiers quals; + }; + + struct ParamDebugInfo + { + std::string name; + const llvm::DIType* type = nullptr; + unsigned line = 0; + unsigned column = 0; + }; + + struct ParamTypeInfo + { + const llvm::DIType* originalType = nullptr; + const llvm::DIType* pointeeType = nullptr; // unqualified, typedefs stripped + const llvm::DIType* pointeeDisplayType = nullptr; // unqualified, typedefs preserved + bool isPointer = false; + bool isReference = false; + bool isRvalueReference = false; + bool pointerConst = false; + bool pointerVolatile = false; + bool pointerRestrict = false; + bool pointeeConst = false; + bool pointeeVolatile = false; + bool pointeeRestrict = false; + bool isDoublePointer = false; + bool isVoid = false; + bool isFunctionPointer = false; + }; + + static const llvm::DIType* stripTypedefs(const llvm::DIType* type) + { + using namespace llvm; + const DIType* cur = type; + while (cur) + { + auto* DT = dyn_cast(cur); + if (!DT) + break; + auto tag = DT->getTag(); + if (tag == dwarf::DW_TAG_typedef) + { + cur = DT->getBaseType(); + continue; + } + break; + } + return cur; + } + + static StrippedDIType stripQualifiers(const llvm::DIType* type) + { + using namespace llvm; + StrippedDIType out; + out.type = type; + + while (out.type) + { + auto* DT = dyn_cast(out.type); + if (!DT) + break; + auto tag = DT->getTag(); + if (tag == dwarf::DW_TAG_const_type) + { + out.quals.isConst = true; + out.type = DT->getBaseType(); + continue; + } + if (tag == dwarf::DW_TAG_volatile_type) + { + out.quals.isVolatile = true; + out.type = DT->getBaseType(); + continue; + } + if (tag == dwarf::DW_TAG_restrict_type) + { + out.quals.isRestrict = true; + out.type = DT->getBaseType(); + continue; + } + break; + } + + return out; + } + + static std::string formatDITypeName(const llvm::DIType* type) + { + using namespace llvm; + if (!type) + return std::string(""); + + if (auto* BT = dyn_cast(type)) + { + if (!BT->getName().empty()) + return BT->getName().str(); + } + + if (auto* CT = dyn_cast(type)) + { + if (!CT->getName().empty()) + return CT->getName().str(); + if (!CT->getIdentifier().empty()) + return CT->getIdentifier().str(); + } + + if (auto* DT = dyn_cast(type)) + { + auto tag = DT->getTag(); + if (tag == dwarf::DW_TAG_typedef && !DT->getName().empty()) + { + return DT->getName().str(); + } + if ((tag == dwarf::DW_TAG_const_type) || (tag == dwarf::DW_TAG_volatile_type) || + (tag == dwarf::DW_TAG_restrict_type)) + { + return formatDITypeName(DT->getBaseType()); + } + if (!DT->getName().empty()) + return DT->getName().str(); + } + + if (auto* ST = dyn_cast(type)) + { + (void)ST; + return std::string(""); + } + + return std::string(""); + } + + static bool buildParamTypeInfo(const llvm::DIType* type, ParamTypeInfo& info) + { + using namespace llvm; + if (!type) + return false; + + info.originalType = type; + + StrippedDIType top = stripQualifiers(type); + info.pointerConst = top.quals.isConst; + info.pointerVolatile = top.quals.isVolatile; + info.pointerRestrict = top.quals.isRestrict; + + const DIType* topType = stripTypedefs(top.type); + auto* derived = dyn_cast(topType); + if (!derived) + return false; + + auto tag = derived->getTag(); + if (tag == dwarf::DW_TAG_pointer_type) + { + info.isPointer = true; + } + else if (tag == dwarf::DW_TAG_reference_type) + { + info.isReference = true; + } + else if (tag == dwarf::DW_TAG_rvalue_reference_type) + { + info.isReference = true; + info.isRvalueReference = true; + } + else + { + return false; + } + + const DIType* baseType = derived->getBaseType(); + StrippedDIType base = stripQualifiers(baseType); + info.pointeeConst = base.quals.isConst; + info.pointeeVolatile = base.quals.isVolatile; + info.pointeeRestrict = base.quals.isRestrict; + info.pointeeDisplayType = base.type ? base.type : baseType; + + const DIType* baseNoTypedef = stripTypedefs(base.type); + info.pointeeType = baseNoTypedef; + + if (!baseNoTypedef) + return true; + + if (auto* baseDerived = dyn_cast(baseNoTypedef)) + { + auto baseTag = baseDerived->getTag(); + if (baseTag == dwarf::DW_TAG_pointer_type || + baseTag == dwarf::DW_TAG_reference_type || + baseTag == dwarf::DW_TAG_rvalue_reference_type) + { + info.isDoublePointer = true; + } + } + + if (isa(baseNoTypedef)) + info.isFunctionPointer = true; + + if (auto* basic = dyn_cast(baseNoTypedef)) + { + if (basic->getName() == "void") + info.isVoid = true; + } + + return true; + } + + static std::string buildTypeString(const ParamTypeInfo& info, const std::string& baseName, + bool addPointeeConst, bool includePointerConst, + const std::string& paramName) + { + std::string out; + if (info.pointeeConst || addPointeeConst) + out += "const "; + if (info.pointeeVolatile) + out += "volatile "; + out += baseName.empty() ? std::string("") : baseName; + + if (info.isReference) + { + out += info.isRvalueReference ? " &&" : " &"; + if (!paramName.empty()) + { + out += paramName; + } + return out; + } + + if (info.isPointer) + { + out += " *"; + if (includePointerConst && info.pointerConst) + out += " const"; + if (info.pointerVolatile) + out += " volatile"; + if (info.pointerRestrict) + out += " restrict"; + } + + if (!paramName.empty()) + { + if (!out.empty() && (out.back() == '*' || out.back() == '&')) + out += paramName; + else + out += " " + paramName; + } + + return out; + } + + static std::string buildPointeeQualPrefix(const ParamTypeInfo& info, bool addConst) + { + std::string out; + if (addConst) + out += "const "; + if (info.pointeeVolatile) + out += "volatile "; + if (info.pointeeRestrict) + out += "restrict "; + return out; + } + + static ParamDebugInfo getParamDebugInfo(const llvm::Function& F, const llvm::Argument& Arg) + { + using namespace llvm; + ParamDebugInfo info; + info.name = Arg.getName().str(); + + if (auto* SP = F.getSubprogram()) + { + for (DINode* node : SP->getRetainedNodes()) + { + auto* var = dyn_cast(node); + if (!var || !var->isParameter()) + continue; + if (var->getArg() != Arg.getArgNo() + 1) + continue; + if (!var->getName().empty()) + info.name = var->getName().str(); + info.type = var->getType(); + if (var->getLine() != 0) + info.line = var->getLine(); + break; + } + + if (!info.type) + { + if (auto* subTy = SP->getType()) + { + auto types = subTy->getTypeArray(); + if (types.size() > Arg.getArgNo() + 1) + info.type = types[Arg.getArgNo() + 1]; + } + } + + if (info.line == 0) + info.line = SP->getLine(); + } + + return info; + } + + static bool calleeParamIsReadOnly(const llvm::Function* callee, unsigned argIndex) + { + if (!callee || argIndex >= callee->arg_size()) + return false; + + const llvm::Argument& param = *callee->getArg(argIndex); + ParamDebugInfo dbg = getParamDebugInfo(*callee, param); + if (!dbg.type) + return false; + + ParamTypeInfo typeInfo; + if (!buildParamTypeInfo(dbg.type, typeInfo)) + return false; + + if (typeInfo.isDoublePointer || typeInfo.isVoid || typeInfo.isFunctionPointer) + return false; + + if (!typeInfo.isPointer && !typeInfo.isReference) + return false; + + return typeInfo.pointeeConst; + } + + static bool callArgMayWriteThrough(const llvm::CallBase& CB, unsigned argIndex) + { + using namespace llvm; + + const Function* callee = CB.getCalledFunction(); + if (!callee) + { + const Value* called = CB.getCalledOperand(); + if (called) + called = called->stripPointerCasts(); + callee = dyn_cast(called); + } + + if (!callee) + return true; + + if (auto* MI = dyn_cast(&CB)) + { + if (isa(MI)) + return argIndex == 0; + if (isa(MI)) + return argIndex == 0; + } + + if (callee->isIntrinsic()) + { + switch (callee->getIntrinsicID()) + { + case Intrinsic::dbg_declare: + case Intrinsic::dbg_value: + case Intrinsic::dbg_label: + case Intrinsic::lifetime_start: + case Intrinsic::lifetime_end: + case Intrinsic::invariant_start: + case Intrinsic::invariant_end: + case Intrinsic::assume: + return false; + default: + break; + } + } + + if (callee->doesNotAccessMemory()) + return false; + if (callee->onlyReadsMemory()) + return false; + + if (argIndex >= callee->arg_size()) + return true; // varargs or unknown + + const AttributeList& attrs = callee->getAttributes(); + if (attrs.hasParamAttr(argIndex, Attribute::ReadOnly) || + attrs.hasParamAttr(argIndex, Attribute::ReadNone)) + { + return false; + } + if (attrs.hasParamAttr(argIndex, Attribute::WriteOnly)) + return true; + + if (calleeParamIsReadOnly(callee, argIndex)) + return false; + + return true; + } + + static bool valueMayBeWrittenThrough(const llvm::Value* root, const llvm::Function& F) + { + using namespace llvm; + (void)F; + + SmallPtrSet visited; + SmallVector worklist; + worklist.push_back(root); + + while (!worklist.empty()) + { + const Value* V = worklist.pop_back_val(); + if (!visited.insert(V).second) + continue; + + for (const Use& U : V->uses()) + { + const User* Usr = U.getUser(); + + if (auto* SI = dyn_cast(Usr)) + { + if (SI->getPointerOperand() == V) + return true; + if (SI->getValueOperand() == V) + { + const Value* dst = SI->getPointerOperand()->stripPointerCasts(); + if (auto* AI = dyn_cast(dst)) + { + for (const Use& AU : AI->uses()) + { + if (auto* LI = dyn_cast(AU.getUser())) + { + if (LI->getPointerOperand()->stripPointerCasts() == AI) + worklist.push_back(LI); + } + } + } + else + { + return true; // pointer escapes to non-local memory + } + } + continue; + } + + if (auto* AI = dyn_cast(Usr)) + { + if (AI->getPointerOperand() == V) + return true; + continue; + } + + if (auto* CX = dyn_cast(Usr)) + { + if (CX->getPointerOperand() == V) + return true; + continue; + } + + if (auto* CB = dyn_cast(Usr)) + { + for (unsigned i = 0; i < CB->arg_size(); ++i) + { + if (CB->getArgOperand(i) == V) + { + if (callArgMayWriteThrough(*CB, i)) + return true; + } + } + continue; + } + + if (auto* GEP = dyn_cast(Usr)) + { + worklist.push_back(GEP); + continue; + } + if (auto* BC = dyn_cast(Usr)) + { + worklist.push_back(BC); + continue; + } + if (auto* ASC = dyn_cast(Usr)) + { + worklist.push_back(ASC); + continue; + } + if (auto* PN = dyn_cast(Usr)) + { + if (PN->getType()->isPointerTy()) + worklist.push_back(PN); + continue; + } + if (auto* Sel = dyn_cast(Usr)) + { + if (Sel->getType()->isPointerTy()) + worklist.push_back(Sel); + continue; + } + if (auto* CI = dyn_cast(Usr)) + { + if (CI->getType()->isPointerTy()) + worklist.push_back(CI); + continue; + } + if (isa(Usr)) + return true; // unknown aliasing, be conservative + } + } + + return false; + } + + static void analyzeConstParamsInFunction(llvm::Function& F, + std::vector& out) + { + using namespace llvm; + + if (F.isDeclaration()) + return; + + for (Argument& Arg : F.args()) + { + ParamDebugInfo dbg = getParamDebugInfo(F, Arg); + if (!dbg.type) + continue; + + ParamTypeInfo typeInfo; + if (!buildParamTypeInfo(dbg.type, typeInfo)) + continue; + + if (!typeInfo.isPointer && !typeInfo.isReference) + continue; + if (typeInfo.isDoublePointer || typeInfo.isVoid || typeInfo.isFunctionPointer) + continue; + if (typeInfo.pointeeConst) + continue; + + if (valueMayBeWrittenThrough(&Arg, F)) + continue; + + ConstParamIssue issue; + issue.funcName = F.getName().str(); + issue.paramName = dbg.name.empty() ? Arg.getName().str() : dbg.name; + issue.line = dbg.line; + issue.column = dbg.column; + issue.pointerConstOnly = + typeInfo.isPointer && typeInfo.pointerConst && !typeInfo.pointeeConst; + issue.isReference = typeInfo.isReference; + issue.isRvalueRef = typeInfo.isRvalueReference; + + std::string baseName = formatDITypeName(typeInfo.pointeeDisplayType); + issue.currentType = + buildTypeString(typeInfo, baseName, false, true, issue.paramName); + if (typeInfo.isRvalueReference) + { + std::string valuePrefix = buildPointeeQualPrefix(typeInfo, false); + std::string constRefPrefix = buildPointeeQualPrefix(typeInfo, true); + issue.suggestedType = valuePrefix + baseName + " " + issue.paramName; + issue.suggestedTypeAlt = constRefPrefix + baseName + " &" + issue.paramName; + } + else + { + issue.suggestedType = + buildTypeString(typeInfo, baseName, true, false, issue.paramName); + } + + out.push_back(std::move(issue)); + } + } + } // namespace + + std::vector + analyzeConstParams(llvm::Module& mod, + const std::function& shouldAnalyze) + { + std::vector out; + + for (llvm::Function& F : mod) + { + if (F.isDeclaration()) + continue; + if (!shouldAnalyze(F)) + continue; + analyzeConstParamsInFunction(F, out); + } + + return out; + } +} // namespace ctrace::stack::analysis diff --git a/src/analysis/DynamicAlloca.cpp b/src/analysis/DynamicAlloca.cpp new file mode 100644 index 0000000..1d4e93a --- /dev/null +++ b/src/analysis/DynamicAlloca.cpp @@ -0,0 +1,92 @@ +#include "analysis/DynamicAlloca.hpp" + +#include +#include +#include +#include +#include +#include + +#include "analysis/IRValueUtils.hpp" + +namespace ctrace::stack::analysis +{ + namespace + { + static void analyzeDynamicAllocasInFunction(llvm::Function& F, + std::vector& out) + { + using namespace llvm; + + if (F.isDeclaration()) + return; + + for (BasicBlock& BB : F) + { + for (Instruction& I : BB) + { + auto* AI = dyn_cast(&I); + if (!AI) + continue; + + // Taille d'allocation : on distingue trois cas : + // - constante immédiate -> pas une VLA + // - dérivée d'une constante simple -> pas une VLA (heuristique) + // - vraiment dépendante d'une valeur -> VLA / alloca variable + Value* arraySizeVal = AI->getArraySize(); + + // 1) Cas taille directement constante dans l'IR + if (llvm::isa(arraySizeVal)) + continue; // taille connue à la compilation, OK + + // 2) Heuristique "smart" : essayer de remonter à une constante + // via les stores dans une variable locale (tryGetConstFromValue). + // Exemple typique : + // int n = 6; + // char buf[n]; // en C : VLA, mais ici n est en fait constant + // + // Dans ce cas, on ne veut pas spammer avec un warning VLA : + // on traite ça comme une taille effectivement constante. + if (tryGetConstFromValue(arraySizeVal, F) != nullptr) + continue; + + // 3) Ici, on considère que c'est une vraie VLA / alloca dynamique + DynamicAllocaIssue issue; + issue.funcName = F.getName().str(); + issue.varName = deriveAllocaName(AI); + if (AI->getAllocatedType()) + { + std::string tyStr; + llvm::raw_string_ostream rso(tyStr); + AI->getAllocatedType()->print(rso); + issue.typeName = rso.str(); + } + else + { + issue.typeName = ""; + } + issue.allocaInst = AI; + out.push_back(std::move(issue)); + } + } + } + } // namespace + + std::vector + analyzeDynamicAllocas(llvm::Module& mod, + const std::function& shouldAnalyze) + { + std::vector out; + + for (llvm::Function& F : mod) + { + if (F.isDeclaration()) + continue; + if (!shouldAnalyze(F)) + continue; + analyzeDynamicAllocasInFunction(F, out); + } + + return out; + } +} // namespace ctrace::stack::analysis diff --git a/src/analysis/FunctionFilter.cpp b/src/analysis/FunctionFilter.cpp new file mode 100644 index 0000000..d5ad0f6 --- /dev/null +++ b/src/analysis/FunctionFilter.cpp @@ -0,0 +1,80 @@ +#include "analysis/FunctionFilter.hpp" + +#include +#include +#include +#include + +#include "analysis/AnalyzerUtils.hpp" + +namespace ctrace::stack::analysis +{ + FunctionFilter buildFunctionFilter(const llvm::Module& mod, const AnalysisConfig& config) + { + FunctionFilter filter; + filter.hasPathFilter = !config.onlyFiles.empty() || !config.onlyDirs.empty(); + filter.hasFuncFilter = !config.onlyFunctions.empty(); + filter.hasFilter = filter.hasPathFilter || filter.hasFuncFilter; + filter.moduleSourcePath = mod.getSourceFileName(); + filter.config = &config; + return filter; + } + + bool FunctionFilter::shouldAnalyze(const llvm::Function& F) const + { + if (!config) + return true; + + const AnalysisConfig& cfg = *config; + + if (!hasFilter) + return true; + if (hasFuncFilter && !functionNameMatches(F, cfg)) + { + if (cfg.dumpFilter) + { + llvm::errs() << "[filter] func=" << F.getName() << " file= keep=no\n"; + } + return false; + } + if (!hasPathFilter) + return true; + std::string path = getFunctionSourcePath(F); + std::string usedPath; + bool decision = false; + if (!path.empty()) + { + usedPath = path; + decision = shouldIncludePath(usedPath, cfg); + } + else + { + llvm::StringRef name = F.getName(); + if (name.starts_with("__") || name.starts_with("llvm.") || name.starts_with("clang.")) + { + decision = false; + } + else if (!moduleSourcePath.empty()) + { + usedPath = moduleSourcePath; + decision = shouldIncludePath(usedPath, cfg); + } + else + { + decision = false; + } + } + + if (cfg.dumpFilter) + { + llvm::errs() << "[filter] func=" << F.getName() << " file="; + if (usedPath.empty()) + llvm::errs() << ""; + else + llvm::errs() << usedPath; + llvm::errs() << " keep=" << (decision ? "yes" : "no") << "\n"; + } + + return decision; + } +} // namespace ctrace::stack::analysis diff --git a/src/analysis/IRValueUtils.cpp b/src/analysis/IRValueUtils.cpp new file mode 100644 index 0000000..7c7d15f --- /dev/null +++ b/src/analysis/IRValueUtils.cpp @@ -0,0 +1,133 @@ +#include "analysis/IRValueUtils.hpp" + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace ctrace::stack::analysis +{ + std::string deriveAllocaName(const llvm::AllocaInst* AI) + { + using namespace llvm; + + if (!AI) + return std::string(""); + if (AI->hasName()) + return AI->getName().str(); + + SmallPtrSet visited; + SmallVector worklist; + worklist.push_back(AI); + + while (!worklist.empty()) + { + const Value* V = worklist.back(); + worklist.pop_back(); + if (!visited.insert(V).second) + continue; + + for (const Use& U : V->uses()) + { + const User* Usr = U.getUser(); + + if (auto* DVI = dyn_cast(Usr)) + { + if (auto* var = DVI->getVariable()) + { + if (!var->getName().empty()) + return var->getName().str(); + } + continue; + } + + if (auto* SI = dyn_cast(Usr)) + { + if (SI->getValueOperand() != V) + continue; + const Value* dst = SI->getPointerOperand()->stripPointerCasts(); + if (auto* dstAI = dyn_cast(dst)) + { + if (dstAI->hasName()) + return dstAI->getName().str(); + } + worklist.push_back(dst); + continue; + } + + if (auto* BC = dyn_cast(Usr)) + { + worklist.push_back(BC); + continue; + } + if (auto* GEP = dyn_cast(Usr)) + { + worklist.push_back(GEP); + continue; + } + if (auto* PN = dyn_cast(Usr)) + { + if (PN->getType()->isPointerTy()) + worklist.push_back(PN); + continue; + } + if (auto* Sel = dyn_cast(Usr)) + { + if (Sel->getType()->isPointerTy()) + worklist.push_back(Sel); + continue; + } + } + } + + return std::string(""); + } + + const llvm::ConstantInt* tryGetConstFromValue(const llvm::Value* V, const llvm::Function& F) + { + using namespace llvm; + + // On enlève d'abord les cast (sext/zext/trunc, etc.) pour arriver + // à la vraie valeur “de base”. + const Value* cur = V; + while (auto* cast = dyn_cast(cur)) + { + cur = cast->getOperand(0); + } + + // Cas trivial : c'est déjà une constante entière. + if (auto* C = dyn_cast(cur)) + return C; + + // Cas -O0 typique : on compare un load d'une variable locale. + auto* LI = dyn_cast(cur); + if (!LI) + return nullptr; + + const Value* ptr = LI->getPointerOperand(); + const ConstantInt* found = nullptr; + + // Version ultra-simple : on cherche un store de constante dans la fonction. + for (const BasicBlock& BB : F) + { + for (const Instruction& I : BB) + { + auto* SI = dyn_cast(&I); + if (!SI) + continue; + if (SI->getPointerOperand() != ptr) + continue; + if (auto* C = dyn_cast(SI->getValueOperand())) + { + // On garde la dernière constante trouvée (si plusieurs stores, c'est naïf). + found = C; + } + } + } + return found; + } +} // namespace ctrace::stack::analysis diff --git a/src/analysis/InputPipeline.cpp b/src/analysis/InputPipeline.cpp new file mode 100644 index 0000000..5622006 --- /dev/null +++ b/src/analysis/InputPipeline.cpp @@ -0,0 +1,121 @@ +#include "analysis/InputPipeline.hpp" + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "compilerlib/compiler.h" + +namespace ctrace::stack::analysis +{ + LanguageType detectFromExtension(const std::string& path) + { + auto pos = path.find_last_of('.'); + if (pos == std::string::npos) + return LanguageType::Unknown; + + std::string ext = path.substr(pos + 1); + std::transform(ext.begin(), ext.end(), ext.begin(), + [](unsigned char c) { return std::tolower(c); }); + + if (ext == "ll") + return LanguageType::LLVM_IR; + + if (ext == "c") + return LanguageType::C; + + if (ext == "cpp" || ext == "cc" || ext == "cxx" || ext == "c++" || ext == "cp" || + ext == "C") + return LanguageType::CXX; + + return LanguageType::Unknown; + } + + LanguageType detectLanguageFromFile(const std::string& path, llvm::LLVMContext& ctx) + { + { + llvm::SMDiagnostic diag; + if (auto mod = llvm::parseIRFile(path, diag, ctx)) + { + return LanguageType::LLVM_IR; + } + } + + return detectFromExtension(path); + } + + ModuleLoadResult loadModuleForAnalysis(const std::string& filename, + const AnalysisConfig& config, llvm::LLVMContext& ctx, + llvm::SMDiagnostic& err) + { + ModuleLoadResult result; + result.language = detectLanguageFromFile(filename, ctx); + + if (result.language == LanguageType::Unknown) + { + result.error = "Unsupported input file type: " + filename + "\n"; + return result; + } + + if (result.language != LanguageType::LLVM_IR) + { + std::vector args; + args.push_back("-emit-llvm"); + args.push_back("-S"); + args.push_back("-g"); + if (result.language == LanguageType::CXX) + { + args.push_back("-x"); + args.push_back("c++"); + args.push_back("-std=gnu++20"); + } + for (const auto& extraArg : config.extraCompileArgs) + { + args.push_back(extraArg); + } + args.push_back("-fno-discard-value-names"); + args.push_back(filename); + compilerlib::OutputMode mode = compilerlib::OutputMode::ToMemory; + auto res = compilerlib::compile(args, mode); + + if (!res.success) + { + result.error = "Compilation failed:\n" + res.diagnostics + '\n'; + return result; + } + + if (res.llvmIR.empty()) + { + result.error = "No LLVM IR produced by compilerlib::compile\n"; + return result; + } + + auto buffer = llvm::MemoryBuffer::getMemBuffer(res.llvmIR, "in_memory_ll"); + + llvm::SMDiagnostic diag; + result.module = llvm::parseIR(buffer->getMemBufferRef(), diag, ctx); + + if (!result.module) + { + std::string msg; + llvm::raw_string_ostream os(msg); + diag.print("in_memory_ll", os); + result.error = "Failed to parse in-memory LLVM IR:\n" + os.str(); + return result; + } + + return result; + } + + result.module = llvm::parseIRFile(filename, err, ctx); + return result; + } +} // namespace ctrace::stack::analysis diff --git a/src/analysis/IntRanges.cpp b/src/analysis/IntRanges.cpp new file mode 100644 index 0000000..248bb2a --- /dev/null +++ b/src/analysis/IntRanges.cpp @@ -0,0 +1,252 @@ +#include "analysis/IntRanges.hpp" + +#include +#include +#include +#include + +namespace ctrace::stack::analysis +{ + std::map computeIntRangesFromICmps(llvm::Function& F) + { + using namespace llvm; + + std::map ranges; + + auto applyConstraint = + [&ranges](const Value* V, bool hasLB, long long newLB, bool hasUB, long long newUB) + { + auto& R = ranges[V]; + if (hasLB) + { + if (!R.hasLower || newLB > R.lower) + { + R.hasLower = true; + R.lower = newLB; + } + } + if (hasUB) + { + if (!R.hasUpper || newUB < R.upper) + { + R.hasUpper = true; + R.upper = newUB; + } + } + }; + + for (BasicBlock& BB : F) + { + for (Instruction& I : BB) + { + auto* icmp = dyn_cast(&I); + if (!icmp) + continue; + + Value* op0 = icmp->getOperand(0); + Value* op1 = icmp->getOperand(1); + + ConstantInt* C = nullptr; + Value* V = nullptr; + + // On cherche un pattern "V ? C" ou "C ? V" + if ((C = dyn_cast(op1)) && !isa(op0)) + { + V = op0; + } + else if ((C = dyn_cast(op0)) && !isa(op1)) + { + V = op1; + } + else + { + continue; + } + + auto pred = icmp->getPredicate(); + + bool hasLB = false, hasUB = false; + long long lb = 0, ub = 0; + + auto updateForSigned = [&](bool valueIsOp0) + { + long long c = C->getSExtValue(); + if (valueIsOp0) + { + switch (pred) + { + case ICmpInst::ICMP_SLT: // V < C => V <= C-1 + hasUB = true; + ub = c - 1; + break; + case ICmpInst::ICMP_SLE: // V <= C => V <= C + hasUB = true; + ub = c; + break; + case ICmpInst::ICMP_SGT: // V > C => V >= C+1 + hasLB = true; + lb = c + 1; + break; + case ICmpInst::ICMP_SGE: // V >= C => V >= C + hasLB = true; + lb = c; + break; + case ICmpInst::ICMP_EQ: // V == C => [C, C] + hasLB = true; + lb = c; + hasUB = true; + ub = c; + break; + case ICmpInst::ICMP_NE: + // approximation : V != C => V <= C (très conservateur) + hasUB = true; + ub = c; + break; + default: + break; + } + } + else + { + // C ? V <=> V ? C (inversé) + switch (pred) + { + case ICmpInst::ICMP_SGT: // C > V => V < C => V <= C-1 + hasUB = true; + ub = c - 1; + break; + case ICmpInst::ICMP_SGE: // C >= V => V <= C + hasUB = true; + ub = c; + break; + case ICmpInst::ICMP_SLT: // C < V => V > C => V >= C+1 + hasLB = true; + lb = c + 1; + break; + case ICmpInst::ICMP_SLE: // C <= V => V >= C + hasLB = true; + lb = c; + break; + case ICmpInst::ICMP_EQ: // C == V => [C, C] + hasLB = true; + lb = c; + hasUB = true; + ub = c; + break; + case ICmpInst::ICMP_NE: + hasUB = true; + ub = c; + break; + default: + break; + } + } + }; + + auto updateForUnsigned = [&](bool valueIsOp0) + { + unsigned long long cu = C->getZExtValue(); + long long c = static_cast(cu); + if (valueIsOp0) + { + switch (pred) + { + case ICmpInst::ICMP_ULT: // V < C => V <= C-1 + hasUB = true; + ub = c - 1; + break; + case ICmpInst::ICMP_ULE: // V <= C + hasUB = true; + ub = c; + break; + case ICmpInst::ICMP_UGT: // V > C => V >= C+1 + hasLB = true; + lb = c + 1; + break; + case ICmpInst::ICMP_UGE: // V >= C + hasLB = true; + lb = c; + break; + case ICmpInst::ICMP_EQ: + hasLB = true; + lb = c; + hasUB = true; + ub = c; + break; + case ICmpInst::ICMP_NE: + hasUB = true; + ub = c; + break; + default: + break; + } + } + else + { + switch (pred) + { + case ICmpInst::ICMP_UGT: // C > V => V < C + hasUB = true; + ub = c - 1; + break; + case ICmpInst::ICMP_UGE: // C >= V => V <= C + hasUB = true; + ub = c; + break; + case ICmpInst::ICMP_ULT: // C < V => V > C + hasLB = true; + lb = c + 1; + break; + case ICmpInst::ICMP_ULE: // C <= V => V >= C + hasLB = true; + lb = c; + break; + case ICmpInst::ICMP_EQ: + hasLB = true; + lb = c; + hasUB = true; + ub = c; + break; + case ICmpInst::ICMP_NE: + hasUB = true; + ub = c; + break; + default: + break; + } + } + }; + + bool valueIsOp0 = (V == op0); + + // On choisit le groupe de prédicats + if (pred == ICmpInst::ICMP_SLT || pred == ICmpInst::ICMP_SLE || + pred == ICmpInst::ICMP_SGT || pred == ICmpInst::ICMP_SGE || + pred == ICmpInst::ICMP_EQ || pred == ICmpInst::ICMP_NE) + { + updateForSigned(valueIsOp0); + } + else if (pred == ICmpInst::ICMP_ULT || pred == ICmpInst::ICMP_ULE || + pred == ICmpInst::ICMP_UGT || pred == ICmpInst::ICMP_UGE) + { + updateForUnsigned(valueIsOp0); + } + + if (!(hasLB || hasUB)) + continue; + + // Applique la contrainte sur V lui-même + applyConstraint(V, hasLB, lb, hasUB, ub); + + // Et éventuellement sur le pointeur sous-jacent si V est un load + if (auto* LI = dyn_cast(V)) + { + const Value* ptr = LI->getPointerOperand(); + applyConstraint(ptr, hasLB, lb, hasUB, ub); + } + } + } + + return ranges; + } +} // namespace ctrace::stack::analysis diff --git a/src/analysis/InvalidBaseReconstruction.cpp b/src/analysis/InvalidBaseReconstruction.cpp new file mode 100644 index 0000000..39dc851 --- /dev/null +++ b/src/analysis/InvalidBaseReconstruction.cpp @@ -0,0 +1,900 @@ +#include "analysis/InvalidBaseReconstruction.hpp" + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace ctrace::stack::analysis +{ + namespace + { + static bool isLoadFromAlloca(const llvm::Value* V, const llvm::AllocaInst* AI) + { + if (!V || !AI) + return false; + const auto* LI = llvm::dyn_cast(V); + if (!LI) + return false; + const llvm::Value* Ptr = LI->getPointerOperand()->stripPointerCasts(); + return Ptr == AI; + } + + static bool valueDependsOnAlloca(const llvm::Value* V, const llvm::AllocaInst* AI, + llvm::SmallPtrSetImpl& visited) + { + using namespace llvm; + + if (!V || !AI) + return false; + if (visited.contains(V)) + return false; + visited.insert(V); + + if (isLoadFromAlloca(V, AI)) + return true; + + if (auto* I = dyn_cast(V)) + { + for (const Value* Op : I->operands()) + { + if (valueDependsOnAlloca(Op, AI, visited)) + return true; + } + } + if (auto* CE = dyn_cast(V)) + { + for (const Value* Op : CE->operands()) + { + if (valueDependsOnAlloca(Op, AI, visited)) + return true; + } + } + return false; + } + + static bool matchAllocaLoadAddSub(const llvm::Value* V, const llvm::AllocaInst* AI, + int64_t& deltaOut) + { + using namespace llvm; + + const Value* lhs = nullptr; + const Value* rhs = nullptr; + unsigned opcode = 0; + + if (auto* BO = dyn_cast(V)) + { + opcode = BO->getOpcode(); + lhs = BO->getOperand(0); + rhs = BO->getOperand(1); + } + else if (auto* CE = dyn_cast(V)) + { + opcode = CE->getOpcode(); + lhs = CE->getOperand(0); + rhs = CE->getOperand(1); + } + else + { + return false; + } + + if (opcode != Instruction::Add && opcode != Instruction::Sub) + return false; + + const auto* lhsC = dyn_cast(lhs); + const auto* rhsC = dyn_cast(rhs); + bool lhsIsLoad = isLoadFromAlloca(lhs, AI); + bool rhsIsLoad = isLoadFromAlloca(rhs, AI); + + if (opcode == Instruction::Add) + { + if (lhsIsLoad && rhsC) + { + deltaOut = rhsC->getSExtValue(); + return true; + } + if (rhsIsLoad && lhsC) + { + deltaOut = lhsC->getSExtValue(); + return true; + } + } + else if (opcode == Instruction::Sub) + { + if (lhsIsLoad && rhsC) + { + deltaOut = -rhsC->getSExtValue(); + return true; + } + } + + return false; + } + + struct PtrIntMatch + { + const llvm::Value* ptrOperand = nullptr; + int64_t offset = 0; + bool sawOffset = false; + }; + + static const llvm::Value* stripIntCasts(const llvm::Value* V) + { + using namespace llvm; + + const Value* Cur = V; + while (Cur) + { + if (auto* CI = dyn_cast(Cur)) + { + const Value* Op = CI->getOperand(0); + if (CI->getType()->isIntegerTy() && Op->getType()->isIntegerTy()) + { + Cur = Op; + continue; + } + } + else if (auto* CE = dyn_cast(Cur)) + { + if (CE->isCast()) + { + const Value* Op = CE->getOperand(0); + if (CE->getType()->isIntegerTy() && Op->getType()->isIntegerTy()) + { + Cur = Op; + continue; + } + } + } + break; + } + return Cur ? Cur : V; + } + + static const llvm::Value* getPtrToIntOperand(const llvm::Value* V) + { + using namespace llvm; + + if (auto* PTI = dyn_cast(V)) + return PTI->getOperand(0); + if (auto* CE = dyn_cast(V)) + { + if (CE->getOpcode() == Instruction::PtrToInt) + return CE->getOperand(0); + } + return nullptr; + } + + static void collectPtrToIntMatches(const llvm::Value* V, + llvm::SmallVectorImpl& out) + { + using namespace llvm; + + struct IntWorkItem + { + const Value* val = nullptr; + int64_t offset = 0; + bool sawOffset = false; + }; + + SmallVector worklist; + std::map> visited; + + auto recordVisited = [&](const Value* Val, int64_t offset, bool sawOffset) + { + unsigned bit = sawOffset ? 2u : 1u; + unsigned& flags = visited[Val][offset]; + if (flags & bit) + return false; + flags |= bit; + return true; + }; + + worklist.push_back({V, 0, false}); + recordVisited(V, 0, false); + + while (!worklist.empty()) + { + const Value* Cur = stripIntCasts(worklist.back().val); + int64_t curOffset = worklist.back().offset; + bool curSawOffset = worklist.back().sawOffset; + worklist.pop_back(); + + if (const Value* PtrOp = getPtrToIntOperand(Cur)) + { + out.push_back({PtrOp, curOffset, curSawOffset}); + continue; + } + + const Value* lhs = nullptr; + const Value* rhs = nullptr; + unsigned opcode = 0; + + if (auto* BO = dyn_cast(Cur)) + { + opcode = BO->getOpcode(); + lhs = BO->getOperand(0); + rhs = BO->getOperand(1); + } + else if (auto* CE = dyn_cast(Cur)) + { + opcode = CE->getOpcode(); + lhs = CE->getOperand(0); + rhs = CE->getOperand(1); + } + + if (opcode == Instruction::Add || opcode == Instruction::Sub) + { + const auto* lhsC = dyn_cast(lhs); + const auto* rhsC = dyn_cast(rhs); + if (rhsC) + { + int64_t delta = rhsC->getSExtValue(); + if (opcode == Instruction::Sub) + delta = -delta; + int64_t newOffset = curOffset + delta; + if (recordVisited(lhs, newOffset, true)) + worklist.push_back({lhs, newOffset, true}); + continue; + } + if (lhsC && opcode == Instruction::Add) + { + int64_t delta = lhsC->getSExtValue(); + int64_t newOffset = curOffset + delta; + if (recordVisited(rhs, newOffset, true)) + worklist.push_back({rhs, newOffset, true}); + continue; + } + } + + if (auto* PN = dyn_cast(Cur)) + { + for (const Value* In : PN->incoming_values()) + { + if (recordVisited(In, curOffset, curSawOffset)) + worklist.push_back({In, curOffset, curSawOffset}); + } + continue; + } + if (auto* Sel = dyn_cast(Cur)) + { + const Value* T = Sel->getTrueValue(); + const Value* F = Sel->getFalseValue(); + if (recordVisited(T, curOffset, curSawOffset)) + worklist.push_back({T, curOffset, curSawOffset}); + if (recordVisited(F, curOffset, curSawOffset)) + worklist.push_back({F, curOffset, curSawOffset}); + continue; + } + + if (auto* LI = dyn_cast(Cur)) + { + const Value* PtrOp = LI->getPointerOperand()->stripPointerCasts(); + if (auto* AI = dyn_cast(PtrOp)) + { + Type* allocTy = AI->getAllocatedType(); + if (allocTy && allocTy->isIntegerTy()) + { + SmallVector seeds; + SmallVector deltas; + + for (const User* Usr : AI->users()) + { + auto* SI = dyn_cast(Usr); + if (!SI) + continue; + if (SI->getPointerOperand()->stripPointerCasts() != AI) + continue; + const Value* StoredVal = SI->getValueOperand(); + + int64_t delta = 0; + if (matchAllocaLoadAddSub(StoredVal, AI, delta)) + { + deltas.push_back(delta); + continue; + } + + llvm::SmallPtrSet depVisited; + if (!valueDependsOnAlloca(StoredVal, AI, depVisited)) + { + seeds.push_back(StoredVal); + } + } + + if (!seeds.empty()) + { + for (const Value* Seed : seeds) + { + if (recordVisited(Seed, curOffset, curSawOffset)) + worklist.push_back({Seed, curOffset, curSawOffset}); + for (int64_t delta : deltas) + { + int64_t newOffset = curOffset + delta; + if (recordVisited(Seed, newOffset, true)) + worklist.push_back({Seed, newOffset, true}); + } + } + } + else + { + for (const User* Usr : AI->users()) + { + auto* SI = dyn_cast(Usr); + if (!SI) + continue; + if (SI->getPointerOperand()->stripPointerCasts() != AI) + continue; + const Value* StoredVal = SI->getValueOperand(); + if (recordVisited(StoredVal, curOffset, curSawOffset)) + worklist.push_back({StoredVal, curOffset, curSawOffset}); + } + } + continue; + } + } + } + } + } + + static bool recordVisitedOffset(std::map>& visited, + const llvm::Value* V, int64_t offset) + { + auto& setRef = visited[V]; + return setRef.insert(offset).second; + } + + static bool getGEPConstantOffsetAndBase(const llvm::Value* V, const llvm::DataLayout& DL, + int64_t& outOffset, const llvm::Value*& outBase) + { + using namespace llvm; + + if (auto* GEP = dyn_cast(V)) + { + APInt offset(64, 0); + if (!GEP->accumulateConstantOffset(DL, offset)) + return false; + outOffset = offset.getSExtValue(); + outBase = GEP->getPointerOperand(); + return true; + } + + if (auto* CE = dyn_cast(V)) + { + if (CE->getOpcode() == Instruction::GetElementPtr) + { + auto* GEP = cast(CE); + APInt offset(64, 0); + if (!GEP->accumulateConstantOffset(DL, offset)) + return false; + outOffset = offset.getSExtValue(); + outBase = GEP->getPointerOperand(); + return true; + } + } + + return false; + } + + struct PtrOrigin + { + const llvm::AllocaInst* alloca = nullptr; + int64_t offset = 0; + }; + + static void collectPointerOrigins(const llvm::Value* V, const llvm::DataLayout& DL, + llvm::SmallVectorImpl& out) + { + using namespace llvm; + + SmallVector, 16> worklist; + std::map> visited; + + worklist.push_back({V, 0}); + recordVisitedOffset(visited, V, 0); + + while (!worklist.empty()) + { + const Value* Cur = worklist.back().first; + int64_t currentOffset = worklist.back().second; + worklist.pop_back(); + + if (auto* AI = dyn_cast(Cur)) + { + Type* allocaTy = AI->getAllocatedType(); + if (allocaTy->isPointerTy()) + { + for (const User* Usr : AI->users()) + { + if (auto* SI = dyn_cast(Usr)) + { + if (SI->getPointerOperand() != AI) + continue; + const Value* StoredVal = SI->getValueOperand(); + if (recordVisitedOffset(visited, StoredVal, currentOffset)) + { + worklist.push_back({StoredVal, currentOffset}); + } + } + } + continue; + } + + out.push_back({AI, currentOffset}); + continue; + } + + if (auto* BC = dyn_cast(Cur)) + { + const Value* Src = BC->getOperand(0); + if (recordVisitedOffset(visited, Src, currentOffset)) + worklist.push_back({Src, currentOffset}); + continue; + } + + if (auto* ASC = dyn_cast(Cur)) + { + const Value* Src = ASC->getOperand(0); + if (recordVisitedOffset(visited, Src, currentOffset)) + worklist.push_back({Src, currentOffset}); + continue; + } + + int64_t gepOffset = 0; + const Value* gepBase = nullptr; + if (getGEPConstantOffsetAndBase(Cur, DL, gepOffset, gepBase)) + { + int64_t newOffset = currentOffset + gepOffset; + if (recordVisitedOffset(visited, gepBase, newOffset)) + worklist.push_back({gepBase, newOffset}); + continue; + } + + if (auto* LI = dyn_cast(Cur)) + { + const Value* PtrOp = LI->getPointerOperand(); + if (recordVisitedOffset(visited, PtrOp, currentOffset)) + worklist.push_back({PtrOp, currentOffset}); + continue; + } + + if (auto* PN = dyn_cast(Cur)) + { + for (const Value* In : PN->incoming_values()) + { + if (recordVisitedOffset(visited, In, currentOffset)) + worklist.push_back({In, currentOffset}); + } + continue; + } + + if (auto* Sel = dyn_cast(Cur)) + { + const Value* T = Sel->getTrueValue(); + const Value* F = Sel->getFalseValue(); + if (recordVisitedOffset(visited, T, currentOffset)) + worklist.push_back({T, currentOffset}); + if (recordVisitedOffset(visited, F, currentOffset)) + worklist.push_back({F, currentOffset}); + continue; + } + + if (auto* CE = dyn_cast(Cur)) + { + if (CE->getOpcode() == Instruction::BitCast || + CE->getOpcode() == Instruction::AddrSpaceCast) + { + const Value* Src = CE->getOperand(0); + if (recordVisitedOffset(visited, Src, currentOffset)) + worklist.push_back({Src, currentOffset}); + } + } + } + } + + static bool isPointerDereferencedOrUsed(const llvm::Value* V) + { + using namespace llvm; + + SmallVector worklist; + SmallPtrSet visited; + worklist.push_back(V); + + while (!worklist.empty()) + { + const Value* Cur = worklist.back(); + worklist.pop_back(); + if (!visited.insert(Cur).second) + continue; + + for (const Use& U : Cur->uses()) + { + const User* Usr = U.getUser(); + + if (auto* LI = dyn_cast(Usr)) + { + if (LI->getPointerOperand() == Cur) + return true; + continue; + } + if (auto* SI = dyn_cast(Usr)) + { + if (SI->getPointerOperand() == Cur) + return true; + if (SI->getValueOperand() == Cur) + { + const Value* dst = SI->getPointerOperand()->stripPointerCasts(); + if (auto* AI = dyn_cast(dst)) + { + Type* allocTy = AI->getAllocatedType(); + if (allocTy && allocTy->isPointerTy()) + { + for (const User* AUser : AI->users()) + { + if (auto* LI = dyn_cast(AUser)) + { + if (LI->getPointerOperand()->stripPointerCasts() == AI) + { + worklist.push_back(LI); + } + } + } + } + } + } + continue; + } + if (auto* RMW = dyn_cast(Usr)) + { + if (RMW->getPointerOperand() == Cur) + return true; + continue; + } + if (auto* CX = dyn_cast(Usr)) + { + if (CX->getPointerOperand() == Cur) + return true; + continue; + } + if (auto* MI = dyn_cast(Usr)) + { + if (MI->getRawDest() == Cur) + return true; + if (auto* MTI = dyn_cast(MI)) + { + if (MTI->getRawSource() == Cur) + return true; + } + continue; + } + + if (auto* BC = dyn_cast(Usr)) + { + worklist.push_back(BC); + continue; + } + if (auto* ASC = dyn_cast(Usr)) + { + worklist.push_back(ASC); + continue; + } + if (auto* GEP = dyn_cast(Usr)) + { + worklist.push_back(GEP); + continue; + } + if (auto* PN = dyn_cast(Usr)) + { + worklist.push_back(PN); + continue; + } + if (auto* Sel = dyn_cast(Usr)) + { + worklist.push_back(Sel); + continue; + } + if (auto* CE = dyn_cast(Usr)) + { + worklist.push_back(CE); + continue; + } + } + } + + return false; + } + + static std::optional getAllocaTotalSizeBytes(const llvm::AllocaInst* AI, + const llvm::DataLayout& DL) + { + using namespace llvm; + + Type* allocatedTy = AI->getAllocatedType(); + + if (!AI->isArrayAllocation()) + { + return DL.getTypeAllocSize(allocatedTy); + } + + if (auto* C = dyn_cast(AI->getArraySize())) + { + uint64_t count = C->getZExtValue(); + uint64_t elemSize = DL.getTypeAllocSize(allocatedTy); + return count * elemSize; + } + + return std::nullopt; + } + + static void analyzeInvalidBaseReconstructionsInFunction( + llvm::Function& F, const llvm::DataLayout& DL, + std::vector& out) + { + using namespace llvm; + + if (F.isDeclaration()) + return; + + std::map> allocaInfo; + + for (BasicBlock& BB : F) + { + for (Instruction& I : BB) + { + auto* AI = dyn_cast(&I); + if (!AI) + continue; + + std::optional sizeOpt = getAllocaTotalSizeBytes(AI, DL); + if (!sizeOpt.has_value()) + continue; + + std::string varName = + AI->hasName() ? AI->getName().str() : std::string(""); + allocaInfo[AI] = {varName, sizeOpt.value()}; + } + } + + for (BasicBlock& BB : F) + { + for (Instruction& I : BB) + { + if (auto* ITP = dyn_cast(&I)) + { + if (!isPointerDereferencedOrUsed(ITP)) + continue; + + Value* IntVal = ITP->getOperand(0); + + SmallVector matches; + collectPtrToIntMatches(IntVal, matches); + if (matches.empty()) + continue; + + struct AggEntry + { + std::set memberOffsets; + bool anyOutOfBounds = false; + bool anyNonZeroResult = false; + std::string varName; + uint64_t allocaSize = 0; + std::string targetType; + }; + + std::map, AggEntry> agg; + + for (const auto& match : matches) + { + if (!match.sawOffset) + continue; + + SmallVector origins; + collectPointerOrigins(match.ptrOperand, DL, origins); + if (origins.empty()) + continue; + + for (const auto& origin : origins) + { + auto it = allocaInfo.find(origin.alloca); + if (it == allocaInfo.end()) + continue; + + const std::string& varName = it->second.first; + uint64_t allocaSize = it->second.second; + + int64_t resultOffset = origin.offset + match.offset; + bool isOutOfBounds = + (resultOffset < 0) || + (static_cast(resultOffset) >= allocaSize); + + std::string targetType; + Type* targetTy = ITP->getType(); + if (auto* PtrTy = dyn_cast(targetTy)) + { + raw_string_ostream rso(targetType); + PtrTy->print(rso); + } + + auto key = std::make_pair(origin.alloca, match.offset); + auto& entry = agg[key]; + entry.memberOffsets.insert(origin.offset); + entry.anyOutOfBounds |= isOutOfBounds; + if (resultOffset != 0) + entry.anyNonZeroResult = true; + entry.varName = varName; + entry.allocaSize = allocaSize; + entry.targetType = targetType.empty() ? "" : targetType; + } + } + + for (auto& kv : agg) + { + const auto& entry = kv.second; + if (entry.memberOffsets.empty()) + continue; + if (!entry.anyOutOfBounds && !entry.anyNonZeroResult) + continue; + + std::ostringstream memberStr; + if (entry.memberOffsets.size() == 1) + { + int64_t mo = *entry.memberOffsets.begin(); + memberStr << (mo != 0 ? "offset +" + std::to_string(mo) : "base"); + } + else + { + memberStr << "offsets "; + bool first = true; + for (int64_t mo : entry.memberOffsets) + { + if (!first) + memberStr << ", "; + memberStr << (mo != 0 ? "+" + std::to_string(mo) : "base"); + first = false; + } + } + + InvalidBaseReconstructionIssue issue; + issue.funcName = F.getName().str(); + issue.varName = entry.varName; + issue.sourceMember = memberStr.str(); + issue.offsetUsed = kv.first.second; + issue.targetType = entry.targetType; + issue.isOutOfBounds = entry.anyOutOfBounds; + issue.inst = &I; + + out.push_back(std::move(issue)); + } + } + + if (auto* GEP = dyn_cast(&I)) + { + if (!isPointerDereferencedOrUsed(GEP)) + continue; + + int64_t gepOffset = 0; + const Value* PtrOp = nullptr; + if (!getGEPConstantOffsetAndBase(GEP, DL, gepOffset, PtrOp)) + continue; + + SmallVector origins; + collectPointerOrigins(PtrOp, DL, origins); + if (origins.empty()) + continue; + + struct AggEntry + { + std::set memberOffsets; + bool anyOutOfBounds = false; + bool anyNonZeroResult = false; + std::string varName; + std::string targetType; + }; + + std::map agg; + + for (const auto& origin : origins) + { + if (origin.offset == 0 && gepOffset >= 0) + { + continue; + } + + auto it = allocaInfo.find(origin.alloca); + if (it == allocaInfo.end()) + continue; + + const std::string& varName = it->second.first; + uint64_t allocaSize = it->second.second; + + int64_t resultOffset = origin.offset + gepOffset; + bool isOutOfBounds = + (resultOffset < 0) || + (static_cast(resultOffset) >= allocaSize); + + std::string targetType; + Type* targetTy = GEP->getType(); + raw_string_ostream rso(targetType); + targetTy->print(rso); + + auto& entry = agg[origin.alloca]; + entry.memberOffsets.insert(origin.offset); + entry.anyOutOfBounds |= isOutOfBounds; + if (resultOffset != 0) + entry.anyNonZeroResult = true; + entry.varName = varName; + entry.targetType = targetType; + } + + for (auto& kv : agg) + { + const auto& entry = kv.second; + if (entry.memberOffsets.empty()) + continue; + if (!entry.anyOutOfBounds && !entry.anyNonZeroResult) + continue; + + std::ostringstream memberStr; + if (entry.memberOffsets.size() == 1) + { + int64_t mo = *entry.memberOffsets.begin(); + memberStr << (mo != 0 ? "offset +" + std::to_string(mo) : "base"); + } + else + { + memberStr << "offsets "; + bool first = true; + for (int64_t mo : entry.memberOffsets) + { + if (!first) + memberStr << ", "; + memberStr << (mo != 0 ? "+" + std::to_string(mo) : "base"); + first = false; + } + } + + InvalidBaseReconstructionIssue issue; + issue.funcName = F.getName().str(); + issue.varName = entry.varName; + issue.sourceMember = memberStr.str(); + issue.offsetUsed = gepOffset; + issue.targetType = entry.targetType; + issue.isOutOfBounds = entry.anyOutOfBounds; + issue.inst = &I; + + out.push_back(std::move(issue)); + } + } + } + } + } + } // namespace + + std::vector analyzeInvalidBaseReconstructions( + llvm::Module& mod, const llvm::DataLayout& DL, + const std::function& shouldAnalyze) + { + std::vector issues; + for (llvm::Function& F : mod) + { + if (F.isDeclaration()) + continue; + if (!shouldAnalyze(F)) + continue; + analyzeInvalidBaseReconstructionsInFunction(F, DL, issues); + } + return issues; + } +} // namespace ctrace::stack::analysis diff --git a/src/analysis/MemIntrinsicOverflow.cpp b/src/analysis/MemIntrinsicOverflow.cpp new file mode 100644 index 0000000..9cb8494 --- /dev/null +++ b/src/analysis/MemIntrinsicOverflow.cpp @@ -0,0 +1,175 @@ +#include "analysis/MemIntrinsicOverflow.hpp" + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace ctrace::stack::analysis +{ + namespace + { + static std::optional getAllocaTotalSizeBytes(const llvm::AllocaInst* AI, + const llvm::DataLayout& DL) + { + using namespace llvm; + + Type* allocatedTy = AI->getAllocatedType(); + + if (!AI->isArrayAllocation()) + { + return DL.getTypeAllocSize(allocatedTy); + } + + if (auto* C = dyn_cast(AI->getArraySize())) + { + uint64_t count = C->getZExtValue(); + uint64_t elemSize = DL.getTypeAllocSize(allocatedTy); + return count * elemSize; + } + + return std::nullopt; + } + + static void analyzeMemIntrinsicOverflowsInFunction(llvm::Function& F, + const llvm::DataLayout& DL, + std::vector& out) + { + using namespace llvm; + + if (F.isDeclaration()) + return; + + for (BasicBlock& BB : F) + { + for (Instruction& I : BB) + { + auto* CB = dyn_cast(&I); + if (!CB) + continue; + + Function* callee = CB->getCalledFunction(); + if (!callee) + continue; + + StringRef name = callee->getName(); + + enum class MemKind + { + None, + MemCpy, + MemSet, + MemMove + }; + MemKind kind = MemKind::None; + + if (auto* II = dyn_cast(CB)) + { + switch (II->getIntrinsicID()) + { + case Intrinsic::memcpy: + kind = MemKind::MemCpy; + break; + case Intrinsic::memset: + kind = MemKind::MemSet; + break; + case Intrinsic::memmove: + kind = MemKind::MemMove; + break; + default: + break; + } + } + + if (kind == MemKind::None) + { + if (name == "memcpy" || name.contains("memcpy")) + kind = MemKind::MemCpy; + else if (name == "memset" || name.contains("memset")) + kind = MemKind::MemSet; + else if (name == "memmove" || name.contains("memmove")) + kind = MemKind::MemMove; + } + + if (kind == MemKind::None) + continue; + + if (CB->arg_size() < 3) + continue; + + Value* dest = CB->getArgOperand(0); + + const Value* cur = dest->stripPointerCasts(); + if (auto* GEP = dyn_cast(cur)) + { + cur = GEP->getPointerOperand(); + } + const AllocaInst* AI = dyn_cast(cur); + if (!AI) + continue; + + auto maybeSize = getAllocaTotalSizeBytes(AI, DL); + if (!maybeSize) + continue; + StackSize destBytes = *maybeSize; + + Value* lenV = CB->getArgOperand(2); + auto* lenC = dyn_cast(lenV); + if (!lenC) + continue; + + uint64_t len = lenC->getZExtValue(); + if (len <= destBytes) + continue; + + MemIntrinsicIssue issue; + issue.funcName = F.getName().str(); + issue.varName = AI->hasName() ? AI->getName().str() : std::string(""); + issue.destSizeBytes = destBytes; + issue.lengthBytes = len; + issue.inst = &I; + + switch (kind) + { + case MemKind::MemCpy: + issue.intrinsicName = "memcpy"; + break; + case MemKind::MemSet: + issue.intrinsicName = "memset"; + break; + case MemKind::MemMove: + issue.intrinsicName = "memmove"; + break; + default: + break; + } + + out.push_back(std::move(issue)); + } + } + } + } // namespace + + std::vector + analyzeMemIntrinsicOverflows(llvm::Module& mod, const llvm::DataLayout& DL, + const std::function& shouldAnalyze) + { + std::vector issues; + for (llvm::Function& F : mod) + { + if (F.isDeclaration()) + continue; + if (!shouldAnalyze(F)) + continue; + analyzeMemIntrinsicOverflowsInFunction(F, DL, issues); + } + return issues; + } +} // namespace ctrace::stack::analysis diff --git a/src/analysis/SizeMinusKWrites.cpp b/src/analysis/SizeMinusKWrites.cpp new file mode 100644 index 0000000..767b9c1 --- /dev/null +++ b/src/analysis/SizeMinusKWrites.cpp @@ -0,0 +1,490 @@ +#include "analysis/SizeMinusKWrites.hpp" + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace ctrace::stack::analysis +{ + namespace + { + static llvm::Value* stripCasts(llvm::Value* v) + { + while (auto* cast = llvm::dyn_cast(v)) + v = cast->getOperand(0); + return v; + } + + static std::string canonicalizeSinkName(llvm::StringRef name) + { + if (name.starts_with("__") && name.ends_with("_chk") && name.size() > 6) + { + llvm::StringRef core = name.drop_front(2).drop_back(4); + if (!core.empty()) + return core.str(); + } + return name.str(); + } + + struct SizeMinusKMatch + { + llvm::Value* base = nullptr; + int64_t k = 0; + }; + + struct SizeMinusKSink + { + unsigned dstIdx = 0; + unsigned lenIdx = 0; + }; + + using SizeMinusKSummaryMap = + llvm::DenseMap>; + + template + static SizeMinusKMatch matchSizeMinusK(llvm::Value* v, Canonicalize canonicalize) + { + v = canonicalize(v); + if (auto* bin = llvm::dyn_cast(v)) + { + llvm::Value* lhs = canonicalize(bin->getOperand(0)); + llvm::Value* rhs = canonicalize(bin->getOperand(1)); + + if (bin->getOpcode() == llvm::Instruction::Sub) + { + if (auto* c = llvm::dyn_cast(rhs)) + { + int64_t k = c->getSExtValue(); + if (k > 0) + return {lhs, k}; + } + } + if (bin->getOpcode() == llvm::Instruction::Add) + { + if (auto* c = llvm::dyn_cast(rhs)) + { + int64_t k = -c->getSExtValue(); + if (k > 0) + return {lhs, k}; + } + } + } + return {}; + } + + static bool predicateAt(llvm::LazyValueInfo& lvi, llvm::CmpInst::Predicate pred, + llvm::Value* lhs, llvm::Value* rhs, llvm::Instruction* at) + { +#if LLVM_VERSION_MAJOR >= 17 + if (llvm::Constant* c = lvi.getPredicateAt(pred, lhs, rhs, at, false)) + { + if (auto* ci = llvm::dyn_cast(c)) + return ci->isOne(); + } + return false; +#else + return lvi.getPredicateAt(pred, lhs, rhs, at) == llvm::LazyValueInfo::True; +#endif + } + + static bool isNonNullAt(llvm::Value* v, llvm::Instruction* at, llvm::LazyValueInfo& lvi) + { + if (!v || !v->getType()->isPointerTy()) + return false; + if (auto* arg = llvm::dyn_cast(v)) + { + if (arg->hasNonNullAttr()) + return true; + } + if (auto* call = llvm::dyn_cast(v)) + { + if (call->hasRetAttr(llvm::Attribute::NonNull)) + return true; + } + auto* ptrTy = llvm::cast(v->getType()); + auto* nullPtr = llvm::ConstantPointerNull::get(ptrTy); + return predicateAt(lvi, llvm::CmpInst::ICMP_NE, v, nullPtr, at); + } + + static bool isGreaterThanAt(llvm::Value* v, int64_t bound, llvm::Instruction* at, + llvm::LazyValueInfo& lvi) + { + if (!v || !v->getType()->isIntegerTy()) + return false; + if (auto* c = llvm::dyn_cast(v)) + return c->getSExtValue() > bound; + auto* boundConst = llvm::ConstantInt::get(v->getType(), bound, true); + return predicateAt(lvi, llvm::CmpInst::ICMP_SGT, v, boundConst, at); + } + + static bool getKnownSinkCallInfo(llvm::CallBase* CB, llvm::TargetLibraryInfo& TLI, + unsigned& dstIdx, unsigned& lenIdx, std::string& sinkName) + { + using namespace llvm; + + if (auto* II = dyn_cast(CB)) + { + if ((II->getIntrinsicID() == Intrinsic::memcpy || + II->getIntrinsicID() == Intrinsic::memmove || + II->getIntrinsicID() == Intrinsic::memset) && + CB->arg_size() >= 3) + { + dstIdx = 0; + lenIdx = 2; + sinkName = "llvm.mem*"; + return true; + } + return false; + } + + int dst = -1; + int len = -1; + bool matched = false; + StringRef name; + + Value* callee = CB->getCalledOperand(); + if (callee) + { + callee = callee->stripPointerCasts(); + if (auto* fn = dyn_cast(callee)) + { + LibFunc lf; + if (TLI.getLibFunc(*fn, lf)) + { + switch (lf) + { + case LibFunc_memcpy: + case LibFunc_memmove: + case LibFunc_memset: + case LibFunc_strncpy: + case LibFunc_strncat: + case LibFunc_stpncpy: + dst = 0; + len = 2; + name = fn->getName(); + matched = true; + break; + default: + break; + } + } + + if (!matched) + { + StringRef fnName = fn->getName(); + if (fnName.contains("memcpy") || fnName.contains("memmove") || + fnName.contains("memset") || fnName.contains("strncpy") || + fnName.contains("strncat") || fnName.contains("stpncpy")) + { + dst = 0; + len = 2; + name = fnName; + matched = true; + } + } + } + } + + if (matched && dst >= 0 && len >= 0 && CB->arg_size() > static_cast(len)) + { + dstIdx = static_cast(dst); + lenIdx = static_cast(len); + sinkName = name.empty() ? "lib call" : name.str(); + return true; + } + return false; + } + + template + static std::optional getArgIndex(llvm::Value* v, Canonicalize canonicalize) + { + v = canonicalize(v); + if (auto* arg = llvm::dyn_cast(v)) + return arg->getArgNo(); + return std::nullopt; + } + + static bool addSummarySink(std::vector& sinks, unsigned dstIdx, + unsigned lenIdx) + { + for (const auto& s : sinks) + { + if (s.dstIdx == dstIdx && s.lenIdx == lenIdx) + return false; + } + sinks.push_back({dstIdx, lenIdx}); + return true; + } + + static SizeMinusKSummaryMap buildSizeMinusKSummaries(llvm::Module& mod) + { + using namespace llvm; + SizeMinusKSummaryMap summaries; + + auto buildCanonicalize = [&](Function& F) + { + DenseMap argSlots; + for (Instruction& inst : F.getEntryBlock()) + { + auto* store = dyn_cast(&inst); + if (!store) + continue; + auto* arg = dyn_cast(stripCasts(store->getValueOperand())); + if (!arg) + continue; + auto* slot = dyn_cast(stripCasts(store->getPointerOperand())); + if (!slot) + continue; + argSlots[slot] = arg; + } + + return [argSlots = std::move(argSlots)](Value* v) -> Value* + { + v = stripCasts(v); + if (auto* load = dyn_cast(v)) + { + Value* ptr = stripCasts(load->getPointerOperand()); + if (auto* slot = dyn_cast(ptr)) + { + auto it = argSlots.find(slot); + if (it != argSlots.end()) + return const_cast(it->second); + } + } + return v; + }; + }; + + // Pass 1: direct libc/intrinsic sinks mapped to arguments + for (Function& F : mod) + { + if (F.isDeclaration()) + continue; + + auto canonical = buildCanonicalize(F); + TargetLibraryInfoImpl TLII(Triple(F.getParent()->getTargetTriple())); + TargetLibraryInfo TLI(TLII, &F); + + for (Instruction& I : instructions(F)) + { + auto* CB = dyn_cast(&I); + if (!CB) + continue; + + unsigned dstIdx = 0; + unsigned lenIdx = 0; + std::string sinkName; + if (!getKnownSinkCallInfo(CB, TLI, dstIdx, lenIdx, sinkName)) + continue; + if (dstIdx >= CB->arg_size() || lenIdx >= CB->arg_size()) + continue; + auto dstArg = getArgIndex(CB->getArgOperand(dstIdx), canonical); + auto lenArg = getArgIndex(CB->getArgOperand(lenIdx), canonical); + if (!dstArg || !lenArg) + continue; + addSummarySink(summaries[&F], *dstArg, *lenArg); + } + } + + // Pass 2: propagate through wrappers until fixpoint + bool changed = true; + while (changed) + { + changed = false; + for (Function& F : mod) + { + if (F.isDeclaration()) + continue; + + auto canonical = buildCanonicalize(F); + + for (Instruction& I : instructions(F)) + { + auto* CB = dyn_cast(&I); + if (!CB) + continue; + Function* callee = CB->getCalledFunction(); + if (!callee || callee->isDeclaration()) + continue; + auto it = summaries.find(callee); + if (it == summaries.end()) + continue; + + for (const auto& sink : it->second) + { + if (sink.dstIdx >= CB->arg_size() || sink.lenIdx >= CB->arg_size()) + continue; + auto dstArg = getArgIndex(CB->getArgOperand(sink.dstIdx), canonical); + auto lenArg = getArgIndex(CB->getArgOperand(sink.lenIdx), canonical); + if (!dstArg || !lenArg) + continue; + if (addSummarySink(summaries[&F], *dstArg, *lenArg)) + changed = true; + } + } + } + } + + return summaries; + } + + static void analyzeSizeMinusKWritesInFunction(llvm::Function& F, const llvm::DataLayout& DL, + const SizeMinusKSummaryMap& summaries, + std::vector& out) + { + using namespace llvm; + + if (F.isDeclaration()) + return; + + AssumptionCache AC(F); + LazyValueInfo LVI(&AC, &DL); + TargetLibraryInfoImpl TLII(Triple(F.getParent()->getTargetTriple())); + TargetLibraryInfo TLI(TLII, &F); + + DenseMap argSlots; + for (Instruction& inst : F.getEntryBlock()) + { + auto* store = dyn_cast(&inst); + if (!store) + continue; + auto* arg = dyn_cast(stripCasts(store->getValueOperand())); + if (!arg) + continue; + auto* slot = dyn_cast(stripCasts(store->getPointerOperand())); + if (!slot) + continue; + argSlots[slot] = arg; + } + + auto canonical = [&](Value* v) -> Value* + { + v = stripCasts(v); + if (auto* load = dyn_cast(v)) + { + Value* ptr = stripCasts(load->getPointerOperand()); + if (auto* slot = dyn_cast(ptr)) + { + if (const Argument* arg = argSlots.lookup(slot)) + return const_cast(arg); + } + } + return v; + }; + + auto emitIssue = [&](Instruction* at, Value* dest, Value* sizeBase, StringRef sinkName, + bool hasPtrDest, int64_t k) + { + SizeMinusKWriteIssue issue; + issue.funcName = F.getName().str(); + issue.sinkName = sinkName.str(); + issue.hasPointerDest = hasPtrDest; + issue.ptrNonNull = hasPtrDest ? isNonNullAt(dest, at, LVI) : true; + issue.sizeAboveK = isGreaterThanAt(sizeBase, k, at, LVI); + issue.k = k; + issue.inst = at; + if (!issue.ptrNonNull || !issue.sizeAboveK) + out.push_back(std::move(issue)); + }; + + for (Instruction& I : instructions(F)) + { + if (auto* CB = dyn_cast(&I)) + { + unsigned dstIdx = 0; + unsigned lenIdx = 0; + std::string sinkName; + if (getKnownSinkCallInfo(CB, TLI, dstIdx, lenIdx, sinkName)) + { + SizeMinusKMatch match = + matchSizeMinusK(CB->getArgOperand(lenIdx), canonical); + if (match.base) + { + std::string label = canonicalizeSinkName(sinkName); + if (label == "llvm.mem*" || label == "lib call") + label += " (len = size-k)"; + emitIssue(&I, canonical(CB->getArgOperand(dstIdx)), match.base, label, + true, match.k); + } + continue; + } + + if (Function* calleeFn = CB->getCalledFunction()) + { + auto it = summaries.find(calleeFn); + if (it != summaries.end()) + { + for (const auto& sink : it->second) + { + if (sink.dstIdx >= CB->arg_size() || sink.lenIdx >= CB->arg_size()) + { + continue; + } + SizeMinusKMatch match = + matchSizeMinusK(CB->getArgOperand(sink.lenIdx), canonical); + if (!match.base) + continue; + emitIssue(&I, canonical(CB->getArgOperand(sink.dstIdx)), match.base, + calleeFn->getName(), true, match.k); + } + } + } + } + + if (auto* store = dyn_cast(&I)) + { + auto* gep = dyn_cast(store->getPointerOperand()); + if (!gep) + continue; + SizeMinusKMatch match; + for (unsigned idx = 1; idx < gep->getNumOperands(); ++idx) + { + match = matchSizeMinusK(gep->getOperand(idx), canonical); + if (match.base) + break; + } + if (!match.base) + continue; + emitIssue(&I, canonical(gep->getPointerOperand()), match.base, + "store (idx = size-k)", true, match.k); + } + } + } + } // namespace + + std::vector + analyzeSizeMinusKWrites(llvm::Module& mod, const llvm::DataLayout& DL, + const std::function& shouldAnalyzeFunction) + { + SizeMinusKSummaryMap summaries = buildSizeMinusKSummaries(mod); + std::vector issues; + + for (llvm::Function& F : mod) + { + if (F.isDeclaration()) + continue; + if (!shouldAnalyzeFunction(F)) + continue; + analyzeSizeMinusKWritesInFunction(F, DL, summaries, issues); + } + + return issues; + } +} // namespace ctrace::stack::analysis diff --git a/src/analysis/StackBufferAnalysis.cpp b/src/analysis/StackBufferAnalysis.cpp new file mode 100644 index 0000000..459673c --- /dev/null +++ b/src/analysis/StackBufferAnalysis.cpp @@ -0,0 +1,682 @@ +#include "analysis/StackBufferAnalysis.hpp" + +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "analysis/IntRanges.hpp" + +namespace ctrace::stack::analysis +{ + namespace + { + struct RecursionGuard + { + llvm::SmallPtrSetImpl& set; + const llvm::Value* value; + RecursionGuard(llvm::SmallPtrSetImpl& s, const llvm::Value* v) + : set(s), value(v) + { + set.insert(value); + } + ~RecursionGuard() + { + set.erase(value); + } + }; + + // Taille (en nombre d'éléments) pour une alloca de tableau sur la stack + static std::optional getAllocaElementCount(llvm::AllocaInst* AI) + { + using namespace llvm; + + Type* elemTy = AI->getAllocatedType(); + StackSize count = 1; + + // Cas "char test[10];" => alloca [10 x i8] + if (auto* arrTy = dyn_cast(elemTy)) + { + count *= arrTy->getNumElements(); + elemTy = arrTy->getElementType(); + } + + // Cas "alloca i8, i64 10" => alloca tableau avec taille constante + if (AI->isArrayAllocation()) + { + if (auto* C = dyn_cast(AI->getArraySize())) + { + count *= C->getZExtValue(); + } + else + { + // taille non constante - analyse plus compliquée, on ignore pour l'instant + return std::nullopt; + } + } + + return count; + } + + static const llvm::AllocaInst* resolveArrayAllocaFromPointerInternal( + const llvm::Value* V, llvm::Function& F, std::vector& path, + llvm::SmallPtrSetImpl& recursionStack, int depth) + { + using namespace llvm; + + if (!V) + return nullptr; + if (depth > 64) + return nullptr; + if (recursionStack.contains(V)) + return nullptr; + + RecursionGuard guard(recursionStack, V); + + auto isArrayAlloca = [](const AllocaInst* AI) -> bool + { + Type* T = AI->getAllocatedType(); + // On considère comme "buffer de stack" : + // - les vrais tableaux, + // - les allocas de type tableau (VLA côté IR), + // - les structs qui contiennent au moins un champ tableau. + if (T->isArrayTy() || AI->isArrayAllocation()) + return true; + + if (auto* ST = llvm::dyn_cast(T)) + { + for (unsigned i = 0; i < ST->getNumElements(); ++i) + { + if (ST->getElementType(i)->isArrayTy()) + return true; + } + } + return false; + }; + + // Pour éviter les boucles d'aliasing bizarres + SmallPtrSet visited; + const Value* cur = V; + + while (cur && !visited.contains(cur)) + { + visited.insert(cur); + if (cur->hasName()) + path.push_back(cur->getName().str()); + + // Cas 1 : on tombe sur une alloca. + if (auto* AI = dyn_cast(cur)) + { + if (isArrayAlloca(AI)) + { + // Alloca d'un buffer de stack (tableau) : cible finale. + return AI; + } + + // Sinon, c'est très probablement une variable locale de type pointeur + // (char *ptr; char **pp; etc.). On parcourt les stores vers cette + // variable pour voir quelles valeurs lui sont assignées, et on + // tente de remonter jusqu'à une vraie alloca de tableau. + const AllocaInst* foundAI = nullptr; + + for (BasicBlock& BB : F) + { + for (Instruction& I : BB) + { + auto* SI = dyn_cast(&I); + if (!SI) + continue; + if (SI->getPointerOperand() != AI) + continue; + + const Value* storedPtr = SI->getValueOperand(); + std::vector subPath; + const AllocaInst* cand = resolveArrayAllocaFromPointerInternal( + storedPtr, F, subPath, recursionStack, depth + 1); + if (!cand) + continue; + + if (!foundAI) + { + foundAI = cand; + // Append subPath to path + path.insert(path.end(), subPath.begin(), subPath.end()); + } + else if (foundAI != cand) + { + // Plusieurs bases différentes : aliasing ambigu, + // on préfère abandonner plutôt que de se tromper. + return nullptr; + } + } + } + return foundAI; + } + + // Cas 2 : bitcast -> on remonte l'opérande. + if (auto* BC = dyn_cast(cur)) + { + cur = BC->getOperand(0); + continue; + } + + // Cas 3 : GEP -> on remonte sur le pointeur de base. + if (auto* GEP = dyn_cast(cur)) + { + cur = GEP->getPointerOperand(); + continue; + } + + // Cas 4 : load d'un pointeur. Exemple typique : + // char *ptr = test; + // char *p2 = ptr; + // char **pp = &ptr; + // (*pp)[i] = ... + // + // On remonte au "container" du pointeur (variable locale, ou autre valeur) + // en suivant l'opérande du load. + if (auto* LI = dyn_cast(cur)) + { + cur = LI->getPointerOperand(); + continue; + } + + // Cas 5 : PHI de pointeurs (fusion de plusieurs alias) : + // on tente de résoudre chaque incoming et on s'assure qu'ils + // pointent tous vers la même alloca de tableau. + if (auto* PN = dyn_cast(cur)) + { + const AllocaInst* foundAI = nullptr; + std::vector phiPath; + for (unsigned i = 0; i < PN->getNumIncomingValues(); ++i) + { + const Value* inV = PN->getIncomingValue(i); + std::vector subPath; + const AllocaInst* cand = resolveArrayAllocaFromPointerInternal( + inV, F, subPath, recursionStack, depth + 1); + if (!cand) + continue; + if (!foundAI) + { + foundAI = cand; + phiPath = subPath; + } + else if (foundAI != cand) + { + // PHI mélange plusieurs bases différentes : trop ambigu. + return nullptr; + } + } + path.insert(path.end(), phiPath.begin(), phiPath.end()); + return foundAI; + } + + // Autres cas (arguments, globales complexes, etc.) : on arrête l'heuristique. + break; + } + + return nullptr; + } + + static const llvm::AllocaInst* resolveArrayAllocaFromPointer(const llvm::Value* V, + llvm::Function& F, + std::vector& path) + { + llvm::SmallPtrSet recursionStack; + return resolveArrayAllocaFromPointerInternal(V, F, path, recursionStack, 0); + } + + static void + analyzeStackBufferOverflowsInFunction(llvm::Function& F, + std::vector& out) + { + using namespace llvm; + + auto ranges = computeIntRangesFromICmps(F); + + for (BasicBlock& BB : F) + { + for (Instruction& I : BB) + { + auto* GEP = dyn_cast(&I); + if (!GEP) + continue; + + // 1) Trouver la base du pointeur (test, &test[0], ptr, etc.) + const Value* basePtr = GEP->getPointerOperand(); + std::vector aliasPath; + const AllocaInst* AI = resolveArrayAllocaFromPointer(basePtr, F, aliasPath); + if (!AI) + continue; + + // 2) Déterminer la taille logique du tableau ciblé et récupérer l'index + // On essaie d'abord de la déduire du type traversé par la GEP + // (cas struct S { char buf[10]; }; s.buf[i]) puis on retombe + // sur la taille de l'alloca pour les cas plus simples (char buf[10]). + StackSize arraySize = 0; + Value* idxVal = nullptr; + + Type* srcElemTy = GEP->getSourceElementType(); + + if (auto* arrTy = dyn_cast(srcElemTy)) + { + // Cas direct : alloca [N x T]; GEP indices [0, i] + if (GEP->getNumIndices() < 2) + continue; + auto idxIt = GEP->idx_begin(); + ++idxIt; // saute le premier indice (souvent 0) + idxVal = idxIt->get(); + arraySize = arrTy->getNumElements(); + } + else if (auto* ST = dyn_cast(srcElemTy)) + { + // Cas struct avec champ tableau: + // %ptr = getelementptr inbounds %struct.S, %struct.S* %s, + // i32 0, i32 , i64 %i + // + // On attend donc au moins 3 indices: [0, field, i] + if (GEP->getNumIndices() >= 3) + { + auto idxIt = GEP->idx_begin(); + + // premier indice (souvent 0) + auto* idx0 = dyn_cast(idxIt->get()); + ++idxIt; + // second indice: index de champ dans la struct + auto* fieldIdxC = dyn_cast(idxIt->get()); + ++idxIt; + + if (idx0 && fieldIdxC) + { + unsigned fieldIdx = + static_cast(fieldIdxC->getZExtValue()); + if (fieldIdx < ST->getNumElements()) + { + Type* fieldTy = ST->getElementType(fieldIdx); + if (auto* fieldArrTy = dyn_cast(fieldTy)) + { + arraySize = fieldArrTy->getNumElements(); + // Troisième indice = index dans le tableau interne + idxVal = idxIt->get(); + } + } + } + } + } + + // Si on n'a pas réussi à déduire une taille via la GEP, + // on retombe sur la taille dérivée de l'alloca (cas char buf[10]; ptr = buf; ptr[i]). + if (arraySize == 0 || !idxVal) + { + auto maybeCount = getAllocaElementCount(const_cast(AI)); + if (!maybeCount) + continue; + arraySize = *maybeCount; + if (arraySize == 0) + continue; + + // Pour ces cas-là, on considère le premier indice comme l'index logique. + if (GEP->getNumIndices() < 1) + continue; + auto idxIt = GEP->idx_begin(); + idxVal = idxIt->get(); + } + + std::string varName = + AI->hasName() ? AI->getName().str() : std::string(""); + + // "baseIdxVal" = variable de boucle "i" sans les casts (sext/zext...) + Value* baseIdxVal = idxVal; + while (auto* cast = dyn_cast(baseIdxVal)) + { + baseIdxVal = cast->getOperand(0); + } + + // 4) Cas index constant : test[11] + if (auto* CIdx = dyn_cast(idxVal)) + { + auto idxValue = CIdx->getSExtValue(); + if (idxValue < 0 || static_cast(idxValue) >= arraySize) + { + for (User* GU : GEP->users()) + { + if (auto* S = dyn_cast(GU)) + { + StackBufferOverflowIssue report; + report.funcName = F.getName().str(); + report.varName = varName; + report.arraySize = arraySize; + report.indexOrUpperBound = static_cast(idxValue); + report.isWrite = true; + report.indexIsConstant = true; + report.inst = S; + report.aliasPathVec = aliasPath; + if (!aliasPath.empty()) + { + std::reverse(aliasPath.begin(), aliasPath.end()); + std::string chain; + for (size_t i = 0; i < aliasPath.size(); ++i) + { + chain += aliasPath[i]; + if (i + 1 < aliasPath.size()) + chain += " -> "; + } + report.aliasPath = chain; + } + out.push_back(std::move(report)); + } + else if (auto* L = dyn_cast(GU)) + { + StackBufferOverflowIssue report; + report.funcName = F.getName().str(); + report.varName = varName; + report.arraySize = arraySize; + report.indexOrUpperBound = static_cast(idxValue); + report.isWrite = false; + report.indexIsConstant = true; + report.inst = L; + report.aliasPathVec = aliasPath; + if (!aliasPath.empty()) + { + std::reverse(aliasPath.begin(), aliasPath.end()); + std::string chain; + for (size_t i = 0; i < aliasPath.size(); ++i) + { + chain += aliasPath[i]; + if (i + 1 < aliasPath.size()) + chain += " -> "; + } + report.aliasPath = chain; + } + out.push_back(std::move(report)); + } + } + } + continue; + } + + // 5) Cas index variable : test[i] / ptr[i] + // On regarde si on a un intervalle pour la valeur de base (i, pas le cast) + const Value* key = baseIdxVal; + + // Si l'index vient d'un load (pattern -O0 : load i, icmp, load i, gep), + // on utilise le pointeur sous-jacent comme clé (l'alloca de i). + if (auto* LI = dyn_cast(baseIdxVal)) + { + key = LI->getPointerOperand(); + } + + auto itRange = ranges.find(key); + if (itRange == ranges.end()) + { + // pas de borne connue => on ne dit rien ici + continue; + } + + const IntRange& R = itRange->second; + + // 5.a) Borne supérieure hors bornes: UB >= arraySize + if (R.hasUpper && R.upper >= 0 && static_cast(R.upper) >= arraySize) + { + StackSize ub = static_cast(R.upper); + + for (User* GU : GEP->users()) + { + if (auto* S = dyn_cast(GU)) + { + StackBufferOverflowIssue report; + report.funcName = F.getName().str(); + report.varName = varName; + report.arraySize = arraySize; + report.indexOrUpperBound = ub; + report.isWrite = true; + report.indexIsConstant = false; + report.inst = S; + report.aliasPathVec = aliasPath; + if (!aliasPath.empty()) + { + std::reverse(aliasPath.begin(), aliasPath.end()); + std::string chain; + for (size_t i = 0; i < aliasPath.size(); ++i) + { + chain += aliasPath[i]; + if (i + 1 < aliasPath.size()) + chain += " -> "; + } + report.aliasPath = chain; + } + out.push_back(std::move(report)); + } + else if (auto* L = dyn_cast(GU)) + { + StackBufferOverflowIssue report; + report.funcName = F.getName().str(); + report.varName = varName; + report.arraySize = arraySize; + report.indexOrUpperBound = ub; + report.isWrite = false; + report.indexIsConstant = false; + report.inst = L; + report.aliasPathVec = aliasPath; + if (!aliasPath.empty()) + { + std::reverse(aliasPath.begin(), aliasPath.end()); + std::string chain; + for (size_t i = 0; i < aliasPath.size(); ++i) + { + chain += aliasPath[i]; + if (i + 1 < aliasPath.size()) + chain += " -> "; + } + report.aliasPath = chain; + } + out.push_back(std::move(report)); + } + } + } + + // 5.b) Borne inférieure négative: LB < 0 => index potentiellement négatif + if (R.hasLower && R.lower < 0) + { + for (User* GU : GEP->users()) + { + if (auto* S = dyn_cast(GU)) + { + StackBufferOverflowIssue report; + report.funcName = F.getName().str(); + report.varName = varName; + report.arraySize = arraySize; + report.isWrite = true; + report.indexIsConstant = false; + report.inst = S; + report.isLowerBoundViolation = true; + report.lowerBound = R.lower; + report.aliasPathVec = aliasPath; + if (!aliasPath.empty()) + { + std::reverse(aliasPath.begin(), aliasPath.end()); + std::string chain; + for (size_t i = 0; i < aliasPath.size(); ++i) + { + chain += aliasPath[i]; + if (i + 1 < aliasPath.size()) + chain += " -> "; + } + report.aliasPath = chain; + } + out.push_back(std::move(report)); + } + else if (auto* L = dyn_cast(GU)) + { + StackBufferOverflowIssue report; + report.funcName = F.getName().str(); + report.varName = varName; + report.arraySize = arraySize; + report.isWrite = false; + report.indexIsConstant = false; + report.inst = L; + report.isLowerBoundViolation = true; + report.lowerBound = R.lower; + report.aliasPathVec = aliasPath; + if (!aliasPath.empty()) + { + std::reverse(aliasPath.begin(), aliasPath.end()); + std::string chain; + for (size_t i = 0; i < aliasPath.size(); ++i) + { + chain += aliasPath[i]; + if (i + 1 < aliasPath.size()) + chain += " -> "; + } + report.aliasPath = chain; + } + out.push_back(std::move(report)); + } + } + } + // Si R.hasUpper && R.upper < arraySize et (pas de LB problématique), + // on considère l'accès comme probablement sûr. + } + } + } + + static void analyzeMultipleStoresInFunction(llvm::Function& F, + std::vector& out) + { + using namespace llvm; + + if (F.isDeclaration()) + return; + + struct Info + { + std::size_t storeCount = 0; + llvm::SmallPtrSet indexKeys; + const AllocaInst* AI = nullptr; + }; + + std::map infoMap; + + for (BasicBlock& BB : F) + { + for (Instruction& I : BB) + { + auto* S = dyn_cast(&I); + if (!S) + continue; + + Value* ptr = S->getPointerOperand(); + auto* GEP = dyn_cast(ptr); + if (!GEP) + continue; + + // On remonte à la base pour trouver une alloca de tableau sur la stack. + const Value* basePtr = GEP->getPointerOperand(); + std::vector dummyAliasPath; + const AllocaInst* AI = + resolveArrayAllocaFromPointer(basePtr, F, dummyAliasPath); + if (!AI) + continue; + + // On récupère l'expression d'index utilisée dans le GEP. + Value* idxVal = nullptr; + Type* srcElemTy = GEP->getSourceElementType(); + + if (auto* arrTy = dyn_cast(srcElemTy)) + { + // Pattern [N x T]* -> indices [0, i] + if (GEP->getNumIndices() < 2) + continue; + auto idxIt = GEP->idx_begin(); + ++idxIt; // saute le premier indice (souvent 0) + idxVal = idxIt->get(); + } + else + { + // Pattern T* -> indice unique [i] (cas char *ptr = test; ptr[i]) + if (GEP->getNumIndices() < 1) + continue; + auto idxIt = GEP->idx_begin(); + idxVal = idxIt->get(); + } + + if (!idxVal) + continue; + + // On normalise un peu la clé d'index en enlevant les casts SSA. + const Value* idxKey = idxVal; + while (auto* cast = dyn_cast(const_cast(idxKey))) + { + idxKey = cast->getOperand(0); + } + + auto& info = infoMap[AI]; + info.AI = AI; + info.storeCount++; + info.indexKeys.insert(idxKey); + } + } + + // Construction des warnings pour chaque buffer qui reçoit plusieurs stores. + for (auto& entry : infoMap) + { + const AllocaInst* AI = entry.first; + const Info& info = entry.second; + + if (info.storeCount <= 1) + continue; // un seul store -> pas de warning + + MultipleStoreIssue issue; + issue.funcName = F.getName().str(); + issue.varName = AI->hasName() ? AI->getName().str() : std::string(""); + issue.storeCount = info.storeCount; + issue.distinctIndexCount = info.indexKeys.size(); + issue.allocaInst = AI; + + out.push_back(std::move(issue)); + } + } + } // namespace + + std::vector + analyzeStackBufferOverflows(llvm::Module& mod, + const std::function& shouldAnalyze) + { + std::vector out; + + for (llvm::Function& F : mod) + { + if (F.isDeclaration()) + continue; + if (!shouldAnalyze(F)) + continue; + analyzeStackBufferOverflowsInFunction(F, out); + } + + return out; + } + + std::vector + analyzeMultipleStores(llvm::Module& mod, + const std::function& shouldAnalyze) + { + std::vector out; + + for (llvm::Function& F : mod) + { + if (F.isDeclaration()) + continue; + if (!shouldAnalyze(F)) + continue; + analyzeMultipleStoresInFunction(F, out); + } + + return out; + } +} // namespace ctrace::stack::analysis diff --git a/src/analysis/StackComputation.cpp b/src/analysis/StackComputation.cpp new file mode 100644 index 0000000..28c7fb4 --- /dev/null +++ b/src/analysis/StackComputation.cpp @@ -0,0 +1,357 @@ +#include "analysis/StackComputation.hpp" + +#include +#include +#include +#include +#include +#include +#include + +#include "analysis/IRValueUtils.hpp" + +namespace ctrace::stack::analysis +{ + namespace + { + enum VisitState + { + NotVisited = 0, + Visiting = 1, + Visited = 2 + }; + + static bool hasNonSelfCall(const llvm::Function& F) + { + const llvm::Function* Self = &F; + + for (const llvm::BasicBlock& BB : F) + { + for (const llvm::Instruction& I : BB) + { + const llvm::Function* Callee = nullptr; + + if (auto* CI = llvm::dyn_cast(&I)) + { + Callee = CI->getCalledFunction(); + } + else if (auto* II = llvm::dyn_cast(&I)) + { + Callee = II->getCalledFunction(); + } + + if (Callee && !Callee->isDeclaration() && Callee != Self) + { + return true; // appel vers une autre fonction + } + } + } + return false; + } + + static LocalStackInfo computeLocalStackBase(llvm::Function& F, const llvm::DataLayout& DL) + { + LocalStackInfo info; + + for (llvm::BasicBlock& BB : F) + { + for (llvm::Instruction& I : BB) + { + auto* alloca = llvm::dyn_cast(&I); + if (!alloca) + continue; + + llvm::Type* ty = alloca->getAllocatedType(); + StackSize count = 1; + + if (auto* CI = llvm::dyn_cast(alloca->getArraySize())) + { + count = CI->getZExtValue(); + } + else if (auto* C = analysis::tryGetConstFromValue(alloca->getArraySize(), F)) + { + count = C->getZExtValue(); + } + else + { + info.hasDynamicAlloca = true; + info.unknown = true; + continue; + } + + StackSize size = DL.getTypeAllocSize(ty) * count; + info.bytes += size; + info.localAllocas.emplace_back(analysis::deriveAllocaName(alloca), size); + } + } + + return info; + } + + static LocalStackInfo computeLocalStackIR(llvm::Function& F, const llvm::DataLayout& DL) + { + LocalStackInfo info = computeLocalStackBase(F, DL); + + if (info.bytes == 0) + return info; + + llvm::MaybeAlign MA = DL.getStackAlignment(); + unsigned stackAlign = MA ? MA->value() : 1u; + + if (stackAlign > 1) + info.bytes = llvm::alignTo(info.bytes, stackAlign); + + return info; + } + + static LocalStackInfo computeLocalStackABI(llvm::Function& F, const llvm::DataLayout& DL) + { + LocalStackInfo info = computeLocalStackBase(F, DL); + + llvm::MaybeAlign MA = DL.getStackAlignment(); + unsigned stackAlign = MA ? MA->value() : 1u; // 16 sur beaucoup de cibles + + StackSize frameSize = info.bytes; + + if (stackAlign > 1) + frameSize = llvm::alignTo(frameSize, stackAlign); + + if (!F.isDeclaration() && stackAlign > 1 && frameSize < stackAlign) + { + frameSize = stackAlign; + } + + if (stackAlign > 1 && hasNonSelfCall(F)) + { + frameSize = llvm::alignTo(frameSize + stackAlign, stackAlign); + } + + info.bytes = frameSize; + return info; + } + + static StackEstimate + dfsComputeStack(const llvm::Function* F, const CallGraph& CG, + const std::map& LocalStack, + std::map& State, + InternalAnalysisState& Res) + { + auto itState = State.find(F); + if (itState != State.end()) + { + if (itState->second == Visiting) + { + // Cycle détecté : on marque tous les noeuds actuellement en "Visiting" + for (auto& p : State) + { + if (p.second == Visiting) + { + Res.RecursiveFuncs.insert(p.first); + } + } + auto itLocal = LocalStack.find(F); + if (itLocal != LocalStack.end()) + { + return StackEstimate{itLocal->second.bytes, itLocal->second.unknown}; + } + return {}; + } + else if (itState->second == Visited) + { + auto itTotal = Res.TotalStack.find(F); + return (itTotal != Res.TotalStack.end()) ? itTotal->second : StackEstimate{}; + } + } + + State[F] = Visiting; + + auto itLocal = LocalStack.find(F); + StackEstimate local = {}; + if (itLocal != LocalStack.end()) + { + local.bytes = itLocal->second.bytes; + local.unknown = itLocal->second.unknown; + } + StackEstimate maxCallee = {}; + + auto itCG = CG.find(F); + if (itCG != CG.end()) + { + for (const llvm::Function* Callee : itCG->second) + { + StackEstimate calleeStack = dfsComputeStack(Callee, CG, LocalStack, State, Res); + if (calleeStack.bytes > maxCallee.bytes) + maxCallee.bytes = calleeStack.bytes; + if (calleeStack.unknown) + maxCallee.unknown = true; + } + } + + StackEstimate total; + total.bytes = local.bytes + maxCallee.bytes; + total.unknown = local.unknown || maxCallee.unknown; + Res.TotalStack[F] = total; + State[F] = Visited; + return total; + } + } // namespace + + CallGraph buildCallGraph(llvm::Module& M) + { + CallGraph CG; + + for (llvm::Function& F : M) + { + if (F.isDeclaration()) + continue; + + auto& vec = CG[&F]; + + for (llvm::BasicBlock& BB : F) + { + for (llvm::Instruction& I : BB) + { + const llvm::Function* Callee = nullptr; + + if (auto* CI = llvm::dyn_cast(&I)) + { + Callee = CI->getCalledFunction(); + } + else if (auto* II = llvm::dyn_cast(&I)) + { + Callee = II->getCalledFunction(); + } + + if (Callee && !Callee->isDeclaration()) + { + vec.push_back(Callee); + } + } + } + } + + return CG; + } + + LocalStackInfo computeLocalStack(llvm::Function& F, const llvm::DataLayout& DL, + AnalysisMode mode) + { + switch (mode) + { + case AnalysisMode::IR: + return computeLocalStackIR(F, DL); + case AnalysisMode::ABI: + return computeLocalStackABI(F, DL); + } + return {}; + } + + InternalAnalysisState + computeGlobalStackUsage(const CallGraph& CG, + const std::map& LocalStack) + { + InternalAnalysisState Res; + std::map State; + + for (auto& p : LocalStack) + { + State[p.first] = NotVisited; + } + + for (auto& p : LocalStack) + { + const llvm::Function* F = p.first; + if (State[F] == NotVisited) + { + dfsComputeStack(F, CG, LocalStack, State, Res); + } + } + + return Res; + } + + bool detectInfiniteSelfRecursion(llvm::Function& F) + { + if (F.isDeclaration()) + return false; + + const llvm::Function* Self = &F; + + std::vector SelfCallBlocks; + + for (llvm::BasicBlock& BB : F) + { + for (llvm::Instruction& I : BB) + { + const llvm::Function* Callee = nullptr; + + if (auto* CI = llvm::dyn_cast(&I)) + { + Callee = CI->getCalledFunction(); + } + else if (auto* II = llvm::dyn_cast(&I)) + { + Callee = II->getCalledFunction(); + } + + if (Callee == Self) + { + SelfCallBlocks.push_back(&BB); + break; + } + } + } + + if (SelfCallBlocks.empty()) + return false; + + llvm::DominatorTree DT(F); + + bool hasReturn = false; + + for (llvm::BasicBlock& BB : F) + { + for (llvm::Instruction& I : BB) + { + if (llvm::isa(&I)) + { + hasReturn = true; + + bool dominatedBySelfCall = false; + for (llvm::BasicBlock* SCB : SelfCallBlocks) + { + if (DT.dominates(SCB, &BB)) + { + dominatedBySelfCall = true; + break; + } + } + + if (!dominatedBySelfCall) + return false; + } + } + } + + if (!hasReturn) + { + return true; + } + + return true; + } + + StackSize computeAllocaLargeThreshold(const AnalysisConfig& config) + { + const StackSize defaultStack = 8ull * 1024ull * 1024ull; + const StackSize minThreshold = 64ull * 1024ull; // 64 KiB + + StackSize base = config.stackLimit ? config.stackLimit : defaultStack; + StackSize derived = base / 8; + + if (derived < minThreshold) + derived = minThreshold; + + return derived; + } +} // namespace ctrace::stack::analysis diff --git a/src/analysis/StackPointerEscape.cpp b/src/analysis/StackPointerEscape.cpp new file mode 100644 index 0000000..01dfff3 --- /dev/null +++ b/src/analysis/StackPointerEscape.cpp @@ -0,0 +1,218 @@ +#include "analysis/StackPointerEscape.hpp" + +#include +#include +#include +#include +#include +#include + +namespace ctrace::stack::analysis +{ + namespace + { + static bool isStdLibCalleeName(llvm::StringRef name) + { + return name.starts_with("_ZNSt3__1") || name.starts_with("_ZSt") || + name.starts_with("_ZNSt") || name.starts_with("__cxx"); + } + + static bool isStdLibCallee(const llvm::Function* F) + { + if (!F) + return false; + return isStdLibCalleeName(F->getName()); + } + + static void analyzeStackPointerEscapesInFunction(llvm::Function& F, + std::vector& out) + { + using namespace llvm; + + if (F.isDeclaration()) + return; + + for (BasicBlock& BB : F) + { + for (Instruction& I : BB) + { + auto* AI = dyn_cast(&I); + if (!AI) + continue; + + SmallPtrSet visited; + SmallVector worklist; + worklist.push_back(AI); + + while (!worklist.empty()) + { + const Value* V = worklist.back(); + worklist.pop_back(); + if (visited.contains(V)) + continue; + visited.insert(V); + + for (const Use& U : V->uses()) + { + const User* Usr = U.getUser(); + + if (auto* RI = dyn_cast(Usr)) + { + StackPointerEscapeIssue issue; + issue.funcName = F.getName().str(); + issue.varName = + AI->hasName() ? AI->getName().str() : std::string(""); + issue.escapeKind = "return"; + issue.targetName = {}; + issue.inst = RI; + out.push_back(std::move(issue)); + continue; + } + + if (auto* SI = dyn_cast(Usr)) + { + if (SI->getValueOperand() == V) + { + const Value* dstRaw = SI->getPointerOperand(); + const Value* dst = dstRaw->stripPointerCasts(); + + if (auto* GV = dyn_cast(dst)) + { + StackPointerEscapeIssue issue; + issue.funcName = F.getName().str(); + issue.varName = AI->hasName() ? AI->getName().str() + : std::string(""); + issue.escapeKind = "store_global"; + issue.targetName = + GV->hasName() ? GV->getName().str() : std::string{}; + issue.inst = SI; + out.push_back(std::move(issue)); + continue; + } + + if (!isa(dst)) + { + StackPointerEscapeIssue issue; + issue.funcName = F.getName().str(); + issue.varName = AI->hasName() ? AI->getName().str() + : std::string(""); + issue.escapeKind = "store_unknown"; + issue.targetName = + dst->hasName() ? dst->getName().str() : std::string{}; + issue.inst = SI; + out.push_back(std::move(issue)); + continue; + } + + const AllocaInst* dstAI = cast(dst); + worklist.push_back(dstAI); + } + continue; + } + + if (auto* CB = dyn_cast(Usr)) + { + for (unsigned argIndex = 0; argIndex < CB->arg_size(); ++argIndex) + { + if (CB->getArgOperand(argIndex) != V) + continue; + + const Value* calledVal = CB->getCalledOperand(); + const Value* calledStripped = + calledVal ? calledVal->stripPointerCasts() : nullptr; + const Function* directCallee = + calledStripped ? dyn_cast(calledStripped) + : nullptr; + if (CB->paramHasAttr(argIndex, llvm::Attribute::NoCapture) || + CB->paramHasAttr(argIndex, llvm::Attribute::ByVal) || + CB->paramHasAttr(argIndex, llvm::Attribute::ByRef)) + { + continue; + } + if (directCallee) + { + llvm::StringRef calleeName = directCallee->getName(); + if (calleeName.contains("unique_ptr") || + calleeName.contains("make_unique")) + { + continue; + } + if (isStdLibCallee(directCallee)) + { + continue; + } + } + + StackPointerEscapeIssue issue; + issue.funcName = F.getName().str(); + issue.varName = AI->hasName() ? AI->getName().str() + : std::string(""); + issue.inst = cast(CB); + + if (!directCallee) + { + issue.escapeKind = "call_callback"; + issue.targetName.clear(); + out.push_back(std::move(issue)); + } + else + { +#ifdef CT_DISABLE_CALL_ARG + issue.escapeKind = "call_arg"; + issue.targetName = directCallee->hasName() + ? directCallee->getName().str() + : std::string{}; + out.push_back(std::move(issue)); +#endif + } + } + + continue; + } + + if (auto* BC = dyn_cast(Usr)) + { + if (BC->getType()->isPointerTy()) + worklist.push_back(BC); + continue; + } + if (auto* GEP = dyn_cast(Usr)) + { + worklist.push_back(GEP); + continue; + } + if (auto* PN = dyn_cast(Usr)) + { + if (PN->getType()->isPointerTy()) + worklist.push_back(PN); + continue; + } + if (auto* Sel = dyn_cast(Usr)) + { + if (Sel->getType()->isPointerTy()) + worklist.push_back(Sel); + continue; + } + } + } + } + } + } + } // namespace + + std::vector + analyzeStackPointerEscapes(llvm::Module& mod, + const std::function& shouldAnalyze) + { + std::vector issues; + for (llvm::Function& F : mod) + { + if (F.isDeclaration()) + continue; + if (!shouldAnalyze(F)) + continue; + analyzeStackPointerEscapesInFunction(F, issues); + } + return issues; + } +} // namespace ctrace::stack::analysis diff --git a/src/passes/ModulePasses.cpp b/src/passes/ModulePasses.cpp new file mode 100644 index 0000000..606c7c2 --- /dev/null +++ b/src/passes/ModulePasses.cpp @@ -0,0 +1,78 @@ +#include "passes/ModulePasses.hpp" + +#include +#include +#include +#include +#include +#include +#include + +namespace ctrace::stack +{ + static llvm::DenseSet collectNoCaptureArgs(const llvm::Module& mod) + { + llvm::DenseSet out; + for (const llvm::Function& F : mod) + { + for (const llvm::Argument& A : F.args()) + { + if (A.hasNoCaptureAttr()) + out.insert(&A); + } + } + return out; + } + + void runFunctionAttrsPass(llvm::Module& mod) + { + // llvm::errs() << "[stack-analyzer] running function-attrs pass\n"; + const llvm::DenseSet before = collectNoCaptureArgs(mod); + + llvm::PassBuilder PB; + llvm::LoopAnalysisManager LAM; + llvm::FunctionAnalysisManager FAM; + llvm::CGSCCAnalysisManager CGAM; + llvm::ModuleAnalysisManager MAM; + + llvm::TargetLibraryInfoImpl TLII(llvm::Triple(mod.getTargetTriple())); + FAM.registerPass([&] { return llvm::TargetLibraryAnalysis(TLII); }); + + PB.registerModuleAnalyses(MAM); + PB.registerCGSCCAnalyses(CGAM); + PB.registerFunctionAnalyses(FAM); + PB.registerLoopAnalyses(LAM); + PB.crossRegisterProxies(LAM, FAM, CGAM, MAM); + + llvm::ModulePassManager MPM; + if (auto Err = PB.parsePassPipeline(MPM, "function-attrs")) + { + llvm::consumeError(std::move(Err)); + return; + } + MPM.run(mod, MAM); + + unsigned added = 0; + for (const llvm::Function& F : mod) + { + unsigned idx = 0; + for (const llvm::Argument& A : F.args()) + { + if (A.hasNoCaptureAttr() && !before.contains(&A)) + { + // llvm::errs() << "[stack-analyzer] nocapture added: " << F.getName() + // << " arg#" << idx; + if (A.hasName()) + llvm::errs() << " (" << A.getName() << ")"; + llvm::errs() << "\n"; + ++added; + } + ++idx; + } + } + if (added == 0) + { + // llvm::errs() << "[stack-analyzer] nocapture added: none\n"; + } + } +} // namespace ctrace::stack diff --git a/src/report/ReportSerialization.cpp b/src/report/ReportSerialization.cpp new file mode 100644 index 0000000..38f0b19 --- /dev/null +++ b/src/report/ReportSerialization.cpp @@ -0,0 +1,297 @@ +#include "StackUsageAnalyzer.hpp" + +#include // std::snprintf +#include +#include +#include + +namespace ctrace::stack +{ + namespace + { + + // Petit helper pour échapper les chaînes JSON. + static std::string jsonEscape(const std::string& s) + { + std::string out; + out.reserve(s.size() + 16); + for (char c : s) + { + switch (c) + { + case '\\': + out += "\\\\"; + break; + case '\"': + out += "\\\""; + break; + case '\n': + out += "\\n"; + break; + case '\r': + out += "\\r"; + break; + case '\t': + out += "\\t"; + break; + default: + if (static_cast(c) < 0x20) + { + char buf[7]; + std::snprintf(buf, sizeof(buf), "\\u%04x", c & 0xFF); + out += buf; + } + else + { + out += c; + } + break; + } + } + return out; + } + + // Old helper to convert DiagnosticSeverity to string, don't use it anymore. + static const char* severityToJsonString(DiagnosticSeverity sev) + { + switch (sev) + { + case DiagnosticSeverity::Info: + return "info"; + case DiagnosticSeverity::Warning: + return "warning"; + case DiagnosticSeverity::Error: + return "error"; + } + return "info"; + } + + static const char* severityToSarifLevel(DiagnosticSeverity sev) + { + // SARIF levels: "none", "note", "warning", "error" + switch (sev) + { + case DiagnosticSeverity::Info: + return "note"; + case DiagnosticSeverity::Warning: + return "warning"; + case DiagnosticSeverity::Error: + return "error"; + } + return "note"; + } + + } // anonymous namespace + + static std::string toJsonImpl(const AnalysisResult& result, const std::string* inputFile, + const std::vector* inputFiles) + { + std::ostringstream os; + os << "{\n"; + os << " \"meta\": {\n"; + os << " \"tool\": \"" + << "ctrace-stack-analyzer" + << "\",\n"; + if (inputFiles && !inputFiles->empty()) + { + os << " \"inputFiles\": ["; + for (std::size_t i = 0; i < inputFiles->size(); ++i) + { + os << "\"" << jsonEscape((*inputFiles)[i]) << "\""; + if (i + 1 < inputFiles->size()) + os << ", "; + } + os << "],\n"; + } + else if (inputFile) + { + os << " \"inputFile\": \"" << jsonEscape(*inputFile) << "\",\n"; + } + os << " \"mode\": \"" << (result.config.mode == AnalysisMode::IR ? "IR" : "ABI") + << "\",\n"; + os << " \"stackLimit\": " << result.config.stackLimit << ",\n"; + os << " \"analysisTimeMs\": " << -1 << "\n"; + os << " },\n"; + + // Fonctions + os << " \"functions\": [\n"; + for (std::size_t i = 0; i < result.functions.size(); ++i) + { + const auto& f = result.functions[i]; + os << " {\n"; + std::string filePath = f.filePath; + if (filePath.empty() && inputFile) + { + filePath = *inputFile; + } + os << " \"file\": \"" << jsonEscape(filePath) << "\",\n"; + os << " \"name\": \"" << jsonEscape(f.name) << "\",\n"; + os << " \"localStack\": "; + if (f.localStackUnknown) + { + os << "null"; + } + else + { + os << f.localStack; + } + os << ",\n"; + os << " \"localStackLowerBound\": "; + if (f.localStackUnknown && f.localStack > 0) + { + os << f.localStack; + } + else + { + os << "null"; + } + os << ",\n"; + os << " \"localStackUnknown\": " << (f.localStackUnknown ? "true" : "false") + << ",\n"; + os << " \"maxStack\": "; + if (f.maxStackUnknown) + { + os << "null"; + } + else + { + os << f.maxStack; + } + os << ",\n"; + os << " \"maxStackLowerBound\": "; + if (f.maxStackUnknown && f.maxStack > 0) + { + os << f.maxStack; + } + else + { + os << "null"; + } + os << ",\n"; + os << " \"maxStackUnknown\": " << (f.maxStackUnknown ? "true" : "false") << ",\n"; + os << " \"hasDynamicAlloca\": " << (f.hasDynamicAlloca ? "true" : "false") + << ",\n"; + os << " \"isRecursive\": " << (f.isRecursive ? "true" : "false") << ",\n"; + os << " \"hasInfiniteSelfRecursion\": " + << (f.hasInfiniteSelfRecursion ? "true" : "false") << ",\n"; + os << " \"exceedsLimit\": " << (f.exceedsLimit ? "true" : "false") << "\n"; + os << " }"; + if (i + 1 < result.functions.size()) + os << ","; + os << "\n"; + } + os << " ],\n"; + + // Diagnostics + os << " \"diagnostics\": [\n"; + for (std::size_t i = 0; i < result.diagnostics.size(); ++i) + { + const auto& d = result.diagnostics[i]; + os << " {\n"; + os << " \"id\": \"diag-" << (i + 1) << "\",\n"; + os << " \"severity\": \"" << ctrace::stack::enumToString(d.severity) << "\",\n"; + const std::string ruleId = + d.ruleId.empty() ? std::string(ctrace::stack::enumToString(d.errCode)) : d.ruleId; + os << " \"ruleId\": \"" << jsonEscape(ruleId) << "\",\n"; + + std::string diagFilePath = d.filePath; + if (diagFilePath.empty() && inputFile) + { + diagFilePath = *inputFile; + } + os << " \"location\": {\n"; + os << " \"file\": \"" << jsonEscape(diagFilePath) << "\",\n"; + os << " \"function\": \"" << jsonEscape(d.funcName) << "\",\n"; + os << " \"startLine\": " << d.line << ",\n"; + os << " \"startColumn\": " << d.column << ",\n"; + os << " \"endLine\": " << d.endLine << ",\n"; + os << " \"endColumn\": " << d.endColumn << "\n"; + os << " },\n"; + + os << " \"details\": {\n"; + os << " \"message\": \"" << jsonEscape(d.message) << "\",\n"; + os << " \"variableAliasing\": ["; + for (std::size_t j = 0; j < d.variableAliasingVec.size(); ++j) + { + os << "\"" << jsonEscape(d.variableAliasingVec[j]) << "\""; + if (j + 1 < d.variableAliasingVec.size()) + os << ", "; + } + os << "]\n"; + os << " }\n"; // <-- ferme "details" + os << " }"; // <-- ferme le diagnostic + if (i + 1 < result.diagnostics.size()) + os << ","; + os << "\n"; + } + os << " ]\n"; + os << "}\n"; + return os.str(); + } + + std::string toJson(const AnalysisResult& result, const std::string& inputFile) + { + return toJsonImpl(result, &inputFile, nullptr); + } + + std::string toJson(const AnalysisResult& result, const std::vector& inputFiles) + { + return toJsonImpl(result, nullptr, &inputFiles); + } + + std::string toSarif(const AnalysisResult& result, const std::string& inputFile, + const std::string& toolName, const std::string& toolVersion) + { + std::ostringstream os; + os << "{\n"; + os << " \"version\": \"2.1.0\",\n"; + os << " \"$schema\": " + "\"https://schemastore.azurewebsites.net/schemas/json/sarif-2.1.0.json\",\n"; + os << " \"runs\": [\n"; + os << " {\n"; + os << " \"tool\": {\n"; + os << " \"driver\": {\n"; + os << " \"name\": \"" << jsonEscape(toolName) << "\",\n"; + os << " \"version\": \"" << jsonEscape(toolVersion) << "\"\n"; + os << " }\n"; + os << " },\n"; + os << " \"results\": [\n"; + + for (std::size_t i = 0; i < result.diagnostics.size(); ++i) + { + const auto& d = result.diagnostics[i]; + os << " {\n"; + // Pour le moment, un seul ruleId générique; tu pourras le spécialiser plus tard. + const std::string ruleId = + d.ruleId.empty() ? std::string(ctrace::stack::enumToString(d.errCode)) : d.ruleId; + os << " \"ruleId\": \"" << jsonEscape(ruleId) << "\",\n"; + os << " \"level\": \"" << severityToSarifLevel(d.severity) << "\",\n"; + os << " \"message\": { \"text\": \"" << jsonEscape(d.message) << "\" },\n"; + os << " \"locations\": [\n"; + os << " {\n"; + os << " \"physicalLocation\": {\n"; + std::string diagFilePath = d.filePath.empty() ? inputFile : d.filePath; + os << " \"artifactLocation\": { \"uri\": \"" << jsonEscape(diagFilePath) + << "\" },\n"; + os << " \"region\": {\n"; + os << " \"startLine\": " << d.line << ",\n"; + os << " \"startColumn\": " << d.column << "\n"; + os << " }\n"; + os << " }\n"; + os << " }\n"; + os << " ]\n"; + os << " }"; + if (i + 1 < result.diagnostics.size()) + os << ","; + os << "\n"; + } + + os << " ]\n"; + os << " }\n"; + os << " ]\n"; + os << "}\n"; + + return os.str(); + } + +} // namespace ctrace::stack diff --git a/test/bound-storage/unreachable-detached.c b/test/bound-storage/unreachable-detached.c new file mode 100644 index 0000000..555f3af --- /dev/null +++ b/test/bound-storage/unreachable-detached.c @@ -0,0 +1,13 @@ +void unreachable_detached_region(void) +{ + int i = 11; + char test[10]; + + goto done; + + if (i <= 10) + test[11] = 'a'; + +done: + return; +} diff --git a/test/bound-storage/unreachable-multi-pred.c b/test/bound-storage/unreachable-multi-pred.c new file mode 100644 index 0000000..c921e3b --- /dev/null +++ b/test/bound-storage/unreachable-multi-pred.c @@ -0,0 +1,22 @@ + +void unreachable_multi_pred_mixed(int x) +{ + int zero = 0; + char test[10]; + + // at line 19, column 14 + // [!!] potential stack buffer overflow on variable 'test' (size 10) + // alias path: test + // constant index 11 is out of bounds (0..9) + // (this is a write access) + if (zero) + goto L; + if (x > 0) + goto L; + return; + +L: + test[11] = 'a'; +} + +// not contains: [info] this access appears unreachable at runtime (condition is always false for this branch) diff --git a/test/bound-storage/unreachable-validation.c b/test/bound-storage/unreachable-validation.c new file mode 100644 index 0000000..6542848 --- /dev/null +++ b/test/bound-storage/unreachable-validation.c @@ -0,0 +1,15 @@ +void unreachable_validation_local_const(void) +{ + int i = 11; + char test[10]; + + // at line 14, column 18 + // [!!] potential stack buffer overflow on variable 'test' (size 10) + // alias path: test + // constant index 11 is out of bounds (0..9) + // (this is a write access) + // [info] this access appears unreachable at runtime (condition is always false for this branch) + + if (i <= 10) + test[11] = 'a'; +} diff --git a/test/escape-stack/direct-callback.c b/test/escape-stack/direct-callback.c index ce9cafc..5a05bf7 100644 --- a/test/escape-stack/direct-callback.c +++ b/test/escape-stack/direct-callback.c @@ -1,11 +1,16 @@ -// case_call_arg.c -void sink(char* p); +// // case_call_arg.c +// void sink(char* p); -void pass_to_sink(void) +// void pass_to_sink(void) +// { +// char buf[10]; +// // at line 10, column 5 +// // [!!] stack pointer escape: address of variable 'buf' escapes this function +// // address passed as argument to function 'sink' (callee may capture the pointer beyond this function) +// sink(buf); // le callee peut capturer le pointeur +// } + +void temporary(void) { - char buf[10]; - // at line 10, column 5 - // [!!] stack pointer escape: address of variable 'buf' escapes this function - // address passed as argument to function 'sink' (callee may capture the pointer beyond this function) - sink(buf); // le callee peut capturer le pointeur + // dummy function to prevent tail call optimization of pass_to_sink() } diff --git a/test/escape-stack/interproc-escape.c b/test/escape-stack/interproc-escape.c new file mode 100644 index 0000000..e3d4b37 --- /dev/null +++ b/test/escape-stack/interproc-escape.c @@ -0,0 +1,23 @@ +// static char* g_ptr; + +// static void store_global(char* p) +// { +// g_ptr = p; +// } + +// void escape_via_defined_callee(void) +// { +// char buf[10]; +// // at line 14, column 5 +// // [!!] stack pointer escape: address of variable 'buf' escapes this function +// // address passed as argument to function 'store_global' (callee may capture the pointer beyond this function) +// store_global(buf); +// } + +// This test checks that we can detect interprocedural escapes through defined callees, even when the callee is not marked with attributes like 'nocapture' (which is the case here since the callee is defined in the same translation unit and we don't want to rely on the pass adding 'nocapture' attributes). +// Note that this test is not about the precision of the analysis (we may have false positives, and in this case we do since 'store_global' does not actually capture the pointer beyond the function), but rather about the ability to detect that there is an escape through the call to 'store_global'. + +void temporary(void) +{ + // dummy function to prevent tail call optimization of escape_via_defined_callee() +} \ No newline at end of file diff --git a/test/size-arg/strncpy-size-minus-1.c b/test/size-arg/strncpy-size-minus-1.c new file mode 100644 index 0000000..5a84307 --- /dev/null +++ b/test/size-arg/strncpy-size-minus-1.c @@ -0,0 +1,19 @@ +#include +#include + +void foo(char* dst, const char* src, size_t n) +{ + // at line 10, column 5 + // [!] potential unsafe write with length (size - 1) in strncpy + // destination pointer may be null + // size operand may be <= 1 + strncpy(dst, src, n - 1); +} + +int main(void) +{ + char a[8] = {0}; + char b[8] = {0}; + foo(a, b, 8); + return 0; +} diff --git a/test/size-arg/wrapper-size-minus-1.c b/test/size-arg/wrapper-size-minus-1.c new file mode 100644 index 0000000..89ecf63 --- /dev/null +++ b/test/size-arg/wrapper-size-minus-1.c @@ -0,0 +1,21 @@ +#include +#include + +void test2(char* dst, const char* src, size_t n) +{ + strncpy(dst, src, n); +} + +void test(char* dst, const char* src, size_t n) +{ + test2(dst, src, n); +} + +void caller(char* dst, const char* src, size_t n) +{ + // at line 20, column 5 + // [!] potential unsafe write with length (size - 1) in test + // destination pointer may be null + // size operand may be <= 1 + test(dst, src, n - 1); +} diff --git a/test/size-arg/wrapper-size-minus-1.ll b/test/size-arg/wrapper-size-minus-1.ll new file mode 100644 index 0000000..03e6467 --- /dev/null +++ b/test/size-arg/wrapper-size-minus-1.ll @@ -0,0 +1,137 @@ +; ModuleID = '/tmp/coretrace-stack-analyzer/test/size-arg/wrapper-size-minus-1.c' +source_filename = "/tmp/coretrace-stack-analyzer/test/size-arg/wrapper-size-minus-1.c" +target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128-Fn32" +target triple = "arm64-apple-macosx15.0.0" + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define void @test2(ptr noundef %dst, ptr noundef %src, i64 noundef %n) #0 !dbg !10 { +entry: + %dst.addr = alloca ptr, align 8 + %src.addr = alloca ptr, align 8 + %n.addr = alloca i64, align 8 + store ptr %dst, ptr %dst.addr, align 8 + #dbg_declare(ptr %dst.addr, !24, !DIExpression(), !25) + store ptr %src, ptr %src.addr, align 8 + #dbg_declare(ptr %src.addr, !26, !DIExpression(), !27) + store i64 %n, ptr %n.addr, align 8 + #dbg_declare(ptr %n.addr, !28, !DIExpression(), !29) + %0 = load ptr, ptr %dst.addr, align 8, !dbg !30 + %1 = load ptr, ptr %src.addr, align 8, !dbg !30 + %2 = load i64, ptr %n.addr, align 8, !dbg !30 + %3 = load ptr, ptr %dst.addr, align 8, !dbg !30 + %4 = call i64 @llvm.objectsize.i64.p0(ptr %3, i1 false, i1 true, i1 false), !dbg !30 + %call = call ptr @__strncpy_chk(ptr noundef %0, ptr noundef %1, i64 noundef %2, i64 noundef %4) #3, !dbg !30 + ret void, !dbg !31 +} + +; Function Attrs: nounwind +declare ptr @__strncpy_chk(ptr noundef, ptr noundef, i64 noundef, i64 noundef) #1 + +; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) +declare i64 @llvm.objectsize.i64.p0(ptr, i1 immarg, i1 immarg, i1 immarg) #2 + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define void @test(ptr noundef %dst, ptr noundef %src, i64 noundef %n) #0 !dbg !32 { +entry: + %dst.addr = alloca ptr, align 8 + %src.addr = alloca ptr, align 8 + %n.addr = alloca i64, align 8 + store ptr %dst, ptr %dst.addr, align 8 + #dbg_declare(ptr %dst.addr, !33, !DIExpression(), !34) + store ptr %src, ptr %src.addr, align 8 + #dbg_declare(ptr %src.addr, !35, !DIExpression(), !36) + store i64 %n, ptr %n.addr, align 8 + #dbg_declare(ptr %n.addr, !37, !DIExpression(), !38) + %0 = load ptr, ptr %dst.addr, align 8, !dbg !39 + %1 = load ptr, ptr %src.addr, align 8, !dbg !40 + %2 = load i64, ptr %n.addr, align 8, !dbg !41 + call void @test2(ptr noundef %0, ptr noundef %1, i64 noundef %2), !dbg !42 + ret void, !dbg !43 +} + +; Function Attrs: noinline nounwind optnone ssp uwtable(sync) +define void @caller(ptr noundef %dst, ptr noundef %src, i64 noundef %n) #0 !dbg !44 { +entry: + %dst.addr = alloca ptr, align 8 + %src.addr = alloca ptr, align 8 + %n.addr = alloca i64, align 8 + store ptr %dst, ptr %dst.addr, align 8 + #dbg_declare(ptr %dst.addr, !45, !DIExpression(), !46) + store ptr %src, ptr %src.addr, align 8 + #dbg_declare(ptr %src.addr, !47, !DIExpression(), !48) + store i64 %n, ptr %n.addr, align 8 + #dbg_declare(ptr %n.addr, !49, !DIExpression(), !50) + %0 = load ptr, ptr %dst.addr, align 8, !dbg !51 + %1 = load ptr, ptr %src.addr, align 8, !dbg !52 + %2 = load i64, ptr %n.addr, align 8, !dbg !53 + %sub = sub i64 %2, 1, !dbg !54 + call void @test(ptr noundef %0, ptr noundef %1, i64 noundef %sub), !dbg !55 + ret void, !dbg !56 +} + +attributes #0 = { noinline nounwind optnone ssp uwtable(sync) "frame-pointer"="non-leaf" "no-trapping-math"="true" "probe-stack"="__chkstk_darwin" "stack-protector-buffer-size"="8" "target-cpu"="apple-m1" "target-features"="+aes,+altnzcv,+bti,+ccdp,+ccidx,+complxnum,+crc,+dit,+dotprod,+flagm,+fp-armv8,+fp16fml,+fptoint,+fullfp16,+jsconv,+lse,+neon,+pauth,+perfmon,+predres,+ras,+rcpc,+rdm,+sb,+sha2,+sha3,+specrestrict,+ssbs,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8a,+zcm,+zcz" } +attributes #1 = { nounwind "frame-pointer"="non-leaf" "no-trapping-math"="true" "probe-stack"="__chkstk_darwin" "stack-protector-buffer-size"="8" "target-cpu"="apple-m1" "target-features"="+aes,+altnzcv,+bti,+ccdp,+ccidx,+complxnum,+crc,+dit,+dotprod,+flagm,+fp-armv8,+fp16fml,+fptoint,+fullfp16,+jsconv,+lse,+neon,+pauth,+perfmon,+predres,+ras,+rcpc,+rdm,+sb,+sha2,+sha3,+specrestrict,+ssbs,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8a,+zcm,+zcz" } +attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } +attributes #3 = { nounwind } + +!llvm.module.flags = !{!0, !1, !2, !3, !4, !5, !6} +!llvm.dbg.cu = !{!7} +!llvm.ident = !{!9} + +!0 = !{i32 2, !"SDK Version", [2 x i32] [i32 26, i32 2]} +!1 = !{i32 7, !"Dwarf Version", i32 5} +!2 = !{i32 2, !"Debug Info Version", i32 3} +!3 = !{i32 1, !"wchar_size", i32 4} +!4 = !{i32 8, !"PIC Level", i32 2} +!5 = !{i32 7, !"uwtable", i32 1} +!6 = !{i32 7, !"frame-pointer", i32 1} +!7 = distinct !DICompileUnit(language: DW_LANG_C11, file: !8, producer: "Apple clang version 17.0.0 (clang-1700.6.3.2)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, splitDebugInlining: false, nameTableKind: Apple, sysroot: "/Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk", sdk: "MacOSX.sdk") +!8 = !DIFile(filename: "/tmp/coretrace-stack-analyzer/test/size-arg/wrapper-size-minus-1.c", directory: "/private/tmp", checksumkind: CSK_MD5, checksum: "1a0b03385c639f79a4276f293c4423f0") +!9 = !{!"Apple clang version 17.0.0 (clang-1700.6.3.2)"} +!10 = distinct !DISubprogram(name: "test2", scope: !11, file: !11, line: 4, type: !12, scopeLine: 5, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !7, retainedNodes: !23) +!11 = !DIFile(filename: "/tmp/coretrace-stack-analyzer/test/size-arg/wrapper-size-minus-1.c", directory: "", checksumkind: CSK_MD5, checksum: "1a0b03385c639f79a4276f293c4423f0") +!12 = !DISubroutineType(types: !13) +!13 = !{null, !14, !16, !18} +!14 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !15, size: 64) +!15 = !DIBasicType(name: "char", size: 8, encoding: DW_ATE_signed_char) +!16 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !17, size: 64) +!17 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !15) +!18 = !DIDerivedType(tag: DW_TAG_typedef, name: "size_t", file: !19, line: 50, baseType: !20) +!19 = !DIFile(filename: "/Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk/usr/include/sys/_types/_size_t.h", directory: "", checksumkind: CSK_MD5, checksum: "f7981334d28e0c246f35cd24042aa2a4") +!20 = !DIDerivedType(tag: DW_TAG_typedef, name: "__darwin_size_t", file: !21, line: 87, baseType: !22) +!21 = !DIFile(filename: "/Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk/usr/include/arm/_types.h", directory: "", checksumkind: CSK_MD5, checksum: "b270144f57ae258d0ce80b8f87be068c") +!22 = !DIBasicType(name: "unsigned long", size: 64, encoding: DW_ATE_unsigned) +!23 = !{} +!24 = !DILocalVariable(name: "dst", arg: 1, scope: !10, file: !11, line: 4, type: !14) +!25 = !DILocation(line: 4, column: 18, scope: !10) +!26 = !DILocalVariable(name: "src", arg: 2, scope: !10, file: !11, line: 4, type: !16) +!27 = !DILocation(line: 4, column: 35, scope: !10) +!28 = !DILocalVariable(name: "n", arg: 3, scope: !10, file: !11, line: 4, type: !18) +!29 = !DILocation(line: 4, column: 47, scope: !10) +!30 = !DILocation(line: 6, column: 5, scope: !10) +!31 = !DILocation(line: 7, column: 1, scope: !10) +!32 = distinct !DISubprogram(name: "test", scope: !11, file: !11, line: 9, type: !12, scopeLine: 10, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !7, retainedNodes: !23) +!33 = !DILocalVariable(name: "dst", arg: 1, scope: !32, file: !11, line: 9, type: !14) +!34 = !DILocation(line: 9, column: 17, scope: !32) +!35 = !DILocalVariable(name: "src", arg: 2, scope: !32, file: !11, line: 9, type: !16) +!36 = !DILocation(line: 9, column: 34, scope: !32) +!37 = !DILocalVariable(name: "n", arg: 3, scope: !32, file: !11, line: 9, type: !18) +!38 = !DILocation(line: 9, column: 46, scope: !32) +!39 = !DILocation(line: 11, column: 11, scope: !32) +!40 = !DILocation(line: 11, column: 16, scope: !32) +!41 = !DILocation(line: 11, column: 21, scope: !32) +!42 = !DILocation(line: 11, column: 5, scope: !32) +!43 = !DILocation(line: 12, column: 1, scope: !32) +!44 = distinct !DISubprogram(name: "caller", scope: !11, file: !11, line: 14, type: !12, scopeLine: 15, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !7, retainedNodes: !23) +!45 = !DILocalVariable(name: "dst", arg: 1, scope: !44, file: !11, line: 14, type: !14) +!46 = !DILocation(line: 14, column: 19, scope: !44) +!47 = !DILocalVariable(name: "src", arg: 2, scope: !44, file: !11, line: 14, type: !16) +!48 = !DILocation(line: 14, column: 36, scope: !44) +!49 = !DILocalVariable(name: "n", arg: 3, scope: !44, file: !11, line: 14, type: !18) +!50 = !DILocation(line: 14, column: 48, scope: !44) +!51 = !DILocation(line: 20, column: 10, scope: !44) +!52 = !DILocation(line: 20, column: 15, scope: !44) +!53 = !DILocation(line: 20, column: 20, scope: !44) +!54 = !DILocation(line: 20, column: 22, scope: !44) +!55 = !DILocation(line: 20, column: 5, scope: !44) +!56 = !DILocation(line: 21, column: 1, scope: !44) diff --git a/test/vla/vla-read.c b/test/vla/vla-read.c index 3257323..c6261a8 100644 --- a/test/vla/vla-read.c +++ b/test/vla/vla-read.c @@ -5,16 +5,13 @@ int main(void) { char tmp[1024]; - // ----- at line 11, column 17 - // [!!] stack pointer escape: address of variable 'tmp' escapes this function - // address passed as argument to function '_read' (callee may capture the pointer beyond this function) ssize_t n = read(STDIN_FILENO, tmp, sizeof(tmp)); if (n <= 0) return 1; // char *buf = malloc(n); int len = (int)n; - // at line 21, column 5 + // at line 18, column 5 // [!] dynamic stack allocation detected for variable 'vla' // allocated type: i8 // size of this allocation is not compile-time constant (VLA / variable alloca) and may lead to unbounded stack usage @@ -25,9 +22,6 @@ int main(void) for (ssize_t i = 0; i < n; ++i) buf[i] = tmp[i]; - // at line 31, column 5 - // [!!] stack pointer escape: address of variable 'vla' escapes this function - // address passed as argument to function 'free' (callee may capture the pointer beyond this function) free(buf); return 0; }