diff --git a/CMakeLists.txt b/CMakeLists.txt index 5a90049..2d4f9f8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -53,6 +53,7 @@ set(STACK_ANALYZER_SOURCES src/analysis/AnalyzerUtils.cpp src/analysis/CompileCommands.cpp src/analysis/ConstParamAnalysis.cpp + src/analysis/DuplicateIfCondition.cpp src/analysis/DynamicAlloca.cpp src/analysis/FunctionFilter.cpp src/analysis/IRValueUtils.cpp diff --git a/include/StackUsageAnalyzer.hpp b/include/StackUsageAnalyzer.hpp index 8715115..5cdc106 100644 --- a/include/StackUsageAnalyzer.hpp +++ b/include/StackUsageAnalyzer.hpp @@ -118,12 +118,13 @@ namespace ctrace::stack AllocaUsageWarning = 9, InvalidBaseReconstruction = 10, ConstParameterNotModified = 11, - SizeMinusOneWrite = 12 + SizeMinusOneWrite = 12, + DuplicateIfCondition = 13 }; template <> struct EnumTraits { - static constexpr std::array names = {"None", + static constexpr std::array names = {"None", "StackBufferOverflow", "NegativeStackIndex", "VLAUsage", @@ -135,7 +136,8 @@ namespace ctrace::stack "AllocaUsageWarning", "InvalidBaseReconstruction", "ConstParameterNotModified", - "SizeMinusOneWrite"}; + "SizeMinusOneWrite", + "DuplicateIfCondition"}; }; /* diff --git a/include/analysis/DuplicateIfCondition.hpp b/include/analysis/DuplicateIfCondition.hpp new file mode 100644 index 0000000..a833d7d --- /dev/null +++ b/include/analysis/DuplicateIfCondition.hpp @@ -0,0 +1,25 @@ +#pragma once + +#include +#include +#include + +namespace llvm +{ + class Function; + class Instruction; + class Module; +} // namespace llvm + +namespace ctrace::stack::analysis +{ + struct DuplicateIfConditionIssue + { + std::string funcName; + const llvm::Instruction* conditionInst = nullptr; + }; + + std::vector + analyzeDuplicateIfConditions(llvm::Module& mod, + const std::function& shouldAnalyze); +} // namespace ctrace::stack::analysis diff --git a/src/StackUsageAnalyzer.cpp b/src/StackUsageAnalyzer.cpp index f68cd22..bc9ca20 100644 --- a/src/StackUsageAnalyzer.cpp +++ b/src/StackUsageAnalyzer.cpp @@ -21,6 +21,7 @@ #include "analysis/AllocaUsage.hpp" #include "analysis/AnalyzerUtils.hpp" #include "analysis/ConstParamAnalysis.hpp" +#include "analysis/DuplicateIfCondition.hpp" #include "analysis/DynamicAlloca.hpp" #include "analysis/FunctionFilter.hpp" #include "analysis/InputPipeline.hpp" @@ -861,6 +862,66 @@ namespace ctrace::stack } } + static void appendDuplicateIfConditionDiagnostics( + AnalysisResult& result, const std::vector& issues) + { + for (const auto& issue : issues) + { + unsigned line = 0; + unsigned column = 0; + unsigned startLine = 0; + unsigned startColumn = 0; + unsigned endLine = 0; + unsigned endColumn = 0; + bool haveLoc = false; + + if (issue.conditionInst) + { + llvm::DebugLoc DL = issue.conditionInst->getDebugLoc(); + if (DL) + { + line = DL.getLine(); + startLine = DL.getLine(); + column = DL.getCol(); + startColumn = DL.getCol(); + endLine = DL.getLine(); + endColumn = DL.getCol(); + haveLoc = true; + + if (auto* loc = DL.get()) + { + if (auto* scope = llvm::dyn_cast(loc)) + { + if (scope->getColumn() != 0) + { + endColumn = scope->getColumn() + 1; + } + } + } + } + } + + std::ostringstream body; + body << " [!] unreachable else-if branch: condition is equivalent to a previous " + "'if' condition\n"; + body << " else branch implies previous condition is false\n"; + + Diagnostic diag; + diag.funcName = issue.funcName; + diag.line = haveLoc ? line : 0; + diag.column = haveLoc ? column : 0; + diag.startLine = haveLoc ? startLine : 0; + diag.startColumn = haveLoc ? startColumn : 0; + diag.endLine = haveLoc ? endLine : 0; + diag.endColumn = haveLoc ? endColumn : 0; + diag.severity = DiagnosticSeverity::Warning; + diag.errCode = DescriptiveErrorCode::DuplicateIfCondition; + diag.ruleId = "DuplicateIfCondition"; + diag.message = body.str(); + result.diagnostics.push_back(std::move(diag)); + } + } + static void appendInvalidBaseReconstructionDiagnostics( AnalysisResult& result, const std::vector& issues) @@ -1200,6 +1261,11 @@ namespace ctrace::stack std::vector multiStoreIssues = analysis::analyzeMultipleStores(mod, shouldAnalyzeFunction); appendMultipleStoreDiagnostics(result, multiStoreIssues); + + // 12b) Détection de branches else-if inatteignables (condition dupliquée) + std::vector duplicateIfIssues = + analysis::analyzeDuplicateIfConditions(mod, shouldAnalyzeFunction); + appendDuplicateIfConditionDiagnostics(result, duplicateIfIssues); logDuration("Multiple stores", t0); // 13) Detect invalid base pointer reconstructions (offsetof/container_of) diff --git a/src/analysis/DuplicateIfCondition.cpp b/src/analysis/DuplicateIfCondition.cpp new file mode 100644 index 0000000..350dd1e --- /dev/null +++ b/src/analysis/DuplicateIfCondition.cpp @@ -0,0 +1,449 @@ +#include "analysis/DuplicateIfCondition.hpp" + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "analysis/AnalyzerUtils.hpp" + +namespace ctrace::stack::analysis +{ + namespace + { + struct SourceLocation + { + std::string path; + unsigned line = 0; + unsigned column = 0; + }; + + struct SourceFileCache + { + std::unordered_map> files; + }; + + static SourceFileCache& getSourceCache() + { + static SourceFileCache cache; + return cache; + } + + static bool loadSourceFile(const std::string& path, std::vector& lines) + { + std::ifstream in(path); + if (!in) + return false; + std::string line; + while (std::getline(in, line)) + { + lines.push_back(line); + } + return true; + } + + static const std::vector* getSourceLines(const std::string& path) + { + if (path.empty()) + return nullptr; + auto& cache = getSourceCache().files; + auto it = cache.find(path); + if (it != cache.end()) + return &it->second; + std::vector lines; + if (!loadSourceFile(path, lines)) + return nullptr; + auto [inserted, _] = cache.emplace(path, std::move(lines)); + return &inserted->second; + } + + static bool isWordChar(char c) + { + return std::isalnum(static_cast(c)) || c == '_'; + } + + static std::string stripLineComment(const std::string& line) + { + bool inString = false; + bool escape = false; + for (std::size_t i = 0; i + 1 < line.size(); ++i) + { + char c = line[i]; + if (escape) + { + escape = false; + continue; + } + if (c == '\\' && inString) + { + escape = true; + continue; + } + if (c == '"') + { + inString = !inString; + continue; + } + if (!inString && c == '/' && line[i + 1] == '/') + { + return line.substr(0, i); + } + } + return line; + } + + static bool lineHasElseToken(const std::string& line) + { + std::size_t pos = 0; + while ((pos = line.find("else", pos)) != std::string::npos) + { + bool leftOk = (pos == 0) || !isWordChar(line[pos - 1]); + bool rightOk = (pos + 4 >= line.size()) || !isWordChar(line[pos + 4]); + if (leftOk && rightOk) + { + return true; + } + pos += 4; + } + return false; + } + + static bool hasElseBetween(const std::vector& lines, unsigned startLine, + unsigned endLine, unsigned endColumn) + { + if (lines.empty() || startLine == 0 || endLine == 0) + return false; + if (startLine > endLine) + std::swap(startLine, endLine); + endLine = std::min(endLine, static_cast(lines.size())); + startLine = std::min(startLine, endLine); + + for (unsigned lineNo = startLine; lineNo <= endLine; ++lineNo) + { + std::string view = stripLineComment(lines[lineNo - 1]); + if (lineNo == endLine && endColumn > 0 && endColumn - 1 < view.size()) + { + view = view.substr(0, endColumn - 1); + } + if (lineHasElseToken(view)) + return true; + } + return false; + } + + static bool getSourceLocation(const llvm::Instruction* inst, SourceLocation& out) + { + if (!inst) + return false; + + if (llvm::DebugLoc DL = inst->getDebugLoc()) + { + out.line = DL.getLine(); + out.column = DL.getCol(); + std::string dir = DL->getDirectory().str(); + std::string file = DL->getFilename().str(); + if (!file.empty()) + { + if (!dir.empty()) + out.path = dir + "/" + file; + else + out.path = file; + } + } + + if (out.path.empty()) + { + out.path = getFunctionSourcePath(*inst->getFunction()); + } + + return !out.path.empty() && out.line != 0; + } + + struct MemoryOperand + { + const llvm::Value* ptr = nullptr; + bool precise = false; // true if we can reason about direct stores only + }; + + struct ConditionKey + { + llvm::CmpInst::Predicate pred = llvm::CmpInst::BAD_ICMP_PREDICATE; + llvm::Value* lhs = nullptr; + llvm::Value* rhs = nullptr; + bool valid = false; + llvm::SmallVector memoryOperands; + }; + + static llvm::Value* stripCasts(llvm::Value* v) + { + while (auto* cast = llvm::dyn_cast(v)) + { + v = cast->getOperand(0); + } + return v; + } + + static bool isPrecisePointer(const llvm::Value* ptr) + { + using namespace llvm; + if (!ptr) + return false; + const Value* base = ptr->stripPointerCasts(); + auto* allocaInst = dyn_cast(base); + if (!allocaInst) + return false; + return !PointerMayBeCaptured(allocaInst, true, true); + } + + static llvm::Value* canonicalizeOperand(llvm::Value* v, ConditionKey& key) + { + v = stripCasts(v); + if (auto* load = llvm::dyn_cast(v)) + { + llvm::Value* ptr = load->getPointerOperand()->stripPointerCasts(); + key.memoryOperands.push_back({ptr, isPrecisePointer(ptr)}); + return ptr; + } + return v; + } + + static void dedupeMemoryOperands(ConditionKey& key) + { + llvm::SmallPtrSet seen; + llvm::SmallVector deduped; + deduped.reserve(key.memoryOperands.size()); + for (const auto& mem : key.memoryOperands) + { + if (!mem.ptr) + continue; + if (seen.insert(mem.ptr).second) + { + deduped.push_back(mem); + } + } + key.memoryOperands.swap(deduped); + } + + static ConditionKey buildConditionKey(llvm::Value* cond) + { + ConditionKey key; + auto* cmp = llvm::dyn_cast(cond); + if (!cmp) + return key; + + key.valid = true; + key.pred = cmp->getPredicate(); + key.lhs = canonicalizeOperand(cmp->getOperand(0), key); + key.rhs = canonicalizeOperand(cmp->getOperand(1), key); + + if (std::less{}(key.rhs, key.lhs)) + { + key.pred = llvm::CmpInst::getSwappedPredicate(key.pred); + std::swap(key.lhs, key.rhs); + } + + dedupeMemoryOperands(key); + return key; + } + + static bool conditionKeysEquivalent(const ConditionKey& a, const ConditionKey& b) + { + if (!a.valid || !b.valid) + return false; + return a.pred == b.pred && a.lhs == b.lhs && a.rhs == b.rhs; + } + + static bool isInterferingWrite(const llvm::Instruction& I, const MemoryOperand& mem) + { + using namespace llvm; + if (!I.mayWriteToMemory()) + return false; + + if (auto* store = dyn_cast(&I)) + { + const Value* ptr = store->getPointerOperand()->stripPointerCasts(); + return ptr == mem.ptr; + } + + if (auto* rmw = dyn_cast(&I)) + { + const Value* ptr = rmw->getPointerOperand()->stripPointerCasts(); + return ptr == mem.ptr; + } + + if (auto* cmpxchg = dyn_cast(&I)) + { + const Value* ptr = cmpxchg->getPointerOperand()->stripPointerCasts(); + return ptr == mem.ptr; + } + + if (auto* memIntrinsic = dyn_cast(&I)) + { + const Value* ptr = memIntrinsic->getRawDest()->stripPointerCasts(); + return ptr == mem.ptr; + } + + if (auto* call = dyn_cast(&I)) + { + if (!call->mayWriteToMemory()) + return false; + if (!mem.precise) + return true; + for (const Use& arg : call->args()) + { + const Value* argVal = arg.get(); + if (!argVal || !argVal->getType()->isPointerTy()) + continue; + if (argVal->stripPointerCasts() == mem.ptr) + return true; + } + return false; + } + + return !mem.precise; + } + + static bool hasInterveningWrites(const llvm::Function& F, const llvm::DominatorTree& DT, + const llvm::BasicBlock* pathBlock, + const llvm::Instruction* at, + const llvm::SmallVector& memoryOps) + { + if (memoryOps.empty() || !pathBlock || !at) + return false; + + const llvm::BasicBlock* atBlock = at->getParent(); + + for (const llvm::BasicBlock& BB : F) + { + if (!DT.dominates(pathBlock, &BB)) + continue; + + for (const llvm::Instruction& I : BB) + { + if (&BB == atBlock && &I == at) + break; + if (llvm::isa(&I)) + continue; + if (!I.mayWriteToMemory()) + continue; + if (!llvm::isPotentiallyReachable(&I, at, nullptr, &DT)) + continue; + + for (const auto& mem : memoryOps) + { + if (isInterferingWrite(I, mem)) + return true; + } + } + } + + return false; + } + + static bool findDuplicateElseCondition(const llvm::BranchInst* branch, + const llvm::DominatorTree& DT, + DuplicateIfConditionIssue& out) + { + if (!branch || !branch->isConditional()) + return false; + + const llvm::BasicBlock* curBlock = branch->getParent(); + auto* node = DT.getNode(curBlock); + if (!node) + return false; + + ConditionKey curKey = buildConditionKey(branch->getCondition()); + if (!curKey.valid) + return false; + + SourceLocation currentLoc; + getSourceLocation(branch, currentLoc); + + for (auto* dom = node->getIDom(); dom; dom = dom->getIDom()) + { + const llvm::BasicBlock* domBlock = dom->getBlock(); + if (!domBlock) + continue; + auto* domTerm = llvm::dyn_cast(domBlock->getTerminator()); + if (!domTerm || !domTerm->isConditional()) + continue; + + const llvm::BasicBlock* falseSucc = domTerm->getSuccessor(1); + if (!falseSucc || !DT.dominates(falseSucc, curBlock)) + continue; + + ConditionKey domKey = buildConditionKey(domTerm->getCondition()); + if (!conditionKeysEquivalent(domKey, curKey)) + continue; + + SourceLocation domLoc; + if (!getSourceLocation(domTerm, domLoc)) + continue; + if (currentLoc.path.empty() || domLoc.path.empty() || + currentLoc.path != domLoc.path) + continue; + + const auto* lines = getSourceLines(currentLoc.path); + if (!lines || + !hasElseBetween(*lines, domLoc.line, currentLoc.line, currentLoc.column)) + continue; + + if (hasInterveningWrites(*curBlock->getParent(), DT, falseSucc, branch, + curKey.memoryOperands)) + continue; + + out.funcName = branch->getFunction()->getName().str(); + out.conditionInst = branch; + return true; + } + + return false; + } + + } // namespace + + std::vector + analyzeDuplicateIfConditions(llvm::Module& mod, + const std::function& shouldAnalyze) + { + std::vector issues; + + for (llvm::Function& F : mod) + { + if (F.isDeclaration()) + continue; + if (!shouldAnalyze(F)) + continue; + + llvm::DominatorTree DT(F); + + for (llvm::BasicBlock& BB : F) + { + auto* br = llvm::dyn_cast(BB.getTerminator()); + if (!br || !br->isConditional()) + continue; + + DuplicateIfConditionIssue issue; + if (findDuplicateElseCondition(br, DT, issue)) + { + issues.push_back(std::move(issue)); + } + } + } + + return issues; + } +} // namespace ctrace::stack::analysis diff --git a/test/diagnostics/duplicate-else-if-basic.c b/test/diagnostics/duplicate-else-if-basic.c new file mode 100644 index 0000000..1fa0abe --- /dev/null +++ b/test/diagnostics/duplicate-else-if-basic.c @@ -0,0 +1,20 @@ +#include + +int main(int argc, char* argv[]) +{ + int num = argc - 1; + + if (num == 0) + { + printf("Num is zero 1\n"); + } + // at line 14, column 18 + // [!] unreachable else-if branch: condition is equivalent to a previous 'if' condition + // else branch implies previous condition is false + else if (num == 0) + { + printf("Num is zero 2\n"); + } + + return 0; +} diff --git a/test/diagnostics/duplicate-else-if-commutative.c b/test/diagnostics/duplicate-else-if-commutative.c new file mode 100644 index 0000000..290effd --- /dev/null +++ b/test/diagnostics/duplicate-else-if-commutative.c @@ -0,0 +1,18 @@ +int main(int argc, char* argv[]) +{ + int num = argc - 1; + + if (0 == num) + { + return 0; + } + // at line 12, column 18 + // [!] unreachable else-if branch: condition is equivalent to a previous 'if' condition + // else branch implies previous condition is false + else if (num == 0) + { + return 1; + } + + return 2; +} diff --git a/test/diagnostics/duplicate-else-if-distinct.c b/test/diagnostics/duplicate-else-if-distinct.c new file mode 100644 index 0000000..e3cfed5 --- /dev/null +++ b/test/diagnostics/duplicate-else-if-distinct.c @@ -0,0 +1,16 @@ +// not contains: [!] unreachable else-if branch: condition is equivalent to a previous 'if' condition +int main(int argc, char* argv[]) +{ + int num = argc - 1; + + if (num == 0) + { + return 0; + } + else if (num == 1) + { + return 1; + } + + return 2; +} diff --git a/test/diagnostics/duplicate-else-if-nested-loop.c b/test/diagnostics/duplicate-else-if-nested-loop.c new file mode 100644 index 0000000..68c6d69 --- /dev/null +++ b/test/diagnostics/duplicate-else-if-nested-loop.c @@ -0,0 +1,26 @@ +#define IS_ZERO(x) ((x) == 0) + +int main(int argc, char* argv[]) +{ + int num = argc - 1; + + for (int i = 0; i < 1; ++i) + { + if (IS_ZERO(num)) + { + num += 1; + } + else + { + // at line 18, column 17 + // [!] unreachable else-if branch: condition is equivalent to a previous 'if' condition + // else branch implies previous condition is false + if (IS_ZERO(num)) + { + return 1; + } + } + } + + return 0; +} diff --git a/test/diagnostics/duplicate-else-if-separate.c b/test/diagnostics/duplicate-else-if-separate.c new file mode 100644 index 0000000..1352356 --- /dev/null +++ b/test/diagnostics/duplicate-else-if-separate.c @@ -0,0 +1,19 @@ +#include + +// not contains: [!] unreachable else-if branch: condition is equivalent to a previous 'if' condition +int main(int argc, char* argv[]) +{ + int num = argc - 1; + + if (num == 0) + { + return 0; + } + + if (num == 0) + { + return 1; + } + + return 2; +} diff --git a/test/diagnostics/duplicate-nested-if.c b/test/diagnostics/duplicate-nested-if.c new file mode 100644 index 0000000..bc5576f --- /dev/null +++ b/test/diagnostics/duplicate-nested-if.c @@ -0,0 +1,32 @@ +int main(int argc, char* argv[]) +{ + int num = argc - 1; + + if (0 == num) + { + if (num == 0) + { + if (num == 0) + { + return 0; + } + // at line 16, column 26 + // [!] unreachable else-if branch: condition is equivalent to a previous 'if' condition + // else branch implies previous condition is false + else if (num == 0) + { + return 0; + } + } + return 0; + } + // at line 26, column 18 + // [!] unreachable else-if branch: condition is equivalent to a previous 'if' condition + // else branch implies previous condition is false + else if (num == 0) + { + return 1; + } + + return 2; +} \ No newline at end of file diff --git a/test/diagnostics/duplicate-nested-if_2.c b/test/diagnostics/duplicate-nested-if_2.c new file mode 100644 index 0000000..5bf8dde --- /dev/null +++ b/test/diagnostics/duplicate-nested-if_2.c @@ -0,0 +1,26 @@ +int main(int argc, char* argv[]) +{ + int num = argc - 1; + + if (0 == num) + { + if (num != 0) + { + if (num == 0) + { + return 0; + } + else if (num == 0) + { + return 0; + } + } + return 0; + } + else if (num == 0) + { + return 1; + } + + return 2; +} \ No newline at end of file